diff options
491 files changed, 12440 insertions, 4093 deletions
@@ -627,6 +627,10 @@ S: 48287 Sawleaf S: Fremont, California 94539 S: USA +N: Tomas Cech +E: sleep_walker@suse.com +D: arm/palm treo support + N: Florent Chabaud E: florent.chabaud@polytechnique.org D: software suspend diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst index f49aeef62d0c..cf8722f96090 100644 --- a/Documentation/bpf/btf.rst +++ b/Documentation/bpf/btf.rst @@ -369,7 +369,8 @@ No additional type data follow ``btf_type``. * ``name_off``: offset to a valid C identifier * ``info.kind_flag``: 0 * ``info.kind``: BTF_KIND_FUNC - * ``info.vlen``: 0 + * ``info.vlen``: linkage information (BTF_FUNC_STATIC, BTF_FUNC_GLOBAL + or BTF_FUNC_EXTERN) * ``type``: a BTF_KIND_FUNC_PROTO type No additional type data follow ``btf_type``. @@ -380,6 +381,9 @@ type. The BTF_KIND_FUNC may in turn be referenced by a func_info in the :ref:`BTF_Ext_Section` (ELF) or in the arguments to :ref:`BPF_Prog_Load` (ABI). +Currently, only linkage values of BTF_FUNC_STATIC and BTF_FUNC_GLOBAL are +supported in the kernel. + 2.2.13 BTF_KIND_FUNC_PROTO ~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst index 96056a7447c7..1bc2c5c58bdb 100644 --- a/Documentation/bpf/index.rst +++ b/Documentation/bpf/index.rst @@ -19,6 +19,7 @@ that goes into great technical depth about the BPF Architecture. faq syscall_api helpers + kfuncs programs maps bpf_prog_run diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst new file mode 100644 index 000000000000..c0b7dae6dbf5 --- /dev/null +++ b/Documentation/bpf/kfuncs.rst @@ -0,0 +1,170 @@ +============================= +BPF Kernel Functions (kfuncs) +============================= + +1. Introduction +=============== + +BPF Kernel Functions or more commonly known as kfuncs are functions in the Linux +kernel which are exposed for use by BPF programs. Unlike normal BPF helpers, +kfuncs do not have a stable interface and can change from one kernel release to +another. Hence, BPF programs need to be updated in response to changes in the +kernel. + +2. Defining a kfunc +=================== + +There are two ways to expose a kernel function to BPF programs, either make an +existing function in the kernel visible, or add a new wrapper for BPF. In both +cases, care must be taken that BPF program can only call such function in a +valid context. To enforce this, visibility of a kfunc can be per program type. + +If you are not creating a BPF wrapper for existing kernel function, skip ahead +to :ref:`BPF_kfunc_nodef`. + +2.1 Creating a wrapper kfunc +---------------------------- + +When defining a wrapper kfunc, the wrapper function should have extern linkage. +This prevents the compiler from optimizing away dead code, as this wrapper kfunc +is not invoked anywhere in the kernel itself. It is not necessary to provide a +prototype in a header for the wrapper kfunc. + +An example is given below:: + + /* Disables missing prototype warnings */ + __diag_push(); + __diag_ignore_all("-Wmissing-prototypes", + "Global kfuncs as their definitions will be in BTF"); + + struct task_struct *bpf_find_get_task_by_vpid(pid_t nr) + { + return find_get_task_by_vpid(nr); + } + + __diag_pop(); + +A wrapper kfunc is often needed when we need to annotate parameters of the +kfunc. Otherwise one may directly make the kfunc visible to the BPF program by +registering it with the BPF subsystem. See :ref:`BPF_kfunc_nodef`. + +2.2 Annotating kfunc parameters +------------------------------- + +Similar to BPF helpers, there is sometime need for additional context required +by the verifier to make the usage of kernel functions safer and more useful. +Hence, we can annotate a parameter by suffixing the name of the argument of the +kfunc with a __tag, where tag may be one of the supported annotations. + +2.2.1 __sz Annotation +--------------------- + +This annotation is used to indicate a memory and size pair in the argument list. +An example is given below:: + + void bpf_memzero(void *mem, int mem__sz) + { + ... + } + +Here, the verifier will treat first argument as a PTR_TO_MEM, and second +argument as its size. By default, without __sz annotation, the size of the type +of the pointer is used. Without __sz annotation, a kfunc cannot accept a void +pointer. + +.. _BPF_kfunc_nodef: + +2.3 Using an existing kernel function +------------------------------------- + +When an existing function in the kernel is fit for consumption by BPF programs, +it can be directly registered with the BPF subsystem. However, care must still +be taken to review the context in which it will be invoked by the BPF program +and whether it is safe to do so. + +2.4 Annotating kfuncs +--------------------- + +In addition to kfuncs' arguments, verifier may need more information about the +type of kfunc(s) being registered with the BPF subsystem. To do so, we define +flags on a set of kfuncs as follows:: + + BTF_SET8_START(bpf_task_set) + BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL) + BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE) + BTF_SET8_END(bpf_task_set) + +This set encodes the BTF ID of each kfunc listed above, and encodes the flags +along with it. Ofcourse, it is also allowed to specify no flags. + +2.4.1 KF_ACQUIRE flag +--------------------- + +The KF_ACQUIRE flag is used to indicate that the kfunc returns a pointer to a +refcounted object. The verifier will then ensure that the pointer to the object +is eventually released using a release kfunc, or transferred to a map using a +referenced kptr (by invoking bpf_kptr_xchg). If not, the verifier fails the +loading of the BPF program until no lingering references remain in all possible +explored states of the program. + +2.4.2 KF_RET_NULL flag +---------------------- + +The KF_RET_NULL flag is used to indicate that the pointer returned by the kfunc +may be NULL. Hence, it forces the user to do a NULL check on the pointer +returned from the kfunc before making use of it (dereferencing or passing to +another helper). This flag is often used in pairing with KF_ACQUIRE flag, but +both are orthogonal to each other. + +2.4.3 KF_RELEASE flag +--------------------- + +The KF_RELEASE flag is used to indicate that the kfunc releases the pointer +passed in to it. There can be only one referenced pointer that can be passed in. +All copies of the pointer being released are invalidated as a result of invoking +kfunc with this flag. + +2.4.4 KF_KPTR_GET flag +---------------------- + +The KF_KPTR_GET flag is used to indicate that the kfunc takes the first argument +as a pointer to kptr, safely increments the refcount of the object it points to, +and returns a reference to the user. The rest of the arguments may be normal +arguments of a kfunc. The KF_KPTR_GET flag should be used in conjunction with +KF_ACQUIRE and KF_RET_NULL flags. + +2.4.5 KF_TRUSTED_ARGS flag +-------------------------- + +The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It +indicates that the all pointer arguments will always be refcounted, and have +their offset set to 0. It can be used to enforce that a pointer to a refcounted +object acquired from a kfunc or BPF helper is passed as an argument to this +kfunc without any modifications (e.g. pointer arithmetic) such that it is +trusted and points to the original object. This flag is often used for kfuncs +that operate (change some property, perform some operation) on an object that +was obtained using an acquire kfunc. Such kfuncs need an unchanged pointer to +ensure the integrity of the operation being performed on the expected object. + +2.5 Registering the kfuncs +-------------------------- + +Once the kfunc is prepared for use, the final step to making it visible is +registering it with the BPF subsystem. Registration is done per BPF program +type. An example is shown below:: + + BTF_SET8_START(bpf_task_set) + BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL) + BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE) + BTF_SET8_END(bpf_task_set) + + static const struct btf_kfunc_id_set bpf_task_kfunc_set = { + .owner = THIS_MODULE, + .set = &bpf_task_set, + }; + + static int init_subsystem(void) + { + return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_task_kfunc_set); + } + late_initcall(init_subsystem); diff --git a/Documentation/bpf/map_hash.rst b/Documentation/bpf/map_hash.rst new file mode 100644 index 000000000000..e85120878b27 --- /dev/null +++ b/Documentation/bpf/map_hash.rst @@ -0,0 +1,185 @@ +.. SPDX-License-Identifier: GPL-2.0-only +.. Copyright (C) 2022 Red Hat, Inc. + +=============================================== +BPF_MAP_TYPE_HASH, with PERCPU and LRU Variants +=============================================== + +.. note:: + - ``BPF_MAP_TYPE_HASH`` was introduced in kernel version 3.19 + - ``BPF_MAP_TYPE_PERCPU_HASH`` was introduced in version 4.6 + - Both ``BPF_MAP_TYPE_LRU_HASH`` and ``BPF_MAP_TYPE_LRU_PERCPU_HASH`` + were introduced in version 4.10 + +``BPF_MAP_TYPE_HASH`` and ``BPF_MAP_TYPE_PERCPU_HASH`` provide general +purpose hash map storage. Both the key and the value can be structs, +allowing for composite keys and values. + +The kernel is responsible for allocating and freeing key/value pairs, up +to the max_entries limit that you specify. Hash maps use pre-allocation +of hash table elements by default. The ``BPF_F_NO_PREALLOC`` flag can be +used to disable pre-allocation when it is too memory expensive. + +``BPF_MAP_TYPE_PERCPU_HASH`` provides a separate value slot per +CPU. The per-cpu values are stored internally in an array. + +The ``BPF_MAP_TYPE_LRU_HASH`` and ``BPF_MAP_TYPE_LRU_PERCPU_HASH`` +variants add LRU semantics to their respective hash tables. An LRU hash +will automatically evict the least recently used entries when the hash +table reaches capacity. An LRU hash maintains an internal LRU list that +is used to select elements for eviction. This internal LRU list is +shared across CPUs but it is possible to request a per CPU LRU list with +the ``BPF_F_NO_COMMON_LRU`` flag when calling ``bpf_map_create``. + +Usage +===== + +.. c:function:: + long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) + +Hash entries can be added or updated using the ``bpf_map_update_elem()`` +helper. This helper replaces existing elements atomically. The ``flags`` +parameter can be used to control the update behaviour: + +- ``BPF_ANY`` will create a new element or update an existing element +- ``BPF_NOEXIST`` will create a new element only if one did not already + exist +- ``BPF_EXIST`` will update an existing element + +``bpf_map_update_elem()`` returns 0 on success, or negative error in +case of failure. + +.. c:function:: + void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) + +Hash entries can be retrieved using the ``bpf_map_lookup_elem()`` +helper. This helper returns a pointer to the value associated with +``key``, or ``NULL`` if no entry was found. + +.. c:function:: + long bpf_map_delete_elem(struct bpf_map *map, const void *key) + +Hash entries can be deleted using the ``bpf_map_delete_elem()`` +helper. This helper will return 0 on success, or negative error in case +of failure. + +Per CPU Hashes +-------------- + +For ``BPF_MAP_TYPE_PERCPU_HASH`` and ``BPF_MAP_TYPE_LRU_PERCPU_HASH`` +the ``bpf_map_update_elem()`` and ``bpf_map_lookup_elem()`` helpers +automatically access the hash slot for the current CPU. + +.. c:function:: + void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu) + +The ``bpf_map_lookup_percpu_elem()`` helper can be used to lookup the +value in the hash slot for a specific CPU. Returns value associated with +``key`` on ``cpu`` , or ``NULL`` if no entry was found or ``cpu`` is +invalid. + +Concurrency +----------- + +Values stored in ``BPF_MAP_TYPE_HASH`` can be accessed concurrently by +programs running on different CPUs. Since Kernel version 5.1, the BPF +infrastructure provides ``struct bpf_spin_lock`` to synchronise access. +See ``tools/testing/selftests/bpf/progs/test_spin_lock.c``. + +Userspace +--------- + +.. c:function:: + int bpf_map_get_next_key(int fd, const void *cur_key, void *next_key) + +In userspace, it is possible to iterate through the keys of a hash using +libbpf's ``bpf_map_get_next_key()`` function. The first key can be fetched by +calling ``bpf_map_get_next_key()`` with ``cur_key`` set to +``NULL``. Subsequent calls will fetch the next key that follows the +current key. ``bpf_map_get_next_key()`` returns 0 on success, -ENOENT if +cur_key is the last key in the hash, or negative error in case of +failure. + +Note that if ``cur_key`` gets deleted then ``bpf_map_get_next_key()`` +will instead return the *first* key in the hash table which is +undesirable. It is recommended to use batched lookup if there is going +to be key deletion intermixed with ``bpf_map_get_next_key()``. + +Examples +======== + +Please see the ``tools/testing/selftests/bpf`` directory for functional +examples. The code snippets below demonstrates API usage. + +This example shows how to declare an LRU Hash with a struct key and a +struct value. + +.. code-block:: c + + #include <linux/bpf.h> + #include <bpf/bpf_helpers.h> + + struct key { + __u32 srcip; + }; + + struct value { + __u64 packets; + __u64 bytes; + }; + + struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 32); + __type(key, struct key); + __type(value, struct value); + } packet_stats SEC(".maps"); + +This example shows how to create or update hash values using atomic +instructions: + +.. code-block:: c + + static void update_stats(__u32 srcip, int bytes) + { + struct key key = { + .srcip = srcip, + }; + struct value *value = bpf_map_lookup_elem(&packet_stats, &key); + + if (value) { + __sync_fetch_and_add(&value->packets, 1); + __sync_fetch_and_add(&value->bytes, bytes); + } else { + struct value newval = { 1, bytes }; + + bpf_map_update_elem(&packet_stats, &key, &newval, BPF_NOEXIST); + } + } + +Userspace walking the map elements from the map declared above: + +.. code-block:: c + + #include <bpf/libbpf.h> + #include <bpf/bpf.h> + + static void walk_hash_elements(int map_fd) + { + struct key *cur_key = NULL; + struct key next_key; + struct value value; + int err; + + for (;;) { + err = bpf_map_get_next_key(map_fd, cur_key, &next_key); + if (err) + break; + + bpf_map_lookup_elem(map_fd, &next_key, &value); + + // Use key and value here + + cur_key = &next_key; + } + } diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml index c388ae5da1e4..c9c346e6228e 100644 --- a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml +++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-engine.yaml @@ -94,6 +94,7 @@ if: - allwinner,sun8i-a83t-display-engine - allwinner,sun8i-r40-display-engine - allwinner,sun9i-a80-display-engine + - allwinner,sun20i-d1-display-engine - allwinner,sun50i-a64-display-engine then: diff --git a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml index 5aac094fd217..445b2a553625 100644 --- a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml +++ b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml @@ -23,6 +23,8 @@ properties: - brcm,bcm4345c5 - brcm,bcm43540-bt - brcm,bcm4335a0 + - brcm,bcm4349-bt + - infineon,cyw55572-bt shutdown-gpios: maxItems: 1 @@ -92,6 +94,13 @@ properties: pcm-sync-mode: slave, master pcm-clock-mode: slave, master + brcm,requires-autobaud-mode: + type: boolean + description: + Set this property if autobaud mode is required. Autobaud mode is required + if the device's initial baud rate in normal mode is not supported by the + host or if the device requires autobaud mode startup before loading FW. + interrupts: items: - description: Handle to the line HOST_WAKE used to wake @@ -108,6 +117,22 @@ properties: required: - compatible +dependencies: + brcm,requires-autobaud-mode: [ 'shutdown-gpios' ] + +if: + not: + properties: + compatible: + contains: + enum: + - brcm,bcm20702a1 + - brcm,bcm4329-bt + - brcm,bcm4330-bt +then: + properties: + reset-gpios: false + additionalProperties: false examples: diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst index 4d19b19bcc08..73a4176144b3 100644 --- a/Documentation/filesystems/netfs_library.rst +++ b/Documentation/filesystems/netfs_library.rst @@ -301,7 +301,7 @@ through which it can issue requests and negotiate:: void (*issue_read)(struct netfs_io_subrequest *subreq); bool (*is_still_valid)(struct netfs_io_request *rreq); int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, - struct folio *folio, void **_fsdata); + struct folio **foliop, void **_fsdata); void (*done)(struct netfs_io_request *rreq); }; @@ -381,8 +381,10 @@ The operations are as follows: allocated/grabbed the folio to be modified to allow the filesystem to flush conflicting state before allowing it to be modified. - It should return 0 if everything is now fine, -EAGAIN if the folio should be - regrabbed and any other error code to abort the operation. + It may unlock and discard the folio it was given and set the caller's folio + pointer to NULL. It should return 0 if everything is now fine (``*foliop`` + left set) or the op should be retried (``*foliop`` cleared) and any other + error code to abort the operation. * ``done`` diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst index ed7fa76e7a40..d742ba6bd211 100644 --- a/Documentation/networking/dsa/dsa.rst +++ b/Documentation/networking/dsa/dsa.rst @@ -503,26 +503,108 @@ per-port PHY specific details: interface connection, MDIO bus location, etc. Driver development ================== -DSA switch drivers need to implement a dsa_switch_ops structure which will +DSA switch drivers need to implement a ``dsa_switch_ops`` structure which will contain the various members described below. -``register_switch_driver()`` registers this dsa_switch_ops in its internal list -of drivers to probe for. ``unregister_switch_driver()`` does the exact opposite. +Probing, registration and device lifetime +----------------------------------------- -Unless requested differently by setting the priv_size member accordingly, DSA -does not allocate any driver private context space. +DSA switches are regular ``device`` structures on buses (be they platform, SPI, +I2C, MDIO or otherwise). The DSA framework is not involved in their probing +with the device core. + +Switch registration from the perspective of a driver means passing a valid +``struct dsa_switch`` pointer to ``dsa_register_switch()``, usually from the +switch driver's probing function. The following members must be valid in the +provided structure: + +- ``ds->dev``: will be used to parse the switch's OF node or platform data. + +- ``ds->num_ports``: will be used to create the port list for this switch, and + to validate the port indices provided in the OF node. + +- ``ds->ops``: a pointer to the ``dsa_switch_ops`` structure holding the DSA + method implementations. + +- ``ds->priv``: backpointer to a driver-private data structure which can be + retrieved in all further DSA method callbacks. + +In addition, the following flags in the ``dsa_switch`` structure may optionally +be configured to obtain driver-specific behavior from the DSA core. Their +behavior when set is documented through comments in ``include/net/dsa.h``. + +- ``ds->vlan_filtering_is_global`` + +- ``ds->needs_standalone_vlan_filtering`` + +- ``ds->configure_vlan_while_not_filtering`` + +- ``ds->untag_bridge_pvid`` + +- ``ds->assisted_learning_on_cpu_port`` + +- ``ds->mtu_enforcement_ingress`` + +- ``ds->fdb_isolation`` + +Internally, DSA keeps an array of switch trees (group of switches) global to +the kernel, and attaches a ``dsa_switch`` structure to a tree on registration. +The tree ID to which the switch is attached is determined by the first u32 +number of the ``dsa,member`` property of the switch's OF node (0 if missing). +The switch ID within the tree is determined by the second u32 number of the +same OF property (0 if missing). Registering multiple switches with the same +switch ID and tree ID is illegal and will cause an error. Using platform data, +a single switch and a single switch tree is permitted. + +In case of a tree with multiple switches, probing takes place asymmetrically. +The first N-1 callers of ``dsa_register_switch()`` only add their ports to the +port list of the tree (``dst->ports``), each port having a backpointer to its +associated switch (``dp->ds``). Then, these switches exit their +``dsa_register_switch()`` call early, because ``dsa_tree_setup_routing_table()`` +has determined that the tree is not yet complete (not all ports referenced by +DSA links are present in the tree's port list). The tree becomes complete when +the last switch calls ``dsa_register_switch()``, and this triggers the effective +continuation of initialization (including the call to ``ds->ops->setup()``) for +all switches within that tree, all as part of the calling context of the last +switch's probe function. + +The opposite of registration takes place when calling ``dsa_unregister_switch()``, +which removes a switch's ports from the port list of the tree. The entire tree +is torn down when the first switch unregisters. + +It is mandatory for DSA switch drivers to implement the ``shutdown()`` callback +of their respective bus, and call ``dsa_switch_shutdown()`` from it (a minimal +version of the full teardown performed by ``dsa_unregister_switch()``). +The reason is that DSA keeps a reference on the master net device, and if the +driver for the master device decides to unbind on shutdown, DSA's reference +will block that operation from finalizing. + +Either ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` must be called, +but not both, and the device driver model permits the bus' ``remove()`` method +to be called even if ``shutdown()`` was already called. Therefore, drivers are +expected to implement a mutual exclusion method between ``remove()`` and +``shutdown()`` by setting their drvdata to NULL after any of these has run, and +checking whether the drvdata is NULL before proceeding to take any action. + +After ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` was called, no +further callbacks via the provided ``dsa_switch_ops`` may take place, and the +driver may free the data structures associated with the ``dsa_switch``. Switch configuration -------------------- -- ``tag_protocol``: this is to indicate what kind of tagging protocol is supported, - should be a valid value from the ``dsa_tag_protocol`` enum +- ``get_tag_protocol``: this is to indicate what kind of tagging protocol is + supported, should be a valid value from the ``dsa_tag_protocol`` enum. + The returned information does not have to be static; the driver is passed the + CPU port number, as well as the tagging protocol of a possibly stacked + upstream switch, in case there are hardware limitations in terms of supported + tag formats. -- ``probe``: probe routine which will be invoked by the DSA platform device upon - registration to test for the presence/absence of a switch device. For MDIO - devices, it is recommended to issue a read towards internal registers using - the switch pseudo-PHY and return whether this is a supported device. For other - buses, return a non-NULL string +- ``change_tag_protocol``: when the default tagging protocol has compatibility + problems with the master or other issues, the driver may support changing it + at runtime, either through a device tree property or through sysfs. In that + case, further calls to ``get_tag_protocol`` should report the protocol in + current use. - ``setup``: setup function for the switch, this function is responsible for setting up the ``dsa_switch_ops`` private structure with all it needs: register maps, @@ -535,7 +617,17 @@ Switch configuration fully configured and ready to serve any kind of request. It is recommended to issue a software reset of the switch during this setup function in order to avoid relying on what a previous software agent such as a bootloader/firmware - may have previously configured. + may have previously configured. The method responsible for undoing any + applicable allocations or operations done here is ``teardown``. + +- ``port_setup`` and ``port_teardown``: methods for initialization and + destruction of per-port data structures. It is mandatory for some operations + such as registering and unregistering devlink port regions to be done from + these methods, otherwise they are optional. A port will be torn down only if + it has been previously set up. It is possible for a port to be set up during + probing only to be torn down immediately afterwards, for example in case its + PHY cannot be found. In this case, probing of the DSA switch continues + without that particular port. PHY devices and link management ------------------------------- @@ -635,26 +727,198 @@ Power management ``BR_STATE_DISABLED`` and propagating changes to the hardware if this port is disabled while being a bridge member +Address databases +----------------- + +Switching hardware is expected to have a table for FDB entries, however not all +of them are active at the same time. An address database is the subset (partition) +of FDB entries that is active (can be matched by address learning on RX, or FDB +lookup on TX) depending on the state of the port. An address database may +occasionally be called "FID" (Filtering ID) in this document, although the +underlying implementation may choose whatever is available to the hardware. + +For example, all ports that belong to a VLAN-unaware bridge (which is +*currently* VLAN-unaware) are expected to learn source addresses in the +database associated by the driver with that bridge (and not with other +VLAN-unaware bridges). During forwarding and FDB lookup, a packet received on a +VLAN-unaware bridge port should be able to find a VLAN-unaware FDB entry having +the same MAC DA as the packet, which is present on another port member of the +same bridge. At the same time, the FDB lookup process must be able to not find +an FDB entry having the same MAC DA as the packet, if that entry points towards +a port which is a member of a different VLAN-unaware bridge (and is therefore +associated with a different address database). + +Similarly, each VLAN of each offloaded VLAN-aware bridge should have an +associated address database, which is shared by all ports which are members of +that VLAN, but not shared by ports belonging to different bridges that are +members of the same VID. + +In this context, a VLAN-unaware database means that all packets are expected to +match on it irrespective of VLAN ID (only MAC address lookup), whereas a +VLAN-aware database means that packets are supposed to match based on the VLAN +ID from the classified 802.1Q header (or the pvid if untagged). + +At the bridge layer, VLAN-unaware FDB entries have the special VID value of 0, +whereas VLAN-aware FDB entries have non-zero VID values. Note that a +VLAN-unaware bridge may have VLAN-aware (non-zero VID) FDB entries, and a +VLAN-aware bridge may have VLAN-unaware FDB entries. As in hardware, the +software bridge keeps separate address databases, and offloads to hardware the +FDB entries belonging to these databases, through switchdev, asynchronously +relative to the moment when the databases become active or inactive. + +When a user port operates in standalone mode, its driver should configure it to +use a separate database called a port private database. This is different from +the databases described above, and should impede operation as standalone port +(packet in, packet out to the CPU port) as little as possible. For example, +on ingress, it should not attempt to learn the MAC SA of ingress traffic, since +learning is a bridging layer service and this is a standalone port, therefore +it would consume useless space. With no address learning, the port private +database should be empty in a naive implementation, and in this case, all +received packets should be trivially flooded to the CPU port. + +DSA (cascade) and CPU ports are also called "shared" ports because they service +multiple address databases, and the database that a packet should be associated +to is usually embedded in the DSA tag. This means that the CPU port may +simultaneously transport packets coming from a standalone port (which were +classified by hardware in one address database), and from a bridge port (which +were classified to a different address database). + +Switch drivers which satisfy certain criteria are able to optimize the naive +configuration by removing the CPU port from the flooding domain of the switch, +and just program the hardware with FDB entries pointing towards the CPU port +for which it is known that software is interested in those MAC addresses. +Packets which do not match a known FDB entry will not be delivered to the CPU, +which will save CPU cycles required for creating an skb just to drop it. + +DSA is able to perform host address filtering for the following kinds of +addresses: + +- Primary unicast MAC addresses of ports (``dev->dev_addr``). These are + associated with the port private database of the respective user port, + and the driver is notified to install them through ``port_fdb_add`` towards + the CPU port. + +- Secondary unicast and multicast MAC addresses of ports (addresses added + through ``dev_uc_add()`` and ``dev_mc_add()``). These are also associated + with the port private database of the respective user port. + +- Local/permanent bridge FDB entries (``BR_FDB_LOCAL``). These are the MAC + addresses of the bridge ports, for which packets must be terminated locally + and not forwarded. They are associated with the address database for that + bridge. + +- Static bridge FDB entries installed towards foreign (non-DSA) interfaces + present in the same bridge as some DSA switch ports. These are also + associated with the address database for that bridge. + +- Dynamically learned FDB entries on foreign interfaces present in the same + bridge as some DSA switch ports, only if ``ds->assisted_learning_on_cpu_port`` + is set to true by the driver. These are associated with the address database + for that bridge. + +For various operations detailed below, DSA provides a ``dsa_db`` structure +which can be of the following types: + +- ``DSA_DB_PORT``: the FDB (or MDB) entry to be installed or deleted belongs to + the port private database of user port ``db->dp``. +- ``DSA_DB_BRIDGE``: the entry belongs to one of the address databases of bridge + ``db->bridge``. Separation between the VLAN-unaware database and the per-VID + databases of this bridge is expected to be done by the driver. +- ``DSA_DB_LAG``: the entry belongs to the address database of LAG ``db->lag``. + Note: ``DSA_DB_LAG`` is currently unused and may be removed in the future. + +The drivers which act upon the ``dsa_db`` argument in ``port_fdb_add``, +``port_mdb_add`` etc should declare ``ds->fdb_isolation`` as true. + +DSA associates each offloaded bridge and each offloaded LAG with a one-based ID +(``struct dsa_bridge :: num``, ``struct dsa_lag :: id``) for the purposes of +refcounting addresses on shared ports. Drivers may piggyback on DSA's numbering +scheme (the ID is readable through ``db->bridge.num`` and ``db->lag.id`` or may +implement their own. + +Only the drivers which declare support for FDB isolation are notified of FDB +entries on the CPU port belonging to ``DSA_DB_PORT`` databases. +For compatibility/legacy reasons, ``DSA_DB_BRIDGE`` addresses are notified to +drivers even if they do not support FDB isolation. However, ``db->bridge.num`` +and ``db->lag.id`` are always set to 0 in that case (to denote the lack of +isolation, for refcounting purposes). + +Note that it is not mandatory for a switch driver to implement physically +separate address databases for each standalone user port. Since FDB entries in +the port private databases will always point to the CPU port, there is no risk +for incorrect forwarding decisions. In this case, all standalone ports may +share the same database, but the reference counting of host-filtered addresses +(not deleting the FDB entry for a port's MAC address if it's still in use by +another port) becomes the responsibility of the driver, because DSA is unaware +that the port databases are in fact shared. This can be achieved by calling +``dsa_fdb_present_in_other_db()`` and ``dsa_mdb_present_in_other_db()``. +The down side is that the RX filtering lists of each user port are in fact +shared, which means that user port A may accept a packet with a MAC DA it +shouldn't have, only because that MAC address was in the RX filtering list of +user port B. These packets will still be dropped in software, however. + Bridge layer ------------ +Offloading the bridge forwarding plane is optional and handled by the methods +below. They may be absent, return -EOPNOTSUPP, or ``ds->max_num_bridges`` may +be non-zero and exceeded, and in this case, joining a bridge port is still +possible, but the packet forwarding will take place in software, and the ports +under a software bridge must remain configured in the same way as for +standalone operation, i.e. have all bridging service functions (address +learning etc) disabled, and send all received packets to the CPU port only. + +Concretely, a port starts offloading the forwarding plane of a bridge once it +returns success to the ``port_bridge_join`` method, and stops doing so after +``port_bridge_leave`` has been called. Offloading the bridge means autonomously +learning FDB entries in accordance with the software bridge port's state, and +autonomously forwarding (or flooding) received packets without CPU intervention. +This is optional even when offloading a bridge port. Tagging protocol drivers +are expected to call ``dsa_default_offload_fwd_mark(skb)`` for packets which +have already been autonomously forwarded in the forwarding domain of the +ingress switch port. DSA, through ``dsa_port_devlink_setup()``, considers all +switch ports part of the same tree ID to be part of the same bridge forwarding +domain (capable of autonomous forwarding to each other). + +Offloading the TX forwarding process of a bridge is a distinct concept from +simply offloading its forwarding plane, and refers to the ability of certain +driver and tag protocol combinations to transmit a single skb coming from the +bridge device's transmit function to potentially multiple egress ports (and +thereby avoid its cloning in software). + +Packets for which the bridge requests this behavior are called data plane +packets and have ``skb->offload_fwd_mark`` set to true in the tag protocol +driver's ``xmit`` function. Data plane packets are subject to FDB lookup, +hardware learning on the CPU port, and do not override the port STP state. +Additionally, replication of data plane packets (multicast, flooding) is +handled in hardware and the bridge driver will transmit a single skb for each +packet that may or may not need replication. + +When the TX forwarding offload is enabled, the tag protocol driver is +responsible to inject packets into the data plane of the hardware towards the +correct bridging domain (FID) that the port is a part of. The port may be +VLAN-unaware, and in this case the FID must be equal to the FID used by the +driver for its VLAN-unaware address database associated with that bridge. +Alternatively, the bridge may be VLAN-aware, and in that case, it is guaranteed +that the packet is also VLAN-tagged with the VLAN ID that the bridge processed +this packet in. It is the responsibility of the hardware to untag the VID on +the egress-untagged ports, or keep the tag on the egress-tagged ones. + - ``port_bridge_join``: bridge layer function invoked when a given switch port is added to a bridge, this function should do what's necessary at the switch level to permit the joining port to be added to the relevant logical domain for it to ingress/egress traffic with other members of the bridge. + By setting the ``tx_fwd_offload`` argument to true, the TX forwarding process + of this bridge is also offloaded. - ``port_bridge_leave``: bridge layer function invoked when a given switch port is removed from a bridge, this function should do what's necessary at the switch level to deny the leaving port from ingress/egress traffic from the - remaining bridge members. When the port leaves the bridge, it should be aged - out at the switch hardware for the switch to (re) learn MAC addresses behind - this port. + remaining bridge members. - ``port_stp_state_set``: bridge layer function invoked when a given switch port STP state is computed by the bridge layer and should be propagated to switch - hardware to forward/block/learn traffic. The switch driver is responsible for - computing a STP state change based on current and asked parameters and perform - the relevant ageing based on the intersection results + hardware to forward/block/learn traffic. - ``port_bridge_flags``: bridge layer function invoked when a port must configure its settings for e.g. flooding of unknown traffic or source address @@ -667,21 +931,11 @@ Bridge layer CPU port, and flooding towards the CPU port should also be enabled, due to a lack of an explicit address filtering mechanism in the DSA core. -- ``port_bridge_tx_fwd_offload``: bridge layer function invoked after - ``port_bridge_join`` when a driver sets ``ds->num_fwd_offloading_bridges`` to - a non-zero value. Returning success in this function activates the TX - forwarding offload bridge feature for this port, which enables the tagging - protocol driver to inject data plane packets towards the bridging domain that - the port is a part of. Data plane packets are subject to FDB lookup, hardware - learning on the CPU port, and do not override the port STP state. - Additionally, replication of data plane packets (multicast, flooding) is - handled in hardware and the bridge driver will transmit a single skb for each - packet that needs replication. The method is provided as a configuration - point for drivers that need to configure the hardware for enabling this - feature. - -- ``port_bridge_tx_fwd_unoffload``: bridge layer function invoked when a driver - leaves a bridge port which had the TX forwarding offload feature enabled. +- ``port_fast_age``: bridge layer function invoked when flushing the + dynamically learned FDB entries on the port is necessary. This is called when + transitioning from an STP state where learning should take place to an STP + state where it shouldn't, or when leaving a bridge, or when address learning + is turned off via ``port_bridge_flags``. Bridge VLAN filtering --------------------- @@ -697,55 +951,44 @@ Bridge VLAN filtering allowed. - ``port_vlan_add``: bridge layer function invoked when a VLAN is configured - (tagged or untagged) for the given switch port. If the operation is not - supported by the hardware, this function should return ``-EOPNOTSUPP`` to - inform the bridge code to fallback to a software implementation. + (tagged or untagged) for the given switch port. The CPU port becomes a member + of a VLAN only if a foreign bridge port is also a member of it (and + forwarding needs to take place in software), or the VLAN is installed to the + VLAN group of the bridge device itself, for termination purposes + (``bridge vlan add dev br0 vid 100 self``). VLANs on shared ports are + reference counted and removed when there is no user left. Drivers do not need + to manually install a VLAN on the CPU port. - ``port_vlan_del``: bridge layer function invoked when a VLAN is removed from the given switch port -- ``port_vlan_dump``: bridge layer function invoked with a switchdev callback - function that the driver has to call for each VLAN the given port is a member - of. A switchdev object is used to carry the VID and bridge flags. - - ``port_fdb_add``: bridge layer function invoked when the bridge wants to install a Forwarding Database entry, the switch hardware should be programmed with the specified address in the specified VLAN Id in the forwarding database - associated with this VLAN ID. If the operation is not supported, this - function should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to - a software implementation. - -.. note:: VLAN ID 0 corresponds to the port private database, which, in the context - of DSA, would be its port-based VLAN, used by the associated bridge device. + associated with this VLAN ID. - ``port_fdb_del``: bridge layer function invoked when the bridge wants to remove a Forwarding Database entry, the switch hardware should be programmed to delete the specified MAC address from the specified VLAN ID if it was mapped into this port forwarding database -- ``port_fdb_dump``: bridge layer function invoked with a switchdev callback - function that the driver has to call for each MAC address known to be behind - the given port. A switchdev object is used to carry the VID and FDB info. +- ``port_fdb_dump``: bridge bypass function invoked by ``ndo_fdb_dump`` on the + physical DSA port interfaces. Since DSA does not attempt to keep in sync its + hardware FDB entries with the software bridge, this method is implemented as + a means to view the entries visible on user ports in the hardware database. + The entries reported by this function have the ``self`` flag in the output of + the ``bridge fdb show`` command. - ``port_mdb_add``: bridge layer function invoked when the bridge wants to install - a multicast database entry. If the operation is not supported, this function - should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to a - software implementation. The switch hardware should be programmed with the + a multicast database entry. The switch hardware should be programmed with the specified address in the specified VLAN ID in the forwarding database associated with this VLAN ID. -.. note:: VLAN ID 0 corresponds to the port private database, which, in the context - of DSA, would be its port-based VLAN, used by the associated bridge device. - - ``port_mdb_del``: bridge layer function invoked when the bridge wants to remove a multicast database entry, the switch hardware should be programmed to delete the specified MAC address from the specified VLAN ID if it was mapped into this port forwarding database. -- ``port_mdb_dump``: bridge layer function invoked with a switchdev callback - function that the driver has to call for each MAC address known to be behind - the given port. A switchdev object is used to carry the VID and MDB info. - Link aggregation ---------------- diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 1c3897a4e60f..5879ef3bc2cb 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1058,11 +1058,7 @@ udp_rmem_min - INTEGER Default: 4K udp_wmem_min - INTEGER - Minimal size of send buffer used by UDP sockets in moderation. - Each UDP socket is able to use the size for sending data, even if - total pages of UDP sockets exceed udp_mem pressure. The unit is byte. - - Default: 4K + UDP does not have tx memory accounting and this tunable has no effect. RAW variables ============= diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 11e00a46c610..6e090fb96a0e 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5657,6 +5657,7 @@ by a string of size ``name_size``. #define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) + #define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES #define KVM_STATS_BASE_SHIFT 8 @@ -5702,14 +5703,13 @@ Bits 0-3 of ``flags`` encode the type: by the ``hist_param`` field. The range of the Nth bucket (1 <= N < ``size``) is [``hist_param``*(N-1), ``hist_param``*N), while the range of the last bucket is [``hist_param``*(``size``-1), +INF). (+INF means positive infinity - value.) The bucket value indicates how many samples fell in the bucket's range. + value.) * ``KVM_STATS_TYPE_LOG_HIST`` The statistic is reported as a logarithmic histogram. The number of buckets is specified by the ``size`` field. The range of the first bucket is [0, 1), while the range of the last bucket is [pow(2, ``size``-2), +INF). Otherwise, The Nth bucket (1 < N < ``size``) covers - [pow(2, N-2), pow(2, N-1)). The bucket value indicates how many samples fell - in the bucket's range. + [pow(2, N-2), pow(2, N-1)). Bits 4-7 of ``flags`` encode the unit: @@ -5724,6 +5724,15 @@ Bits 4-7 of ``flags`` encode the unit: It indicates that the statistics data is used to measure time or latency. * ``KVM_STATS_UNIT_CYCLES`` It indicates that the statistics data is used to measure CPU clock cycles. + * ``KVM_STATS_UNIT_BOOLEAN`` + It indicates that the statistic will always be either 0 or 1. Boolean + statistics of "peak" type will never go back from 1 to 0. Boolean + statistics can be linear histograms (with two buckets) but not logarithmic + histograms. + +Note that, in the case of histograms, the unit applies to the bucket +ranges, while the bucket value indicates how many samples fell in the +bucket's range. Bits 8-11 of ``flags``, together with ``exponent``, encode the scale of the unit: @@ -5746,7 +5755,7 @@ the corresponding statistics data. The ``bucket_size`` field is used as a parameter for histogram statistics data. It is only used by linear histogram statistics data, specifying the size of a -bucket. +bucket in the unit expressed by bits 4-11 of ``flags`` together with ``exponent``. The ``name`` field is the name string of the statistics data. The name string starts at the end of ``struct kvm_stats_desc``. The maximum length including diff --git a/MAINTAINERS b/MAINTAINERS index 66738c8330db..46b345ddc67c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2497,10 +2497,8 @@ F: drivers/power/reset/oxnas-restart.c N: oxnas ARM/PALM TREO SUPPORT -M: Tomas Cech <sleep_walker@suse.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Maintained -W: http://hackndev.com +S: Orphan F: arch/arm/mach-pxa/palmtreo.* ARM/PALMTX,PALMT5,PALMLD,PALMTE2,PALMTC SUPPORT @@ -14377,7 +14375,8 @@ S: Maintained F: drivers/net/phy/nxp-c45-tja11xx.c NXP FSPI DRIVER -M: Ashish Kumar <ashish.kumar@nxp.com> +M: Han Xu <han.xu@nxp.com> +M: Haibo Chen <haibo.chen@nxp.com> R: Yogesh Gaur <yogeshgaur.83@gmail.com> L: linux-spi@vger.kernel.org S: Maintained @@ -17300,12 +17299,15 @@ N: riscv K: riscv RISC-V/MICROCHIP POLARFIRE SOC SUPPORT -M: Lewis Hanly <lewis.hanly@microchip.com> M: Conor Dooley <conor.dooley@microchip.com> +M: Daire McNamara <daire.mcnamara@microchip.com> L: linux-riscv@lists.infradead.org S: Supported F: arch/riscv/boot/dts/microchip/ +F: drivers/char/hw_random/mpfs-rng.c +F: drivers/clk/microchip/clk-mpfs.c F: drivers/mailbox/mailbox-mpfs.c +F: drivers/pci/controller/pcie-microchip-host.c F: drivers/soc/microchip/ F: include/soc/microchip/mpfs.h @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 19 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Superb Owl # *DOCUMENTATION* diff --git a/arch/Kconfig b/arch/Kconfig index fcf9a41a4ef5..71b9272acb28 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -438,6 +438,13 @@ config MMU_GATHER_PAGE_SIZE config MMU_GATHER_NO_RANGE bool + select MMU_GATHER_MERGE_VMAS + +config MMU_GATHER_NO_FLUSH_CACHE + bool + +config MMU_GATHER_MERGE_VMAS + bool config MMU_GATHER_NO_GATHER bool diff --git a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi index fded07f370b3..d6ba4b2a60f6 100644 --- a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi +++ b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi @@ -226,7 +226,7 @@ reg = <0x28>; #gpio-cells = <2>; gpio-controller; - ngpio = <32>; + ngpios = <62>; }; sgtl5000: codec@a { diff --git a/arch/arm/boot/dts/imx6ull-colibri.dtsi b/arch/arm/boot/dts/imx6ull-colibri.dtsi index 15621e03fa4d..2c3ae715c683 100644 --- a/arch/arm/boot/dts/imx6ull-colibri.dtsi +++ b/arch/arm/boot/dts/imx6ull-colibri.dtsi @@ -166,7 +166,7 @@ atmel_mxt_ts: touchscreen@4a { compatible = "atmel,maxtouch"; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_atmel_conn>; + pinctrl-0 = <&pinctrl_atmel_conn &pinctrl_atmel_snvs_conn>; reg = <0x4a>; interrupt-parent = <&gpio5>; interrupts = <4 IRQ_TYPE_EDGE_FALLING>; /* SODIMM 107 / INT */ @@ -331,7 +331,6 @@ pinctrl_atmel_conn: atmelconngrp { fsl,pins = < MX6UL_PAD_JTAG_MOD__GPIO1_IO10 0xb0a0 /* SODIMM 106 */ - MX6ULL_PAD_SNVS_TAMPER4__GPIO5_IO04 0xb0a0 /* SODIMM 107 */ >; }; @@ -684,6 +683,12 @@ }; &iomuxc_snvs { + pinctrl_atmel_snvs_conn: atmelsnvsconngrp { + fsl,pins = < + MX6ULL_PAD_SNVS_TAMPER4__GPIO5_IO04 0xb0a0 /* SODIMM 107 */ + >; + }; + pinctrl_snvs_gpio1: snvsgpio1grp { fsl,pins = < MX6ULL_PAD_SNVS_TAMPER6__GPIO5_IO06 0x110a0 /* SODIMM 93 */ diff --git a/arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi b/arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi index 4cab1b3b3b29..725dcf707b31 100644 --- a/arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi +++ b/arch/arm/boot/dts/lan966x-kontron-kswitch-d10-mmt.dtsi @@ -87,22 +87,22 @@ phy4: ethernet-phy@5 { reg = <5>; - coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>; + coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>; }; phy5: ethernet-phy@6 { reg = <6>; - coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>; + coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>; }; phy6: ethernet-phy@7 { reg = <7>; - coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>; + coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>; }; phy7: ethernet-phy@8 { reg = <8>; - coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>; + coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>; }; }; diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi index 814ad0b46232..c3b8a6d63027 100644 --- a/arch/arm/boot/dts/qcom-msm8974.dtsi +++ b/arch/arm/boot/dts/qcom-msm8974.dtsi @@ -506,6 +506,8 @@ interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_BLSP1_UART2_APPS_CLK>, <&gcc GCC_BLSP1_AHB_CLK>; clock-names = "core", "iface"; + pinctrl-names = "default"; + pinctrl-0 = <&blsp1_uart2_default>; status = "disabled"; }; @@ -581,6 +583,9 @@ interrupts = <GIC_SPI 113 IRQ_TYPE_NONE>; clocks = <&gcc GCC_BLSP2_UART1_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>; clock-names = "core", "iface"; + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&blsp2_uart1_default>; + pinctrl-1 = <&blsp2_uart1_sleep>; status = "disabled"; }; @@ -599,6 +604,8 @@ interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_BLSP2_UART4_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>; clock-names = "core", "iface"; + pinctrl-names = "default"; + pinctrl-0 = <&blsp2_uart4_default>; status = "disabled"; }; @@ -639,6 +646,9 @@ interrupts = <0 106 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_BLSP2_QUP6_I2C_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>; clock-names = "core", "iface"; + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&blsp2_i2c6_default>; + pinctrl-1 = <&blsp2_i2c6_sleep>; #address-cells = <1>; #size-cells = <0>; }; @@ -1256,7 +1266,7 @@ }; }; - blsp1_uart2_active: blsp1-uart2-active { + blsp1_uart2_default: blsp1-uart2-default { rx { pins = "gpio5"; function = "blsp_uart2"; @@ -1272,7 +1282,7 @@ }; }; - blsp2_uart1_active: blsp2-uart1-active { + blsp2_uart1_default: blsp2-uart1-default { tx-rts { pins = "gpio41", "gpio44"; function = "blsp_uart7"; @@ -1295,7 +1305,7 @@ bias-pull-down; }; - blsp2_uart4_active: blsp2-uart4-active { + blsp2_uart4_default: blsp2-uart4-default { tx-rts { pins = "gpio53", "gpio56"; function = "blsp_uart10"; @@ -1406,7 +1416,19 @@ bias-pull-up; }; - /* BLSP2_I2C6 info is missing - nobody uses it though? */ + blsp2_i2c6_default: blsp2-i2c6-default { + pins = "gpio87", "gpio88"; + function = "blsp_i2c12"; + drive-strength = <2>; + bias-disable; + }; + + blsp2_i2c6_sleep: blsp2-i2c6-sleep { + pins = "gpio87", "gpio88"; + function = "blsp_i2c12"; + drive-strength = <2>; + bias-pull-up; + }; spi8_default: spi8_default { mosi { diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 89c71d419f82..659a17fc755c 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -1124,7 +1124,7 @@ clocks = <&pmc PMC_TYPE_PERIPHERAL 55>, <&pmc PMC_TYPE_GCK 55>; clock-names = "pclk", "gclk"; assigned-clocks = <&pmc PMC_TYPE_CORE PMC_I2S1_MUX>; - assigned-parrents = <&pmc PMC_TYPE_GCK 55>; + assigned-clock-parents = <&pmc PMC_TYPE_GCK 55>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts index f19ed981da9d..3706216ffb40 100644 --- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts +++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts @@ -169,7 +169,7 @@ flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "mxicy,mx25l1606e", "winbond,w25q128"; + compatible = "mxicy,mx25l1606e", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <40000000>; }; diff --git a/arch/arm/mach-rockchip/pm.c b/arch/arm/mach-rockchip/pm.c index 87389d9456b9..30d781d80fe0 100644 --- a/arch/arm/mach-rockchip/pm.c +++ b/arch/arm/mach-rockchip/pm.c @@ -311,7 +311,7 @@ void __init rockchip_suspend_init(void) &match); if (!match) { pr_err("Failed to find PMU node\n"); - return; + goto out_put; } pm_data = (struct rockchip_pm_data *) match->data; @@ -320,9 +320,12 @@ void __init rockchip_suspend_init(void) if (ret) { pr_err("%s: matches init error %d\n", __func__, ret); - return; + goto out_put; } } suspend_set_ops(pm_data->ops); + +out_put: + of_node_put(np); } diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi index 66023d553524..d084c33d5ca8 100644 --- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi @@ -9,6 +9,14 @@ /delete-node/ cpu@3; }; + timer { + compatible = "arm,armv8-timer"; + interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>, + <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>, + <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>, + <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>; + }; + pmu { compatible = "arm,cortex-a53-pmu"; interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi index a4be040a00c0..967d2cd3c3ce 100644 --- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi @@ -29,6 +29,8 @@ device_type = "cpu"; compatible = "brcm,brahma-b53"; reg = <0x0>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0xfff8>; next-level-cache = <&l2>; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 92465f777603..d5cdd77e5a95 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -224,9 +224,12 @@ little-endian; }; - efuse@1e80000 { + sfp: efuse@1e80000 { compatible = "fsl,ls1028a-sfp"; reg = <0x0 0x1e80000 0x0 0x10000>; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(4)>; + clock-names = "sfp"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi index 913d845eb51a..1977103a5ef4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi @@ -376,7 +376,8 @@ camera: &i2c7 { <&cru ACLK_VIO>, <&cru ACLK_GIC_PRE>, <&cru PCLK_DDR>, - <&cru ACLK_HDCP>; + <&cru ACLK_HDCP>, + <&cru ACLK_VDU>; assigned-clock-rates = <600000000>, <1600000000>, <1000000000>, @@ -388,6 +389,7 @@ camera: &i2c7 { <400000000>, <200000000>, <200000000>, + <400000000>, <400000000>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index fbd0346624e6..9d5b0e8c9cca 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1462,7 +1462,8 @@ <&cru HCLK_PERILP1>, <&cru PCLK_PERILP1>, <&cru ACLK_VIO>, <&cru ACLK_HDCP>, <&cru ACLK_GIC_PRE>, - <&cru PCLK_DDR>; + <&cru PCLK_DDR>, + <&cru ACLK_VDU>; assigned-clock-rates = <594000000>, <800000000>, <1000000000>, @@ -1473,7 +1474,8 @@ <100000000>, <50000000>, <400000000>, <400000000>, <200000000>, - <200000000>; + <200000000>, + <400000000>; }; grf: syscon@ff770000 { diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts index 1534e11a9ad1..fa953b736642 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts @@ -687,6 +687,7 @@ }; &usb_host0_xhci { + dr_mode = "host"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts index 7bdcecc0dfe4..02d5f5a8ca03 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts @@ -133,7 +133,7 @@ assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>; assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>, <&gmac1_clkin>; clock_in_out = "input"; - phy-mode = "rgmii-id"; + phy-mode = "rgmii"; phy-supply = <&vcc_3v3>; pinctrl-names = "default"; pinctrl-0 = <&gmac1m1_miim diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 6aa2dc836db1..834bff720582 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -510,6 +510,9 @@ u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg, unsigned int imm, enum aarch64_insn_size_type size, enum aarch64_insn_ldst_type type); +u32 aarch64_insn_gen_load_literal(unsigned long pc, unsigned long addr, + enum aarch64_insn_register reg, + bool is64bit); u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, enum aarch64_insn_register reg2, enum aarch64_insn_register base, diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index 695d7368fadc..49e972beeac7 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -323,7 +323,7 @@ static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type, return insn; } -static inline long branch_imm_common(unsigned long pc, unsigned long addr, +static inline long label_imm_common(unsigned long pc, unsigned long addr, long range) { long offset; @@ -354,7 +354,7 @@ u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, * ARM64 virtual address arrangement guarantees all kernel and module * texts are within +/-128M. */ - offset = branch_imm_common(pc, addr, SZ_128M); + offset = label_imm_common(pc, addr, SZ_128M); if (offset >= SZ_128M) return AARCH64_BREAK_FAULT; @@ -382,7 +382,7 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr, u32 insn; long offset; - offset = branch_imm_common(pc, addr, SZ_1M); + offset = label_imm_common(pc, addr, SZ_1M); if (offset >= SZ_1M) return AARCH64_BREAK_FAULT; @@ -421,7 +421,7 @@ u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr, u32 insn; long offset; - offset = branch_imm_common(pc, addr, SZ_1M); + offset = label_imm_common(pc, addr, SZ_1M); insn = aarch64_insn_get_bcond_value(); @@ -543,6 +543,28 @@ u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg, return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm); } +u32 aarch64_insn_gen_load_literal(unsigned long pc, unsigned long addr, + enum aarch64_insn_register reg, + bool is64bit) +{ + u32 insn; + long offset; + + offset = label_imm_common(pc, addr, SZ_1M); + if (offset >= SZ_1M) + return AARCH64_BREAK_FAULT; + + insn = aarch64_insn_get_ldr_lit_value(); + + if (is64bit) + insn |= BIT(30); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg); + + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn, + offset >> 2); +} + u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, enum aarch64_insn_register reg2, enum aarch64_insn_register base, diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 194c95ccc1cf..a6acb94ea3d6 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -80,6 +80,12 @@ #define A64_STR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, STORE) #define A64_LDR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, LOAD) +/* LDR (literal) */ +#define A64_LDR32LIT(Wt, offset) \ + aarch64_insn_gen_load_literal(0, offset, Wt, false) +#define A64_LDR64LIT(Xt, offset) \ + aarch64_insn_gen_load_literal(0, offset, Xt, true) + /* Load/store register pair */ #define A64_LS_PAIR(Rt, Rt2, Rn, offset, ls, type) \ aarch64_insn_gen_load_store_pair(Rt, Rt2, Rn, offset, \ @@ -270,6 +276,7 @@ #define A64_BTI_C A64_HINT(AARCH64_INSN_HINT_BTIC) #define A64_BTI_J A64_HINT(AARCH64_INSN_HINT_BTIJ) #define A64_BTI_JC A64_HINT(AARCH64_INSN_HINT_BTIJC) +#define A64_NOP A64_HINT(AARCH64_INSN_HINT_NOP) /* DMB */ #define A64_DMB_ISH aarch64_insn_gen_dmb(AARCH64_INSN_MB_ISH) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index f08a4447d363..7ca8779ae34f 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -10,6 +10,7 @@ #include <linux/bitfield.h> #include <linux/bpf.h> #include <linux/filter.h> +#include <linux/memory.h> #include <linux/printk.h> #include <linux/slab.h> @@ -18,6 +19,7 @@ #include <asm/cacheflush.h> #include <asm/debug-monitors.h> #include <asm/insn.h> +#include <asm/patching.h> #include <asm/set_memory.h> #include "bpf_jit.h" @@ -78,6 +80,15 @@ struct jit_ctx { int fpb_offset; }; +struct bpf_plt { + u32 insn_ldr; /* load target */ + u32 insn_br; /* branch to target */ + u64 target; /* target value */ +}; + +#define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target) +#define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target) + static inline void emit(const u32 insn, struct jit_ctx *ctx) { if (ctx->image != NULL) @@ -140,6 +151,12 @@ static inline void emit_a64_mov_i64(const int reg, const u64 val, } } +static inline void emit_bti(u32 insn, struct jit_ctx *ctx) +{ + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) + emit(insn, ctx); +} + /* * Kernel addresses in the vmalloc space use at most 48 bits, and the * remaining bits are guaranteed to be 0x1. So we can compose the address @@ -159,6 +176,14 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val, } } +static inline void emit_call(u64 target, struct jit_ctx *ctx) +{ + u8 tmp = bpf2a64[TMP_REG_1]; + + emit_addr_mov_i64(tmp, target, ctx); + emit(A64_BLR(tmp), ctx); +} + static inline int bpf2a64_offset(int bpf_insn, int off, const struct jit_ctx *ctx) { @@ -235,13 +260,30 @@ static bool is_lsi_offset(int offset, int scale) return true; } +/* generated prologue: + * bti c // if CONFIG_ARM64_BTI_KERNEL + * mov x9, lr + * nop // POKE_OFFSET + * paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL + * stp x29, lr, [sp, #-16]! + * mov x29, sp + * stp x19, x20, [sp, #-16]! + * stp x21, x22, [sp, #-16]! + * stp x25, x26, [sp, #-16]! + * stp x27, x28, [sp, #-16]! + * mov x25, sp + * mov tcc, #0 + * // PROLOGUE_OFFSET + */ + +#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0) +#define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0) + +/* Offset of nop instruction in bpf prog entry to be poked */ +#define POKE_OFFSET (BTI_INSNS + 1) + /* Tail call offset to jump into */ -#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) || \ - IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) -#define PROLOGUE_OFFSET 9 -#else -#define PROLOGUE_OFFSET 8 -#endif +#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8) static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) { @@ -280,12 +322,14 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) * */ + emit_bti(A64_BTI_C, ctx); + + emit(A64_MOV(1, A64_R(9), A64_LR), ctx); + emit(A64_NOP, ctx); + /* Sign lr */ if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) emit(A64_PACIASP, ctx); - /* BTI landing pad */ - else if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) - emit(A64_BTI_C, ctx); /* Save FP and LR registers to stay align with ARM64 AAPCS */ emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); @@ -312,8 +356,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) } /* BTI landing pad for the tail call, done with a BR */ - if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) - emit(A64_BTI_J, ctx); + emit_bti(A64_BTI_J, ctx); } emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx); @@ -557,6 +600,53 @@ static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) return 0; } +void dummy_tramp(void); + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .global dummy_tramp\n" +" .type dummy_tramp, %function\n" +"dummy_tramp:" +#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) +" bti j\n" /* dummy_tramp is called via "br x10" */ +#endif +" mov x10, x30\n" +" mov x30, x9\n" +" ret x10\n" +" .size dummy_tramp, .-dummy_tramp\n" +" .popsection\n" +); + +/* build a plt initialized like this: + * + * plt: + * ldr tmp, target + * br tmp + * target: + * .quad dummy_tramp + * + * when a long jump trampoline is attached, target is filled with the + * trampoline address, and when the trampoline is removed, target is + * restored to dummy_tramp address. + */ +static void build_plt(struct jit_ctx *ctx) +{ + const u8 tmp = bpf2a64[TMP_REG_1]; + struct bpf_plt *plt = NULL; + + /* make sure target is 64-bit aligned */ + if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2) + emit(A64_NOP, ctx); + + plt = (struct bpf_plt *)(ctx->image + ctx->idx); + /* plt is called via bl, no BTI needed here */ + emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx); + emit(A64_BR(tmp), ctx); + + if (ctx->image) + plt->target = (u64)&dummy_tramp; +} + static void build_epilogue(struct jit_ctx *ctx) { const u8 r0 = bpf2a64[BPF_REG_0]; @@ -991,8 +1081,7 @@ emit_cond_jmp: &func_addr, &func_addr_fixed); if (ret < 0) return ret; - emit_addr_mov_i64(tmp, func_addr, ctx); - emit(A64_BLR(tmp), ctx); + emit_call(func_addr, ctx); emit(A64_MOV(1, r0, A64_R(0)), ctx); break; } @@ -1336,6 +1425,13 @@ static int validate_code(struct jit_ctx *ctx) if (a64_insn == AARCH64_BREAK_FAULT) return -1; } + return 0; +} + +static int validate_ctx(struct jit_ctx *ctx) +{ + if (validate_code(ctx)) + return -1; if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) return -1; @@ -1356,7 +1452,7 @@ struct arm64_jit_data { struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { - int image_size, prog_size, extable_size; + int image_size, prog_size, extable_size, extable_align, extable_offset; struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header; struct arm64_jit_data *jit_data; @@ -1426,13 +1522,17 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx); + build_plt(&ctx); + extable_align = __alignof__(struct exception_table_entry); extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry); /* Now we know the actual image size. */ prog_size = sizeof(u32) * ctx.idx; - image_size = prog_size + extable_size; + /* also allocate space for plt target */ + extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align); + image_size = extable_offset + extable_size; header = bpf_jit_binary_alloc(image_size, &image_ptr, sizeof(u32), jit_fill_hole); if (header == NULL) { @@ -1444,7 +1544,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.image = (__le32 *)image_ptr; if (extable_size) - prog->aux->extable = (void *)image_ptr + prog_size; + prog->aux->extable = (void *)image_ptr + extable_offset; skip_init_ctx: ctx.idx = 0; ctx.exentry_idx = 0; @@ -1458,9 +1558,10 @@ skip_init_ctx: } build_epilogue(&ctx); + build_plt(&ctx); /* 3. Extra pass to validate JITed code. */ - if (validate_code(&ctx)) { + if (validate_ctx(&ctx)) { bpf_jit_binary_free(header); prog = orig_prog; goto out_off; @@ -1537,3 +1638,583 @@ bool bpf_jit_supports_subprog_tailcalls(void) { return true; } + +static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, + int args_off, int retval_off, int run_ctx_off, + bool save_ret) +{ + u32 *branch; + u64 enter_prog; + u64 exit_prog; + struct bpf_prog *p = l->link.prog; + int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); + + if (p->aux->sleepable) { + enter_prog = (u64)__bpf_prog_enter_sleepable; + exit_prog = (u64)__bpf_prog_exit_sleepable; + } else { + enter_prog = (u64)__bpf_prog_enter; + exit_prog = (u64)__bpf_prog_exit; + } + + if (l->cookie == 0) { + /* if cookie is zero, one instruction is enough to store it */ + emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx); + } else { + emit_a64_mov_i64(A64_R(10), l->cookie, ctx); + emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off), + ctx); + } + + /* save p to callee saved register x19 to avoid loading p with mov_i64 + * each time. + */ + emit_addr_mov_i64(A64_R(19), (const u64)p, ctx); + + /* arg1: prog */ + emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); + /* arg2: &run_ctx */ + emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx); + + emit_call(enter_prog, ctx); + + /* if (__bpf_prog_enter(prog) == 0) + * goto skip_exec_of_prog; + */ + branch = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + + /* save return value to callee saved register x20 */ + emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); + + emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); + if (!p->jited) + emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); + + emit_call((const u64)p->bpf_func, ctx); + + if (save_ret) + emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); + + if (ctx->image) { + int offset = &ctx->image[ctx->idx] - branch; + *branch = A64_CBZ(1, A64_R(0), offset); + } + + /* arg1: prog */ + emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); + /* arg2: start time */ + emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx); + /* arg3: &run_ctx */ + emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx); + + emit_call(exit_prog, ctx); +} + +static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, + int args_off, int retval_off, int run_ctx_off, + u32 **branches) +{ + int i; + + /* The first fmod_ret program will receive a garbage return value. + * Set this to 0 to avoid confusing the program. + */ + emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); + for (i = 0; i < tl->nr_links; i++) { + invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, + run_ctx_off, true); + /* if (*(u64 *)(sp + retval_off) != 0) + * goto do_fexit; + */ + emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx); + /* Save the location of branch, and generate a nop. + * This nop will be replaced with a cbnz later. + */ + branches[i] = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + } +} + +static void save_args(struct jit_ctx *ctx, int args_off, int nargs) +{ + int i; + + for (i = 0; i < nargs; i++) { + emit(A64_STR64I(i, A64_SP, args_off), ctx); + args_off += 8; + } +} + +static void restore_args(struct jit_ctx *ctx, int args_off, int nargs) +{ + int i; + + for (i = 0; i < nargs; i++) { + emit(A64_LDR64I(i, A64_SP, args_off), ctx); + args_off += 8; + } +} + +/* Based on the x86's implementation of arch_prepare_bpf_trampoline(). + * + * bpf prog and function entry before bpf trampoline hooked: + * mov x9, lr + * nop + * + * bpf prog and function entry after bpf trampoline hooked: + * mov x9, lr + * bl <bpf_trampoline or plt> + * + */ +static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, + struct bpf_tramp_links *tlinks, void *orig_call, + int nargs, u32 flags) +{ + int i; + int stack_size; + int retaddr_off; + int regs_off; + int retval_off; + int args_off; + int nargs_off; + int ip_off; + int run_ctx_off; + struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; + struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + bool save_ret; + u32 **branches = NULL; + + /* trampoline stack layout: + * [ parent ip ] + * [ FP ] + * SP + retaddr_off [ self ip ] + * [ FP ] + * + * [ padding ] align SP to multiples of 16 + * + * [ x20 ] callee saved reg x20 + * SP + regs_off [ x19 ] callee saved reg x19 + * + * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or + * BPF_TRAMP_F_RET_FENTRY_RET + * + * [ argN ] + * [ ... ] + * SP + args_off [ arg1 ] + * + * SP + nargs_off [ args count ] + * + * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag + * + * SP + run_ctx_off [ bpf_tramp_run_ctx ] + */ + + stack_size = 0; + run_ctx_off = stack_size; + /* room for bpf_tramp_run_ctx */ + stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); + + ip_off = stack_size; + /* room for IP address argument */ + if (flags & BPF_TRAMP_F_IP_ARG) + stack_size += 8; + + nargs_off = stack_size; + /* room for args count */ + stack_size += 8; + + args_off = stack_size; + /* room for args */ + stack_size += nargs * 8; + + /* room for return value */ + retval_off = stack_size; + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) + stack_size += 8; + + /* room for callee saved registers, currently x19 and x20 are used */ + regs_off = stack_size; + stack_size += 16; + + /* round up to multiples of 16 to avoid SPAlignmentFault */ + stack_size = round_up(stack_size, 16); + + /* return address locates above FP */ + retaddr_off = stack_size + 8; + + /* bpf trampoline may be invoked by 3 instruction types: + * 1. bl, attached to bpf prog or kernel function via short jump + * 2. br, attached to bpf prog or kernel function via long jump + * 3. blr, working as a function pointer, used by struct_ops. + * So BTI_JC should used here to support both br and blr. + */ + emit_bti(A64_BTI_JC, ctx); + + /* frame for parent function */ + emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + + /* frame for patched function */ + emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + + /* allocate stack space */ + emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx); + + if (flags & BPF_TRAMP_F_IP_ARG) { + /* save ip address of the traced function */ + emit_addr_mov_i64(A64_R(10), (const u64)orig_call, ctx); + emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx); + } + + /* save args count*/ + emit(A64_MOVZ(1, A64_R(10), nargs, 0), ctx); + emit(A64_STR64I(A64_R(10), A64_SP, nargs_off), ctx); + + /* save args */ + save_args(ctx, args_off, nargs); + + /* save callee saved registers */ + emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); + emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); + emit_call((const u64)__bpf_tramp_enter, ctx); + } + + for (i = 0; i < fentry->nr_links; i++) + invoke_bpf_prog(ctx, fentry->links[i], args_off, + retval_off, run_ctx_off, + flags & BPF_TRAMP_F_RET_FENTRY_RET); + + if (fmod_ret->nr_links) { + branches = kcalloc(fmod_ret->nr_links, sizeof(u32 *), + GFP_KERNEL); + if (!branches) + return -ENOMEM; + + invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, + run_ctx_off, branches); + } + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + restore_args(ctx, args_off, nargs); + /* call original func */ + emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); + emit(A64_BLR(A64_R(10)), ctx); + /* store return value */ + emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); + /* reserve a nop for bpf_tramp_image_put */ + im->ip_after_call = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + } + + /* update the branches saved in invoke_bpf_mod_ret with cbnz */ + for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) { + int offset = &ctx->image[ctx->idx] - branches[i]; + *branches[i] = A64_CBNZ(1, A64_R(10), offset); + } + + for (i = 0; i < fexit->nr_links; i++) + invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, + run_ctx_off, false); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + im->ip_epilogue = ctx->image + ctx->idx; + emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); + emit_call((const u64)__bpf_tramp_exit, ctx); + } + + if (flags & BPF_TRAMP_F_RESTORE_REGS) + restore_args(ctx, args_off, nargs); + + /* restore callee saved register x19 and x20 */ + emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); + emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx); + + if (save_ret) + emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx); + + /* reset SP */ + emit(A64_MOV(1, A64_SP, A64_FP), ctx); + + /* pop frames */ + emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx); + + if (flags & BPF_TRAMP_F_SKIP_FRAME) { + /* skip patched function, return to parent */ + emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); + emit(A64_RET(A64_R(9)), ctx); + } else { + /* return to patched function */ + emit(A64_MOV(1, A64_R(10), A64_LR), ctx); + emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); + emit(A64_RET(A64_R(10)), ctx); + } + + if (ctx->image) + bpf_flush_icache(ctx->image, ctx->image + ctx->idx); + + kfree(branches); + + return ctx->idx; +} + +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, + void *image_end, const struct btf_func_model *m, + u32 flags, struct bpf_tramp_links *tlinks, + void *orig_call) +{ + int ret; + int nargs = m->nr_args; + int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE; + struct jit_ctx ctx = { + .image = NULL, + .idx = 0, + }; + + /* the first 8 arguments are passed by registers */ + if (nargs > 8) + return -ENOTSUPP; + + ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags); + if (ret < 0) + return ret; + + if (ret > max_insns) + return -EFBIG; + + ctx.image = image; + ctx.idx = 0; + + jit_fill_hole(image, (unsigned int)(image_end - image)); + ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags); + + if (ret > 0 && validate_code(&ctx) < 0) + ret = -EINVAL; + + if (ret > 0) + ret *= AARCH64_INSN_SIZE; + + return ret; +} + +static bool is_long_jump(void *ip, void *target) +{ + long offset; + + /* NULL target means this is a NOP */ + if (!target) + return false; + + offset = (long)target - (long)ip; + return offset < -SZ_128M || offset >= SZ_128M; +} + +static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, + void *addr, void *plt, u32 *insn) +{ + void *target; + + if (!addr) { + *insn = aarch64_insn_gen_nop(); + return 0; + } + + if (is_long_jump(ip, addr)) + target = plt; + else + target = addr; + + *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, + (unsigned long)target, + type); + + return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; +} + +/* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf + * trampoline with the branch instruction from @ip to @new_addr. If @old_addr + * or @new_addr is NULL, the old or new instruction is NOP. + * + * When @ip is the bpf prog entry, a bpf trampoline is being attached or + * detached. Since bpf trampoline and bpf prog are allocated separately with + * vmalloc, the address distance may exceed 128MB, the maximum branch range. + * So long jump should be handled. + * + * When a bpf prog is constructed, a plt pointing to empty trampoline + * dummy_tramp is placed at the end: + * + * bpf_prog: + * mov x9, lr + * nop // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad dummy_tramp // plt target + * + * This is also the state when no trampoline is attached. + * + * When a short-jump bpf trampoline is attached, the patchsite is patched + * to a bl instruction to the trampoline directly: + * + * bpf_prog: + * mov x9, lr + * bl <short-jump bpf trampoline address> // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad dummy_tramp // plt target + * + * When a long-jump bpf trampoline is attached, the plt target is filled with + * the trampoline address and the patchsite is patched to a bl instruction to + * the plt: + * + * bpf_prog: + * mov x9, lr + * bl plt // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad <long-jump bpf trampoline address> // plt target + * + * The dummy_tramp is used to prevent another CPU from jumping to unknown + * locations during the patching process, making the patching process easier. + */ +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, + void *old_addr, void *new_addr) +{ + int ret; + u32 old_insn; + u32 new_insn; + u32 replaced; + struct bpf_plt *plt = NULL; + unsigned long size = 0UL; + unsigned long offset = ~0UL; + enum aarch64_insn_branch_type branch_type; + char namebuf[KSYM_NAME_LEN]; + void *image = NULL; + u64 plt_target = 0ULL; + bool poking_bpf_entry; + + if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) + /* Only poking bpf text is supported. Since kernel function + * entry is set up by ftrace, we reply on ftrace to poke kernel + * functions. + */ + return -ENOTSUPP; + + image = ip - offset; + /* zero offset means we're poking bpf prog entry */ + poking_bpf_entry = (offset == 0UL); + + /* bpf prog entry, find plt and the real patchsite */ + if (poking_bpf_entry) { + /* plt locates at the end of bpf prog */ + plt = image + size - PLT_TARGET_OFFSET; + + /* skip to the nop instruction in bpf prog entry: + * bti c // if BTI enabled + * mov x9, x30 + * nop + */ + ip = image + POKE_OFFSET * AARCH64_INSN_SIZE; + } + + /* long jump is only possible at bpf prog entry */ + if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) && + !poking_bpf_entry)) + return -EINVAL; + + if (poke_type == BPF_MOD_CALL) + branch_type = AARCH64_INSN_BRANCH_LINK; + else + branch_type = AARCH64_INSN_BRANCH_NOLINK; + + if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0) + return -EFAULT; + + if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0) + return -EFAULT; + + if (is_long_jump(ip, new_addr)) + plt_target = (u64)new_addr; + else if (is_long_jump(ip, old_addr)) + /* if the old target is a long jump and the new target is not, + * restore the plt target to dummy_tramp, so there is always a + * legal and harmless address stored in plt target, and we'll + * never jump from plt to an unknown place. + */ + plt_target = (u64)&dummy_tramp; + + if (plt_target) { + /* non-zero plt_target indicates we're patching a bpf prog, + * which is read only. + */ + if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1)) + return -EFAULT; + WRITE_ONCE(plt->target, plt_target); + set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1); + /* since plt target points to either the new trampoline + * or dummy_tramp, even if another CPU reads the old plt + * target value before fetching the bl instruction to plt, + * it will be brought back by dummy_tramp, so no barrier is + * required here. + */ + } + + /* if the old target and the new target are both long jumps, no + * patching is required + */ + if (old_insn == new_insn) + return 0; + + mutex_lock(&text_mutex); + if (aarch64_insn_read(ip, &replaced)) { + ret = -EFAULT; + goto out; + } + + if (replaced != old_insn) { + ret = -EFAULT; + goto out; + } + + /* We call aarch64_insn_patch_text_nosync() to replace instruction + * atomically, so no other CPUs will fetch a half-new and half-old + * instruction. But there is chance that another CPU executes the + * old instruction after the patching operation finishes (e.g., + * pipeline not flushed, or icache not synchronized yet). + * + * 1. when a new trampoline is attached, it is not a problem for + * different CPUs to jump to different trampolines temporarily. + * + * 2. when an old trampoline is freed, we should wait for all other + * CPUs to exit the trampoline and make sure the trampoline is no + * longer reachable, since bpf_tramp_image_put() function already + * uses percpu_ref and task-based rcu to do the sync, no need to call + * the sync version here, see bpf_tramp_image_put() for details. + */ + ret = aarch64_insn_patch_text_nosync(ip, new_insn); +out: + mutex_unlock(&text_mutex); + + return ret; +} diff --git a/arch/csky/include/asm/tlb.h b/arch/csky/include/asm/tlb.h index 3498e65f59f8..702861c68874 100644 --- a/arch/csky/include/asm/tlb.h +++ b/arch/csky/include/asm/tlb.h @@ -4,21 +4,6 @@ #define __ASM_CSKY_TLB_H #include <asm/cacheflush.h> - -#define tlb_start_vma(tlb, vma) \ - do { \ - if (!(tlb)->fullmm) \ - flush_cache_range(vma, (vma)->vm_start, (vma)->vm_end); \ - } while (0) - -#define tlb_end_vma(tlb, vma) \ - do { \ - if (!(tlb)->fullmm) \ - flush_tlb_range(vma, (vma)->vm_start, (vma)->vm_end); \ - } while (0) - -#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) - #include <asm-generic/tlb.h> #endif /* __ASM_CSKY_TLB_H */ diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 53a912befb62..b57daee98b89 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -108,6 +108,7 @@ config LOONGARCH select TRACE_IRQFLAGS_SUPPORT select USE_PERCPU_NUMA_NODE_ID select ZONE_DMA32 + select MMU_GATHER_MERGE_VMAS if MMU config 32BIT bool diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h index 4f629ae9d5a9..dd24f5898f65 100644 --- a/arch/loongarch/include/asm/tlb.h +++ b/arch/loongarch/include/asm/tlb.h @@ -137,16 +137,6 @@ static inline void invtlb_all(u32 op, u32 info, u64 addr) ); } -/* - * LoongArch doesn't need any special per-pte or per-vma handling, except - * we need to flush cache for area to be unmapped. - */ -#define tlb_start_vma(tlb, vma) \ - do { \ - if (!(tlb)->fullmm) \ - flush_cache_range(vma, vma->vm_start, vma->vm_end); \ - } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) static void tlb_flush(struct mmu_gather *tlb); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 7aa12e88c580..c235648fae23 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -256,6 +256,7 @@ config PPC select IRQ_FORCED_THREADING select MMU_GATHER_PAGE_SIZE select MMU_GATHER_RCU_TABLE_FREE + select MMU_GATHER_MERGE_VMAS select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE select NEED_PER_CPU_EMBED_FIRST_CHUNK if PPC64 diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 09a9ae5f3656..b3de6102a907 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -19,8 +19,6 @@ #include <linux/pagemap.h> -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) #define __tlb_remove_tlb_entry __tlb_remove_tlb_entry #define tlb_flush tlb_flush diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index 0ec9a143dfd4..3c83e98e82e4 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -50,6 +50,7 @@ riscv,isa = "rv64imafdc"; clocks = <&clkcfg CLK_CPU>; tlb-split; + next-level-cache = <&cctrllr>; status = "okay"; cpu1_intc: interrupt-controller { @@ -77,6 +78,7 @@ riscv,isa = "rv64imafdc"; clocks = <&clkcfg CLK_CPU>; tlb-split; + next-level-cache = <&cctrllr>; status = "okay"; cpu2_intc: interrupt-controller { @@ -104,6 +106,7 @@ riscv,isa = "rv64imafdc"; clocks = <&clkcfg CLK_CPU>; tlb-split; + next-level-cache = <&cctrllr>; status = "okay"; cpu3_intc: interrupt-controller { @@ -131,6 +134,7 @@ riscv,isa = "rv64imafdc"; clocks = <&clkcfg CLK_CPU>; tlb-split; + next-level-cache = <&cctrllr>; status = "okay"; cpu4_intc: interrupt-controller { #interrupt-cells = <1>; diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c index 672f02b21ce0..1031038423e7 100644 --- a/arch/riscv/errata/sifive/errata.c +++ b/arch/riscv/errata/sifive/errata.c @@ -111,6 +111,7 @@ void __init_or_module sifive_errata_patch_func(struct alt_entry *begin, cpu_apply_errata |= tmp; } } - if (cpu_apply_errata != cpu_req_errata) + if (stage != RISCV_ALTERNATIVES_MODULE && + cpu_apply_errata != cpu_req_errata) warn_miss_errata(cpu_req_errata - cpu_apply_errata); } diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 5c2aba5efbd0..dc42375c2357 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -175,7 +175,7 @@ static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot) static inline unsigned long _pud_pfn(pud_t pud) { - return pud_val(pud) >> _PAGE_PFN_SHIFT; + return __page_val_to_pfn(pud_val(pud)); } static inline pmd_t *pud_pgtable(pud_t pud) @@ -278,13 +278,13 @@ static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot) static inline unsigned long _p4d_pfn(p4d_t p4d) { - return p4d_val(p4d) >> _PAGE_PFN_SHIFT; + return __page_val_to_pfn(p4d_val(p4d)); } static inline pud_t *p4d_pgtable(p4d_t p4d) { if (pgtable_l4_enabled) - return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT); + return (pud_t *)pfn_to_virt(__page_val_to_pfn(p4d_val(p4d))); return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) }); } @@ -292,7 +292,7 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) static inline struct page *p4d_page(p4d_t p4d) { - return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT); + return pfn_to_page(__page_val_to_pfn(p4d_val(p4d))); } #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) @@ -347,7 +347,7 @@ static inline void pgd_clear(pgd_t *pgd) static inline p4d_t *pgd_pgtable(pgd_t pgd) { if (pgtable_l5_enabled) - return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT); + return (p4d_t *)pfn_to_virt(__page_val_to_pfn(pgd_val(pgd))); return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) }); } @@ -355,7 +355,7 @@ static inline p4d_t *pgd_pgtable(pgd_t pgd) static inline struct page *pgd_page(pgd_t pgd) { - return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT); + return pfn_to_page(__page_val_to_pfn(pgd_val(pgd))); } #define pgd_page(pgd) pgd_page(pgd) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 1d1be9d9419c..5dbd6610729b 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -261,7 +261,7 @@ static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot) static inline unsigned long _pgd_pfn(pgd_t pgd) { - return pgd_val(pgd) >> _PAGE_PFN_SHIFT; + return __page_val_to_pfn(pgd_val(pgd)); } static inline struct page *pmd_page(pmd_t pmd) @@ -590,14 +590,14 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd) return __pmd(pmd_val(pmd) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE)); } -#define __pmd_to_phys(pmd) (pmd_val(pmd) >> _PAGE_PFN_SHIFT << PAGE_SHIFT) +#define __pmd_to_phys(pmd) (__page_val_to_pfn(pmd_val(pmd)) << PAGE_SHIFT) static inline unsigned long pmd_pfn(pmd_t pmd) { return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT); } -#define __pud_to_phys(pud) (pud_val(pud) >> _PAGE_PFN_SHIFT << PAGE_SHIFT) +#define __pud_to_phys(pud) (__page_val_to_pfn(pud_val(pud)) << PAGE_SHIFT) static inline unsigned long pud_pfn(pud_t pud) { diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 1c00695ebee7..9826073fbc67 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -54,7 +54,7 @@ static inline unsigned long gstage_pte_index(gpa_t addr, u32 level) static inline unsigned long gstage_pte_page_vaddr(pte_t pte) { - return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT); + return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte))); } static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level) diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 7f4ad5e4373a..f3455dc013fa 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -781,9 +781,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { + kvm_vcpu_srcu_read_unlock(vcpu); rcuwait_wait_event(wait, (!vcpu->arch.power_off) && (!vcpu->arch.pause), TASK_INTERRUPTIBLE); + kvm_vcpu_srcu_read_lock(vcpu); if (vcpu->arch.power_off || vcpu->arch.pause) { /* diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8cd9e56c629b..5a1a8dfda6f8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -204,6 +204,7 @@ config S390 select IOMMU_SUPPORT if PCI select MMU_GATHER_NO_GATHER select MMU_GATHER_RCU_TABLE_FREE + select MMU_GATHER_MERGE_VMAS select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE if PCI select NEED_SG_DMA_LENGTH if PCI diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 495c68a4df1e..4cb5d17e7ead 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -82,7 +82,7 @@ endif ifdef CONFIG_EXPOLINE ifdef CONFIG_EXPOLINE_EXTERN - KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline.o + KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline/expoline.o CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern CC_FLAGS_EXPOLINE += -mfunction-return=thunk-extern else @@ -163,6 +163,12 @@ vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/s390/kernel/vdso64 include/generated/vdso64-offsets.h $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \ $(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h) + +ifdef CONFIG_EXPOLINE_EXTERN +modules_prepare: expoline_prepare +expoline_prepare: + $(Q)$(MAKE) $(build)=arch/s390/lib/expoline arch/s390/lib/expoline/expoline.o +endif endif # Don't use tabs in echo arguments diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index d910d71b5bb5..7e9e99523e95 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -2,8 +2,6 @@ #ifndef _ASM_S390_NOSPEC_ASM_H #define _ASM_S390_NOSPEC_ASM_H -#include <asm/alternative-asm.h> -#include <asm/asm-offsets.h> #include <asm/dwarf.h> #ifdef __ASSEMBLY__ diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index fe6407f0eb1b..3a5c8fb590e5 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -27,9 +27,6 @@ static inline void tlb_flush(struct mmu_gather *tlb); static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) - #define tlb_flush tlb_flush #define pte_free_tlb pte_free_tlb #define pmd_free_tlb pmd_free_tlb diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 5d415b3db6d1..580d2e3265cb 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -7,7 +7,6 @@ lib-y += delay.o string.o uaccess.o find.o spinlock.o obj-y += mem.o xor.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o -obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o obj-$(CONFIG_S390_KPROBES_SANITY_TEST) += test_kprobes_s390.o test_kprobes_s390-objs += test_kprobes_asm.o test_kprobes.o @@ -22,3 +21,5 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o + +obj-$(CONFIG_EXPOLINE_EXTERN) += expoline/ diff --git a/arch/s390/lib/expoline/Makefile b/arch/s390/lib/expoline/Makefile new file mode 100644 index 000000000000..854631d9cb03 --- /dev/null +++ b/arch/s390/lib/expoline/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y += expoline.o diff --git a/arch/s390/lib/expoline.S b/arch/s390/lib/expoline/expoline.S index 92ed8409a7a4..92ed8409a7a4 100644 --- a/arch/s390/lib/expoline.S +++ b/arch/s390/lib/expoline/expoline.S diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index ba449c47effd..4f7d1dfbc608 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -67,6 +67,8 @@ config SPARC64 select HAVE_KRETPROBES select HAVE_KPROBES select MMU_GATHER_RCU_TABLE_FREE if SMP + select MMU_GATHER_MERGE_VMAS + select MMU_GATHER_NO_FLUSH_CACHE select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h index 779a5a0f0608..3037187482db 100644 --- a/arch/sparc/include/asm/tlb_64.h +++ b/arch/sparc/include/asm/tlb_64.h @@ -22,8 +22,6 @@ void smp_flush_tlb_mm(struct mm_struct *mm); void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *); void flush_tlb_pending(void); -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) #define tlb_flush(tlb) flush_tlb_pending() /* diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 0760e24f2eba..9838967d0b2f 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -432,6 +432,10 @@ void apply_retpolines(s32 *start, s32 *end) { } +void apply_returns(s32 *start, s32 *end) +{ +} + void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e58798f636d4..7fff10e15969 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -245,6 +245,7 @@ config X86 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT + select MMU_GATHER_MERGE_VMAS select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if UNWINDER_ORC || STACK_VALIDATION diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 285e043a3e40..9953d966d124 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -727,7 +727,6 @@ native_irq_return_ldt: pushq %rdi /* Stash user RDI */ swapgs /* to kernel GS */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ - UNTRAIN_RET movq PER_CPU_VAR(espfix_waddr), %rdi movq %rax, (0*8)(%rdi) /* user RAX */ diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 1bfe979bb9bc..580636cdc257 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -2,9 +2,6 @@ #ifndef _ASM_X86_TLB_H #define _ASM_X86_TLB_H -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) - #define tlb_flush tlb_flush static inline void tlb_flush(struct mmu_gather *tlb); diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 734b96454896..8d8752b44f11 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -16,6 +16,12 @@ bool cpc_supported_by_cpu(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: case X86_VENDOR_HYGON: + if (boot_cpu_data.x86 == 0x19 && ((boot_cpu_data.x86_model <= 0x0f) || + (boot_cpu_data.x86_model >= 0x20 && boot_cpu_data.x86_model <= 0x2f))) + return true; + else if (boot_cpu_data.x86 == 0x17 && + boot_cpu_data.x86_model >= 0x70 && boot_cpu_data.x86_model <= 0x7f) + return true; return boot_cpu_has(X86_FEATURE_CPPC); } return false; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0dd04713434b..aa34f908c39f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -793,7 +793,7 @@ enum retbleed_mitigation_cmd { RETBLEED_CMD_IBPB, }; -const char * const retbleed_strings[] = { +static const char * const retbleed_strings[] = { [RETBLEED_MITIGATION_NONE] = "Vulnerable", [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB", @@ -1181,7 +1181,7 @@ spectre_v2_user_select_mitigation(void) if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { if (mode != SPECTRE_V2_USER_STRICT && mode != SPECTRE_V2_USER_STRICT_PREFERRED) - pr_info("Selecting STIBP always-on mode to complement retbleed mitigation'\n"); + pr_info("Selecting STIBP always-on mode to complement retbleed mitigation\n"); mode = SPECTRE_V2_USER_STRICT_PREFERRED; } diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index eb8656bac99b..9b7acc9c7874 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -23,6 +23,7 @@ #include <asm/cpufeatures.h> #include <asm/percpu.h> #include <asm/nops.h> +#include <asm/nospec-branch.h> #include <asm/bootparam.h> #include <asm/export.h> #include <asm/pgtable_32.h> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index db96bf7d1122..f8382abe22ff 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -189,9 +189,6 @@ #define X8(x...) X4(x), X4(x) #define X16(x...) X8(x), X8(x) -#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) -#define FASTOP_SIZE (8 * (1 + HAS_KERNEL_IBT)) - struct opcode { u64 flags; u8 intercept; @@ -306,9 +303,15 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for * different operand sizes can be reached by calculation, rather than a jump * table (which would be bigger than the code). + * + * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR + * and 1 for the straight line speculation INT3, leaves 7 bytes for the + * body of the function. Currently none is larger than 4. */ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); +#define FASTOP_SIZE 16 + #define __FOP_FUNC(name) \ ".align " __stringify(FASTOP_SIZE) " \n\t" \ ".type " name ", @function \n\t" \ @@ -442,11 +445,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] * INT3 [1 byte; CONFIG_SLS] */ -#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ - IS_ENABLED(CONFIG_SLS)) -#define SETCC_LENGTH (ENDBR_INSN_SIZE + 3 + RET_LENGTH) -#define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) -static_assert(SETCC_LENGTH <= SETCC_ALIGN); +#define SETCC_ALIGN 16 #define FOP_SETCC(op) \ ".align " __stringify(SETCC_ALIGN) " \n\t" \ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 3a4e895269d7..ab135f9ef52f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2278,7 +2278,6 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_ENABLE_VMFUNC | - SECONDARY_EXEC_TSC_SCALING | SECONDARY_EXEC_DESC); if (nested_cpu_has(vmcs12, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 26d0cac32f73..143e37298d8a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -298,7 +298,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, directed_yield_successful), STATS_DESC_COUNTER(VCPU, preemption_reported), STATS_DESC_COUNTER(VCPU, preemption_other), - STATS_DESC_ICOUNTER(VCPU, guest_mode) + STATS_DESC_IBOOLEAN(VCPU, guest_mode) }; const struct kvm_stats_header kvm_vcpu_stats_header = { @@ -9143,15 +9143,17 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, */ static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid) { - struct kvm_lapic_irq lapic_irq; - - lapic_irq.shorthand = APIC_DEST_NOSHORT; - lapic_irq.dest_mode = APIC_DEST_PHYSICAL; - lapic_irq.level = 0; - lapic_irq.dest_id = apicid; - lapic_irq.msi_redir_hint = false; + /* + * All other fields are unused for APIC_DM_REMRD, but may be consumed by + * common code, e.g. for tracing. Defer initialization to the compiler. + */ + struct kvm_lapic_irq lapic_irq = { + .delivery_mode = APIC_DM_REMRD, + .dest_mode = APIC_DEST_PHYSICAL, + .shorthand = APIC_DEST_NOSHORT, + .dest_id = apicid, + }; - lapic_irq.delivery_mode = APIC_DM_REMRD; kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d8cfce221275..57ba5502aecf 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -77,10 +77,20 @@ static uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, }; -/* Check that the write-protect PAT entry is set for write-protect */ +/* + * Check that the write-protect PAT entry is set for write-protect. + * To do this without making assumptions how PAT has been set up (Xen has + * another layout than the kernel), translate the _PAGE_CACHE_MODE_WP cache + * mode via the __cachemode2pte_tbl[] into protection bits (those protection + * bits will select a cache mode of WP or better), and then translate the + * protection bits back into the cache mode using __pte2cm_idx() and the + * __pte2cachemode_tbl[] array. This will return the really used cache mode. + */ bool x86_has_pat_wp(void) { - return __pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] == _PAGE_CACHE_MODE_WP; + uint16_t prot = __cachemode2pte_tbl[_PAGE_CACHE_MODE_WP]; + + return __pte2cachemode_tbl[__pte2cm_idx(prot)] == _PAGE_CACHE_MODE_WP; } enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 7e95697a6459..c1f6c1c51d99 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1950,23 +1950,6 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, return 0; } -static bool is_valid_bpf_tramp_flags(unsigned int flags) -{ - if ((flags & BPF_TRAMP_F_RESTORE_REGS) && - (flags & BPF_TRAMP_F_SKIP_FRAME)) - return false; - - /* - * BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops, - * and it must be used alone. - */ - if ((flags & BPF_TRAMP_F_RET_FENTRY_RET) && - (flags & ~BPF_TRAMP_F_RET_FENTRY_RET)) - return false; - - return true; -} - /* Example: * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); * its 'struct btf_func_model' will be nr_args=2 @@ -2045,9 +2028,6 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (nr_args > 6) return -ENOTSUPP; - if (!is_valid_bpf_tramp_flags(flags)) - return -EINVAL; - /* Generated trampoline stack layout: * * RBP + 8 [ return address ] @@ -2153,10 +2133,15 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (flags & BPF_TRAMP_F_CALL_ORIG) { restore_regs(m, &prog, nr_args, regs_off); - /* call original function */ - if (emit_call(&prog, orig_call, prog)) { - ret = -EINVAL; - goto cleanup; + if (flags & BPF_TRAMP_F_ORIG_STACK) { + emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8); + EMIT2(0xff, 0xd0); /* call *rax */ + } else { + /* call original function */ + if (emit_call(&prog, orig_call, prog)) { + ret = -EINVAL; + goto cleanup; + } } /* remember return value in a stack for bpf prog to access */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); @@ -2520,3 +2505,28 @@ bool bpf_jit_supports_subprog_tailcalls(void) { return true; } + +void bpf_jit_free(struct bpf_prog *prog) +{ + if (prog->jited) { + struct x64_jit_data *jit_data = prog->aux->jit_data; + struct bpf_binary_header *hdr; + + /* + * If we fail the final pass of JIT (from jit_subprogs), + * the program may not be finalized yet. Call finalize here + * before freeing it. + */ + if (jit_data) { + bpf_jit_binary_pack_finalize(prog, jit_data->header, + jit_data->rw_header); + kvfree(jit_data->addrs); + kfree(jit_data); + } + hdr = bpf_jit_binary_pack_hdr(prog); + bpf_jit_binary_pack_free(hdr, NULL); + WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); + } + + bpf_prog_unlock_free(prog); +} diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 9ffe2bad27d5..4e5257a4811b 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -23,6 +23,7 @@ #include <linux/objtool.h> #include <asm/page_types.h> #include <asm/segment.h> +#include <asm/nospec-branch.h> .text .code64 @@ -75,7 +76,9 @@ STACK_FRAME_NON_STANDARD __efi64_thunk 1: movq 0x20(%rsp), %rsp pop %rbx pop %rbp - RET + ANNOTATE_UNRET_SAFE + ret + int3 .code32 2: pushl $__KERNEL_CS diff --git a/block/blk-merge.c b/block/blk-merge.c index 7771dacc99cb..f5e6527ebc9c 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -345,6 +345,7 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio, /* there isn't chance to merge the splitted bio */ split->bi_opf |= REQ_NOMERGE; + blkcg_bio_issue_init(split); bio_chain(split, *bio); trace_block_split(split, (*bio)->bi_iter.bi_sector); submit_bio_noacct(*bio); diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 43177c20ce4f..eaea733b368a 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -73,7 +73,7 @@ module_param(device_id_scheme, bool, 0444); static int only_lcd = -1; module_param(only_lcd, int, 0444); -static bool has_backlight; +static bool may_report_brightness_keys; static int register_count; static DEFINE_MUTEX(register_count_mutex); static DEFINE_MUTEX(video_list_lock); @@ -1224,7 +1224,7 @@ acpi_video_bus_get_one_device(struct acpi_device *device, acpi_video_device_find_cap(data); if (data->cap._BCM && data->cap._BCL) - has_backlight = true; + may_report_brightness_keys = true; mutex_lock(&video->device_list_lock); list_add_tail(&data->entry, &video->video_device_list); @@ -1693,6 +1693,9 @@ static void acpi_video_device_notify(acpi_handle handle, u32 event, void *data) break; } + if (keycode) + may_report_brightness_keys = true; + acpi_notifier_call_chain(device, event, 0); if (keycode && (report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS)) { @@ -2253,7 +2256,7 @@ void acpi_video_unregister(void) if (register_count) { acpi_bus_unregister_driver(&acpi_video_bus); register_count = 0; - has_backlight = false; + may_report_brightness_keys = false; } mutex_unlock(®ister_count_mutex); } @@ -2275,7 +2278,7 @@ void acpi_video_unregister_backlight(void) bool acpi_video_handles_brightness_key_presses(void) { - return has_backlight && + return may_report_brightness_keys && (report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS); } EXPORT_SYMBOL(acpi_video_handles_brightness_key_presses); diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index 76fbb046bdbe..3006e2a0f37e 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -403,6 +403,13 @@ static int btbcm_read_info(struct hci_dev *hdev) bt_dev_info(hdev, "BCM: chip id %u", skb->data[1]); kfree_skb(skb); + return 0; +} + +static int btbcm_print_controller_features(struct hci_dev *hdev) +{ + struct sk_buff *skb; + /* Read Controller Features */ skb = btbcm_read_controller_features(hdev); if (IS_ERR(skb)) @@ -454,6 +461,8 @@ static const struct bcm_subver_table bcm_uart_subver_table[] = { { 0x6606, "BCM4345C5" }, /* 003.006.006 */ { 0x230f, "BCM4356A2" }, /* 001.003.015 */ { 0x220e, "BCM20702A1" }, /* 001.002.014 */ + { 0x420d, "BCM4349B1" }, /* 002.002.013 */ + { 0x420e, "BCM4349B1" }, /* 002.002.014 */ { 0x4217, "BCM4329B1" }, /* 002.002.023 */ { 0x6106, "BCM4359C0" }, /* 003.001.006 */ { 0x4106, "BCM4335A0" }, /* 002.001.006 */ @@ -514,7 +523,7 @@ static const char *btbcm_get_board_name(struct device *dev) #endif } -int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done) +int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done, bool use_autobaud_mode) { u16 subver, rev, pid, vid; struct sk_buff *skb; @@ -551,9 +560,16 @@ int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done) if (err) return err; } - err = btbcm_print_local_name(hdev); - if (err) - return err; + + if (!use_autobaud_mode) { + err = btbcm_print_controller_features(hdev); + if (err) + return err; + + err = btbcm_print_local_name(hdev); + if (err) + return err; + } bcm_subver_table = (hdev->bus == HCI_USB) ? bcm_usb_subver_table : bcm_uart_subver_table; @@ -636,13 +652,13 @@ int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done) } EXPORT_SYMBOL_GPL(btbcm_initialize); -int btbcm_finalize(struct hci_dev *hdev, bool *fw_load_done) +int btbcm_finalize(struct hci_dev *hdev, bool *fw_load_done, bool use_autobaud_mode) { int err; /* Re-initialize if necessary */ if (*fw_load_done) { - err = btbcm_initialize(hdev, fw_load_done); + err = btbcm_initialize(hdev, fw_load_done, use_autobaud_mode); if (err) return err; } @@ -658,15 +674,16 @@ EXPORT_SYMBOL_GPL(btbcm_finalize); int btbcm_setup_patchram(struct hci_dev *hdev) { bool fw_load_done = false; + bool use_autobaud_mode = false; int err; /* Initialize */ - err = btbcm_initialize(hdev, &fw_load_done); + err = btbcm_initialize(hdev, &fw_load_done, use_autobaud_mode); if (err) return err; /* Re-initialize after loading Patch */ - return btbcm_finalize(hdev, &fw_load_done); + return btbcm_finalize(hdev, &fw_load_done, use_autobaud_mode); } EXPORT_SYMBOL_GPL(btbcm_setup_patchram); diff --git a/drivers/bluetooth/btbcm.h b/drivers/bluetooth/btbcm.h index 8bf01565fdfc..b4cb24231a20 100644 --- a/drivers/bluetooth/btbcm.h +++ b/drivers/bluetooth/btbcm.h @@ -62,8 +62,8 @@ int btbcm_write_pcm_int_params(struct hci_dev *hdev, int btbcm_setup_patchram(struct hci_dev *hdev); int btbcm_setup_apple(struct hci_dev *hdev); -int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done); -int btbcm_finalize(struct hci_dev *hdev, bool *fw_load_done); +int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done, bool use_autobaud_mode); +int btbcm_finalize(struct hci_dev *hdev, bool *fw_load_done, bool use_autobaud_mode); #else @@ -104,12 +104,12 @@ static inline int btbcm_setup_apple(struct hci_dev *hdev) return 0; } -static inline int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done) +static inline int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done, bool use_autobaud_mode) { return 0; } -static inline int btbcm_finalize(struct hci_dev *hdev, bool *fw_load_done) +static inline int btbcm_finalize(struct hci_dev *hdev, bool *fw_load_done, bool use_autobaud_mode) { return 0; } diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index d6700efcfe8c..f9a3444753c2 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -1282,6 +1282,13 @@ err: hci_reset_dev(hdev); } +static bool btmtksdio_sdio_inband_wakeup(struct hci_dev *hdev) +{ + struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); + + return device_may_wakeup(bdev->dev); +} + static bool btmtksdio_sdio_wakeup(struct hci_dev *hdev) { struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); @@ -1349,6 +1356,14 @@ static int btmtksdio_probe(struct sdio_func *func, hdev->shutdown = btmtksdio_shutdown; hdev->send = btmtksdio_send_frame; hdev->wakeup = btmtksdio_sdio_wakeup; + /* + * If SDIO controller supports wake on Bluetooth, sending a wakeon + * command is not necessary. + */ + if (device_can_wakeup(func->card->host->parent)) + hdev->wakeup = btmtksdio_sdio_inband_wakeup; + else + hdev->wakeup = btmtksdio_sdio_wakeup; hdev->set_bdaddr = btmtk_set_bdaddr; SET_HCIDEV_DEV(hdev, &func->dev); diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 47c28fd8f006..fb52313a1d45 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -330,7 +330,7 @@ static int rtlbt_parse_firmware(struct hci_dev *hdev, /* Loop from the end of the firmware parsing instructions, until * we find an instruction that identifies the "project ID" for the * hardware supported by this firwmare file. - * Once we have that, we double-check that that project_id is suitable + * Once we have that, we double-check that project_id is suitable * for the hardware we are working with. */ while (fwptr >= btrtl_dev->fw_data + (sizeof(*epatch_info) + 3)) { diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index e25fcd49db70..15caa6469538 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -427,6 +427,18 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x04ca, 0x4006), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + /* Realtek 8852CE Bluetooth devices */ + { USB_DEVICE(0x04ca, 0x4007), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x04c5, 0x1675), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0cb8, 0xc558), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x13d3, 0x3587), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x13d3, 0x3586), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, + /* Realtek Bluetooth devices */ { USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01), .driver_info = BTUSB_REALTEK }, @@ -477,6 +489,12 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe0d9), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH | BTUSB_VALID_LE_STATES }, + { USB_DEVICE(0x13d3, 0x3568), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH | + BTUSB_VALID_LE_STATES }, + { USB_DEVICE(0x0489, 0xe0e2), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH | + BTUSB_VALID_LE_STATES }, /* Additional Realtek 8723AE Bluetooth devices */ { USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK }, @@ -893,11 +911,21 @@ static int btusb_recv_bulk(struct btusb_data *data, void *buffer, int count) hci_skb_expect(skb) -= len; if (skb->len == HCI_ACL_HDR_SIZE) { + __u16 handle = __le16_to_cpu(hci_acl_hdr(skb)->handle); __le16 dlen = hci_acl_hdr(skb)->dlen; + __u8 type; /* Complete ACL header */ hci_skb_expect(skb) = __le16_to_cpu(dlen); + /* Detect if ISO packet has been sent over bulk */ + if (hci_conn_num(data->hdev, ISO_LINK)) { + type = hci_conn_lookup_type(data->hdev, + hci_handle(handle)); + if (type == ISO_LINK) + hci_skb_pkt_type(skb) = HCI_ISODATA_PKT; + } + if (skb_tailroom(skb) < hci_skb_expect(skb)) { kfree_skb(skb); skb = NULL; @@ -1762,6 +1790,13 @@ static int btusb_send_frame(struct hci_dev *hdev, struct sk_buff *skb) hdev->stat.sco_tx++; return submit_tx_urb(hdev, urb); + + case HCI_ISODATA_PKT: + urb = alloc_bulk_urb(hdev, skb); + if (IS_ERR(urb)) + return PTR_ERR(urb); + + return submit_or_queue_tx_urb(hdev, urb); } return -EILSEQ; @@ -2069,7 +2104,6 @@ static int btusb_setup_csr(struct hci_dev *hdev) * without these the controller will lock up. */ set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks); set_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks); set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks); @@ -2255,6 +2289,13 @@ static int btusb_send_frame_intel(struct hci_dev *hdev, struct sk_buff *skb) hdev->stat.sco_tx++; return submit_tx_urb(hdev, urb); + + case HCI_ISODATA_PKT: + urb = alloc_bulk_urb(hdev, skb); + if (IS_ERR(urb)) + return PTR_ERR(urb); + + return submit_or_queue_tx_urb(hdev, urb); } return -EILSEQ; @@ -3352,7 +3393,6 @@ static int btusb_setup_qca(struct hci_dev *hdev) * work with the likes of HSP/HFP mSBC. */ set_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &hdev->quirks); - set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks); return 0; } @@ -3795,6 +3835,7 @@ static int btusb_probe(struct usb_interface *intf, hdev->manufacturer = 70; hdev->cmd_timeout = btusb_mtk_cmd_timeout; hdev->set_bdaddr = btmtk_set_bdaddr; + set_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &hdev->quirks); set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); data->recv_acl = btusb_recv_acl_mtk; } diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 785f445dd60d..d7e0b75db8a6 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -53,10 +53,12 @@ * struct bcm_device_data - device specific data * @no_early_set_baudrate: Disallow set baudrate before driver setup() * @drive_rts_on_open: drive RTS signal on ->open() when platform requires it + * @max_autobaud_speed: max baudrate supported by device in autobaud mode */ struct bcm_device_data { bool no_early_set_baudrate; bool drive_rts_on_open; + u32 max_autobaud_speed; }; /** @@ -99,6 +101,8 @@ struct bcm_device_data { * @no_early_set_baudrate: don't set_baudrate before setup() * @drive_rts_on_open: drive RTS signal on ->open() when platform requires it * @pcm_int_params: keep the initial PCM configuration + * @use_autobaud_mode: start Bluetooth device in autobaud mode + * @max_autobaud_speed: max baudrate supported by device in autobaud mode */ struct bcm_device { /* Must be the first member, hci_serdev.c expects this. */ @@ -136,7 +140,9 @@ struct bcm_device { #endif bool no_early_set_baudrate; bool drive_rts_on_open; + bool use_autobaud_mode; u8 pcm_int_params[5]; + u32 max_autobaud_speed; }; /* generic bcm uart resources */ @@ -472,15 +478,20 @@ static int bcm_open(struct hci_uart *hu) out: if (bcm->dev) { - if (bcm->dev->drive_rts_on_open) + if (bcm->dev->use_autobaud_mode) + hci_uart_set_flow_control(hu, false); /* Assert BT_UART_CTS_N */ + else if (bcm->dev->drive_rts_on_open) hci_uart_set_flow_control(hu, true); - hu->init_speed = bcm->dev->init_speed; + if (bcm->dev->use_autobaud_mode && bcm->dev->max_autobaud_speed) + hu->init_speed = min(bcm->dev->oper_speed, bcm->dev->max_autobaud_speed); + else + hu->init_speed = bcm->dev->init_speed; /* If oper_speed is set, ldisc/serdev will set the baudrate * before calling setup() */ - if (!bcm->dev->no_early_set_baudrate) + if (!bcm->dev->no_early_set_baudrate && !bcm->dev->use_autobaud_mode) hu->oper_speed = bcm->dev->oper_speed; err = bcm_gpio_set_power(bcm->dev, true); @@ -564,6 +575,7 @@ static int bcm_setup(struct hci_uart *hu) { struct bcm_data *bcm = hu->priv; bool fw_load_done = false; + bool use_autobaud_mode = (bcm->dev ? bcm->dev->use_autobaud_mode : 0); unsigned int speed; int err; @@ -572,7 +584,7 @@ static int bcm_setup(struct hci_uart *hu) hu->hdev->set_diag = bcm_set_diag; hu->hdev->set_bdaddr = btbcm_set_bdaddr; - err = btbcm_initialize(hu->hdev, &fw_load_done); + err = btbcm_initialize(hu->hdev, &fw_load_done, use_autobaud_mode); if (err) return err; @@ -580,8 +592,8 @@ static int bcm_setup(struct hci_uart *hu) return 0; /* Init speed if any */ - if (hu->init_speed) - speed = hu->init_speed; + if (bcm->dev && bcm->dev->init_speed) + speed = bcm->dev->init_speed; else if (hu->proto->init_speed) speed = hu->proto->init_speed; else @@ -616,7 +628,7 @@ static int bcm_setup(struct hci_uart *hu) btbcm_write_pcm_int_params(hu->hdev, ¶ms); } - err = btbcm_finalize(hu->hdev, &fw_load_done); + err = btbcm_finalize(hu->hdev, &fw_load_done, use_autobaud_mode); if (err) return err; @@ -1197,6 +1209,8 @@ static int bcm_acpi_probe(struct bcm_device *dev) static int bcm_of_probe(struct bcm_device *bdev) { + bdev->use_autobaud_mode = device_property_read_bool(bdev->dev, + "brcm,requires-autobaud-mode"); device_property_read_u32(bdev->dev, "max-speed", &bdev->oper_speed); device_property_read_u8_array(bdev->dev, "brcm,bt-pcm-int-params", bdev->pcm_int_params, 5); @@ -1512,6 +1526,7 @@ static int bcm_serdev_probe(struct serdev_device *serdev) data = device_get_match_data(bcmdev->dev); if (data) { + bcmdev->max_autobaud_speed = data->max_autobaud_speed; bcmdev->no_early_set_baudrate = data->no_early_set_baudrate; bcmdev->drive_rts_on_open = data->drive_rts_on_open; } @@ -1535,6 +1550,10 @@ static struct bcm_device_data bcm43438_device_data = { .drive_rts_on_open = true, }; +static struct bcm_device_data cyw55572_device_data = { + .max_autobaud_speed = 921600, +}; + static const struct of_device_id bcm_bluetooth_of_match[] = { { .compatible = "brcm,bcm20702a1" }, { .compatible = "brcm,bcm4329-bt" }, @@ -1544,8 +1563,10 @@ static const struct of_device_id bcm_bluetooth_of_match[] = { { .compatible = "brcm,bcm43430a0-bt" }, { .compatible = "brcm,bcm43430a1-bt" }, { .compatible = "brcm,bcm43438-bt", .data = &bcm43438_device_data }, + { .compatible = "brcm,bcm4349-bt", .data = &bcm43438_device_data }, { .compatible = "brcm,bcm43540-bt", .data = &bcm4354_device_data }, { .compatible = "brcm,bcm4335a0" }, + { .compatible = "infineon,cyw55572-bt", .data = &cyw55572_device_data }, { }, }; MODULE_DEVICE_TABLE(of, bcm_bluetooth_of_match); diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index 7249b91d9b91..78afb9a348e7 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -1217,7 +1217,11 @@ static struct platform_driver intel_driver = { int __init intel_init(void) { - platform_driver_register(&intel_driver); + int err; + + err = platform_driver_register(&intel_driver); + if (err) + return err; return hci_uart_register_proto(&intel_proto); } diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index eab34e24d944..8df11016fd51 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1588,7 +1588,7 @@ static bool qca_wakeup(struct hci_dev *hdev) wakeup = device_may_wakeup(hu->serdev->ctrl->dev.parent); bt_dev_dbg(hu->hdev, "wakeup status : %d", wakeup); - return !wakeup; + return wakeup; } static int qca_regulator_init(struct hci_uart *hu) diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index 4cda890ce647..c0e5f42ec6b7 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -231,6 +231,15 @@ static int hci_uart_setup(struct hci_dev *hdev) return 0; } +/* Check if the device is wakeable */ +static bool hci_uart_wakeup(struct hci_dev *hdev) +{ + /* HCI UART devices are assumed to be wakeable by default. + * Implement wakeup callback to override this behavior. + */ + return true; +} + /** hci_uart_write_wakeup - transmit buffer wakeup * @serdev: serial device * @@ -342,6 +351,8 @@ int hci_uart_register_device(struct hci_uart *hu, hdev->flush = hci_uart_flush; hdev->send = hci_uart_send_frame; hdev->setup = hci_uart_setup; + if (!hdev->wakeup) + hdev->wakeup = hci_uart_wakeup; SET_HCIDEV_DEV(hdev, &hu->serdev->dev); if (test_bit(HCI_UART_NO_SUSPEND_NOTIFIER, &hu->flags)) diff --git a/drivers/char/random.c b/drivers/char/random.c index e3dd1dd3dd22..a1af90bacc9f 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1174,7 +1174,7 @@ static void __cold entropy_timer(struct timer_list *timer) */ static void __cold try_to_generate_entropy(void) { - enum { NUM_TRIAL_SAMPLES = 8192, MAX_SAMPLES_PER_BIT = 32 }; + enum { NUM_TRIAL_SAMPLES = 8192, MAX_SAMPLES_PER_BIT = HZ / 30 }; struct entropy_timer_state stack; unsigned int i, num_different = 0; unsigned long last = random_get_entropy(); diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 37a1eb20f5ba..76f6b3884e6b 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -439,9 +439,13 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) /* Both presence and absence of sram regulator are valid cases. */ info->sram_reg = regulator_get_optional(cpu_dev, "sram"); - if (IS_ERR(info->sram_reg)) + if (IS_ERR(info->sram_reg)) { + ret = PTR_ERR(info->sram_reg); + if (ret == -EPROBE_DEFER) + goto out_free_resources; + info->sram_reg = NULL; - else { + } else { ret = regulator_enable(info->sram_reg); if (ret) { dev_warn(cpu_dev, "cpu%d: failed to enable vsram\n", cpu); diff --git a/drivers/firmware/efi/reboot.c b/drivers/firmware/efi/reboot.c index 73089a24f04b..ceae84c19d22 100644 --- a/drivers/firmware/efi/reboot.c +++ b/drivers/firmware/efi/reboot.c @@ -6,7 +6,7 @@ #include <linux/efi.h> #include <linux/reboot.h> -static void (*orig_pm_power_off)(void); +static struct sys_off_handler *efi_sys_off_handler; int efi_reboot_quirk_mode = -1; @@ -51,15 +51,11 @@ bool __weak efi_poweroff_required(void) return false; } -static void efi_power_off(void) +static int efi_power_off(struct sys_off_data *data) { efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL); - /* - * The above call should not return, if it does fall back to - * the original power off method (typically ACPI poweroff). - */ - if (orig_pm_power_off) - orig_pm_power_off(); + + return NOTIFY_DONE; } static int __init efi_shutdown_init(void) @@ -68,8 +64,13 @@ static int __init efi_shutdown_init(void) return -ENODEV; if (efi_poweroff_required()) { - orig_pm_power_off = pm_power_off; - pm_power_off = efi_power_off; + /* SYS_OFF_PRIO_FIRMWARE + 1 so that it runs before acpi_power_off */ + efi_sys_off_handler = + register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_FIRMWARE + 1, + efi_power_off, NULL); + if (IS_ERR(efi_sys_off_handler)) + return PTR_ERR(efi_sys_off_handler); } return 0; diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index 98109839102f..1020c2feb249 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -991,28 +991,22 @@ static struct configfs_attribute *gpio_sim_device_config_attrs[] = { }; struct gpio_sim_chip_name_ctx { - struct gpio_sim_device *dev; + struct fwnode_handle *swnode; char *page; }; static int gpio_sim_emit_chip_name(struct device *dev, void *data) { struct gpio_sim_chip_name_ctx *ctx = data; - struct fwnode_handle *swnode; - struct gpio_sim_bank *bank; /* This would be the sysfs device exported in /sys/class/gpio. */ if (dev->class) return 0; - swnode = dev_fwnode(dev); + if (device_match_fwnode(dev, ctx->swnode)) + return sprintf(ctx->page, "%s\n", dev_name(dev)); - list_for_each_entry(bank, &ctx->dev->bank_list, siblings) { - if (bank->swnode == swnode) - return sprintf(ctx->page, "%s\n", dev_name(dev)); - } - - return -ENODATA; + return 0; } static ssize_t gpio_sim_bank_config_chip_name_show(struct config_item *item, @@ -1020,7 +1014,7 @@ static ssize_t gpio_sim_bank_config_chip_name_show(struct config_item *item, { struct gpio_sim_bank *bank = to_gpio_sim_bank(item); struct gpio_sim_device *dev = gpio_sim_bank_get_device(bank); - struct gpio_sim_chip_name_ctx ctx = { dev, page }; + struct gpio_sim_chip_name_ctx ctx = { bank->swnode, page }; int ret; mutex_lock(&dev->lock); diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index 0c9a63becfef..b26e64338376 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -421,6 +421,10 @@ out_free_lh: * @work: the worker that implements software debouncing * @sw_debounced: flag indicating if the software debouncer is active * @level: the current debounced physical level of the line + * @hdesc: the Hardware Timestamp Engine (HTE) descriptor + * @raw_level: the line level at the time of event + * @total_discard_seq: the running counter of the discarded events + * @last_seqno: the last sequence number before debounce period expires */ struct line { struct gpio_desc *desc; diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index e88c497fa010..f65656df3619 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -256,7 +256,6 @@ config DRM_AMDGPU select HWMON select BACKLIGHT_CLASS_DEVICE select INTERVAL_TREE - select DRM_BUDDY help Choose this option if you have a recent AMD Radeon graphics card. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 6546552e596c..acfa207cf970 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -30,15 +30,12 @@ #include <drm/ttm/ttm_resource.h> #include <drm/ttm/ttm_range_manager.h> -#include "amdgpu_vram_mgr.h" - /* state back for walking over vram_mgr and gtt_mgr allocations */ struct amdgpu_res_cursor { uint64_t start; uint64_t size; uint64_t remaining; - void *node; - uint32_t mem_type; + struct drm_mm_node *node; }; /** @@ -55,63 +52,27 @@ static inline void amdgpu_res_first(struct ttm_resource *res, uint64_t start, uint64_t size, struct amdgpu_res_cursor *cur) { - struct drm_buddy_block *block; - struct list_head *head, *next; struct drm_mm_node *node; - if (!res) - goto fallback; - - BUG_ON(start + size > res->num_pages << PAGE_SHIFT); - - cur->mem_type = res->mem_type; - - switch (cur->mem_type) { - case TTM_PL_VRAM: - head = &to_amdgpu_vram_mgr_resource(res)->blocks; - - block = list_first_entry_or_null(head, - struct drm_buddy_block, - link); - if (!block) - goto fallback; - - while (start >= amdgpu_vram_mgr_block_size(block)) { - start -= amdgpu_vram_mgr_block_size(block); - - next = block->link.next; - if (next != head) - block = list_entry(next, struct drm_buddy_block, link); - } - - cur->start = amdgpu_vram_mgr_block_start(block) + start; - cur->size = min(amdgpu_vram_mgr_block_size(block) - start, size); - cur->remaining = size; - cur->node = block; - break; - case TTM_PL_TT: - node = to_ttm_range_mgr_node(res)->mm_nodes; - while (start >= node->size << PAGE_SHIFT) - start -= node++->size << PAGE_SHIFT; - - cur->start = (node->start << PAGE_SHIFT) + start; - cur->size = min((node->size << PAGE_SHIFT) - start, size); + if (!res || res->mem_type == TTM_PL_SYSTEM) { + cur->start = start; + cur->size = size; cur->remaining = size; - cur->node = node; - break; - default: - goto fallback; + cur->node = NULL; + WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); + return; } - return; + BUG_ON(start + size > res->num_pages << PAGE_SHIFT); -fallback: - cur->start = start; - cur->size = size; + node = to_ttm_range_mgr_node(res)->mm_nodes; + while (start >= node->size << PAGE_SHIFT) + start -= node++->size << PAGE_SHIFT; + + cur->start = (node->start << PAGE_SHIFT) + start; + cur->size = min((node->size << PAGE_SHIFT) - start, size); cur->remaining = size; - cur->node = NULL; - WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); - return; + cur->node = node; } /** @@ -124,9 +85,7 @@ fallback: */ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) { - struct drm_buddy_block *block; - struct drm_mm_node *node; - struct list_head *next; + struct drm_mm_node *node = cur->node; BUG_ON(size > cur->remaining); @@ -140,27 +99,9 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) return; } - switch (cur->mem_type) { - case TTM_PL_VRAM: - block = cur->node; - - next = block->link.next; - block = list_entry(next, struct drm_buddy_block, link); - - cur->node = block; - cur->start = amdgpu_vram_mgr_block_start(block); - cur->size = min(amdgpu_vram_mgr_block_size(block), cur->remaining); - break; - case TTM_PL_TT: - node = cur->node; - - cur->node = ++node; - cur->start = node->start << PAGE_SHIFT; - cur->size = min(node->size << PAGE_SHIFT, cur->remaining); - break; - default: - return; - } + cur->node = ++node; + cur->start = node->start << PAGE_SHIFT; + cur->size = min(node->size << PAGE_SHIFT, cur->remaining); } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 6a70818039dd..9120ae80ef52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -26,7 +26,6 @@ #include <linux/dma-direction.h> #include <drm/gpu_scheduler.h> -#include "amdgpu_vram_mgr.h" #include "amdgpu.h" #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) @@ -39,6 +38,15 @@ #define AMDGPU_POISON 0xd0bed0be +struct amdgpu_vram_mgr { + struct ttm_resource_manager manager; + struct drm_mm mm; + spinlock_t lock; + struct list_head reservations_pending; + struct list_head reserved_pages; + atomic64_t vis_usage; +}; + struct amdgpu_gtt_mgr { struct ttm_resource_manager manager; struct drm_mm mm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 49e4092f447f..0a7611648573 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -32,10 +32,8 @@ #include "atom.h" struct amdgpu_vram_reservation { - u64 start; - u64 size; - struct list_head allocated; - struct list_head blocks; + struct list_head node; + struct drm_mm_node mm_node; }; static inline struct amdgpu_vram_mgr * @@ -188,18 +186,18 @@ const struct attribute_group amdgpu_vram_mgr_attr_group = { }; /** - * amdgpu_vram_mgr_vis_size - Calculate visible block size + * amdgpu_vram_mgr_vis_size - Calculate visible node size * * @adev: amdgpu_device pointer - * @block: DRM BUDDY block structure + * @node: MM node structure * - * Calculate how many bytes of the DRM BUDDY block are inside visible VRAM + * Calculate how many bytes of the MM node are inside visible VRAM */ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, - struct drm_buddy_block *block) + struct drm_mm_node *node) { - u64 start = amdgpu_vram_mgr_block_start(block); - u64 end = start + amdgpu_vram_mgr_block_size(block); + uint64_t start = node->start << PAGE_SHIFT; + uint64_t end = (node->size + node->start) << PAGE_SHIFT; if (start >= adev->gmc.visible_vram_size) return 0; @@ -220,9 +218,9 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_resource *res = bo->tbo.resource; - struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); - struct drm_buddy_block *block; - u64 usage = 0; + unsigned pages = res->num_pages; + struct drm_mm_node *mm; + u64 usage; if (amdgpu_gmc_vram_full_visible(&adev->gmc)) return amdgpu_bo_size(bo); @@ -230,8 +228,9 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT) return 0; - list_for_each_entry(block, &vres->blocks, link) - usage += amdgpu_vram_mgr_vis_size(adev, block); + mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0]; + for (usage = 0; pages; pages -= mm->size, mm++) + usage += amdgpu_vram_mgr_vis_size(adev, mm); return usage; } @@ -241,30 +240,23 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct drm_buddy *mm = &mgr->mm; + struct drm_mm *mm = &mgr->mm; struct amdgpu_vram_reservation *rsv, *temp; - struct drm_buddy_block *block; uint64_t vis_usage; - list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) { - if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size, - rsv->size, mm->chunk_size, &rsv->allocated, - DRM_BUDDY_RANGE_ALLOCATION)) - continue; - - block = amdgpu_vram_mgr_first_block(&rsv->allocated); - if (!block) + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) { + if (drm_mm_reserve_node(mm, &rsv->mm_node)) continue; dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n", - rsv->start, rsv->size); + rsv->mm_node.start, rsv->mm_node.size); - vis_usage = amdgpu_vram_mgr_vis_size(adev, block); + vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node); atomic64_add(vis_usage, &mgr->vis_usage); spin_lock(&man->bdev->lru_lock); - man->usage += rsv->size; + man->usage += rsv->mm_node.size << PAGE_SHIFT; spin_unlock(&man->bdev->lru_lock); - list_move(&rsv->blocks, &mgr->reserved_pages); + list_move(&rsv->node, &mgr->reserved_pages); } } @@ -286,16 +278,14 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, if (!rsv) return -ENOMEM; - INIT_LIST_HEAD(&rsv->allocated); - INIT_LIST_HEAD(&rsv->blocks); + INIT_LIST_HEAD(&rsv->node); + rsv->mm_node.start = start >> PAGE_SHIFT; + rsv->mm_node.size = size >> PAGE_SHIFT; - rsv->start = start; - rsv->size = size; - - mutex_lock(&mgr->lock); - list_add_tail(&rsv->blocks, &mgr->reservations_pending); + spin_lock(&mgr->lock); + list_add_tail(&rsv->node, &mgr->reservations_pending); amdgpu_vram_mgr_do_reserve(&mgr->manager); - mutex_unlock(&mgr->lock); + spin_unlock(&mgr->lock); return 0; } @@ -317,19 +307,19 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, struct amdgpu_vram_reservation *rsv; int ret; - mutex_lock(&mgr->lock); + spin_lock(&mgr->lock); - list_for_each_entry(rsv, &mgr->reservations_pending, blocks) { - if (rsv->start <= start && - (start < (rsv->start + rsv->size))) { + list_for_each_entry(rsv, &mgr->reservations_pending, node) { + if ((rsv->mm_node.start <= start) && + (start < (rsv->mm_node.start + rsv->mm_node.size))) { ret = -EBUSY; goto out; } } - list_for_each_entry(rsv, &mgr->reserved_pages, blocks) { - if (rsv->start <= start && - (start < (rsv->start + rsv->size))) { + list_for_each_entry(rsv, &mgr->reserved_pages, node) { + if ((rsv->mm_node.start <= start) && + (start < (rsv->mm_node.start + rsv->mm_node.size))) { ret = 0; goto out; } @@ -337,11 +327,33 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, ret = -ENOENT; out: - mutex_unlock(&mgr->lock); + spin_unlock(&mgr->lock); return ret; } /** + * amdgpu_vram_mgr_virt_start - update virtual start address + * + * @mem: ttm_resource to update + * @node: just allocated node + * + * Calculate a virtual BO start address to easily check if everything is CPU + * accessible. + */ +static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem, + struct drm_mm_node *node) +{ + unsigned long start; + + start = node->start + node->size; + if (start > mem->num_pages) + start -= mem->num_pages; + else + start = 0; + mem->start = max(mem->start, start); +} + +/** * amdgpu_vram_mgr_new - allocate new ranges * * @man: TTM memory type manager @@ -356,44 +368,46 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, const struct ttm_place *place, struct ttm_resource **res) { - u64 vis_usage = 0, max_bytes, cur_size, min_block_size; + unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages; struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct amdgpu_vram_mgr_resource *vres; - u64 size, remaining_size, lpfn, fpfn; - struct drm_buddy *mm = &mgr->mm; - struct drm_buddy_block *block; - unsigned long pages_per_block; + uint64_t vis_usage = 0, mem_bytes, max_bytes; + struct ttm_range_mgr_node *node; + struct drm_mm *mm = &mgr->mm; + enum drm_mm_insert_mode mode; + unsigned i; int r; - lpfn = place->lpfn << PAGE_SHIFT; + lpfn = place->lpfn; if (!lpfn) - lpfn = man->size; - - fpfn = place->fpfn << PAGE_SHIFT; + lpfn = man->size >> PAGE_SHIFT; max_bytes = adev->gmc.mc_vram_size; if (tbo->type != ttm_bo_type_kernel) max_bytes -= AMDGPU_VM_RESERVED_VRAM; + mem_bytes = tbo->base.size; if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - pages_per_block = ~0ul; + pages_per_node = ~0ul; + num_nodes = 1; } else { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - pages_per_block = HPAGE_PMD_NR; + pages_per_node = HPAGE_PMD_NR; #else /* default to 2MB */ - pages_per_block = 2UL << (20UL - PAGE_SHIFT); + pages_per_node = 2UL << (20UL - PAGE_SHIFT); #endif - pages_per_block = max_t(uint32_t, pages_per_block, - tbo->page_alignment); + pages_per_node = max_t(uint32_t, pages_per_node, + tbo->page_alignment); + num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node); } - vres = kzalloc(sizeof(*vres), GFP_KERNEL); - if (!vres) + node = kvmalloc(struct_size(node, mm_nodes, num_nodes), + GFP_KERNEL | __GFP_ZERO); + if (!node) return -ENOMEM; - ttm_resource_init(tbo, place, &vres->base); + ttm_resource_init(tbo, place, &node->base); /* bail out quickly if there's likely not enough VRAM for this BO */ if (ttm_resource_manager_usage(man) > max_bytes) { @@ -401,130 +415,66 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, goto error_fini; } - INIT_LIST_HEAD(&vres->blocks); - + mode = DRM_MM_INSERT_BEST; if (place->flags & TTM_PL_FLAG_TOPDOWN) - vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; - - if (fpfn || lpfn != man->size) - /* Allocate blocks in desired range */ - vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; - - remaining_size = vres->base.num_pages << PAGE_SHIFT; - - mutex_lock(&mgr->lock); - while (remaining_size) { - if (tbo->page_alignment) - min_block_size = tbo->page_alignment << PAGE_SHIFT; - else - min_block_size = mgr->default_page_size; - - BUG_ON(min_block_size < mm->chunk_size); - - /* Limit maximum size to 2GiB due to SG table limitations */ - size = min(remaining_size, 2ULL << 30); - - if (size >= pages_per_block << PAGE_SHIFT) - min_block_size = pages_per_block << PAGE_SHIFT; - - cur_size = size; - - if (fpfn + size != place->lpfn << PAGE_SHIFT) { - /* - * Except for actual range allocation, modify the size and - * min_block_size conforming to continuous flag enablement - */ - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - size = roundup_pow_of_two(size); - min_block_size = size; - /* - * Modify the size value if size is not - * aligned with min_block_size - */ - } else if (!IS_ALIGNED(size, min_block_size)) { - size = round_up(size, min_block_size); + mode = DRM_MM_INSERT_HIGH; + + pages_left = node->base.num_pages; + + /* Limit maximum size to 2GB due to SG table limitations */ + pages = min(pages_left, 2UL << (30 - PAGE_SHIFT)); + + i = 0; + spin_lock(&mgr->lock); + while (pages_left) { + uint32_t alignment = tbo->page_alignment; + + if (pages >= pages_per_node) + alignment = pages_per_node; + + r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages, + alignment, 0, place->fpfn, + lpfn, mode); + if (unlikely(r)) { + if (pages > pages_per_node) { + if (is_power_of_2(pages)) + pages = pages / 2; + else + pages = rounddown_pow_of_two(pages); + continue; } + goto error_free; } - r = drm_buddy_alloc_blocks(mm, fpfn, - lpfn, - size, - min_block_size, - &vres->blocks, - vres->flags); - if (unlikely(r)) - goto error_free_blocks; - - if (size > remaining_size) - remaining_size = 0; - else - remaining_size -= size; - } - mutex_unlock(&mgr->lock); - - if (cur_size != size) { - struct drm_buddy_block *block; - struct list_head *trim_list; - u64 original_size; - LIST_HEAD(temp); - - trim_list = &vres->blocks; - original_size = vres->base.num_pages << PAGE_SHIFT; - - /* - * If size value is rounded up to min_block_size, trim the last - * block to the required size - */ - if (!list_is_singular(&vres->blocks)) { - block = list_last_entry(&vres->blocks, typeof(*block), link); - list_move_tail(&block->link, &temp); - trim_list = &temp; - /* - * Compute the original_size value by subtracting the - * last block size with (aligned size - original size) - */ - original_size = amdgpu_vram_mgr_block_size(block) - (size - cur_size); - } + vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]); + amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]); + pages_left -= pages; + ++i; - mutex_lock(&mgr->lock); - drm_buddy_block_trim(mm, - original_size, - trim_list); - mutex_unlock(&mgr->lock); - - if (!list_empty(&temp)) - list_splice_tail(trim_list, &vres->blocks); - } - - list_for_each_entry(block, &vres->blocks, link) - vis_usage += amdgpu_vram_mgr_vis_size(adev, block); - - block = amdgpu_vram_mgr_first_block(&vres->blocks); - if (!block) { - r = -EINVAL; - goto error_fini; + if (pages > pages_left) + pages = pages_left; } + spin_unlock(&mgr->lock); - vres->base.start = amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT; - - if (amdgpu_is_vram_mgr_blocks_contiguous(&vres->blocks)) - vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS; + if (i == 1) + node->base.placement |= TTM_PL_FLAG_CONTIGUOUS; if (adev->gmc.xgmi.connected_to_cpu) - vres->base.bus.caching = ttm_cached; + node->base.bus.caching = ttm_cached; else - vres->base.bus.caching = ttm_write_combined; + node->base.bus.caching = ttm_write_combined; atomic64_add(vis_usage, &mgr->vis_usage); - *res = &vres->base; + *res = &node->base; return 0; -error_free_blocks: - drm_buddy_free_list(mm, &vres->blocks); - mutex_unlock(&mgr->lock); +error_free: + while (i--) + drm_mm_remove_node(&node->mm_nodes[i]); + spin_unlock(&mgr->lock); error_fini: - ttm_resource_fini(man, &vres->base); - kfree(vres); + ttm_resource_fini(man, &node->base); + kvfree(node); return r; } @@ -540,26 +490,27 @@ error_fini: static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, struct ttm_resource *res) { - struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); + struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct drm_buddy *mm = &mgr->mm; - struct drm_buddy_block *block; uint64_t vis_usage = 0; + unsigned i, pages; - mutex_lock(&mgr->lock); - list_for_each_entry(block, &vres->blocks, link) - vis_usage += amdgpu_vram_mgr_vis_size(adev, block); + spin_lock(&mgr->lock); + for (i = 0, pages = res->num_pages; pages; + pages -= node->mm_nodes[i].size, ++i) { + struct drm_mm_node *mm = &node->mm_nodes[i]; + drm_mm_remove_node(mm); + vis_usage += amdgpu_vram_mgr_vis_size(adev, mm); + } amdgpu_vram_mgr_do_reserve(man); - - drm_buddy_free_list(mm, &vres->blocks); - mutex_unlock(&mgr->lock); + spin_unlock(&mgr->lock); atomic64_sub(vis_usage, &mgr->vis_usage); ttm_resource_fini(man, res); - kfree(vres); + kvfree(node); } /** @@ -591,7 +542,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, if (!*sgt) return -ENOMEM; - /* Determine the number of DRM_BUDDY blocks to export */ + /* Determine the number of DRM_MM nodes to export */ amdgpu_res_first(res, offset, length, &cursor); while (cursor.remaining) { num_entries++; @@ -607,10 +558,10 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, sg->length = 0; /* - * Walk down DRM_BUDDY blocks to populate scatterlist nodes - * @note: Use iterator api to get first the DRM_BUDDY block + * Walk down DRM_MM nodes to populate scatterlist nodes + * @note: Use iterator api to get first the DRM_MM node * and the number of bytes from it. Access the following - * DRM_BUDDY block(s) if more buffer needs to exported + * DRM_MM node(s) if more buffer needs to exported */ amdgpu_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { @@ -697,22 +648,13 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man, struct drm_printer *printer) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); - struct drm_buddy *mm = &mgr->mm; - struct drm_buddy_block *block; drm_printf(printer, " vis usage:%llu\n", amdgpu_vram_mgr_vis_usage(mgr)); - mutex_lock(&mgr->lock); - drm_printf(printer, "default_page_size: %lluKiB\n", - mgr->default_page_size >> 10); - - drm_buddy_print(mm, printer); - - drm_printf(printer, "reserved:\n"); - list_for_each_entry(block, &mgr->reserved_pages, link) - drm_buddy_block_print(mm, block, printer); - mutex_unlock(&mgr->lock); + spin_lock(&mgr->lock); + drm_mm_print(&mgr->mm, printer); + spin_unlock(&mgr->lock); } static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = { @@ -732,21 +674,16 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) { struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; struct ttm_resource_manager *man = &mgr->manager; - int err; ttm_resource_manager_init(man, &adev->mman.bdev, adev->gmc.real_vram_size); man->func = &amdgpu_vram_mgr_func; - err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); - if (err) - return err; - - mutex_init(&mgr->lock); + drm_mm_init(&mgr->mm, 0, man->size >> PAGE_SHIFT); + spin_lock_init(&mgr->lock); INIT_LIST_HEAD(&mgr->reservations_pending); INIT_LIST_HEAD(&mgr->reserved_pages); - mgr->default_page_size = PAGE_SIZE; ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager); ttm_resource_manager_set_used(man, true); @@ -774,16 +711,16 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) if (ret) return; - mutex_lock(&mgr->lock); - list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) + spin_lock(&mgr->lock); + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) kfree(rsv); - list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) { - drm_buddy_free_list(&mgr->mm, &rsv->blocks); + list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) { + drm_mm_remove_node(&rsv->mm_node); kfree(rsv); } - drm_buddy_fini(&mgr->mm); - mutex_unlock(&mgr->lock); + drm_mm_takedown(&mgr->mm); + spin_unlock(&mgr->lock); ttm_resource_manager_cleanup(man); ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h deleted file mode 100644 index 9a2db87186c7..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ /dev/null @@ -1,89 +0,0 @@ -/* SPDX-License-Identifier: MIT - * Copyright 2021 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __AMDGPU_VRAM_MGR_H__ -#define __AMDGPU_VRAM_MGR_H__ - -#include <drm/drm_buddy.h> - -struct amdgpu_vram_mgr { - struct ttm_resource_manager manager; - struct drm_buddy mm; - /* protects access to buffer objects */ - struct mutex lock; - struct list_head reservations_pending; - struct list_head reserved_pages; - atomic64_t vis_usage; - u64 default_page_size; -}; - -struct amdgpu_vram_mgr_resource { - struct ttm_resource base; - struct list_head blocks; - unsigned long flags; -}; - -static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block) -{ - return drm_buddy_block_offset(block); -} - -static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) -{ - return PAGE_SIZE << drm_buddy_block_order(block); -} - -static inline struct drm_buddy_block * -amdgpu_vram_mgr_first_block(struct list_head *list) -{ - return list_first_entry_or_null(list, struct drm_buddy_block, link); -} - -static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head) -{ - struct drm_buddy_block *block; - u64 start, size; - - block = amdgpu_vram_mgr_first_block(head); - if (!block) - return false; - - while (head != block->link.next) { - start = amdgpu_vram_mgr_block_start(block); - size = amdgpu_vram_mgr_block_size(block); - - block = list_entry(block->link.next, struct drm_buddy_block, link); - if (start + size != amdgpu_vram_mgr_block_start(block)) - return false; - } - - return true; -} - -static inline struct amdgpu_vram_mgr_resource * -to_amdgpu_vram_mgr_resource(struct ttm_resource *res) -{ - return container_of(res, struct amdgpu_vram_mgr_resource, base); -} - -#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index bf4200457772..a08769c5e94b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -184,6 +184,8 @@ static void kfd_device_info_init(struct kfd_dev *kfd, /* Navi2x+, Navi1x+ */ if (gc_version == IP_VERSION(10, 3, 6)) kfd->device_info.no_atomic_fw_version = 14; + else if (gc_version == IP_VERSION(10, 3, 7)) + kfd->device_info.no_atomic_fw_version = 3; else if (gc_version >= IP_VERSION(10, 3, 0)) kfd->device_info.no_atomic_fw_version = 92; else if (gc_version >= IP_VERSION(10, 1, 1)) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1c2984bbda51..93ac33a8de9a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -72,6 +72,7 @@ #include <linux/pci.h> #include <linux/firmware.h> #include <linux/component.h> +#include <linux/dmi.h> #include <drm/display/drm_dp_mst_helper.h> #include <drm/display/drm_hdmi_helper.h> @@ -462,6 +463,26 @@ static void dm_pflip_high_irq(void *interrupt_params) vrr_active, (int) !e); } +static void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc) +{ + struct drm_crtc *crtc = &acrtc->base; + struct drm_device *dev = crtc->dev; + unsigned long flags; + + drm_crtc_handle_vblank(crtc); + + spin_lock_irqsave(&dev->event_lock, flags); + + /* Send completion event for cursor-only commits */ + if (acrtc->event && acrtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { + drm_crtc_send_vblank_event(crtc, acrtc->event); + drm_crtc_vblank_put(crtc); + acrtc->event = NULL; + } + + spin_unlock_irqrestore(&dev->event_lock, flags); +} + static void dm_vupdate_high_irq(void *interrupt_params) { struct common_irq_params *irq_params = interrupt_params; @@ -500,7 +521,7 @@ static void dm_vupdate_high_irq(void *interrupt_params) * if a pageflip happened inside front-porch. */ if (vrr_active) { - drm_crtc_handle_vblank(&acrtc->base); + dm_crtc_handle_vblank(acrtc); /* BTR processing for pre-DCE12 ASICs */ if (acrtc->dm_irq_params.stream && @@ -552,7 +573,7 @@ static void dm_crtc_high_irq(void *interrupt_params) * to dm_vupdate_high_irq after end of front-porch. */ if (!vrr_active) - drm_crtc_handle_vblank(&acrtc->base); + dm_crtc_handle_vblank(acrtc); /** * Following stuff must happen at start of vblank, for crc @@ -1382,6 +1403,41 @@ static bool dm_should_disable_stutter(struct pci_dev *pdev) return false; } +static const struct dmi_system_id hpd_disconnect_quirk_table[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), + }, + }, + {} +}; + +static void retrieve_dmi_info(struct amdgpu_display_manager *dm) +{ + const struct dmi_system_id *dmi_id; + + dm->aux_hpd_discon_quirk = false; + + dmi_id = dmi_first_match(hpd_disconnect_quirk_table); + if (dmi_id) { + dm->aux_hpd_discon_quirk = true; + DRM_INFO("aux_hpd_discon_quirk attached\n"); + } +} + static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; @@ -1508,6 +1564,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) } INIT_LIST_HEAD(&adev->dm.da_list); + + retrieve_dmi_info(&adev->dm); + /* Display Core create. */ adev->dm.dc = dc_create(&init_data); @@ -5407,7 +5466,7 @@ fill_blending_from_plane_state(const struct drm_plane_state *plane_state, } } - if (per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) + if (*per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) *pre_multiplied_alpha = false; } @@ -9135,6 +9194,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct amdgpu_bo *abo; uint32_t target_vblank, last_flip_vblank; bool vrr_active = amdgpu_dm_vrr_active(acrtc_state); + bool cursor_update = false; bool pflip_present = false; struct { struct dc_surface_update surface_updates[MAX_SURFACES]; @@ -9170,8 +9230,13 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct dm_plane_state *dm_new_plane_state = to_dm_plane_state(new_plane_state); /* Cursor plane is handled after stream updates */ - if (plane->type == DRM_PLANE_TYPE_CURSOR) + if (plane->type == DRM_PLANE_TYPE_CURSOR) { + if ((fb && crtc == pcrtc) || + (old_plane_state->fb && old_plane_state->crtc == pcrtc)) + cursor_update = true; + continue; + } if (!fb || !crtc || pcrtc != crtc) continue; @@ -9334,6 +9399,17 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->stream_update.vrr_infopacket = &acrtc_state->stream->vrr_infopacket; } + } else if (cursor_update && acrtc_state->active_planes > 0 && + !acrtc_state->force_dpms_off && + acrtc_attach->base.state->event) { + drm_crtc_vblank_get(pcrtc); + + spin_lock_irqsave(&pcrtc->dev->event_lock, flags); + + acrtc_attach->event = acrtc_attach->base.state->event; + acrtc_attach->base.state->event = NULL; + + spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags); } /* Update the planes if changed or disable if we don't have any. */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index aa34c0068f41..e80ef93f6550 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -540,6 +540,14 @@ struct amdgpu_display_manager { * last successfully applied backlight values. */ u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP]; + + /** + * @aux_hpd_discon_quirk: + * + * quirk for hpd discon while aux is on-going. + * occurred on certain intel platform + */ + bool aux_hpd_discon_quirk; }; enum dsc_clock_force_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 9221b6690a4a..2b9b095e5f03 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -56,6 +56,8 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, ssize_t result = 0; struct aux_payload payload; enum aux_return_code_type operation_result; + struct amdgpu_device *adev; + struct ddc_service *ddc; if (WARN_ON(msg->size > 16)) return -E2BIG; @@ -74,6 +76,21 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload, &operation_result); + /* + * w/a on certain intel platform where hpd is unexpected to pull low during + * 1st sideband message transaction by return AUX_RET_ERROR_HPD_DISCON + * aux transaction is succuess in such case, therefore bypass the error + */ + ddc = TO_DM_AUX(aux)->ddc_service; + adev = ddc->ctx->driver_context; + if (adev->dm.aux_hpd_discon_quirk) { + if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE && + operation_result == AUX_RET_ERROR_HPD_DISCON) { + result = 0; + operation_result = AUX_RET_SUCCESS; + } + } + if (payload.write && result >= 0) result = msg->size; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 6774dd8bb53e..3fe3fbac1e63 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1117,12 +1117,13 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) * on certain displays, such as the Sharp 4k. 36bpp is needed * to support SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 and * SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616 with actual > 10 bpc - * precision on at least DCN display engines. However, at least - * Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth, - * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3 - * did not show such problems, so this seems to be the exception. + * precision on DCN display engines, but apparently not for DCE, as + * far as testing on DCE-11.2 and DCE-8 showed. Various DCE parts have + * problems: Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth, + * neither do DCE-8 at 4k resolution, or DCE-11.2 (broken identify pixel + * passthrough). Therefore only use 36 bpp on DCN where it is actually needed. */ - if (plane_state->ctx->dce_version > DCE_VERSION_11_0) + if (plane_state->ctx->dce_version > DCE_VERSION_MAX) pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP; else pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 5f8809f6990d..2fbd2926a531 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1228,6 +1228,8 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t crystal_clock_freq = 2500; uint32_t tach_period; + if (speed == 0) + return -EINVAL; /* * To prevent from possible overheat, some ASICs may have requirement * for minimum fan speed: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index f46ee16a323a..a4fb577eceb4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -60,6 +60,8 @@ __i915_gem_object_create_region(struct intel_memory_region *mem, if (page_size) default_page_size = page_size; + /* We should be able to fit a page within an sg entry */ + GEM_BUG_ON(overflows_type(default_page_size, u32)); GEM_BUG_ON(!is_power_of_2_u64(default_page_size)); GEM_BUG_ON(default_page_size < PAGE_SIZE); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4c25d9b2f138..8f1bb6a4b7d1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -620,10 +620,15 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct ttm_resource *res) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + u32 page_alignment; if (!i915_ttm_gtt_binds_lmem(res)) return i915_ttm_tt_get_st(bo->ttm); + page_alignment = bo->page_alignment << PAGE_SHIFT; + if (!page_alignment) + page_alignment = obj->mm.region->min_page_size; + /* * If CPU mapping differs, we need to add the ttm_tt pages to * the resulting st. Might make sense for GGTT. @@ -634,7 +639,8 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct i915_refct_sgt *rsgt; rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, - res); + res, + page_alignment); if (IS_ERR(rsgt)) return rsgt; @@ -643,7 +649,8 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, return i915_refct_sgt_get(obj->ttm.cached_io_rsgt); } - return intel_region_ttm_resource_to_rsgt(obj->mm.region, res); + return intel_region_ttm_resource_to_rsgt(obj->mm.region, res, + page_alignment); } static int i915_ttm_truncate(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 319936f91ac5..e6e01c2a74a6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -9,6 +9,7 @@ #include <linux/jiffies.h> #include "gt/intel_engine.h" +#include "gt/intel_rps.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" @@ -31,6 +32,37 @@ i915_gem_object_wait_fence(struct dma_fence *fence, timeout); } +static void +i915_gem_object_boost(struct dma_resv *resv, unsigned int flags) +{ + struct dma_resv_iter cursor; + struct dma_fence *fence; + + /* + * Prescan all fences for potential boosting before we begin waiting. + * + * When we wait, we wait on outstanding fences serially. If the + * dma-resv contains a sequence such as 1:1, 1:2 instead of a reduced + * form 1:2, then as we look at each wait in turn we see that each + * request is currently executing and not worthy of boosting. But if + * we only happen to look at the final fence in the sequence (because + * of request coalescing or splitting between read/write arrays by + * the iterator), then we would boost. As such our decision to boost + * or not is delicately balanced on the order we wait on fences. + * + * So instead of looking for boosts sequentially, look for all boosts + * upfront and then wait on the outstanding fences. + */ + + dma_resv_iter_begin(&cursor, resv, + dma_resv_usage_rw(flags & I915_WAIT_ALL)); + dma_resv_for_each_fence_unlocked(&cursor, fence) + if (dma_fence_is_i915(fence) && + !i915_request_started(to_request(fence))) + intel_rps_boost(to_request(fence)); + dma_resv_iter_end(&cursor); +} + static long i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int flags, @@ -40,6 +72,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, struct dma_fence *fence; long ret = timeout ?: 1; + i915_gem_object_boost(resv, flags); + dma_resv_iter_begin(&cursor, resv, dma_resv_usage_rw(flags & I915_WAIT_ALL)); dma_resv_for_each_fence_unlocked(&cursor, fence) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 51a0fe60c050..531af6ad7007 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1209,6 +1209,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) mutex_lock(>->tlb_invalidate_lock); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ + + for_each_engine(engine, gt, id) { + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + } + + spin_unlock_irq(&uncore->lock); + for_each_engine(engine, gt, id) { /* * HW architecture suggest typical invalidation time at 40us, @@ -1223,7 +1237,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) if (!i915_mmio_reg_offset(rb.reg)) continue; - intel_uncore_write_fw(uncore, rb.reg, rb.bit); if (__intel_wait_for_register_fw(uncore, rb.reg, rb.bit, 0, timeout_us, timeout_ms, diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index a5338c3fde7a..c68d36fb5bbd 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) return err; } -static int gen6_reset_engines(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int __gen6_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) { struct intel_engine_cs *engine; u32 hw_mask; @@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt, return gen6_hw_domain_reset(gt, hw_mask); } +static int gen6_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(>->uncore->lock, flags); + ret = __gen6_reset_engines(gt, engine_mask, retry); + spin_unlock_irqrestore(>->uncore->lock, flags); + + return ret; +} + static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine) { int vecs_id; @@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine) rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit); } -static int gen11_reset_engines(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int __gen11_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) { struct intel_engine_cs *engine; intel_engine_mask_t tmp; @@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt, struct intel_engine_cs *engine; const bool reset_non_ready = retry >= 1; intel_engine_mask_t tmp; + unsigned long flags; int ret; + spin_lock_irqsave(>->uncore->lock, flags); + for_each_engine_masked(engine, gt, engine_mask, tmp) { ret = gen8_engine_reset_prepare(engine); if (ret && !reset_non_ready) @@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt, * This is best effort, so ignore any error from the initial reset. */ if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES) - gen11_reset_engines(gt, gt->info.engine_mask, 0); + __gen11_reset_engines(gt, gt->info.engine_mask, 0); if (GRAPHICS_VER(gt->i915) >= 11) - ret = gen11_reset_engines(gt, engine_mask, retry); + ret = __gen11_reset_engines(gt, engine_mask, retry); else - ret = gen6_reset_engines(gt, engine_mask, retry); + ret = __gen6_reset_engines(gt, engine_mask, retry); skip_reset: for_each_engine_masked(engine, gt, engine_mask, tmp) gen8_engine_reset_cancel(engine); + spin_unlock_irqrestore(>->uncore->lock, flags); + return ret; } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 8b2c11dbe354..1109088fe8f6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -176,8 +176,8 @@ static int live_lrc_layout(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); @@ -365,8 +365,8 @@ static int live_lrc_fixed(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index b9eb75a2b400..1c35a41620ae 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -3117,9 +3117,9 @@ void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu) continue; vaddr = shmem_pin_map(engine->default_state); - if (IS_ERR(vaddr)) { - gvt_err("failed to map %s->default state, err:%zd\n", - engine->name, PTR_ERR(vaddr)); + if (!vaddr) { + gvt_err("failed to map %s->default state\n", + engine->name); return; } diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index 159571b9bd24..dcc081874ec8 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -68,6 +68,7 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size) * drm_mm_node * @node: The drm_mm_node. * @region_start: An offset to add to the dma addresses of the sg list. + * @page_alignment: Required page alignment for each sg entry. Power of two. * * Create a struct sg_table, initializing it from a struct drm_mm_node, * taking a maximum segment length into account, splitting into segments @@ -77,22 +78,25 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size) * error code cast to an error pointer on failure. */ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, - u64 region_start) + u64 region_start, + u32 page_alignment) { - const u64 max_segment = SZ_1G; /* Do we have a limit on this? */ - u64 segment_pages = max_segment >> PAGE_SHIFT; + const u32 max_segment = round_down(UINT_MAX, page_alignment); + const u32 segment_pages = max_segment >> PAGE_SHIFT; u64 block_size, offset, prev_end; struct i915_refct_sgt *rsgt; struct sg_table *st; struct scatterlist *sg; + GEM_BUG_ON(!max_segment); + rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL); if (!rsgt) return ERR_PTR(-ENOMEM); i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT); st = &rsgt->table; - if (sg_alloc_table(st, DIV_ROUND_UP(node->size, segment_pages), + if (sg_alloc_table(st, DIV_ROUND_UP_ULL(node->size, segment_pages), GFP_KERNEL)) { i915_refct_sgt_put(rsgt); return ERR_PTR(-ENOMEM); @@ -112,12 +116,14 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, sg = __sg_next(sg); sg_dma_address(sg) = region_start + offset; + GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg), + page_alignment)); sg_dma_len(sg) = 0; sg->length = 0; st->nents++; } - len = min(block_size, max_segment - sg->length); + len = min_t(u64, block_size, max_segment - sg->length); sg->length += len; sg_dma_len(sg) += len; @@ -138,6 +144,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, * i915_buddy_block list * @res: The struct i915_ttm_buddy_resource. * @region_start: An offset to add to the dma addresses of the sg list. + * @page_alignment: Required page alignment for each sg entry. Power of two. * * Create a struct sg_table, initializing it from struct i915_buddy_block list, * taking a maximum segment length into account, splitting into segments @@ -147,11 +154,12 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, * error code cast to an error pointer on failure. */ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, - u64 region_start) + u64 region_start, + u32 page_alignment) { struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); const u64 size = res->num_pages << PAGE_SHIFT; - const u64 max_segment = rounddown(UINT_MAX, PAGE_SIZE); + const u32 max_segment = round_down(UINT_MAX, page_alignment); struct drm_buddy *mm = bman_res->mm; struct list_head *blocks = &bman_res->blocks; struct drm_buddy_block *block; @@ -161,6 +169,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, resource_size_t prev_end; GEM_BUG_ON(list_empty(blocks)); + GEM_BUG_ON(!max_segment); rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL); if (!rsgt) @@ -191,12 +200,14 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, sg = __sg_next(sg); sg_dma_address(sg) = region_start + offset; + GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg), + page_alignment)); sg_dma_len(sg) = 0; sg->length = 0; st->nents++; } - len = min(block_size, max_segment - sg->length); + len = min_t(u64, block_size, max_segment - sg->length); sg->length += len; sg_dma_len(sg) += len; diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 12c6a1684081..9ddb3e743a3e 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -213,9 +213,11 @@ static inline void __i915_refct_sgt_init(struct i915_refct_sgt *rsgt, void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size); struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, - u64 region_start); + u64 region_start, + u32 page_alignment); struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, - u64 region_start); + u64 region_start, + u32 page_alignment); #endif diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 62ff77445b01..575d67bc6ffe 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -152,6 +152,7 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) * Convert an opaque TTM resource manager resource to a refcounted sg_table. * @mem: The memory region. * @res: The resource manager resource obtained from the TTM resource manager. + * @page_alignment: Required page alignment for each sg entry. Power of two. * * The gem backends typically use sg-tables for operations on the underlying * io_memory. So provide a way for the backends to translate the @@ -161,16 +162,19 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) */ struct i915_refct_sgt * intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, - struct ttm_resource *res) + struct ttm_resource *res, + u32 page_alignment) { if (mem->is_range_manager) { struct ttm_range_mgr_node *range_node = to_ttm_range_mgr_node(res); return i915_rsgt_from_mm_node(&range_node->mm_nodes[0], - mem->region.start); + mem->region.start, + page_alignment); } else { - return i915_rsgt_from_buddy_resource(res, mem->region.start); + return i915_rsgt_from_buddy_resource(res, mem->region.start, + page_alignment); } } diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index cf9d86dcf409..5bb8d8b582ae 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -24,7 +24,8 @@ int intel_region_ttm_fini(struct intel_memory_region *mem); struct i915_refct_sgt * intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, - struct ttm_resource *res); + struct ttm_resource *res, + u32 page_alignment); void intel_region_ttm_resource_free(struct intel_memory_region *mem, struct ttm_resource *res); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 8633bec18fa7..ab9f17fc85bc 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -742,7 +742,7 @@ static int pot_hole(struct i915_address_space *vm, u64 addr; for (addr = round_up(hole_start + min_alignment, step) - min_alignment; - addr <= round_down(hole_end - (2 * min_alignment), step) - min_alignment; + hole_end > addr && hole_end - addr >= 2 * min_alignment; addr += step) { err = i915_vma_pin(vma, 0, 0, addr | flags); if (err) { diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 73eb53edb8de..3b18e5905c86 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -451,7 +451,6 @@ out_put: static int igt_mock_max_segment(void *arg) { - const unsigned int max_segment = rounddown(UINT_MAX, PAGE_SIZE); struct intel_memory_region *mem = arg; struct drm_i915_private *i915 = mem->i915; struct i915_ttm_buddy_resource *res; @@ -460,7 +459,10 @@ static int igt_mock_max_segment(void *arg) struct drm_buddy *mm; struct list_head *blocks; struct scatterlist *sg; + I915_RND_STATE(prng); LIST_HEAD(objects); + unsigned int max_segment; + unsigned int ps; u64 size; int err = 0; @@ -472,7 +474,13 @@ static int igt_mock_max_segment(void *arg) */ size = SZ_8G; - mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0); + ps = PAGE_SIZE; + if (i915_prandom_u64_state(&prng) & 1) + ps = SZ_64K; /* For something like DG2 */ + + max_segment = round_down(UINT_MAX, ps); + + mem = mock_region_create(i915, 0, size, ps, 0, 0); if (IS_ERR(mem)) return PTR_ERR(mem); @@ -498,12 +506,21 @@ static int igt_mock_max_segment(void *arg) } for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { + dma_addr_t daddr = sg_dma_address(sg); + if (sg->length > max_segment) { pr_err("%s: Created an oversized scatterlist entry, %u > %u\n", __func__, sg->length, max_segment); err = -EINVAL; goto out_close; } + + if (!IS_ALIGNED(daddr, ps)) { + pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n", + __func__, &daddr, ps); + err = -EINVAL; + goto out_close; + } } out_close: diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 670557ce1024..bac21fe84ca5 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -33,7 +33,8 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) return PTR_ERR(obj->mm.res); obj->mm.rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, - obj->mm.res); + obj->mm.res, + obj->mm.region->min_page_size); if (IS_ERR(obj->mm.rsgt)) { err = PTR_ERR(obj->mm.rsgt); goto err_free_resource; diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 638bf4a1ed94..646fa8677490 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -4231,10 +4231,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, struct irdma_cm_node *cm_node; struct list_head teardown_list; struct ib_qp_attr attr; - struct irdma_sc_vsi *vsi = &iwdev->vsi; - struct irdma_sc_qp *sc_qp; - struct irdma_qp *qp; - int i; INIT_LIST_HEAD(&teardown_list); @@ -4251,52 +4247,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, irdma_cm_disconn(cm_node->iwqp); irdma_rem_ref_cm_node(cm_node); } - if (!iwdev->roce_mode) - return; - - INIT_LIST_HEAD(&teardown_list); - for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { - mutex_lock(&vsi->qos[i].qos_mutex); - list_for_each_safe (list_node, list_core_temp, - &vsi->qos[i].qplist) { - u32 qp_ip[4]; - - sc_qp = container_of(list_node, struct irdma_sc_qp, - list); - if (sc_qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_RC) - continue; - - qp = sc_qp->qp_uk.back_qp; - if (!disconnect_all) { - if (nfo->ipv4) - qp_ip[0] = qp->udp_info.local_ipaddr[3]; - else - memcpy(qp_ip, - &qp->udp_info.local_ipaddr[0], - sizeof(qp_ip)); - } - - if (disconnect_all || - (nfo->vlan_id == (qp->udp_info.vlan_tag & VLAN_VID_MASK) && - !memcmp(qp_ip, ipaddr, nfo->ipv4 ? 4 : 16))) { - spin_lock(&iwdev->rf->qptable_lock); - if (iwdev->rf->qp_table[sc_qp->qp_uk.qp_id]) { - irdma_qp_add_ref(&qp->ibqp); - list_add(&qp->teardown_entry, - &teardown_list); - } - spin_unlock(&iwdev->rf->qptable_lock); - } - } - mutex_unlock(&vsi->qos[i].qos_mutex); - } - - list_for_each_safe (list_node, list_core_temp, &teardown_list) { - qp = container_of(list_node, struct irdma_qp, teardown_entry); - attr.qp_state = IB_QPS_ERR; - irdma_modify_qp_roce(&qp->ibqp, &attr, IB_QP_STATE, NULL); - irdma_qp_rem_ref(&qp->ibqp); - } } /** diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.c b/drivers/infiniband/hw/irdma/i40iw_hw.c index e46fc110004d..50299f58b6b3 100644 --- a/drivers/infiniband/hw/irdma/i40iw_hw.c +++ b/drivers/infiniband/hw/irdma/i40iw_hw.c @@ -201,6 +201,7 @@ void i40iw_init_hw(struct irdma_sc_dev *dev) dev->hw_attrs.uk_attrs.max_hw_read_sges = I40IW_MAX_SGE_RD; dev->hw_attrs.max_hw_device_pages = I40IW_MAX_PUSH_PAGE_COUNT; dev->hw_attrs.uk_attrs.max_hw_inline = I40IW_MAX_INLINE_DATA_SIZE; + dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M; dev->hw_attrs.max_hw_ird = I40IW_MAX_IRD_SIZE; dev->hw_attrs.max_hw_ord = I40IW_MAX_ORD_SIZE; dev->hw_attrs.max_hw_wqes = I40IW_MAX_WQ_ENTRIES; diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.c b/drivers/infiniband/hw/irdma/icrdma_hw.c index cf53b17510cd..5986fd906308 100644 --- a/drivers/infiniband/hw/irdma/icrdma_hw.c +++ b/drivers/infiniband/hw/irdma/icrdma_hw.c @@ -139,6 +139,7 @@ void icrdma_init_hw(struct irdma_sc_dev *dev) dev->cqp_db = dev->hw_regs[IRDMA_CQPDB]; dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK]; dev->irq_ops = &icrdma_irq_ops; + dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M | SZ_1G; dev->hw_attrs.max_hw_ird = ICRDMA_MAX_IRD_SIZE; dev->hw_attrs.max_hw_ord = ICRDMA_MAX_ORD_SIZE; dev->hw_attrs.max_stat_inst = ICRDMA_MAX_STATS_COUNT; diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h index 46c12334c735..4789e85d717b 100644 --- a/drivers/infiniband/hw/irdma/irdma.h +++ b/drivers/infiniband/hw/irdma/irdma.h @@ -127,6 +127,7 @@ struct irdma_hw_attrs { u64 max_hw_outbound_msg_size; u64 max_hw_inbound_msg_size; u64 max_mr_size; + u64 page_size_cap; u32 min_hw_qp_id; u32 min_hw_aeq_size; u32 max_hw_aeq_size; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index c4412ece5a6d..96135a228f26 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -32,7 +32,7 @@ static int irdma_query_device(struct ib_device *ibdev, props->vendor_part_id = pcidev->device; props->hw_ver = rf->pcidev->revision; - props->page_size_cap = SZ_4K | SZ_2M | SZ_1G; + props->page_size_cap = hw_attrs->page_size_cap; props->max_mr_size = hw_attrs->max_mr_size; props->max_qp = rf->max_qp - rf->used_qps; props->max_qp_wr = hw_attrs->max_qp_wr; @@ -2781,7 +2781,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) { iwmr->page_size = ib_umem_find_best_pgsz(region, - SZ_4K | SZ_2M | SZ_1G, + iwdev->rf->sc_dev.hw_attrs.page_size_cap, virt); if (unlikely(!iwmr->page_size)) { kfree(iwmr); diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 3ad9870db108..aa45a9fee6a0 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -900,6 +900,11 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts) } else { dev_warn(dev, "Unexpected ACPI resources: gpio_count %d, gpio_int_idx %d\n", ts->gpio_count, ts->gpio_int_idx); + /* + * On some devices _PS0 does a reset for us and + * sometimes this is necessary for things to work. + */ + acpi_device_fix_up_power(ACPI_COMPANION(dev)); return -EINVAL; } diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c index 43c521f50c85..3dda6eaabdab 100644 --- a/drivers/input/touchscreen/usbtouchscreen.c +++ b/drivers/input/touchscreen/usbtouchscreen.c @@ -1654,6 +1654,9 @@ static int usbtouch_probe(struct usb_interface *intf, if (id->driver_info == DEVTYPE_IGNORE) return -ENODEV; + if (id->driver_info >= ARRAY_SIZE(usbtouch_dev_info)) + return -ENODEV; + endpoint = usbtouch_get_input_endpoint(intf->cur_altsetting); if (!endpoint) return -ENXIO; diff --git a/drivers/input/touchscreen/wm97xx-core.c b/drivers/input/touchscreen/wm97xx-core.c index 2757c7768ffe..f51ab5614532 100644 --- a/drivers/input/touchscreen/wm97xx-core.c +++ b/drivers/input/touchscreen/wm97xx-core.c @@ -758,7 +758,9 @@ batt_err: static int wm97xx_mfd_remove(struct platform_device *pdev) { - return wm97xx_remove(&pdev->dev); + wm97xx_remove(&pdev->dev); + + return 0; } static int __maybe_unused wm97xx_suspend(struct device *dev) diff --git a/drivers/net/amt.c b/drivers/net/amt.c index 732f4c0daa73..febfcf2d92af 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -563,7 +563,7 @@ static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt) ihv3->nsrcs = 0; ihv3->resv = 0; ihv3->suppress = false; - ihv3->qrv = amt->net->ipv4.sysctl_igmp_qrv; + ihv3->qrv = READ_ONCE(amt->net->ipv4.sysctl_igmp_qrv); ihv3->csum = 0; csum = &ihv3->csum; csum_start = (void *)ihv3; @@ -577,14 +577,14 @@ static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt) return skb; } -static void __amt_update_gw_status(struct amt_dev *amt, enum amt_status status, - bool validate) +static void amt_update_gw_status(struct amt_dev *amt, enum amt_status status, + bool validate) { if (validate && amt->status >= status) return; netdev_dbg(amt->dev, "Update GW status %s -> %s", status_str[amt->status], status_str[status]); - amt->status = status; + WRITE_ONCE(amt->status, status); } static void __amt_update_relay_status(struct amt_tunnel_list *tunnel, @@ -600,14 +600,6 @@ static void __amt_update_relay_status(struct amt_tunnel_list *tunnel, tunnel->status = status; } -static void amt_update_gw_status(struct amt_dev *amt, enum amt_status status, - bool validate) -{ - spin_lock_bh(&amt->lock); - __amt_update_gw_status(amt, status, validate); - spin_unlock_bh(&amt->lock); -} - static void amt_update_relay_status(struct amt_tunnel_list *tunnel, enum amt_status status, bool validate) { @@ -700,9 +692,7 @@ static void amt_send_discovery(struct amt_dev *amt) if (unlikely(net_xmit_eval(err))) amt->dev->stats.tx_errors++; - spin_lock_bh(&amt->lock); - __amt_update_gw_status(amt, AMT_STATUS_SENT_DISCOVERY, true); - spin_unlock_bh(&amt->lock); + amt_update_gw_status(amt, AMT_STATUS_SENT_DISCOVERY, true); out: rcu_read_unlock(); } @@ -900,6 +890,28 @@ static void amt_send_mld_gq(struct amt_dev *amt, struct amt_tunnel_list *tunnel) } #endif +static bool amt_queue_event(struct amt_dev *amt, enum amt_event event, + struct sk_buff *skb) +{ + int index; + + spin_lock_bh(&amt->lock); + if (amt->nr_events >= AMT_MAX_EVENTS) { + spin_unlock_bh(&amt->lock); + return 1; + } + + index = (amt->event_idx + amt->nr_events) % AMT_MAX_EVENTS; + amt->events[index].event = event; + amt->events[index].skb = skb; + amt->nr_events++; + amt->event_idx %= AMT_MAX_EVENTS; + queue_work(amt_wq, &amt->event_wq); + spin_unlock_bh(&amt->lock); + + return 0; +} + static void amt_secret_work(struct work_struct *work) { struct amt_dev *amt = container_of(to_delayed_work(work), @@ -913,58 +925,72 @@ static void amt_secret_work(struct work_struct *work) msecs_to_jiffies(AMT_SECRET_TIMEOUT)); } -static void amt_discovery_work(struct work_struct *work) +static void amt_event_send_discovery(struct amt_dev *amt) { - struct amt_dev *amt = container_of(to_delayed_work(work), - struct amt_dev, - discovery_wq); - - spin_lock_bh(&amt->lock); if (amt->status > AMT_STATUS_SENT_DISCOVERY) goto out; get_random_bytes(&amt->nonce, sizeof(__be32)); - spin_unlock_bh(&amt->lock); amt_send_discovery(amt); - spin_lock_bh(&amt->lock); out: mod_delayed_work(amt_wq, &amt->discovery_wq, msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT)); - spin_unlock_bh(&amt->lock); } -static void amt_req_work(struct work_struct *work) +static void amt_discovery_work(struct work_struct *work) { struct amt_dev *amt = container_of(to_delayed_work(work), struct amt_dev, - req_wq); + discovery_wq); + + if (amt_queue_event(amt, AMT_EVENT_SEND_DISCOVERY, NULL)) + mod_delayed_work(amt_wq, &amt->discovery_wq, + msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT)); +} + +static void amt_event_send_request(struct amt_dev *amt) +{ u32 exp; - spin_lock_bh(&amt->lock); if (amt->status < AMT_STATUS_RECEIVED_ADVERTISEMENT) goto out; if (amt->req_cnt > AMT_MAX_REQ_COUNT) { netdev_dbg(amt->dev, "Gateway is not ready"); amt->qi = AMT_INIT_REQ_TIMEOUT; - amt->ready4 = false; - amt->ready6 = false; + WRITE_ONCE(amt->ready4, false); + WRITE_ONCE(amt->ready6, false); amt->remote_ip = 0; - __amt_update_gw_status(amt, AMT_STATUS_INIT, false); + amt_update_gw_status(amt, AMT_STATUS_INIT, false); amt->req_cnt = 0; + amt->nonce = 0; goto out; } - spin_unlock_bh(&amt->lock); + + if (!amt->req_cnt) { + WRITE_ONCE(amt->ready4, false); + WRITE_ONCE(amt->ready6, false); + get_random_bytes(&amt->nonce, sizeof(__be32)); + } amt_send_request(amt, false); amt_send_request(amt, true); - spin_lock_bh(&amt->lock); - __amt_update_gw_status(amt, AMT_STATUS_SENT_REQUEST, true); + amt_update_gw_status(amt, AMT_STATUS_SENT_REQUEST, true); amt->req_cnt++; out: exp = min_t(u32, (1 * (1 << amt->req_cnt)), AMT_MAX_REQ_TIMEOUT); mod_delayed_work(amt_wq, &amt->req_wq, msecs_to_jiffies(exp * 1000)); - spin_unlock_bh(&amt->lock); +} + +static void amt_req_work(struct work_struct *work) +{ + struct amt_dev *amt = container_of(to_delayed_work(work), + struct amt_dev, + req_wq); + + if (amt_queue_event(amt, AMT_EVENT_SEND_REQUEST, NULL)) + mod_delayed_work(amt_wq, &amt->req_wq, + msecs_to_jiffies(100)); } static bool amt_send_membership_update(struct amt_dev *amt, @@ -1220,7 +1246,8 @@ static netdev_tx_t amt_dev_xmit(struct sk_buff *skb, struct net_device *dev) /* Gateway only passes IGMP/MLD packets */ if (!report) goto free; - if ((!v6 && !amt->ready4) || (v6 && !amt->ready6)) + if ((!v6 && !READ_ONCE(amt->ready4)) || + (v6 && !READ_ONCE(amt->ready6))) goto free; if (amt_send_membership_update(amt, skb, v6)) goto free; @@ -2236,6 +2263,10 @@ static bool amt_advertisement_handler(struct amt_dev *amt, struct sk_buff *skb) ipv4_is_zeronet(amta->ip4)) return true; + if (amt->status != AMT_STATUS_SENT_DISCOVERY || + amt->nonce != amta->nonce) + return true; + amt->remote_ip = amta->ip4; netdev_dbg(amt->dev, "advertised remote ip = %pI4\n", &amt->remote_ip); mod_delayed_work(amt_wq, &amt->req_wq, 0); @@ -2251,6 +2282,9 @@ static bool amt_multicast_data_handler(struct amt_dev *amt, struct sk_buff *skb) struct ethhdr *eth; struct iphdr *iph; + if (READ_ONCE(amt->status) != AMT_STATUS_SENT_UPDATE) + return true; + hdr_size = sizeof(*amtmd) + sizeof(struct udphdr); if (!pskb_may_pull(skb, hdr_size)) return true; @@ -2325,6 +2359,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt, if (amtmq->reserved || amtmq->version) return true; + if (amtmq->nonce != amt->nonce) + return true; + hdr_size -= sizeof(*eth); if (iptunnel_pull_header(skb, hdr_size, htons(ETH_P_TEB), false)) return true; @@ -2339,6 +2376,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt, iph = ip_hdr(skb); if (iph->version == 4) { + if (READ_ONCE(amt->ready4)) + return true; + if (!pskb_may_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS + sizeof(*ihv3))) return true; @@ -2349,12 +2389,10 @@ static bool amt_membership_query_handler(struct amt_dev *amt, ihv3 = skb_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS); skb_reset_transport_header(skb); skb_push(skb, sizeof(*iph) + AMT_IPHDR_OPTS); - spin_lock_bh(&amt->lock); - amt->ready4 = true; + WRITE_ONCE(amt->ready4, true); amt->mac = amtmq->response_mac; amt->req_cnt = 0; amt->qi = ihv3->qqic; - spin_unlock_bh(&amt->lock); skb->protocol = htons(ETH_P_IP); eth->h_proto = htons(ETH_P_IP); ip_eth_mc_map(iph->daddr, eth->h_dest); @@ -2363,6 +2401,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt, struct mld2_query *mld2q; struct ipv6hdr *ip6h; + if (READ_ONCE(amt->ready6)) + return true; + if (!pskb_may_pull(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS + sizeof(*mld2q))) return true; @@ -2374,12 +2415,10 @@ static bool amt_membership_query_handler(struct amt_dev *amt, mld2q = skb_pull(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS); skb_reset_transport_header(skb); skb_push(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS); - spin_lock_bh(&amt->lock); - amt->ready6 = true; + WRITE_ONCE(amt->ready6, true); amt->mac = amtmq->response_mac; amt->req_cnt = 0; amt->qi = mld2q->mld2q_qqic; - spin_unlock_bh(&amt->lock); skb->protocol = htons(ETH_P_IPV6); eth->h_proto = htons(ETH_P_IPV6); ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); @@ -2392,12 +2431,14 @@ static bool amt_membership_query_handler(struct amt_dev *amt, skb->pkt_type = PACKET_MULTICAST; skb->ip_summed = CHECKSUM_NONE; len = skb->len; + local_bh_disable(); if (__netif_rx(skb) == NET_RX_SUCCESS) { amt_update_gw_status(amt, AMT_STATUS_RECEIVED_QUERY, true); dev_sw_netstats_rx_add(amt->dev, len); } else { amt->dev->stats.rx_dropped++; } + local_bh_enable(); return false; } @@ -2638,7 +2679,9 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb) if (tunnel->ip4 == iph->saddr) goto send; + spin_lock_bh(&amt->lock); if (amt->nr_tunnels >= amt->max_tunnels) { + spin_unlock_bh(&amt->lock); icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); return true; } @@ -2646,8 +2689,10 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb) tunnel = kzalloc(sizeof(*tunnel) + (sizeof(struct hlist_head) * amt->hash_buckets), GFP_ATOMIC); - if (!tunnel) + if (!tunnel) { + spin_unlock_bh(&amt->lock); return true; + } tunnel->source_port = udph->source; tunnel->ip4 = iph->saddr; @@ -2660,10 +2705,9 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb) INIT_DELAYED_WORK(&tunnel->gc_wq, amt_tunnel_expire); - spin_lock_bh(&amt->lock); list_add_tail_rcu(&tunnel->list, &amt->tunnel_list); tunnel->key = amt->key; - amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true); + __amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true); amt->nr_tunnels++; mod_delayed_work(amt_wq, &tunnel->gc_wq, msecs_to_jiffies(amt_gmi(amt))); @@ -2688,6 +2732,38 @@ send: return false; } +static void amt_gw_rcv(struct amt_dev *amt, struct sk_buff *skb) +{ + int type = amt_parse_type(skb); + int err = 1; + + if (type == -1) + goto drop; + + if (amt->mode == AMT_MODE_GATEWAY) { + switch (type) { + case AMT_MSG_ADVERTISEMENT: + err = amt_advertisement_handler(amt, skb); + break; + case AMT_MSG_MEMBERSHIP_QUERY: + err = amt_membership_query_handler(amt, skb); + if (!err) + return; + break; + default: + netdev_dbg(amt->dev, "Invalid type of Gateway\n"); + break; + } + } +drop: + if (err) { + amt->dev->stats.rx_dropped++; + kfree_skb(skb); + } else { + consume_skb(skb); + } +} + static int amt_rcv(struct sock *sk, struct sk_buff *skb) { struct amt_dev *amt; @@ -2719,8 +2795,12 @@ static int amt_rcv(struct sock *sk, struct sk_buff *skb) err = true; goto drop; } - err = amt_advertisement_handler(amt, skb); - break; + if (amt_queue_event(amt, AMT_EVENT_RECEIVE, skb)) { + netdev_dbg(amt->dev, "AMT Event queue full\n"); + err = true; + goto drop; + } + goto out; case AMT_MSG_MULTICAST_DATA: if (iph->saddr != amt->remote_ip) { netdev_dbg(amt->dev, "Invalid Relay IP\n"); @@ -2738,11 +2818,12 @@ static int amt_rcv(struct sock *sk, struct sk_buff *skb) err = true; goto drop; } - err = amt_membership_query_handler(amt, skb); - if (err) + if (amt_queue_event(amt, AMT_EVENT_RECEIVE, skb)) { + netdev_dbg(amt->dev, "AMT Event queue full\n"); + err = true; goto drop; - else - goto out; + } + goto out; default: err = true; netdev_dbg(amt->dev, "Invalid type of Gateway\n"); @@ -2780,6 +2861,46 @@ out: return 0; } +static void amt_event_work(struct work_struct *work) +{ + struct amt_dev *amt = container_of(work, struct amt_dev, event_wq); + struct sk_buff *skb; + u8 event; + int i; + + for (i = 0; i < AMT_MAX_EVENTS; i++) { + spin_lock_bh(&amt->lock); + if (amt->nr_events == 0) { + spin_unlock_bh(&amt->lock); + return; + } + event = amt->events[amt->event_idx].event; + skb = amt->events[amt->event_idx].skb; + amt->events[amt->event_idx].event = AMT_EVENT_NONE; + amt->events[amt->event_idx].skb = NULL; + amt->nr_events--; + amt->event_idx++; + amt->event_idx %= AMT_MAX_EVENTS; + spin_unlock_bh(&amt->lock); + + switch (event) { + case AMT_EVENT_RECEIVE: + amt_gw_rcv(amt, skb); + break; + case AMT_EVENT_SEND_DISCOVERY: + amt_event_send_discovery(amt); + break; + case AMT_EVENT_SEND_REQUEST: + amt_event_send_request(amt); + break; + default: + if (skb) + kfree_skb(skb); + break; + } + } +} + static int amt_err_lookup(struct sock *sk, struct sk_buff *skb) { struct amt_dev *amt; @@ -2804,7 +2925,7 @@ static int amt_err_lookup(struct sock *sk, struct sk_buff *skb) break; case AMT_MSG_REQUEST: case AMT_MSG_MEMBERSHIP_UPDATE: - if (amt->status >= AMT_STATUS_RECEIVED_ADVERTISEMENT) + if (READ_ONCE(amt->status) >= AMT_STATUS_RECEIVED_ADVERTISEMENT) mod_delayed_work(amt_wq, &amt->req_wq, 0); break; default: @@ -2867,6 +2988,8 @@ static int amt_dev_open(struct net_device *dev) amt->ready4 = false; amt->ready6 = false; + amt->event_idx = 0; + amt->nr_events = 0; err = amt_socket_create(amt); if (err) @@ -2874,6 +2997,7 @@ static int amt_dev_open(struct net_device *dev) amt->req_cnt = 0; amt->remote_ip = 0; + amt->nonce = 0; get_random_bytes(&amt->key, sizeof(siphash_key_t)); amt->status = AMT_STATUS_INIT; @@ -2892,6 +3016,8 @@ static int amt_dev_stop(struct net_device *dev) struct amt_dev *amt = netdev_priv(dev); struct amt_tunnel_list *tunnel, *tmp; struct socket *sock; + struct sk_buff *skb; + int i; cancel_delayed_work_sync(&amt->req_wq); cancel_delayed_work_sync(&amt->discovery_wq); @@ -2904,6 +3030,15 @@ static int amt_dev_stop(struct net_device *dev) if (sock) udp_tunnel_sock_release(sock); + cancel_work_sync(&amt->event_wq); + for (i = 0; i < AMT_MAX_EVENTS; i++) { + skb = amt->events[i].skb; + if (skb) + kfree_skb(skb); + amt->events[i].event = AMT_EVENT_NONE; + amt->events[i].skb = NULL; + } + amt->ready4 = false; amt->ready6 = false; amt->req_cnt = 0; @@ -3095,7 +3230,7 @@ static int amt_newlink(struct net *net, struct net_device *dev, goto err; } if (amt->mode == AMT_MODE_RELAY) { - amt->qrv = amt->net->ipv4.sysctl_igmp_qrv; + amt->qrv = READ_ONCE(amt->net->ipv4.sysctl_igmp_qrv); amt->qri = 10; dev->needed_headroom = amt->stream_dev->needed_headroom + AMT_RELAY_HLEN; @@ -3146,8 +3281,8 @@ static int amt_newlink(struct net *net, struct net_device *dev, INIT_DELAYED_WORK(&amt->discovery_wq, amt_discovery_work); INIT_DELAYED_WORK(&amt->req_wq, amt_req_work); INIT_DELAYED_WORK(&amt->secret_wq, amt_secret_work); + INIT_WORK(&amt->event_wq, amt_event_work); INIT_LIST_HEAD(&amt->tunnel_list); - return 0; err: dev_put(amt->stream_dev); @@ -3280,7 +3415,7 @@ static int __init amt_init(void) if (err < 0) goto unregister_notifier; - amt_wq = alloc_workqueue("amt", WQ_UNBOUND, 1); + amt_wq = alloc_workqueue("amt", WQ_UNBOUND, 0); if (!amt_wq) { err = -ENOMEM; goto rtnl_unregister; diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c index 50f6719b3aa4..32804fed116c 100644 --- a/drivers/net/can/pch_can.c +++ b/drivers/net/can/pch_can.c @@ -489,6 +489,7 @@ static void pch_can_error(struct net_device *ndev, u32 status) if (!skb) return; + errc = ioread32(&priv->regs->errc); if (status & PCH_BUS_OFF) { pch_can_set_tx_all(priv, 0); pch_can_set_rx_all(priv, 0); @@ -502,7 +503,6 @@ static void pch_can_error(struct net_device *ndev, u32 status) cf->data[7] = (errc & PCH_REC) >> 8; } - errc = ioread32(&priv->regs->errc); /* Warning interrupt. */ if (status & PCH_EWARN) { state = CAN_STATE_ERROR_WARNING; diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c index e3382284e172..d3e569a02b4d 100644 --- a/drivers/net/can/rcar/rcar_canfd.c +++ b/drivers/net/can/rcar/rcar_canfd.c @@ -1843,6 +1843,7 @@ static int rcar_canfd_probe(struct platform_device *pdev) of_child = of_get_child_by_name(pdev->dev.of_node, name); if (of_child && of_device_is_available(of_child)) channels_mask |= BIT(i); + of_node_put(of_child); } if (chip_id != RENESAS_RZG2L) { diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index f4e174cadd4e..7fc86ed405c6 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1691,8 +1691,8 @@ static int mcp251xfd_register_chip_detect(struct mcp251xfd_priv *priv) u32 osc; int err; - /* The OSC_LPMEN is only supported on MCP2518FD, so use it to - * autodetect the model. + /* The OSC_LPMEN is only supported on MCP2518FD and MCP251863, + * so use it to autodetect the model. */ err = regmap_update_bits(priv->map_reg, MCP251XFD_REG_OSC, MCP251XFD_REG_OSC_LPMEN, @@ -1704,10 +1704,18 @@ static int mcp251xfd_register_chip_detect(struct mcp251xfd_priv *priv) if (err) return err; - if (osc & MCP251XFD_REG_OSC_LPMEN) - devtype_data = &mcp251xfd_devtype_data_mcp2518fd; - else + if (osc & MCP251XFD_REG_OSC_LPMEN) { + /* We cannot distinguish between MCP2518FD and + * MCP251863. If firmware specifies MCP251863, keep + * it, otherwise set to MCP2518FD. + */ + if (mcp251xfd_is_251863(priv)) + devtype_data = &mcp251xfd_devtype_data_mcp251863; + else + devtype_data = &mcp251xfd_devtype_data_mcp2518fd; + } else { devtype_data = &mcp251xfd_devtype_data_mcp2517fd; + } if (!mcp251xfd_is_251XFD(priv) && priv->devtype_data.model != devtype_data->model) { diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 26bd239f4f07..fd12a68c1dcd 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -1579,18 +1579,21 @@ int ksz_switch_register(struct ksz_device *dev) ports = of_get_child_by_name(dev->dev->of_node, "ethernet-ports"); if (!ports) ports = of_get_child_by_name(dev->dev->of_node, "ports"); - if (ports) + if (ports) { for_each_available_child_of_node(ports, port) { if (of_property_read_u32(port, "reg", &port_num)) continue; if (!(dev->port_mask & BIT(port_num))) { of_node_put(port); + of_node_put(ports); return -EINVAL; } of_get_phy_mode(port, &dev->ports[port_num].interface); } + of_node_put(ports); + } dev->synclko_125 = of_property_read_bool(dev->dev->of_node, "microchip,synclko-125"); dev->synclko_disable = of_property_read_bool(dev->dev->of_node, diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index b253e27bcfb4..b03d0d0c3dbf 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -3382,12 +3382,28 @@ static const struct of_device_id sja1105_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, sja1105_dt_ids); +static const struct spi_device_id sja1105_spi_ids[] = { + { "sja1105e" }, + { "sja1105t" }, + { "sja1105p" }, + { "sja1105q" }, + { "sja1105r" }, + { "sja1105s" }, + { "sja1110a" }, + { "sja1110b" }, + { "sja1110c" }, + { "sja1110d" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, sja1105_spi_ids); + static struct spi_driver sja1105_driver = { .driver = { .name = "sja1105", .owner = THIS_MODULE, .of_match_table = of_match_ptr(sja1105_dt_ids), }, + .id_table = sja1105_spi_ids, .probe = sja1105_probe, .remove = sja1105_remove, .shutdown = sja1105_shutdown, diff --git a/drivers/net/dsa/vitesse-vsc73xx-spi.c b/drivers/net/dsa/vitesse-vsc73xx-spi.c index 3110895358d8..97a92e6da60d 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-spi.c +++ b/drivers/net/dsa/vitesse-vsc73xx-spi.c @@ -205,10 +205,20 @@ static const struct of_device_id vsc73xx_of_match[] = { }; MODULE_DEVICE_TABLE(of, vsc73xx_of_match); +static const struct spi_device_id vsc73xx_spi_ids[] = { + { "vsc7385" }, + { "vsc7388" }, + { "vsc7395" }, + { "vsc7398" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, vsc73xx_spi_ids); + static struct spi_driver vsc73xx_spi_driver = { .probe = vsc73xx_spi_probe, .remove = vsc73xx_spi_remove, .shutdown = vsc73xx_spi_shutdown, + .id_table = vsc73xx_spi_ids, .driver = { .name = "vsc73xx-spi", .of_match_table = vsc73xx_of_match, diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 7c760aa65540..ddfe9208529a 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1236,8 +1236,8 @@ static struct sock *chtls_recv_sock(struct sock *lsk, csk->sndbuf = newsk->sk_sndbuf; csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx; RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk), - sock_net(newsk)-> - ipv4.sysctl_tcp_window_scaling, + READ_ONCE(sock_net(newsk)-> + ipv4.sysctl_tcp_window_scaling), tp->window_clamp); neigh_release(n); inet_inherit_port(&tcp_hashinfo, lsk, newsk); @@ -1384,7 +1384,7 @@ static void chtls_pass_accept_request(struct sock *sk, #endif } if (req->tcpopt.wsf <= 14 && - sock_net(sk)->ipv4.sysctl_tcp_window_scaling) { + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { inet_rsk(oreq)->wscale_ok = 1; inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf; } diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 528eb0f223b1..b4f5e57d0285 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -2287,7 +2287,7 @@ err: /* Uses sync mcc */ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, - u8 page_num, u8 *data) + u8 page_num, u32 off, u32 len, u8 *data) { struct be_dma_mem cmd; struct be_mcc_wrb *wrb; @@ -2321,10 +2321,10 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, req->port = cpu_to_le32(adapter->hba_port_num); req->page_num = cpu_to_le32(page_num); status = be_mcc_notify_wait(adapter); - if (!status) { + if (!status && len > 0) { struct be_cmd_resp_port_type *resp = cmd.va; - memcpy(data, resp->page_data, PAGE_DATA_LEN); + memcpy(data, resp->page_data + off, len); } err: mutex_unlock(&adapter->mcc_lock); @@ -2415,7 +2415,7 @@ int be_cmd_query_cable_type(struct be_adapter *adapter) int status; status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - page_data); + 0, PAGE_DATA_LEN, page_data); if (!status) { switch (adapter->phy.interface_type) { case PHY_TYPE_QSFP: @@ -2440,7 +2440,7 @@ int be_cmd_query_sfp_info(struct be_adapter *adapter) int status; status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - page_data); + 0, PAGE_DATA_LEN, page_data); if (!status) { strlcpy(adapter->phy.vendor_name, page_data + SFP_VENDOR_NAME_OFFSET, SFP_VENDOR_NAME_LEN - 1); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index db1f3b908582..e2085c68c0ee 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -2427,7 +2427,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon, int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state); int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, - u8 page_num, u8 *data); + u8 page_num, u32 off, u32 len, u8 *data); int be_cmd_query_cable_type(struct be_adapter *adapter); int be_cmd_query_sfp_info(struct be_adapter *adapter); int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd, diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index dfa784339781..bd0df189d871 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -1344,7 +1344,7 @@ static int be_get_module_info(struct net_device *netdev, return -EOPNOTSUPP; status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - page_data); + 0, PAGE_DATA_LEN, page_data); if (!status) { if (!page_data[SFP_PLUS_SFF_8472_COMP]) { modinfo->type = ETH_MODULE_SFF_8079; @@ -1362,25 +1362,32 @@ static int be_get_module_eeprom(struct net_device *netdev, { struct be_adapter *adapter = netdev_priv(netdev); int status; + u32 begin, end; if (!check_privilege(adapter, MAX_PRIVILEGES)) return -EOPNOTSUPP; - status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - data); - if (status) - goto err; + begin = eeprom->offset; + end = eeprom->offset + eeprom->len; + + if (begin < PAGE_DATA_LEN) { + status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, begin, + min_t(u32, end, PAGE_DATA_LEN) - begin, + data); + if (status) + goto err; + + data += PAGE_DATA_LEN - begin; + begin = PAGE_DATA_LEN; + } - if (eeprom->offset + eeprom->len > PAGE_DATA_LEN) { - status = be_cmd_read_port_transceiver_data(adapter, - TR_PAGE_A2, - data + - PAGE_DATA_LEN); + if (end > PAGE_DATA_LEN) { + status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A2, + begin - PAGE_DATA_LEN, + end - begin, data); if (status) goto err; } - if (eeprom->offset) - memcpy(data, data + eeprom->offset, eeprom->len); err: return be_cmd_status(status); } diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index 13382df2f2ef..bcf680e83811 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -630,7 +630,6 @@ struct e1000_phy_info { bool disable_polarity_correction; bool is_mdix; bool polarity_correction; - bool reset_disable; bool speed_downgraded; bool autoneg_wait_to_complete; }; diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index e6c8e6d5234f..9466f65a6da7 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -2050,10 +2050,6 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw) bool blocked = false; int i = 0; - /* Check the PHY (LCD) reset flag */ - if (hw->phy.reset_disable) - return true; - while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) && (i++ < 30)) usleep_range(10000, 11000); diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 638a3ddd7ada..2504b11c3169 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -271,7 +271,6 @@ #define I217_CGFREG_ENABLE_MTA_RESET 0x0002 #define I217_MEMPWR PHY_REG(772, 26) #define I217_MEMPWR_DISABLE_SMB_RELEASE 0x0010 -#define I217_MEMPWR_MOEM 0x1000 /* Receive Address Initial CRC Calculation */ #define E1000_PCH_RAICC(_n) (0x05F50 + ((_n) * 4)) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 38e60def8520..70d933f52e93 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6494,6 +6494,10 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID && hw->mac.type >= e1000_pch_adp) { + /* Keep the GPT clock enabled for CSME */ + mac_data = er32(FEXTNVM); + mac_data |= BIT(3); + ew32(FEXTNVM, mac_data); /* Request ME unconfigure the device from S0ix */ mac_data = er32(H2ME); mac_data &= ~E1000_H2ME_START_DPG; @@ -6987,21 +6991,8 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev) struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = to_pci_dev(dev); - struct e1000_hw *hw = &adapter->hw; - u16 phy_data; int rc; - if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID && - hw->mac.type >= e1000_pch_adp) { - /* Mask OEM Bits / Gig Disable / Restart AN (772_26[12] = 1) */ - e1e_rphy(hw, I217_MEMPWR, &phy_data); - phy_data |= I217_MEMPWR_MOEM; - e1e_wphy(hw, I217_MEMPWR, phy_data); - - /* Disable LCD reset */ - hw->phy.reset_disable = true; - } - e1000e_flush_lpic(pdev); e1000e_pm_freeze(dev); @@ -7023,8 +7014,6 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev) struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = to_pci_dev(dev); - struct e1000_hw *hw = &adapter->hw; - u16 phy_data; int rc; /* Introduce S0ix implementation */ @@ -7035,17 +7024,6 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev) if (rc) return rc; - if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID && - hw->mac.type >= e1000_pch_adp) { - /* Unmask OEM Bits / Gig Disable / Restart AN 772_26[12] = 0 */ - e1e_rphy(hw, I217_MEMPWR, &phy_data); - phy_data &= ~I217_MEMPWR_MOEM; - e1e_wphy(hw, I217_MEMPWR, phy_data); - - /* Enable LCD reset */ - hw->phy.reset_disable = false; - } - return e1000e_pm_thaw(dev); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 151e9b6b9df4..fe6f25537110 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -10773,7 +10773,7 @@ static int i40e_reset(struct i40e_pf *pf) **/ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { - int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state); + const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf); struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_hw *hw = &pf->hw; i40e_status ret; @@ -10781,13 +10781,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) int v; if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && - i40e_check_recovery_mode(pf)) { + is_recovery_mode_reported) i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev); - } if (test_bit(__I40E_DOWN, pf->state) && - !test_bit(__I40E_RECOVERY_MODE, pf->state) && - !old_recovery_mode_bit) + !test_bit(__I40E_RECOVERY_MODE, pf->state)) goto clear_recovery; dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n"); @@ -10814,13 +10812,12 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) * accordingly with regard to resources initialization * and deinitialization */ - if (test_bit(__I40E_RECOVERY_MODE, pf->state) || - old_recovery_mode_bit) { + if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { if (i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities)) goto end_unlock; - if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { + if (is_recovery_mode_reported) { /* we're staying in recovery mode so we'll reinitialize * misc vector here */ diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index fda1198d2c00..69703801d1b0 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -64,7 +64,6 @@ struct iavf_vsi { u16 id; DECLARE_BITMAP(state, __IAVF_VSI_STATE_SIZE__); int base_vector; - u16 work_limit; u16 qs_handle; void *priv; /* client driver data reference. */ }; @@ -160,8 +159,12 @@ struct iavf_vlan { struct iavf_vlan_filter { struct list_head list; struct iavf_vlan vlan; - bool remove; /* filter needs to be removed */ - bool add; /* filter needs to be added */ + struct { + u8 is_new_vlan:1; /* filter is new, wait for PF answer */ + u8 remove:1; /* filter needs to be removed */ + u8 add:1; /* filter needs to be added */ + u8 padding:5; + }; }; #define IAVF_MAX_TRAFFIC_CLASS 4 @@ -464,6 +467,10 @@ static inline const char *iavf_state_str(enum iavf_state_t state) return "__IAVF_INIT_VERSION_CHECK"; case __IAVF_INIT_GET_RESOURCES: return "__IAVF_INIT_GET_RESOURCES"; + case __IAVF_INIT_EXTENDED_CAPS: + return "__IAVF_INIT_EXTENDED_CAPS"; + case __IAVF_INIT_CONFIG_ADAPTER: + return "__IAVF_INIT_CONFIG_ADAPTER"; case __IAVF_INIT_SW: return "__IAVF_INIT_SW"; case __IAVF_INIT_FAILED: @@ -523,6 +530,7 @@ int iavf_get_vf_config(struct iavf_adapter *adapter); int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter); int iavf_send_vf_offload_vlan_v2_msg(struct iavf_adapter *adapter); void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter); +u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter); void iavf_irq_enable(struct iavf_adapter *adapter, bool flush); void iavf_configure_queues(struct iavf_adapter *adapter); void iavf_deconfigure_queues(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 3bb56714beb0..e535d4c3da49 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -692,12 +692,8 @@ static int __iavf_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, int queue) { struct iavf_adapter *adapter = netdev_priv(netdev); - struct iavf_vsi *vsi = &adapter->vsi; struct iavf_ring *rx_ring, *tx_ring; - ec->tx_max_coalesced_frames = vsi->work_limit; - ec->rx_max_coalesced_frames = vsi->work_limit; - /* Rx and Tx usecs per queue value. If user doesn't specify the * queue, return queue 0's value to represent. */ @@ -825,12 +821,8 @@ static int __iavf_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, int queue) { struct iavf_adapter *adapter = netdev_priv(netdev); - struct iavf_vsi *vsi = &adapter->vsi; int i; - if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) - vsi->work_limit = ec->tx_max_coalesced_frames_irq; - if (ec->rx_coalesce_usecs == 0) { if (ec->use_adaptive_rx_coalesce) netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); @@ -1969,8 +1961,6 @@ static int iavf_set_rxfh(struct net_device *netdev, const u32 *indir, static const struct ethtool_ops iavf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | - ETHTOOL_COALESCE_MAX_FRAMES | - ETHTOOL_COALESCE_MAX_FRAMES_IRQ | ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = iavf_get_drvinfo, .get_link = ethtool_op_get_link, diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 69ade653f5d4..7dddf9800e2f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -843,7 +843,7 @@ static void iavf_restore_filters(struct iavf_adapter *adapter) * iavf_get_num_vlans_added - get number of VLANs added * @adapter: board private structure */ -static u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter) +u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter) { return bitmap_weight(adapter->vsi.active_cvlans, VLAN_N_VID) + bitmap_weight(adapter->vsi.active_svlans, VLAN_N_VID); @@ -906,11 +906,6 @@ static int iavf_vlan_rx_add_vid(struct net_device *netdev, if (!iavf_add_vlan(adapter, IAVF_VLAN(vid, be16_to_cpu(proto)))) return -ENOMEM; - if (proto == cpu_to_be16(ETH_P_8021Q)) - set_bit(vid, adapter->vsi.active_cvlans); - else - set_bit(vid, adapter->vsi.active_svlans); - return 0; } @@ -2331,7 +2326,6 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter) adapter->vsi.back = adapter; adapter->vsi.base_vector = 1; - adapter->vsi.work_limit = IAVF_DEFAULT_IRQ_WORK; vsi->netdev = adapter->netdev; vsi->qs_handle = adapter->vsi_res->qset_handle; if (adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) { @@ -3044,6 +3038,9 @@ continue_reset: adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER; iavf_misc_irq_enable(adapter); + bitmap_clear(adapter->vsi.active_cvlans, 0, VLAN_N_VID); + bitmap_clear(adapter->vsi.active_svlans, 0, VLAN_N_VID); + mod_delayed_work(iavf_wq, &adapter->watchdog_task, 2); /* We were running when the reset started, so we need to restore some diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 978f651c6b09..06d18797d25a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -194,7 +194,7 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi, struct iavf_tx_buffer *tx_buf; struct iavf_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; - unsigned int budget = vsi->work_limit; + unsigned int budget = IAVF_DEFAULT_IRQ_WORK; tx_buf = &tx_ring->tx_bi[i]; tx_desc = IAVF_TX_DESC(tx_ring, i); @@ -1285,11 +1285,10 @@ static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring, { struct iavf_rx_buffer *rx_buffer; - if (!size) - return NULL; - rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; prefetchw(rx_buffer->page); + if (!size) + return rx_buffer; /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 0d22bba6a5f2..15ee85dc33bd 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -628,6 +628,33 @@ static void iavf_mac_add_reject(struct iavf_adapter *adapter) } /** + * iavf_vlan_add_reject + * @adapter: adapter structure + * + * Remove VLAN filters from list based on PF response. + **/ +static void iavf_vlan_add_reject(struct iavf_adapter *adapter) +{ + struct iavf_vlan_filter *f, *ftmp; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { + if (f->is_new_vlan) { + if (f->vlan.tpid == ETH_P_8021Q) + clear_bit(f->vlan.vid, + adapter->vsi.active_cvlans); + else + clear_bit(f->vlan.vid, + adapter->vsi.active_svlans); + + list_del(&f->list); + kfree(f); + } + } + spin_unlock_bh(&adapter->mac_vlan_list_lock); +} + +/** * iavf_add_vlans * @adapter: adapter structure * @@ -684,6 +711,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter) vvfl->vlan_id[i] = f->vlan.vid; i++; f->add = false; + f->is_new_vlan = true; if (i == count) break; } @@ -696,10 +724,18 @@ void iavf_add_vlans(struct iavf_adapter *adapter) iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len); kfree(vvfl); } else { + u16 max_vlans = adapter->vlan_v2_caps.filtering.max_filters; + u16 current_vlans = iavf_get_num_vlans_added(adapter); struct virtchnl_vlan_filter_list_v2 *vvfl_v2; adapter->current_op = VIRTCHNL_OP_ADD_VLAN_V2; + if ((count + current_vlans) > max_vlans && + current_vlans < max_vlans) { + count = max_vlans - iavf_get_num_vlans_added(adapter); + more = true; + } + len = sizeof(*vvfl_v2) + ((count - 1) * sizeof(struct virtchnl_vlan_filter)); if (len > IAVF_MAX_AQ_BUF_SIZE) { @@ -726,6 +762,9 @@ void iavf_add_vlans(struct iavf_adapter *adapter) &adapter->vlan_v2_caps.filtering.filtering_support; struct virtchnl_vlan *vlan; + if (i == count) + break; + /* give priority over outer if it's enabled */ if (filtering_support->outer) vlan = &vvfl_v2->filters[i].outer; @@ -737,8 +776,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter) i++; f->add = false; - if (i == count) - break; + f->is_new_vlan = true; } } @@ -2082,6 +2120,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, */ iavf_netdev_features_vlan_strip_set(netdev, true); break; + case VIRTCHNL_OP_ADD_VLAN_V2: + iavf_vlan_add_reject(adapter); + dev_warn(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n", + iavf_stat_str(&adapter->hw, v_retval)); + break; default: dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n", v_retval, iavf_stat_str(&adapter->hw, v_retval), @@ -2385,6 +2428,24 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, spin_unlock_bh(&adapter->adv_rss_lock); } break; + case VIRTCHNL_OP_ADD_VLAN_V2: { + struct iavf_vlan_filter *f; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + list_for_each_entry(f, &adapter->vlan_filter_list, list) { + if (f->is_new_vlan) { + f->is_new_vlan = false; + if (f->vlan.tpid == ETH_P_8021Q) + set_bit(f->vlan.vid, + adapter->vsi.active_cvlans); + else + set_bit(f->vlan.vid, + adapter->vsi.active_svlans); + } + } + spin_unlock_bh(&adapter->mac_vlan_list_lock); + } + break; case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING: /* PF enabled vlan strip on this VF. * Update netdev->features if needed to be in sync with ethtool. diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 4758bdbe5df3..ebff0e04045d 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6182,6 +6182,9 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg) u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); u32 value = 0; + if (IGC_REMOVED(hw_addr)) + return ~value; + value = readl(&hw_addr[reg]); /* reads should not return all F's */ diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index d06018150764..c0d8214148d1 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -303,7 +303,8 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg); #define wr32(reg, val) \ do { \ u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \ - writel((val), &hw_addr[(reg)]); \ + if (!IGC_REMOVED(hw_addr)) \ + writel((val), &hw_addr[(reg)]); \ } while (0) #define rd32(reg) (igc_rd32(hw, reg)) @@ -315,4 +316,6 @@ do { \ #define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2))) +#define IGC_REMOVED(h) unlikely(!(h)) + #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 48444ab9e0b1..5369a97ff5ec 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -813,6 +813,7 @@ struct ixgbe_adapter { #ifdef CONFIG_IXGBE_IPSEC struct ixgbe_ipsec *ipsec; #endif /* CONFIG_IXGBE_IPSEC */ + spinlock_t vfs_lock; }; static inline int ixgbe_determine_xdp_q_idx(int cpu) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0493326a0d6b..d1e430b8c8aa 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6445,6 +6445,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, /* n-tuple support exists, always init our spinlock */ spin_lock_init(&adapter->fdir_perfect_lock); + /* init spinlock to avoid concurrency of VF resources */ + spin_lock_init(&adapter->vfs_lock); + #ifdef CONFIG_IXGBE_DCB ixgbe_init_dcb(adapter); #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 67e49aac50fe..29cc60988071 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -205,10 +205,13 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs) int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) { unsigned int num_vfs = adapter->num_vfs, vf; + unsigned long flags; int rss; + spin_lock_irqsave(&adapter->vfs_lock, flags); /* set num VFs to 0 to prevent access to vfinfo */ adapter->num_vfs = 0; + spin_unlock_irqrestore(&adapter->vfs_lock, flags); /* put the reference to all of the vf devices */ for (vf = 0; vf < num_vfs; ++vf) { @@ -1355,8 +1358,10 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf) void ixgbe_msg_task(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; + unsigned long flags; u32 vf; + spin_lock_irqsave(&adapter->vfs_lock, flags); for (vf = 0; vf < adapter->num_vfs; vf++) { /* process any reset requests */ if (!ixgbe_check_for_rst(hw, vf)) @@ -1370,6 +1375,7 @@ void ixgbe_msg_task(struct ixgbe_adapter *adapter) if (!ixgbe_check_for_ack(hw, vf)) ixgbe_rcv_ack_from_vf(adapter, vf); } + spin_unlock_irqrestore(&adapter->vfs_lock, flags); } static inline void ixgbe_ping_vf(struct ixgbe_adapter *adapter, int vf) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.c b/drivers/net/ethernet/marvell/prestera/prestera_flower.c index b3526962eac8..19d3b55c578e 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_flower.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.c @@ -168,12 +168,12 @@ static int prestera_flower_parse_meta(struct prestera_acl_rule *rule, } port = netdev_priv(ingress_dev); - mask = htons(0x1FFF); - key = htons(port->hw_id); + mask = htons(0x1FFF << 3); + key = htons(port->hw_id << 3); rule_match_set(r_match->key, SYS_PORT, key); rule_match_set(r_match->mask, SYS_PORT, mask); - mask = htons(0x1FF); + mask = htons(0x3FF); key = htons(port->dev_id); rule_match_set(r_match->key, SYS_DEV, key); rule_match_set(r_match->mask, SYS_DEV, mask); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c index 3c8116f16b4d..58f4e44d5ad7 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c @@ -389,8 +389,8 @@ static int __prestera_inetaddr_event(struct prestera_switch *sw, unsigned long event, struct netlink_ext_ack *extack) { - if (!prestera_netdev_check(dev) || netif_is_bridge_port(dev) || - netif_is_lag_port(dev) || netif_is_ovs_port(dev)) + if (!prestera_netdev_check(dev) || netif_is_any_bridge_port(dev) || + netif_is_lag_port(dev)) return 0; return __prestera_inetaddr_port_event(dev, event, extack); diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index 90e7dfd011c9..5d457bc9acc1 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -93,6 +93,9 @@ mtk_flow_get_wdma_info(struct net_device *dev, const u8 *addr, struct mtk_wdma_i }; struct net_device_path path = {}; + if (!ctx.dev) + return -ENODEV; + memcpy(ctx.daddr, addr, sizeof(ctx.daddr)); if (!IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED)) diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 8f0cd3196aac..29be2fcafea3 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -651,7 +651,7 @@ mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs) * WDMA RX. */ - BUG_ON(idx > ARRAY_SIZE(dev->tx_ring)); + BUG_ON(idx >= ARRAY_SIZE(dev->tx_ring)); if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE)) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 61eb96b93889..1b61bc8f59a2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -2296,8 +2296,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, devl_resources_unregister(devlink); mlxsw_core->bus->fini(mlxsw_core->bus_priv); if (!reload) { - devlink_free(devlink); devl_unlock(devlink); + devlink_free(devlink); } return; @@ -2305,8 +2305,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, reload_fail_deinit: mlxsw_core_params_unregister(mlxsw_core); devl_resources_unregister(devlink); - devlink_free(devlink); devl_unlock(devlink); + devlink_free(devlink); } EXPORT_SYMBOL(mlxsw_core_bus_device_unregister); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 23d526f13f1c..2c4443c6b964 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5292,7 +5292,7 @@ static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, { const struct fib_nh *nh = fib_info_nh(fi, 0); - return nh->fib_nh_scope == RT_SCOPE_LINK || + return nh->fib_nh_gw_family || mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL); } @@ -8590,9 +8590,7 @@ static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev, unsigned long event, struct netlink_ext_ack *extack) { - if (netif_is_bridge_port(port_dev) || - netif_is_lag_port(port_dev) || - netif_is_ovs_port(port_dev)) + if (netif_is_any_bridge_port(port_dev) || netif_is_lag_port(port_dev)) return 0; return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, @@ -10144,7 +10142,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, unsigned long *fields = config->fields; u32 hash_fields; - switch (net->ipv4.sysctl_fib_multipath_hash_policy) { + switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) { case 0: mlxsw_sp_mp4_hash_outer_addr(config); break; @@ -10162,7 +10160,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_mp_hash_inner_l3(config); break; case 3: - hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); /* Outer */ MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP); MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP); @@ -10343,13 +10341,14 @@ static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp) static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { struct net *net = mlxsw_sp_net(mlxsw_sp); - bool usp = net->ipv4.sysctl_ip_fwd_update_priority; char rgcr_pl[MLXSW_REG_RGCR_LEN]; u64 max_rifs; + bool usp; if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) return -EIO; max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority); mlxsw_reg_rgcr_pack(rgcr_pl, true, true); mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c index 005e56ea5da1..5893770bfd94 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c @@ -75,6 +75,9 @@ static int __lan966x_mac_learn(struct lan966x *lan966x, int pgid, unsigned int vid, enum macaccess_entry_type type) { + int ret; + + spin_lock(&lan966x->mac_lock); lan966x_mac_select(lan966x, mac, vid); /* Issue a write command */ @@ -86,7 +89,10 @@ static int __lan966x_mac_learn(struct lan966x *lan966x, int pgid, ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_LEARN), lan966x, ANA_MACACCESS); - return lan966x_mac_wait_for_completion(lan966x); + ret = lan966x_mac_wait_for_completion(lan966x); + spin_unlock(&lan966x->mac_lock); + + return ret; } /* The mask of the front ports is encoded inside the mac parameter via a call @@ -113,11 +119,13 @@ int lan966x_mac_learn(struct lan966x *lan966x, int port, return __lan966x_mac_learn(lan966x, port, false, mac, vid, type); } -int lan966x_mac_forget(struct lan966x *lan966x, - const unsigned char mac[ETH_ALEN], - unsigned int vid, - enum macaccess_entry_type type) +static int lan966x_mac_forget_locked(struct lan966x *lan966x, + const unsigned char mac[ETH_ALEN], + unsigned int vid, + enum macaccess_entry_type type) { + lockdep_assert_held(&lan966x->mac_lock); + lan966x_mac_select(lan966x, mac, vid); /* Issue a forget command */ @@ -128,6 +136,20 @@ int lan966x_mac_forget(struct lan966x *lan966x, return lan966x_mac_wait_for_completion(lan966x); } +int lan966x_mac_forget(struct lan966x *lan966x, + const unsigned char mac[ETH_ALEN], + unsigned int vid, + enum macaccess_entry_type type) +{ + int ret; + + spin_lock(&lan966x->mac_lock); + ret = lan966x_mac_forget_locked(lan966x, mac, vid, type); + spin_unlock(&lan966x->mac_lock); + + return ret; +} + int lan966x_mac_cpu_learn(struct lan966x *lan966x, const char *addr, u16 vid) { return lan966x_mac_learn(lan966x, PGID_CPU, addr, vid, ENTRYTYPE_LOCKED); @@ -161,7 +183,7 @@ static struct lan966x_mac_entry *lan966x_mac_alloc_entry(const unsigned char *ma { struct lan966x_mac_entry *mac_entry; - mac_entry = kzalloc(sizeof(*mac_entry), GFP_KERNEL); + mac_entry = kzalloc(sizeof(*mac_entry), GFP_ATOMIC); if (!mac_entry) return NULL; @@ -179,7 +201,6 @@ static struct lan966x_mac_entry *lan966x_mac_find_entry(struct lan966x *lan966x, struct lan966x_mac_entry *res = NULL; struct lan966x_mac_entry *mac_entry; - spin_lock(&lan966x->mac_lock); list_for_each_entry(mac_entry, &lan966x->mac_entries, list) { if (mac_entry->vid == vid && ether_addr_equal(mac, mac_entry->mac) && @@ -188,7 +209,6 @@ static struct lan966x_mac_entry *lan966x_mac_find_entry(struct lan966x *lan966x, break; } } - spin_unlock(&lan966x->mac_lock); return res; } @@ -231,8 +251,11 @@ int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port, { struct lan966x_mac_entry *mac_entry; - if (lan966x_mac_lookup(lan966x, addr, vid, ENTRYTYPE_NORMAL)) + spin_lock(&lan966x->mac_lock); + if (lan966x_mac_lookup(lan966x, addr, vid, ENTRYTYPE_NORMAL)) { + spin_unlock(&lan966x->mac_lock); return 0; + } /* In case the entry already exists, don't add it again to SW, * just update HW, but we need to look in the actual HW because @@ -241,21 +264,25 @@ int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port, * add the entry but without the extern_learn flag. */ mac_entry = lan966x_mac_find_entry(lan966x, addr, vid, port->chip_port); - if (mac_entry) - return lan966x_mac_learn(lan966x, port->chip_port, - addr, vid, ENTRYTYPE_LOCKED); + if (mac_entry) { + spin_unlock(&lan966x->mac_lock); + goto mac_learn; + } mac_entry = lan966x_mac_alloc_entry(addr, vid, port->chip_port); - if (!mac_entry) + if (!mac_entry) { + spin_unlock(&lan966x->mac_lock); return -ENOMEM; + } - spin_lock(&lan966x->mac_lock); list_add_tail(&mac_entry->list, &lan966x->mac_entries); spin_unlock(&lan966x->mac_lock); - lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED); lan966x_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, addr, vid, port->dev); +mac_learn: + lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED); + return 0; } @@ -269,8 +296,9 @@ int lan966x_mac_del_entry(struct lan966x *lan966x, const unsigned char *addr, list) { if (mac_entry->vid == vid && ether_addr_equal(addr, mac_entry->mac)) { - lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid, - ENTRYTYPE_LOCKED); + lan966x_mac_forget_locked(lan966x, mac_entry->mac, + mac_entry->vid, + ENTRYTYPE_LOCKED); list_del(&mac_entry->list); kfree(mac_entry); @@ -288,8 +316,8 @@ void lan966x_mac_purge_entries(struct lan966x *lan966x) spin_lock(&lan966x->mac_lock); list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries, list) { - lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid, - ENTRYTYPE_LOCKED); + lan966x_mac_forget_locked(lan966x, mac_entry->mac, + mac_entry->vid, ENTRYTYPE_LOCKED); list_del(&mac_entry->list); kfree(mac_entry); @@ -325,10 +353,13 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, { struct lan966x_mac_entry *mac_entry, *tmp; unsigned char mac[ETH_ALEN] __aligned(2); + struct list_head mac_deleted_entries; u32 dest_idx; u32 column; u16 vid; + INIT_LIST_HEAD(&mac_deleted_entries); + spin_lock(&lan966x->mac_lock); list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries, list) { bool found = false; @@ -362,20 +393,26 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, } if (!found) { - /* Notify the bridge that the entry doesn't exist - * anymore in the HW and remove the entry from the SW - * list - */ - lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, - mac_entry->mac, mac_entry->vid, - lan966x->ports[mac_entry->port_index]->dev); - list_del(&mac_entry->list); - kfree(mac_entry); + /* Move the entry from SW list to a tmp list such that + * it would be deleted later + */ + list_add_tail(&mac_entry->list, &mac_deleted_entries); } } spin_unlock(&lan966x->mac_lock); + list_for_each_entry_safe(mac_entry, tmp, &mac_deleted_entries, list) { + /* Notify the bridge that the entry doesn't exist + * anymore in the HW + */ + lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, + mac_entry->mac, mac_entry->vid, + lan966x->ports[mac_entry->port_index]->dev); + list_del(&mac_entry->list); + kfree(mac_entry); + } + /* Now go to the list of columns and see if any entry was not in the SW * list, then that means that the entry is new so it needs to notify the * bridge. @@ -396,13 +433,20 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, if (WARN_ON(dest_idx >= lan966x->num_phys_ports)) continue; + spin_lock(&lan966x->mac_lock); + mac_entry = lan966x_mac_find_entry(lan966x, mac, vid, dest_idx); + if (mac_entry) { + spin_unlock(&lan966x->mac_lock); + continue; + } + mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx); - if (!mac_entry) + if (!mac_entry) { + spin_unlock(&lan966x->mac_lock); return; + } mac_entry->row = row; - - spin_lock(&lan966x->mac_lock); list_add_tail(&mac_entry->list, &lan966x->mac_entries); spin_unlock(&lan966x->mac_lock); @@ -424,6 +468,7 @@ irqreturn_t lan966x_mac_irq_handler(struct lan966x *lan966x) lan966x, ANA_MACTINDX); while (1) { + spin_lock(&lan966x->mac_lock); lan_rmw(ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_SYNC_GET_NEXT), ANA_MACACCESS_MAC_TABLE_CMD, lan966x, ANA_MACACCESS); @@ -447,12 +492,15 @@ irqreturn_t lan966x_mac_irq_handler(struct lan966x *lan966x) stop = false; if (column == LAN966X_MAC_COLUMNS - 1 && - index == 0 && stop) + index == 0 && stop) { + spin_unlock(&lan966x->mac_lock); break; + } entry[column].mach = lan_rd(lan966x, ANA_MACHDATA); entry[column].macl = lan_rd(lan966x, ANA_MACLDATA); entry[column].maca = lan_rd(lan966x, ANA_MACACCESS); + spin_unlock(&lan966x->mac_lock); /* Once all the columns are read process them */ if (column == LAN966X_MAC_COLUMNS - 1) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 7453cc5ba832..3c7220a4603e 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -474,7 +474,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, set_tun->ttl = ip4_dst_hoplimit(&rt->dst); ip_rt_put(rt); } else { - set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; + set_tun->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); } } diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index b9298031ea51..4c759488fc77 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -8,7 +8,7 @@ sfc-y += efx.o efx_common.o efx_channels.o nic.o \ ef100.o ef100_nic.o ef100_netdev.o \ ef100_ethtool.o ef100_rx.o ef100_tx.o sfc-$(CONFIG_SFC_MTD) += mtd.o -sfc-$(CONFIG_SFC_SRIOV) += sriov.o ef10_sriov.o ef100_sriov.o +sfc-$(CONFIG_SFC_SRIOV) += sriov.o ef10_sriov.o ef100_sriov.o ef100_rep.o mae.o obj-$(CONFIG_SFC) += sfc.o diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c index 060392ddd612..9e65de1ab889 100644 --- a/drivers/net/ethernet/sfc/ef100_netdev.c +++ b/drivers/net/ethernet/sfc/ef100_netdev.c @@ -85,6 +85,7 @@ static int ef100_net_stop(struct net_device *net_dev) netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n", raw_smp_processor_id()); + efx_detach_reps(efx); netif_stop_queue(net_dev); efx_stop_all(efx); efx_mcdi_mac_fini_stats(efx); @@ -176,6 +177,8 @@ static int ef100_net_open(struct net_device *net_dev) mutex_unlock(&efx->mac_lock); efx->state = STATE_NET_UP; + if (netif_running(efx->net_dev)) + efx_attach_reps(efx); return 0; @@ -195,6 +198,15 @@ static netdev_tx_t ef100_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev) { struct efx_nic *efx = efx_netdev_priv(net_dev); + + return __ef100_hard_start_xmit(skb, efx, net_dev, NULL); +} + +netdev_tx_t __ef100_hard_start_xmit(struct sk_buff *skb, + struct efx_nic *efx, + struct net_device *net_dev, + struct efx_rep *efv) +{ struct efx_tx_queue *tx_queue; struct efx_channel *channel; int rc; @@ -209,7 +221,7 @@ static netdev_tx_t ef100_hard_start_xmit(struct sk_buff *skb, } tx_queue = &channel->tx_queue[0]; - rc = ef100_enqueue_skb(tx_queue, skb); + rc = __ef100_enqueue_skb(tx_queue, skb, efv); if (rc == 0) return NETDEV_TX_OK; @@ -312,7 +324,7 @@ void ef100_remove_netdev(struct efx_probe_data *probe_data) unregister_netdevice_notifier(&efx->netdev_notifier); #if defined(CONFIG_SFC_SRIOV) if (!efx->type->is_vf) - efx_ef100_pci_sriov_disable(efx); + efx_ef100_pci_sriov_disable(efx, true); #endif ef100_unregister_netdev(efx); diff --git a/drivers/net/ethernet/sfc/ef100_netdev.h b/drivers/net/ethernet/sfc/ef100_netdev.h index 38b032ba0953..86bf985e0951 100644 --- a/drivers/net/ethernet/sfc/ef100_netdev.h +++ b/drivers/net/ethernet/sfc/ef100_netdev.h @@ -10,7 +10,12 @@ */ #include <linux/netdevice.h> +#include "ef100_rep.h" +netdev_tx_t __ef100_hard_start_xmit(struct sk_buff *skb, + struct efx_nic *efx, + struct net_device *net_dev, + struct efx_rep *efv); int ef100_netdev_event(struct notifier_block *this, unsigned long event, void *ptr); int ef100_probe_netdev(struct efx_probe_data *probe_data); diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index f89e695cf8ac..4625d35269e6 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -946,6 +946,7 @@ static int ef100_probe_main(struct efx_nic *efx) unsigned int bar_size = resource_size(&efx->pci_dev->resource[efx->mem_bar]); struct ef100_nic_data *nic_data; char fw_version[32]; + u32 priv_mask = 0; int i, rc; if (WARN_ON(bar_size == 0)) @@ -1027,6 +1028,12 @@ static int ef100_probe_main(struct efx_nic *efx) efx_mcdi_print_fwver(efx, fw_version, sizeof(fw_version)); pci_dbg(efx->pci_dev, "Firmware version %s\n", fw_version); + rc = efx_mcdi_get_privilege_mask(efx, &priv_mask); + if (rc) /* non-fatal, and priv_mask will still be 0 */ + pci_info(efx->pci_dev, + "Failed to get privilege mask from FW, rc %d\n", rc); + nic_data->grp_mae = !!(priv_mask & MC_CMD_PRIVILEGE_MASK_IN_GRP_MAE); + if (compare_versions(fw_version, "1.1.0.1000") < 0) { pci_info(efx->pci_dev, "Firmware uses old event descriptors\n"); rc = -EINVAL; diff --git a/drivers/net/ethernet/sfc/ef100_nic.h b/drivers/net/ethernet/sfc/ef100_nic.h index 744dbbdb4adc..40f84a275057 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.h +++ b/drivers/net/ethernet/sfc/ef100_nic.h @@ -72,6 +72,7 @@ struct ef100_nic_data { u8 port_id[ETH_ALEN]; DECLARE_BITMAP(evq_phases, EFX_MAX_CHANNELS); u64 stats[EF100_STAT_COUNT]; + bool grp_mae; /* MAE Privilege */ u16 tso_max_hdr_len; u16 tso_max_payload_num_segs; u16 tso_max_frames; diff --git a/drivers/net/ethernet/sfc/ef100_regs.h b/drivers/net/ethernet/sfc/ef100_regs.h index 710bbdb19885..982b6ab1eb62 100644 --- a/drivers/net/ethernet/sfc/ef100_regs.h +++ b/drivers/net/ethernet/sfc/ef100_regs.h @@ -2,7 +2,7 @@ /**************************************************************************** * Driver for Solarflare network controllers and boards * Copyright 2018 Solarflare Communications Inc. - * Copyright 2019-2020 Xilinx Inc. + * Copyright 2019-2022 Xilinx Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -181,12 +181,6 @@ /* RHEAD_BASE_EVENT */ #define ESF_GZ_E_TYPE_LBN 60 #define ESF_GZ_E_TYPE_WIDTH 4 -#define ESE_GZ_EF100_EV_DRIVER 5 -#define ESE_GZ_EF100_EV_MCDI 4 -#define ESE_GZ_EF100_EV_CONTROL 3 -#define ESE_GZ_EF100_EV_TX_TIMESTAMP 2 -#define ESE_GZ_EF100_EV_TX_COMPLETION 1 -#define ESE_GZ_EF100_EV_RX_PKTS 0 #define ESF_GZ_EV_EVQ_PHASE_LBN 59 #define ESF_GZ_EV_EVQ_PHASE_WIDTH 1 #define ESE_GZ_RHEAD_BASE_EVENT_STRUCT_SIZE 64 @@ -369,14 +363,18 @@ #define ESF_GZ_RX_PREFIX_VLAN_STRIP_TCI_WIDTH 16 #define ESF_GZ_RX_PREFIX_CSUM_FRAME_LBN 144 #define ESF_GZ_RX_PREFIX_CSUM_FRAME_WIDTH 16 -#define ESF_GZ_RX_PREFIX_INGRESS_VPORT_LBN 128 -#define ESF_GZ_RX_PREFIX_INGRESS_VPORT_WIDTH 16 +#define ESF_GZ_RX_PREFIX_INGRESS_MPORT_LBN 128 +#define ESF_GZ_RX_PREFIX_INGRESS_MPORT_WIDTH 16 #define ESF_GZ_RX_PREFIX_USER_MARK_LBN 96 #define ESF_GZ_RX_PREFIX_USER_MARK_WIDTH 32 #define ESF_GZ_RX_PREFIX_RSS_HASH_LBN 64 #define ESF_GZ_RX_PREFIX_RSS_HASH_WIDTH 32 -#define ESF_GZ_RX_PREFIX_PARTIAL_TSTAMP_LBN 32 -#define ESF_GZ_RX_PREFIX_PARTIAL_TSTAMP_WIDTH 32 +#define ESF_GZ_RX_PREFIX_PARTIAL_TSTAMP_LBN 34 +#define ESF_GZ_RX_PREFIX_PARTIAL_TSTAMP_WIDTH 30 +#define ESF_GZ_RX_PREFIX_VSWITCH_STATUS_LBN 33 +#define ESF_GZ_RX_PREFIX_VSWITCH_STATUS_WIDTH 1 +#define ESF_GZ_RX_PREFIX_VLAN_STRIPPED_LBN 32 +#define ESF_GZ_RX_PREFIX_VLAN_STRIPPED_WIDTH 1 #define ESF_GZ_RX_PREFIX_CLASS_LBN 16 #define ESF_GZ_RX_PREFIX_CLASS_WIDTH 16 #define ESF_GZ_RX_PREFIX_USER_FLAG_LBN 15 @@ -454,12 +452,8 @@ #define ESF_GZ_M2M_TRANSLATE_ADDR_WIDTH 1 #define ESF_GZ_M2M_RSVD_LBN 120 #define ESF_GZ_M2M_RSVD_WIDTH 2 -#define ESF_GZ_M2M_ADDR_SPC_LBN 108 -#define ESF_GZ_M2M_ADDR_SPC_WIDTH 12 -#define ESF_GZ_M2M_ADDR_SPC_PASID_LBN 86 -#define ESF_GZ_M2M_ADDR_SPC_PASID_WIDTH 22 -#define ESF_GZ_M2M_ADDR_SPC_MODE_LBN 84 -#define ESF_GZ_M2M_ADDR_SPC_MODE_WIDTH 2 +#define ESF_GZ_M2M_ADDR_SPC_ID_LBN 84 +#define ESF_GZ_M2M_ADDR_SPC_ID_WIDTH 36 #define ESF_GZ_M2M_LEN_MINUS_1_LBN 64 #define ESF_GZ_M2M_LEN_MINUS_1_WIDTH 20 #define ESF_GZ_M2M_ADDR_LBN 0 @@ -492,12 +486,8 @@ #define ESF_GZ_TX_SEG_TRANSLATE_ADDR_WIDTH 1 #define ESF_GZ_TX_SEG_RSVD2_LBN 120 #define ESF_GZ_TX_SEG_RSVD2_WIDTH 2 -#define ESF_GZ_TX_SEG_ADDR_SPC_LBN 108 -#define ESF_GZ_TX_SEG_ADDR_SPC_WIDTH 12 -#define ESF_GZ_TX_SEG_ADDR_SPC_PASID_LBN 86 -#define ESF_GZ_TX_SEG_ADDR_SPC_PASID_WIDTH 22 -#define ESF_GZ_TX_SEG_ADDR_SPC_MODE_LBN 84 -#define ESF_GZ_TX_SEG_ADDR_SPC_MODE_WIDTH 2 +#define ESF_GZ_TX_SEG_ADDR_SPC_ID_LBN 84 +#define ESF_GZ_TX_SEG_ADDR_SPC_ID_WIDTH 36 #define ESF_GZ_TX_SEG_RSVD_LBN 80 #define ESF_GZ_TX_SEG_RSVD_WIDTH 4 #define ESF_GZ_TX_SEG_LEN_LBN 64 @@ -583,6 +573,12 @@ #define ESE_GZ_SF_TX_TSO_DSC_FMT_STRUCT_SIZE 124 +/* Enum D2VIO_MSG_OP */ +#define ESE_GZ_QUE_JBDNE 3 +#define ESE_GZ_QUE_EVICT 2 +#define ESE_GZ_QUE_EMPTY 1 +#define ESE_GZ_NOP 0 + /* Enum DESIGN_PARAMS */ #define ESE_EF100_DP_GZ_RX_MAX_RUNT 17 #define ESE_EF100_DP_GZ_VI_STRIDES 16 @@ -630,6 +626,19 @@ #define ESE_GZ_PCI_BASE_CONFIG_SPACE_SIZE 256 #define ESE_GZ_PCI_EXPRESS_XCAP_HDR_SIZE 4 +/* Enum RH_DSC_TYPE */ +#define ESE_GZ_TX_TOMB 0xF +#define ESE_GZ_TX_VIO 0xE +#define ESE_GZ_TX_TSO_OVRRD 0x8 +#define ESE_GZ_TX_D2CMP 0x7 +#define ESE_GZ_TX_DATA 0x6 +#define ESE_GZ_TX_D2M 0x5 +#define ESE_GZ_TX_M2M 0x4 +#define ESE_GZ_TX_SEG 0x3 +#define ESE_GZ_TX_TSO 0x2 +#define ESE_GZ_TX_OVRRD 0x1 +#define ESE_GZ_TX_SEND 0x0 + /* Enum RH_HCLASS_L2_CLASS */ #define ESE_GZ_RH_HCLASS_L2_CLASS_E2_0123VLAN 1 #define ESE_GZ_RH_HCLASS_L2_CLASS_OTHER 0 @@ -666,6 +675,25 @@ #define ESE_GZ_RH_HCLASS_TUNNEL_CLASS_VXLAN 1 #define ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NONE 0 +/* Enum SF_CTL_EVENT_SUBTYPE */ +#define ESE_GZ_EF100_CTL_EV_EVQ_TIMEOUT 0x3 +#define ESE_GZ_EF100_CTL_EV_FLUSH 0x2 +#define ESE_GZ_EF100_CTL_EV_TIME_SYNC 0x1 +#define ESE_GZ_EF100_CTL_EV_UNSOL_OVERFLOW 0x0 + +/* Enum SF_EVENT_TYPE */ +#define ESE_GZ_EF100_EV_DRIVER 0x5 +#define ESE_GZ_EF100_EV_MCDI 0x4 +#define ESE_GZ_EF100_EV_CONTROL 0x3 +#define ESE_GZ_EF100_EV_TX_TIMESTAMP 0x2 +#define ESE_GZ_EF100_EV_TX_COMPLETION 0x1 +#define ESE_GZ_EF100_EV_RX_PKTS 0x0 + +/* Enum SF_EW_EVENT_TYPE */ +#define ESE_GZ_EF100_EWEV_VIRTQ_DESC 0x2 +#define ESE_GZ_EF100_EWEV_TXQ_DESC 0x1 +#define ESE_GZ_EF100_EWEV_64BIT 0x0 + /* Enum TX_DESC_CSO_PARTIAL_EN */ #define ESE_GZ_TX_DESC_CSO_PARTIAL_EN_TCP 2 #define ESE_GZ_TX_DESC_CSO_PARTIAL_EN_UDP 1 @@ -681,6 +709,15 @@ #define ESE_GZ_TX_DESC_IP4_ID_INC_MOD16 2 #define ESE_GZ_TX_DESC_IP4_ID_INC_MOD15 1 #define ESE_GZ_TX_DESC_IP4_ID_NO_OP 0 + +/* Enum VIRTIO_NET_HDR_F */ +#define ESE_GZ_NEEDS_CSUM 0x1 + +/* Enum VIRTIO_NET_HDR_GSO */ +#define ESE_GZ_TCPV6 0x4 +#define ESE_GZ_UDP 0x3 +#define ESE_GZ_TCPV4 0x1 +#define ESE_GZ_NONE 0x0 /**************************************************************************/ #define ESF_GZ_EV_DEBUG_EVENT_GEN_FLAGS_LBN 44 diff --git a/drivers/net/ethernet/sfc/ef100_rep.c b/drivers/net/ethernet/sfc/ef100_rep.c new file mode 100644 index 000000000000..d07539f091b8 --- /dev/null +++ b/drivers/net/ethernet/sfc/ef100_rep.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2019 Solarflare Communications Inc. + * Copyright 2020-2022 Xilinx Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "ef100_rep.h" +#include "ef100_netdev.h" +#include "ef100_nic.h" +#include "mae.h" + +#define EFX_EF100_REP_DRIVER "efx_ef100_rep" + +static int efx_ef100_rep_init_struct(struct efx_nic *efx, struct efx_rep *efv, + unsigned int i) +{ + efv->parent = efx; + efv->idx = i; + INIT_LIST_HEAD(&efv->list); + efv->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE | + NETIF_MSG_LINK | NETIF_MSG_IFDOWN | + NETIF_MSG_IFUP | NETIF_MSG_RX_ERR | + NETIF_MSG_TX_ERR | NETIF_MSG_HW; + return 0; +} + +static netdev_tx_t efx_ef100_rep_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct efx_rep *efv = netdev_priv(dev); + struct efx_nic *efx = efv->parent; + netdev_tx_t rc; + + /* __ef100_hard_start_xmit() will always return success even in the + * case of TX drops, where it will increment efx's tx_dropped. The + * efv stats really only count attempted TX, not success/failure. + */ + atomic64_inc(&efv->stats.tx_packets); + atomic64_add(skb->len, &efv->stats.tx_bytes); + netif_tx_lock(efx->net_dev); + rc = __ef100_hard_start_xmit(skb, efx, dev, efv); + netif_tx_unlock(efx->net_dev); + return rc; +} + +static int efx_ef100_rep_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) +{ + struct efx_rep *efv = netdev_priv(dev); + struct efx_nic *efx = efv->parent; + struct ef100_nic_data *nic_data; + + nic_data = efx->nic_data; + /* nic_data->port_id is a u8[] */ + ppid->id_len = sizeof(nic_data->port_id); + memcpy(ppid->id, nic_data->port_id, sizeof(nic_data->port_id)); + return 0; +} + +static int efx_ef100_rep_get_phys_port_name(struct net_device *dev, + char *buf, size_t len) +{ + struct efx_rep *efv = netdev_priv(dev); + struct efx_nic *efx = efv->parent; + struct ef100_nic_data *nic_data; + int ret; + + nic_data = efx->nic_data; + ret = snprintf(buf, len, "p%upf%uvf%u", efx->port_num, + nic_data->pf_index, efv->idx); + if (ret >= len) + return -EOPNOTSUPP; + + return 0; +} + +static const struct net_device_ops efx_ef100_rep_netdev_ops = { + .ndo_start_xmit = efx_ef100_rep_xmit, + .ndo_get_port_parent_id = efx_ef100_rep_get_port_parent_id, + .ndo_get_phys_port_name = efx_ef100_rep_get_phys_port_name, +}; + +static void efx_ef100_rep_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strscpy(drvinfo->driver, EFX_EF100_REP_DRIVER, sizeof(drvinfo->driver)); +} + +static u32 efx_ef100_rep_ethtool_get_msglevel(struct net_device *net_dev) +{ + struct efx_rep *efv = netdev_priv(net_dev); + + return efv->msg_enable; +} + +static void efx_ef100_rep_ethtool_set_msglevel(struct net_device *net_dev, + u32 msg_enable) +{ + struct efx_rep *efv = netdev_priv(net_dev); + + efv->msg_enable = msg_enable; +} + +static const struct ethtool_ops efx_ef100_rep_ethtool_ops = { + .get_drvinfo = efx_ef100_rep_get_drvinfo, + .get_msglevel = efx_ef100_rep_ethtool_get_msglevel, + .set_msglevel = efx_ef100_rep_ethtool_set_msglevel, +}; + +static struct efx_rep *efx_ef100_rep_create_netdev(struct efx_nic *efx, + unsigned int i) +{ + struct net_device *net_dev; + struct efx_rep *efv; + int rc; + + net_dev = alloc_etherdev_mq(sizeof(*efv), 1); + if (!net_dev) + return ERR_PTR(-ENOMEM); + + efv = netdev_priv(net_dev); + rc = efx_ef100_rep_init_struct(efx, efv, i); + if (rc) + goto fail1; + efv->net_dev = net_dev; + rtnl_lock(); + spin_lock_bh(&efx->vf_reps_lock); + list_add_tail(&efv->list, &efx->vf_reps); + spin_unlock_bh(&efx->vf_reps_lock); + if (netif_running(efx->net_dev) && efx->state == STATE_NET_UP) { + netif_device_attach(net_dev); + netif_carrier_on(net_dev); + } else { + netif_carrier_off(net_dev); + netif_tx_stop_all_queues(net_dev); + } + rtnl_unlock(); + + net_dev->netdev_ops = &efx_ef100_rep_netdev_ops; + net_dev->ethtool_ops = &efx_ef100_rep_ethtool_ops; + net_dev->min_mtu = EFX_MIN_MTU; + net_dev->max_mtu = EFX_MAX_MTU; + net_dev->features |= NETIF_F_LLTX; + net_dev->hw_features |= NETIF_F_LLTX; + return efv; +fail1: + free_netdev(net_dev); + return ERR_PTR(rc); +} + +static int efx_ef100_configure_rep(struct efx_rep *efv) +{ + struct efx_nic *efx = efv->parent; + u32 selector; + int rc; + + /* Construct mport selector for corresponding VF */ + efx_mae_mport_vf(efx, efv->idx, &selector); + /* Look up actual mport ID */ + rc = efx_mae_lookup_mport(efx, selector, &efv->mport); + if (rc) + return rc; + pci_dbg(efx->pci_dev, "VF %u has mport ID %#x\n", efv->idx, efv->mport); + /* mport label should fit in 16 bits */ + WARN_ON(efv->mport >> 16); + + return 0; +} + +static void efx_ef100_rep_destroy_netdev(struct efx_rep *efv) +{ + struct efx_nic *efx = efv->parent; + + rtnl_lock(); + spin_lock_bh(&efx->vf_reps_lock); + list_del(&efv->list); + spin_unlock_bh(&efx->vf_reps_lock); + rtnl_unlock(); + free_netdev(efv->net_dev); +} + +int efx_ef100_vfrep_create(struct efx_nic *efx, unsigned int i) +{ + struct efx_rep *efv; + int rc; + + efv = efx_ef100_rep_create_netdev(efx, i); + if (IS_ERR(efv)) { + rc = PTR_ERR(efv); + pci_err(efx->pci_dev, + "Failed to create representor for VF %d, rc %d\n", i, + rc); + return rc; + } + rc = efx_ef100_configure_rep(efv); + if (rc) { + pci_err(efx->pci_dev, + "Failed to configure representor for VF %d, rc %d\n", + i, rc); + goto fail; + } + rc = register_netdev(efv->net_dev); + if (rc) { + pci_err(efx->pci_dev, + "Failed to register representor for VF %d, rc %d\n", + i, rc); + goto fail; + } + pci_dbg(efx->pci_dev, "Representor for VF %d is %s\n", i, + efv->net_dev->name); + return 0; +fail: + efx_ef100_rep_destroy_netdev(efv); + return rc; +} + +void efx_ef100_vfrep_destroy(struct efx_nic *efx, struct efx_rep *efv) +{ + struct net_device *rep_dev; + + rep_dev = efv->net_dev; + if (!rep_dev) + return; + netif_dbg(efx, drv, rep_dev, "Removing VF representor\n"); + unregister_netdev(rep_dev); + efx_ef100_rep_destroy_netdev(efv); +} + +void efx_ef100_fini_vfreps(struct efx_nic *efx) +{ + struct ef100_nic_data *nic_data = efx->nic_data; + struct efx_rep *efv, *next; + + if (!nic_data->grp_mae) + return; + + list_for_each_entry_safe(efv, next, &efx->vf_reps, list) + efx_ef100_vfrep_destroy(efx, efv); +} diff --git a/drivers/net/ethernet/sfc/ef100_rep.h b/drivers/net/ethernet/sfc/ef100_rep.h new file mode 100644 index 000000000000..d47fd8ff6220 --- /dev/null +++ b/drivers/net/ethernet/sfc/ef100_rep.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2019 Solarflare Communications Inc. + * Copyright 2020-2022 Xilinx Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +/* Handling for ef100 representor netdevs */ +#ifndef EF100_REP_H +#define EF100_REP_H + +#include "net_driver.h" + +struct efx_rep_sw_stats { + atomic64_t rx_packets, tx_packets; + atomic64_t rx_bytes, tx_bytes; + atomic64_t rx_dropped, tx_errors; +}; + +/** + * struct efx_rep - Private data for an Efx representor + * + * @parent: the efx PF which manages this representor + * @net_dev: representor netdevice + * @msg_enable: log message enable flags + * @mport: m-port ID of corresponding VF + * @idx: VF index + * @list: entry on efx->vf_reps + * @stats: software traffic counters for netdev stats + */ +struct efx_rep { + struct efx_nic *parent; + struct net_device *net_dev; + u32 msg_enable; + u32 mport; + unsigned int idx; + struct list_head list; + struct efx_rep_sw_stats stats; +}; + +int efx_ef100_vfrep_create(struct efx_nic *efx, unsigned int i); +void efx_ef100_vfrep_destroy(struct efx_nic *efx, struct efx_rep *efv); +void efx_ef100_fini_vfreps(struct efx_nic *efx); + +#endif /* EF100_REP_H */ diff --git a/drivers/net/ethernet/sfc/ef100_sriov.c b/drivers/net/ethernet/sfc/ef100_sriov.c index 664578176bfe..94bdbfcb47e8 100644 --- a/drivers/net/ethernet/sfc/ef100_sriov.c +++ b/drivers/net/ethernet/sfc/ef100_sriov.c @@ -11,46 +11,62 @@ #include "ef100_sriov.h" #include "ef100_nic.h" +#include "ef100_rep.h" static int efx_ef100_pci_sriov_enable(struct efx_nic *efx, int num_vfs) { + struct ef100_nic_data *nic_data = efx->nic_data; struct pci_dev *dev = efx->pci_dev; - int rc; + struct efx_rep *efv, *next; + int rc, i; efx->vf_count = num_vfs; rc = pci_enable_sriov(dev, num_vfs); if (rc) - goto fail; + goto fail1; + if (!nic_data->grp_mae) + return 0; + + for (i = 0; i < num_vfs; i++) { + rc = efx_ef100_vfrep_create(efx, i); + if (rc) + goto fail2; + } return 0; -fail: +fail2: + list_for_each_entry_safe(efv, next, &efx->vf_reps, list) + efx_ef100_vfrep_destroy(efx, efv); + pci_disable_sriov(dev); +fail1: netif_err(efx, probe, efx->net_dev, "Failed to enable SRIOV VFs\n"); efx->vf_count = 0; return rc; } -int efx_ef100_pci_sriov_disable(struct efx_nic *efx) +int efx_ef100_pci_sriov_disable(struct efx_nic *efx, bool force) { struct pci_dev *dev = efx->pci_dev; unsigned int vfs_assigned; vfs_assigned = pci_vfs_assigned(dev); - if (vfs_assigned) { + if (vfs_assigned && !force) { netif_info(efx, drv, efx->net_dev, "VFs are assigned to guests; " "please detach them before disabling SR-IOV\n"); return -EBUSY; } - pci_disable_sriov(dev); - + efx_ef100_fini_vfreps(efx); + if (!vfs_assigned) + pci_disable_sriov(dev); return 0; } int efx_ef100_sriov_configure(struct efx_nic *efx, int num_vfs) { if (num_vfs == 0) - return efx_ef100_pci_sriov_disable(efx); + return efx_ef100_pci_sriov_disable(efx, false); else return efx_ef100_pci_sriov_enable(efx, num_vfs); } diff --git a/drivers/net/ethernet/sfc/ef100_sriov.h b/drivers/net/ethernet/sfc/ef100_sriov.h index c48fccd46c57..8ffdf464dd1d 100644 --- a/drivers/net/ethernet/sfc/ef100_sriov.h +++ b/drivers/net/ethernet/sfc/ef100_sriov.h @@ -11,4 +11,4 @@ #include "net_driver.h" int efx_ef100_sriov_configure(struct efx_nic *efx, int num_vfs); -int efx_ef100_pci_sriov_disable(struct efx_nic *efx); +int efx_ef100_pci_sriov_disable(struct efx_nic *efx, bool force); diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c index 26ef51d6b542..102ddc7e206a 100644 --- a/drivers/net/ethernet/sfc/ef100_tx.c +++ b/drivers/net/ethernet/sfc/ef100_tx.c @@ -254,7 +254,8 @@ static void ef100_make_tso_desc(struct efx_nic *efx, static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue, const struct sk_buff *skb, - unsigned int segment_count) + unsigned int segment_count, + struct efx_rep *efv) { unsigned int old_write_count = tx_queue->write_count; unsigned int new_write_count = old_write_count; @@ -272,6 +273,20 @@ static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue, else next_desc_type = ESE_GZ_TX_DESC_TYPE_SEND; + if (unlikely(efv)) { + /* Create TX override descriptor */ + write_ptr = new_write_count & tx_queue->ptr_mask; + txd = ef100_tx_desc(tx_queue, write_ptr); + ++new_write_count; + + tx_queue->packet_write_count = new_write_count; + EFX_POPULATE_OWORD_3(*txd, + ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_PREFIX, + ESF_GZ_TX_PREFIX_EGRESS_MPORT, efv->mport, + ESF_GZ_TX_PREFIX_EGRESS_MPORT_EN, 1); + nr_descs--; + } + /* if it's a raw write (such as XDP) then always SEND single frames */ if (!skb) nr_descs = 1; @@ -306,6 +321,9 @@ static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue, /* if it's a raw write (such as XDP) then always SEND */ next_desc_type = skb ? ESE_GZ_TX_DESC_TYPE_SEG : ESE_GZ_TX_DESC_TYPE_SEND; + /* mark as an EFV buffer if applicable */ + if (unlikely(efv)) + buffer->flags |= EFX_TX_BUF_EFV; } while (new_write_count != tx_queue->insert_count); @@ -324,7 +342,7 @@ static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue, void ef100_tx_write(struct efx_tx_queue *tx_queue) { - ef100_tx_make_descriptors(tx_queue, NULL, 0); + ef100_tx_make_descriptors(tx_queue, NULL, 0, NULL); ef100_tx_push_buffers(tx_queue); } @@ -351,6 +369,12 @@ void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event) */ int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) { + return __ef100_enqueue_skb(tx_queue, skb, NULL); +} + +int __ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb, + struct efx_rep *efv) +{ unsigned int old_insert_count = tx_queue->insert_count; struct efx_nic *efx = tx_queue->efx; bool xmit_more = netdev_xmit_more(); @@ -376,16 +400,64 @@ int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) return 0; } + if (unlikely(efv)) { + struct efx_tx_buffer *buffer = __efx_tx_queue_get_insert_buffer(tx_queue); + + /* Drop representor packets if the queue is stopped. + * We currently don't assert backoff to representors so this is + * to make sure representor traffic can't starve the main + * net device. + * And, of course, if there are no TX descriptors left. + */ + if (netif_tx_queue_stopped(tx_queue->core_txq) || + unlikely(efx_tx_buffer_in_use(buffer))) { + atomic64_inc(&efv->stats.tx_errors); + rc = -ENOSPC; + goto err; + } + + /* Also drop representor traffic if it could cause us to + * stop the queue. If we assert backoff and we haven't + * received traffic on the main net device recently then the + * TX watchdog can go off erroneously. + */ + fill_level = efx_channel_tx_old_fill_level(tx_queue->channel); + fill_level += efx_tx_max_skb_descs(efx); + if (fill_level > efx->txq_stop_thresh) { + struct efx_tx_queue *txq2; + + /* Refresh cached fill level and re-check */ + efx_for_each_channel_tx_queue(txq2, tx_queue->channel) + txq2->old_read_count = READ_ONCE(txq2->read_count); + + fill_level = efx_channel_tx_old_fill_level(tx_queue->channel); + fill_level += efx_tx_max_skb_descs(efx); + if (fill_level > efx->txq_stop_thresh) { + atomic64_inc(&efv->stats.tx_errors); + rc = -ENOSPC; + goto err; + } + } + + buffer->flags = EFX_TX_BUF_OPTION | EFX_TX_BUF_EFV; + tx_queue->insert_count++; + } + /* Map for DMA and create descriptors */ rc = efx_tx_map_data(tx_queue, skb, segments); if (rc) goto err; - ef100_tx_make_descriptors(tx_queue, skb, segments); + ef100_tx_make_descriptors(tx_queue, skb, segments, efv); fill_level = efx_channel_tx_old_fill_level(tx_queue->channel); if (fill_level > efx->txq_stop_thresh) { struct efx_tx_queue *txq2; + /* Because of checks above, representor traffic should + * not be able to stop the queue. + */ + WARN_ON(efv); + netif_tx_stop_queue(tx_queue->core_txq); /* Re-read after a memory barrier in case we've raced with * the completion path. Otherwise there's a danger we'll never @@ -404,8 +476,12 @@ int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* If xmit_more then we don't need to push the doorbell, unless there * are 256 descriptors already queued in which case we have to push to * ensure we never push more than 256 at once. + * + * Always push for representor traffic, and don't account it to parent + * PF netdevice's BQL. */ - if (__netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) || + if (unlikely(efv) || + __netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) || tx_queue->write_count - tx_queue->notify_count > 255) ef100_tx_push_buffers(tx_queue); diff --git a/drivers/net/ethernet/sfc/ef100_tx.h b/drivers/net/ethernet/sfc/ef100_tx.h index ddc4b98fa6db..e9e11540fcde 100644 --- a/drivers/net/ethernet/sfc/ef100_tx.h +++ b/drivers/net/ethernet/sfc/ef100_tx.h @@ -13,6 +13,7 @@ #define EFX_EF100_TX_H #include "net_driver.h" +#include "ef100_rep.h" int ef100_tx_probe(struct efx_tx_queue *tx_queue); void ef100_tx_init(struct efx_tx_queue *tx_queue); @@ -22,4 +23,6 @@ unsigned int ef100_tx_max_skb_descs(struct efx_nic *efx); void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event); netdev_tx_t ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb); +int __ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb, + struct efx_rep *efv); #endif diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index c05a83da9e44..4239c7ece123 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -12,6 +12,7 @@ #include "net_driver.h" #include "ef100_rx.h" #include "ef100_tx.h" +#include "efx_common.h" #include "filter.h" int efx_net_open(struct net_device *net_dev); @@ -206,6 +207,9 @@ static inline void efx_device_detach_sync(struct efx_nic *efx) { struct net_device *dev = efx->net_dev; + /* We must stop reps (which use our TX) before we stop ourselves. */ + efx_detach_reps(efx); + /* Lock/freeze all TX queues so that we can be sure the * TX scheduler is stopped when we're done and before * netif_device_present() becomes false. @@ -217,8 +221,11 @@ static inline void efx_device_detach_sync(struct efx_nic *efx) static inline void efx_device_attach_if_not_resetting(struct efx_nic *efx) { - if ((efx->state != STATE_DISABLED) && !efx->reset_pending) + if ((efx->state != STATE_DISABLED) && !efx->reset_pending) { netif_device_attach(efx->net_dev); + if (efx->state == STATE_NET_UP) + efx_attach_reps(efx); + } } static inline bool efx_rwsem_assert_write_locked(struct rw_semaphore *sem) diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c index 56eb717bb07a..a929a1aaba92 100644 --- a/drivers/net/ethernet/sfc/efx_common.c +++ b/drivers/net/ethernet/sfc/efx_common.c @@ -24,6 +24,7 @@ #include "mcdi_port_common.h" #include "io.h" #include "mcdi_pcol.h" +#include "ef100_rep.h" static unsigned int debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | NETIF_MSG_IFDOWN | @@ -1021,6 +1022,8 @@ int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev) efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE, sizeof(*efx->rps_hash_table), GFP_KERNEL); #endif + spin_lock_init(&efx->vf_reps_lock); + INIT_LIST_HEAD(&efx->vf_reps); INIT_WORK(&efx->mac_work, efx_mac_work); init_waitqueue_head(&efx->flush_wq); @@ -1389,3 +1392,38 @@ int efx_get_phys_port_name(struct net_device *net_dev, char *name, size_t len) return -EINVAL; return 0; } + +void efx_detach_reps(struct efx_nic *efx) +{ + struct net_device *rep_dev; + struct efx_rep *efv; + + ASSERT_RTNL(); + netif_dbg(efx, drv, efx->net_dev, "Detaching VF representors\n"); + list_for_each_entry(efv, &efx->vf_reps, list) { + rep_dev = efv->net_dev; + if (!rep_dev) + continue; + netif_carrier_off(rep_dev); + /* See efx_device_detach_sync() */ + netif_tx_lock_bh(rep_dev); + netif_tx_stop_all_queues(rep_dev); + netif_tx_unlock_bh(rep_dev); + } +} + +void efx_attach_reps(struct efx_nic *efx) +{ + struct net_device *rep_dev; + struct efx_rep *efv; + + ASSERT_RTNL(); + netif_dbg(efx, drv, efx->net_dev, "Attaching VF representors\n"); + list_for_each_entry(efv, &efx->vf_reps, list) { + rep_dev = efv->net_dev; + if (!rep_dev) + continue; + netif_tx_wake_all_queues(rep_dev); + netif_carrier_on(rep_dev); + } +} diff --git a/drivers/net/ethernet/sfc/efx_common.h b/drivers/net/ethernet/sfc/efx_common.h index 93babc1a2678..2c54dac3e662 100644 --- a/drivers/net/ethernet/sfc/efx_common.h +++ b/drivers/net/ethernet/sfc/efx_common.h @@ -111,4 +111,7 @@ int efx_get_phys_port_id(struct net_device *net_dev, int efx_get_phys_port_name(struct net_device *net_dev, char *name, size_t len); + +void efx_detach_reps(struct efx_nic *efx); +void efx_attach_reps(struct efx_nic *efx); #endif diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c new file mode 100644 index 000000000000..011ebd46ada5 --- /dev/null +++ b/drivers/net/ethernet/sfc/mae.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2019 Solarflare Communications Inc. + * Copyright 2020-2022 Xilinx Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "mae.h" +#include "mcdi.h" +#include "mcdi_pcol.h" + +void efx_mae_mport_vf(struct efx_nic *efx __always_unused, u32 vf_id, u32 *out) +{ + efx_dword_t mport; + + EFX_POPULATE_DWORD_3(mport, + MAE_MPORT_SELECTOR_TYPE, MAE_MPORT_SELECTOR_TYPE_FUNC, + MAE_MPORT_SELECTOR_FUNC_PF_ID, MAE_MPORT_SELECTOR_FUNC_PF_ID_CALLER, + MAE_MPORT_SELECTOR_FUNC_VF_ID, vf_id); + *out = EFX_DWORD_VAL(mport); +} + +/* id is really only 24 bits wide */ +int efx_mae_lookup_mport(struct efx_nic *efx, u32 selector, u32 *id) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_MPORT_LOOKUP_OUT_LEN); + MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_MPORT_LOOKUP_IN_LEN); + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, MAE_MPORT_LOOKUP_IN_MPORT_SELECTOR, selector); + rc = efx_mcdi_rpc(efx, MC_CMD_MAE_MPORT_LOOKUP, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < sizeof(outbuf)) + return -EIO; + *id = MCDI_DWORD(outbuf, MAE_MPORT_LOOKUP_OUT_MPORT_ID); + return 0; +} diff --git a/drivers/net/ethernet/sfc/mae.h b/drivers/net/ethernet/sfc/mae.h new file mode 100644 index 000000000000..27e69e8a54b6 --- /dev/null +++ b/drivers/net/ethernet/sfc/mae.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2019 Solarflare Communications Inc. + * Copyright 2020-2022 Xilinx Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EF100_MAE_H +#define EF100_MAE_H +/* MCDI interface for the ef100 Match-Action Engine */ + +#include "net_driver.h" + +void efx_mae_mport_vf(struct efx_nic *efx, u32 vf_id, u32 *out); + +int efx_mae_lookup_mport(struct efx_nic *efx, u32 selector, u32 *id); + +#endif /* EF100_MAE_H */ diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 3225fe64c397..af338208eae9 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -2129,6 +2129,52 @@ fail: return rc; } +/* Failure to read a privilege mask is never fatal, because we can always + * carry on as though we didn't have the privilege we were interested in. + * So use efx_mcdi_rpc_quiet(). + */ +int efx_mcdi_get_privilege_mask(struct efx_nic *efx, u32 *mask) +{ + MCDI_DECLARE_BUF(fi_outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN); + MCDI_DECLARE_BUF(pm_inbuf, MC_CMD_PRIVILEGE_MASK_IN_LEN); + MCDI_DECLARE_BUF(pm_outbuf, MC_CMD_PRIVILEGE_MASK_OUT_LEN); + size_t outlen; + u16 pf, vf; + int rc; + + if (!efx || !mask) + return -EINVAL; + + /* Get our function number */ + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_FUNCTION_INFO, NULL, 0, + fi_outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN, + &outlen); + if (rc != 0) + return rc; + if (outlen < MC_CMD_GET_FUNCTION_INFO_OUT_LEN) + return -EIO; + + pf = MCDI_DWORD(fi_outbuf, GET_FUNCTION_INFO_OUT_PF); + vf = MCDI_DWORD(fi_outbuf, GET_FUNCTION_INFO_OUT_VF); + + MCDI_POPULATE_DWORD_2(pm_inbuf, PRIVILEGE_MASK_IN_FUNCTION, + PRIVILEGE_MASK_IN_FUNCTION_PF, pf, + PRIVILEGE_MASK_IN_FUNCTION_VF, vf); + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_PRIVILEGE_MASK, + pm_inbuf, sizeof(pm_inbuf), + pm_outbuf, sizeof(pm_outbuf), &outlen); + + if (rc != 0) + return rc; + if (outlen < MC_CMD_PRIVILEGE_MASK_OUT_LEN) + return -EIO; + + *mask = MCDI_DWORD(pm_outbuf, PRIVILEGE_MASK_OUT_OLD_MASK); + + return 0; +} + #ifdef CONFIG_SFC_MTD #define EFX_MCDI_NVRAM_LEN_MAX 128 diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h index 69c2924a147c..f74f6ce8b27d 100644 --- a/drivers/net/ethernet/sfc/mcdi.h +++ b/drivers/net/ethernet/sfc/mcdi.h @@ -366,6 +366,7 @@ int efx_mcdi_set_workaround(struct efx_nic *efx, u32 type, bool enabled, unsigned int *flags); int efx_mcdi_get_workarounds(struct efx_nic *efx, unsigned int *impl_out, unsigned int *enabled_out); +int efx_mcdi_get_privilege_mask(struct efx_nic *efx, u32 *mask); #ifdef CONFIG_SFC_MCDI_MON int efx_mcdi_mon_probe(struct efx_nic *efx); diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 2228c88a7f31..4cde54cf77b9 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -178,6 +178,7 @@ struct efx_tx_buffer { #define EFX_TX_BUF_OPTION 0x10 /* empty buffer for option descriptor */ #define EFX_TX_BUF_XDP 0x20 /* buffer was sent with XDP */ #define EFX_TX_BUF_TSO_V3 0x40 /* empty buffer for a TSO_V3 descriptor */ +#define EFX_TX_BUF_EFV 0x100 /* buffer was sent from representor */ /** * struct efx_tx_queue - An Efx TX queue @@ -966,6 +967,8 @@ enum efx_xdp_tx_queues_mode { * @vf_count: Number of VFs intended to be enabled. * @vf_init_count: Number of VFs that have been fully initialised. * @vi_scale: log2 number of vnics per VF. + * @vf_reps_lock: Protects vf_reps list + * @vf_reps: local VF reps * @ptp_data: PTP state data * @ptp_warned: has this NIC seen and warned about unexpected PTP events? * @vpd_sn: Serial number read from VPD @@ -1145,6 +1148,8 @@ struct efx_nic { unsigned vf_init_count; unsigned vi_scale; #endif + spinlock_t vf_reps_lock; + struct list_head vf_reps; struct efx_ptp_data *ptp_data; bool ptp_warned; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 79cc0bb76321..d12474042c84 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -559,6 +559,7 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, void efx_xmit_done_single(struct efx_tx_queue *tx_queue) { unsigned int pkts_compl = 0, bytes_compl = 0; + unsigned int efv_pkts_compl = 0; unsigned int read_ptr; bool finished = false; @@ -580,7 +581,8 @@ void efx_xmit_done_single(struct efx_tx_queue *tx_queue) /* Need to check the flag before dequeueing. */ if (buffer->flags & EFX_TX_BUF_SKB) finished = true; - efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); + efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl, + &efv_pkts_compl); ++tx_queue->read_count; read_ptr = tx_queue->read_count & tx_queue->ptr_mask; @@ -589,7 +591,7 @@ void efx_xmit_done_single(struct efx_tx_queue *tx_queue) tx_queue->pkts_compl += pkts_compl; tx_queue->bytes_compl += bytes_compl; - EFX_WARN_ON_PARANOID(pkts_compl != 1); + EFX_WARN_ON_PARANOID(pkts_compl + efv_pkts_compl != 1); efx_xmit_done_check_empty(tx_queue); } diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c index 658ea2d34070..67e789b96c43 100644 --- a/drivers/net/ethernet/sfc/tx_common.c +++ b/drivers/net/ethernet/sfc/tx_common.c @@ -109,9 +109,11 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) /* Free any buffers left in the ring */ while (tx_queue->read_count != tx_queue->write_count) { unsigned int pkts_compl = 0, bytes_compl = 0; + unsigned int efv_pkts_compl = 0; buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask]; - efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); + efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl, + &efv_pkts_compl); ++tx_queue->read_count; } @@ -146,7 +148,8 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, struct efx_tx_buffer *buffer, unsigned int *pkts_compl, - unsigned int *bytes_compl) + unsigned int *bytes_compl, + unsigned int *efv_pkts_compl) { if (buffer->unmap_len) { struct device *dma_dev = &tx_queue->efx->pci_dev->dev; @@ -164,9 +167,15 @@ void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, if (buffer->flags & EFX_TX_BUF_SKB) { struct sk_buff *skb = (struct sk_buff *)buffer->skb; - EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl); - (*pkts_compl)++; - (*bytes_compl) += skb->len; + if (unlikely(buffer->flags & EFX_TX_BUF_EFV)) { + EFX_WARN_ON_PARANOID(!efv_pkts_compl); + (*efv_pkts_compl)++; + } else { + EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl); + (*pkts_compl)++; + (*bytes_compl) += skb->len; + } + if (tx_queue->timestamping && (tx_queue->completed_timestamp_major || tx_queue->completed_timestamp_minor)) { @@ -199,7 +208,8 @@ void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, unsigned int index, unsigned int *pkts_compl, - unsigned int *bytes_compl) + unsigned int *bytes_compl, + unsigned int *efv_pkts_compl) { struct efx_nic *efx = tx_queue->efx; unsigned int stop_index, read_ptr; @@ -218,7 +228,8 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, return; } - efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl); + efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl, + efv_pkts_compl); ++tx_queue->read_count; read_ptr = tx_queue->read_count & tx_queue->ptr_mask; @@ -241,15 +252,17 @@ void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue) void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) { unsigned int fill_level, pkts_compl = 0, bytes_compl = 0; + unsigned int efv_pkts_compl = 0; struct efx_nic *efx = tx_queue->efx; EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask); - efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); + efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl, + &efv_pkts_compl); tx_queue->pkts_compl += pkts_compl; tx_queue->bytes_compl += bytes_compl; - if (pkts_compl > 1) + if (pkts_compl + efv_pkts_compl > 1) ++tx_queue->merge_events; /* See if we need to restart the netif queue. This memory @@ -274,6 +287,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, unsigned int insert_count) { + unsigned int efv_pkts_compl = 0; struct efx_tx_buffer *buffer; unsigned int bytes_compl = 0; unsigned int pkts_compl = 0; @@ -282,7 +296,8 @@ void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, while (tx_queue->insert_count != insert_count) { --tx_queue->insert_count; buffer = __efx_tx_queue_get_insert_buffer(tx_queue); - efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); + efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl, + &efv_pkts_compl); } } diff --git a/drivers/net/ethernet/sfc/tx_common.h b/drivers/net/ethernet/sfc/tx_common.h index bbab7f248250..d87aecbc7bf1 100644 --- a/drivers/net/ethernet/sfc/tx_common.h +++ b/drivers/net/ethernet/sfc/tx_common.h @@ -19,7 +19,8 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue); void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, struct efx_tx_buffer *buffer, unsigned int *pkts_compl, - unsigned int *bytes_compl); + unsigned int *bytes_compl, + unsigned int *efv_pkts_compl); static inline bool efx_tx_buffer_in_use(struct efx_tx_buffer *buffer) { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index d0e82cb5ae03..52f9ed8db9c9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -297,6 +297,11 @@ static void get_arttime(struct mii_bus *mii, int intel_adhoc_addr, *art_time = ns; } +static int stmmac_cross_ts_isr(struct stmmac_priv *priv) +{ + return (readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE); +} + static int intel_crosststamp(ktime_t *device, struct system_counterval_t *system, void *ctx) @@ -312,8 +317,6 @@ static int intel_crosststamp(ktime_t *device, u32 num_snapshot; u32 gpio_value; u32 acr_value; - int ret; - u32 v; int i; if (!boot_cpu_has(X86_FEATURE_ART)) @@ -327,6 +330,8 @@ static int intel_crosststamp(ktime_t *device, if (priv->plat->ext_snapshot_en) return -EBUSY; + priv->plat->int_snapshot_en = 1; + mutex_lock(&priv->aux_ts_lock); /* Enable Internal snapshot trigger */ acr_value = readl(ptpaddr + PTP_ACR); @@ -346,6 +351,7 @@ static int intel_crosststamp(ktime_t *device, break; default: mutex_unlock(&priv->aux_ts_lock); + priv->plat->int_snapshot_en = 0; return -EINVAL; } writel(acr_value, ptpaddr + PTP_ACR); @@ -367,13 +373,12 @@ static int intel_crosststamp(ktime_t *device, gpio_value |= GMAC_GPO1; writel(gpio_value, ioaddr + GMAC_GPIO_STATUS); - /* Poll for time sync operation done */ - ret = readl_poll_timeout(priv->ioaddr + GMAC_INT_STATUS, v, - (v & GMAC_INT_TSIE), 100, 10000); - - if (ret == -ETIMEDOUT) { - pr_err("%s: Wait for time sync operation timeout\n", __func__); - return ret; + /* Time sync done Indication - Interrupt method */ + if (!wait_event_interruptible_timeout(priv->tstamp_busy_wait, + stmmac_cross_ts_isr(priv), + HZ / 100)) { + priv->plat->int_snapshot_en = 0; + return -ETIMEDOUT; } num_snapshot = (readl(ioaddr + GMAC_TIMESTAMP_STATUS) & @@ -391,6 +396,7 @@ static int intel_crosststamp(ktime_t *device, } system->cycles *= intel_priv->crossts_adj; + priv->plat->int_snapshot_en = 0; return 0; } @@ -606,6 +612,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->has_crossts = true; plat->crosststamp = intel_crosststamp; + plat->int_snapshot_en = 0; /* Setup MSI vector offset specific to Intel mGbE controller */ plat->msi_mac_vec = 29; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 6ff88df58767..ca8ab290013c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -576,32 +576,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) } } - ret = clk_bulk_prepare_enable(variant->num_clks, plat->clks); - if (ret) { - dev_err(plat->dev, "failed to enable clks, err = %d\n", ret); - return ret; - } - - ret = clk_prepare_enable(plat->rmii_internal_clk); - if (ret) { - dev_err(plat->dev, "failed to enable rmii internal clk, err = %d\n", ret); - goto err_clk; - } - return 0; - -err_clk: - clk_bulk_disable_unprepare(variant->num_clks, plat->clks); - return ret; -} - -static void mediatek_dwmac_exit(struct platform_device *pdev, void *priv) -{ - struct mediatek_dwmac_plat_data *plat = priv; - const struct mediatek_dwmac_variant *variant = plat->variant; - - clk_disable_unprepare(plat->rmii_internal_clk); - clk_bulk_disable_unprepare(variant->num_clks, plat->clks); } static int mediatek_dwmac_clks_config(void *priv, bool enabled) @@ -643,7 +618,6 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev, plat->addr64 = priv_plat->variant->dma_bit_mask; plat->bsp_priv = priv_plat; plat->init = mediatek_dwmac_init; - plat->exit = mediatek_dwmac_exit; plat->clks_config = mediatek_dwmac_clks_config; if (priv_plat->variant->dwmac_fix_mac_speed) plat->fix_mac_speed = priv_plat->variant->dwmac_fix_mac_speed; @@ -712,13 +686,32 @@ static int mediatek_dwmac_probe(struct platform_device *pdev) mediatek_dwmac_common_data(pdev, plat_dat, priv_plat); mediatek_dwmac_init(pdev, priv_plat); + ret = mediatek_dwmac_clks_config(priv_plat, true); + if (ret) + return ret; + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) { stmmac_remove_config_dt(pdev, plat_dat); - return ret; + goto err_drv_probe; } return 0; + +err_drv_probe: + mediatek_dwmac_clks_config(priv_plat, false); + return ret; +} + +static int mediatek_dwmac_remove(struct platform_device *pdev) +{ + struct mediatek_dwmac_plat_data *priv_plat = get_stmmac_bsp_priv(&pdev->dev); + int ret; + + ret = stmmac_pltfr_remove(pdev); + mediatek_dwmac_clks_config(priv_plat, false); + + return ret; } static const struct of_device_id mediatek_dwmac_match[] = { @@ -733,7 +726,7 @@ MODULE_DEVICE_TABLE(of, mediatek_dwmac_match); static struct platform_driver mediatek_dwmac_driver = { .probe = mediatek_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove = mediatek_dwmac_remove, .driver = { .name = "dwmac-mediatek", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index 462ca7ed095a..71dad409f78b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -150,7 +150,8 @@ #define GMAC_PCS_IRQ_DEFAULT (GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK | \ GMAC_INT_PCS_ANE) -#define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN) +#define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN | \ + GMAC_INT_TSIE) enum dwmac4_irq_status { time_stamp_irq = 0x00001000, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index fd41db65fe1d..d8f1fbc25bdd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -23,6 +23,7 @@ static void dwmac4_core_init(struct mac_device_info *hw, struct net_device *dev) { + struct stmmac_priv *priv = netdev_priv(dev); void __iomem *ioaddr = hw->pcsr; u32 value = readl(ioaddr + GMAC_CONFIG); @@ -58,6 +59,9 @@ static void dwmac4_core_init(struct mac_device_info *hw, value |= GMAC_INT_FPE_EN; writel(value, ioaddr + GMAC_INT_EN); + + if (GMAC_INT_DEFAULT_ENABLE & GMAC_INT_TSIE) + init_waitqueue_head(&priv->tstamp_busy_wait); } static void dwmac4_rx_queue_enable(struct mac_device_info *hw, @@ -219,6 +223,9 @@ static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan) if (queue == 0 || queue == 4) { value &= ~MTL_RXQ_DMA_Q04MDMACH_MASK; value |= MTL_RXQ_DMA_Q04MDMACH(chan); + } else if (queue > 4) { + value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue - 4); + value |= MTL_RXQ_DMA_QXMDMACH(chan, queue - 4); } else { value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue); value |= MTL_RXQ_DMA_QXMDMACH(chan, queue); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 57970ae2178d..f9e83964aa7e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -266,6 +266,7 @@ struct stmmac_priv { rwlock_t ptp_lock; /* Protects auxiliary snapshot registers from concurrent access. */ struct mutex aux_ts_lock; + wait_queue_head_t tstamp_busy_wait; void __iomem *mmcaddr; void __iomem *ptpaddr; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index abfb3cd5958d..9c3055ee2608 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -803,14 +803,6 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, netdev_warn(priv->dev, "Setting EEE tx-lpi is not supported\n"); - if (priv->hw->xpcs) { - ret = xpcs_config_eee(priv->hw->xpcs, - priv->plat->mult_fact_100ns, - edata->eee_enabled); - if (ret) - return ret; - } - if (!edata->eee_enabled) stmmac_disable_eee_mode(priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 92d32940aff0..764832f4dae1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -179,6 +179,11 @@ static void timestamp_interrupt(struct stmmac_priv *priv) u64 ptp_time; int i; + if (priv->plat->int_snapshot_en) { + wake_up(&priv->tstamp_busy_wait); + return; + } + tsync_int = readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE; if (!tsync_int) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6f14b00c0b14..89eb3e51b22c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -834,19 +834,10 @@ int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags) struct timespec64 now; u32 sec_inc = 0; u64 temp = 0; - int ret; if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) return -EOPNOTSUPP; - ret = clk_prepare_enable(priv->plat->clk_ptp_ref); - if (ret < 0) { - netdev_warn(priv->dev, - "failed to enable PTP reference clock: %pe\n", - ERR_PTR(ret)); - return ret; - } - stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags); priv->systime_flags = systime_flags; @@ -3272,6 +3263,14 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register) stmmac_mmc_setup(priv); + if (ptp_register) { + ret = clk_prepare_enable(priv->plat->clk_ptp_ref); + if (ret < 0) + netdev_warn(priv->dev, + "failed to enable PTP reference clock: %pe\n", + ERR_PTR(ret)); + } + ret = stmmac_init_ptp(priv); if (ret == -EOPNOTSUPP) netdev_info(priv->dev, "PTP not supported by HW\n"); @@ -7215,8 +7214,6 @@ int stmmac_dvr_remove(struct device *dev) netdev_info(priv->dev, "%s: removing driver", __func__); pm_runtime_get_sync(dev); - pm_runtime_disable(dev); - pm_runtime_put_noidle(dev); stmmac_stop_all_dma(priv); stmmac_mac_set(priv, priv->ioaddr, false); @@ -7243,6 +7240,9 @@ int stmmac_dvr_remove(struct device *dev) mutex_destroy(&priv->lock); bitmap_free(priv->af_xdp_zc_qps); + pm_runtime_disable(dev); + pm_runtime_put_noidle(dev); + return 0; } EXPORT_SYMBOL_GPL(stmmac_dvr_remove); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 11e1055e8260..9f5cac4000da 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -815,7 +815,13 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev) if (ret) return ret; - stmmac_init_tstamp_counter(priv, priv->systime_flags); + ret = clk_prepare_enable(priv->plat->clk_ptp_ref); + if (ret < 0) { + netdev_warn(priv->dev, + "failed to enable PTP reference clock: %pe\n", + ERR_PTR(ret)); + return ret; + } } return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index e45fb191d8e6..4d11980dcd64 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -175,11 +175,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp, struct stmmac_priv *priv = container_of(ptp, struct stmmac_priv, ptp_clock_ops); void __iomem *ptpaddr = priv->ptpaddr; - void __iomem *ioaddr = priv->hw->pcsr; struct stmmac_pps_cfg *cfg; - u32 intr_value, acr_value; int ret = -EOPNOTSUPP; unsigned long flags; + u32 acr_value; switch (rq->type) { case PTP_CLK_REQ_PEROUT: @@ -213,19 +212,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp, netdev_dbg(priv->dev, "Auxiliary Snapshot %d enabled.\n", priv->plat->ext_snapshot_num >> PTP_ACR_ATSEN_SHIFT); - /* Enable Timestamp Interrupt */ - intr_value = readl(ioaddr + GMAC_INT_EN); - intr_value |= GMAC_INT_TSIE; - writel(intr_value, ioaddr + GMAC_INT_EN); - } else { netdev_dbg(priv->dev, "Auxiliary Snapshot %d disabled.\n", priv->plat->ext_snapshot_num >> PTP_ACR_ATSEN_SHIFT); - /* Disable Timestamp Interrupt */ - intr_value = readl(ioaddr + GMAC_INT_EN); - intr_value &= ~GMAC_INT_TSIE; - writel(intr_value, ioaddr + GMAC_INT_EN); } writel(acr_value, ptpaddr + PTP_ACR); mutex_unlock(&priv->aux_ts_lock); diff --git a/drivers/net/ipa/data/ipa_data-v3.1.c b/drivers/net/ipa/data/ipa_data-v3.1.c index 00f4e506e6e5..1c1895aea811 100644 --- a/drivers/net/ipa/data/ipa_data-v3.1.c +++ b/drivers/net/ipa/data/ipa_data-v3.1.c @@ -6,10 +6,10 @@ #include <linux/log2.h> -#include "gsi.h" -#include "ipa_data.h" -#include "ipa_endpoint.h" -#include "ipa_mem.h" +#include "../gsi.h" +#include "../ipa_data.h" +#include "../ipa_endpoint.h" +#include "../ipa_mem.h" /** enum ipa_resource_type - IPA resource types for an SoC having IPA v3.1 */ enum ipa_resource_type { diff --git a/drivers/net/ipa/data/ipa_data-v3.5.1.c b/drivers/net/ipa/data/ipa_data-v3.5.1.c index b7e32e87733e..58b708d2fc75 100644 --- a/drivers/net/ipa/data/ipa_data-v3.5.1.c +++ b/drivers/net/ipa/data/ipa_data-v3.5.1.c @@ -6,10 +6,10 @@ #include <linux/log2.h> -#include "gsi.h" -#include "ipa_data.h" -#include "ipa_endpoint.h" -#include "ipa_mem.h" +#include "../gsi.h" +#include "../ipa_data.h" +#include "../ipa_endpoint.h" +#include "../ipa_mem.h" /** enum ipa_resource_type - IPA resource types for an SoC having IPA v3.5.1 */ enum ipa_resource_type { diff --git a/drivers/net/ipa/data/ipa_data-v4.11.c b/drivers/net/ipa/data/ipa_data-v4.11.c index 1be823e5c5c2..a204e439c23d 100644 --- a/drivers/net/ipa/data/ipa_data-v4.11.c +++ b/drivers/net/ipa/data/ipa_data-v4.11.c @@ -4,10 +4,10 @@ #include <linux/log2.h> -#include "gsi.h" -#include "ipa_data.h" -#include "ipa_endpoint.h" -#include "ipa_mem.h" +#include "../gsi.h" +#include "../ipa_data.h" +#include "../ipa_endpoint.h" +#include "../ipa_mem.h" /** enum ipa_resource_type - IPA resource types for an SoC having IPA v4.11 */ enum ipa_resource_type { diff --git a/drivers/net/ipa/data/ipa_data-v4.2.c b/drivers/net/ipa/data/ipa_data-v4.2.c index 683f1f91042f..04f574fe006f 100644 --- a/drivers/net/ipa/data/ipa_data-v4.2.c +++ b/drivers/net/ipa/data/ipa_data-v4.2.c @@ -4,10 +4,10 @@ #include <linux/log2.h> -#include "gsi.h" -#include "ipa_data.h" -#include "ipa_endpoint.h" -#include "ipa_mem.h" +#include "../gsi.h" +#include "../ipa_data.h" +#include "../ipa_endpoint.h" +#include "../ipa_mem.h" /** enum ipa_resource_type - IPA resource types for an SoC having IPA v4.2 */ enum ipa_resource_type { diff --git a/drivers/net/ipa/data/ipa_data-v4.5.c b/drivers/net/ipa/data/ipa_data-v4.5.c index 79398f286a9c..684239e71f46 100644 --- a/drivers/net/ipa/data/ipa_data-v4.5.c +++ b/drivers/net/ipa/data/ipa_data-v4.5.c @@ -4,10 +4,10 @@ #include <linux/log2.h> -#include "gsi.h" -#include "ipa_data.h" -#include "ipa_endpoint.h" -#include "ipa_mem.h" +#include "../gsi.h" +#include "../ipa_data.h" +#include "../ipa_endpoint.h" +#include "../ipa_mem.h" /** enum ipa_resource_type - IPA resource types for an SoC having IPA v4.5 */ enum ipa_resource_type { diff --git a/drivers/net/ipa/data/ipa_data-v4.9.c b/drivers/net/ipa/data/ipa_data-v4.9.c index 4b96efd05cf2..2333e15f9533 100644 --- a/drivers/net/ipa/data/ipa_data-v4.9.c +++ b/drivers/net/ipa/data/ipa_data-v4.9.c @@ -4,10 +4,10 @@ #include <linux/log2.h> -#include "gsi.h" -#include "ipa_data.h" -#include "ipa_endpoint.h" -#include "ipa_mem.h" +#include "../gsi.h" +#include "../ipa_data.h" +#include "../ipa_endpoint.h" +#include "../ipa_mem.h" /** enum ipa_resource_type - IPA resource types for an SoC having IPA v4.9 */ enum ipa_resource_type { diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index ac2d400d1d6c..3b0e8f93e3c8 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -164,11 +164,15 @@ #define GMII_PHY_PGSEL_PAGE3 0x0003 #define GMII_PHY_PGSEL_PAGE5 0x0005 +static int ax88179_reset(struct usbnet *dev); + struct ax88179_data { u8 eee_enabled; u8 eee_active; u16 rxctl; - u16 reserved; + u8 in_pm; + u32 wol_supported; + u32 wolopts; }; struct ax88179_int_data { @@ -185,15 +189,29 @@ static const struct { {7, 0xcc, 0x4c, 0x18, 8}, }; +static void ax88179_set_pm_mode(struct usbnet *dev, bool pm_mode) +{ + struct ax88179_data *ax179_data = dev->driver_priv; + + ax179_data->in_pm = pm_mode; +} + +static int ax88179_in_pm(struct usbnet *dev) +{ + struct ax88179_data *ax179_data = dev->driver_priv; + + return ax179_data->in_pm; +} + static int __ax88179_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data, int in_pm) + u16 size, void *data) { int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16); BUG_ON(!dev); - if (!in_pm) + if (!ax88179_in_pm(dev)) fn = usbnet_read_cmd; else fn = usbnet_read_cmd_nopm; @@ -209,14 +227,14 @@ static int __ax88179_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, } static int __ax88179_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, const void *data, int in_pm) + u16 size, const void *data) { int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16); BUG_ON(!dev); - if (!in_pm) + if (!ax88179_in_pm(dev)) fn = usbnet_write_cmd; else fn = usbnet_write_cmd_nopm; @@ -249,47 +267,6 @@ static void ax88179_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, } } -static int ax88179_read_cmd_nopm(struct usbnet *dev, u8 cmd, u16 value, - u16 index, u16 size, void *data) -{ - int ret; - - if (2 == size) { - u16 buf; - ret = __ax88179_read_cmd(dev, cmd, value, index, size, &buf, 1); - le16_to_cpus(&buf); - *((u16 *)data) = buf; - } else if (4 == size) { - u32 buf; - ret = __ax88179_read_cmd(dev, cmd, value, index, size, &buf, 1); - le32_to_cpus(&buf); - *((u32 *)data) = buf; - } else { - ret = __ax88179_read_cmd(dev, cmd, value, index, size, data, 1); - } - - return ret; -} - -static int ax88179_write_cmd_nopm(struct usbnet *dev, u8 cmd, u16 value, - u16 index, u16 size, const void *data) -{ - int ret; - - if (2 == size) { - u16 buf; - buf = *((u16 *)data); - cpu_to_le16s(&buf); - ret = __ax88179_write_cmd(dev, cmd, value, index, - size, &buf, 1); - } else { - ret = __ax88179_write_cmd(dev, cmd, value, index, - size, data, 1); - } - - return ret; -} - static int ax88179_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data) { @@ -297,16 +274,16 @@ static int ax88179_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, if (2 == size) { u16 buf = 0; - ret = __ax88179_read_cmd(dev, cmd, value, index, size, &buf, 0); + ret = __ax88179_read_cmd(dev, cmd, value, index, size, &buf); le16_to_cpus(&buf); *((u16 *)data) = buf; } else if (4 == size) { u32 buf = 0; - ret = __ax88179_read_cmd(dev, cmd, value, index, size, &buf, 0); + ret = __ax88179_read_cmd(dev, cmd, value, index, size, &buf); le32_to_cpus(&buf); *((u32 *)data) = buf; } else { - ret = __ax88179_read_cmd(dev, cmd, value, index, size, data, 0); + ret = __ax88179_read_cmd(dev, cmd, value, index, size, data); } return ret; @@ -322,10 +299,10 @@ static int ax88179_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, buf = *((u16 *)data); cpu_to_le16s(&buf); ret = __ax88179_write_cmd(dev, cmd, value, index, - size, &buf, 0); + size, &buf); } else { ret = __ax88179_write_cmd(dev, cmd, value, index, - size, data, 0); + size, data); } return ret; @@ -425,55 +402,63 @@ ax88179_phy_write_mmd_indirect(struct usbnet *dev, u16 prtad, u16 devad, static int ax88179_suspend(struct usb_interface *intf, pm_message_t message) { struct usbnet *dev = usb_get_intfdata(intf); + struct ax88179_data *priv = dev->driver_priv; u16 tmp16; u8 tmp8; + ax88179_set_pm_mode(dev, true); + usbnet_suspend(intf, message); + /* Enable WoL */ + if (priv->wolopts) { + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_MONITOR_MOD, + 1, 1, &tmp8); + if (priv->wolopts & WAKE_PHY) + tmp8 |= AX_MONITOR_MODE_RWLC; + if (priv->wolopts & WAKE_MAGIC) + tmp8 |= AX_MONITOR_MODE_RWMP; + + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MONITOR_MOD, + 1, 1, &tmp8); + } + /* Disable RX path */ - ax88179_read_cmd_nopm(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, - 2, 2, &tmp16); + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, &tmp16); tmp16 &= ~AX_MEDIUM_RECEIVE_EN; - ax88179_write_cmd_nopm(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, - 2, 2, &tmp16); + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, + 2, 2, &tmp16); /* Force bulk-in zero length */ - ax88179_read_cmd_nopm(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, - 2, 2, &tmp16); + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, + 2, 2, &tmp16); tmp16 |= AX_PHYPWR_RSTCTL_BZ | AX_PHYPWR_RSTCTL_IPRL; - ax88179_write_cmd_nopm(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, - 2, 2, &tmp16); + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, + 2, 2, &tmp16); /* change clock */ tmp8 = 0; - ax88179_write_cmd_nopm(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, &tmp8); + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, &tmp8); /* Configure RX control register => stop operation */ tmp16 = AX_RX_CTL_STOP; - ax88179_write_cmd_nopm(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2, &tmp16); + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2, &tmp16); + + ax88179_set_pm_mode(dev, false); return 0; } /* This function is used to enable the autodetach function. */ /* This function is determined by offset 0x43 of EEPROM */ -static int ax88179_auto_detach(struct usbnet *dev, int in_pm) +static int ax88179_auto_detach(struct usbnet *dev) { u16 tmp16; u8 tmp8; - int (*fnr)(struct usbnet *, u8, u16, u16, u16, void *); - int (*fnw)(struct usbnet *, u8, u16, u16, u16, const void *); - - if (!in_pm) { - fnr = ax88179_read_cmd; - fnw = ax88179_write_cmd; - } else { - fnr = ax88179_read_cmd_nopm; - fnw = ax88179_write_cmd_nopm; - } - if (fnr(dev, AX_ACCESS_EEPROM, 0x43, 1, 2, &tmp16) < 0) + if (ax88179_read_cmd(dev, AX_ACCESS_EEPROM, 0x43, 1, 2, &tmp16) < 0) return 0; if ((tmp16 == 0xFFFF) || (!(tmp16 & 0x0100))) @@ -481,13 +466,13 @@ static int ax88179_auto_detach(struct usbnet *dev, int in_pm) /* Enable Auto Detach bit */ tmp8 = 0; - fnr(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, &tmp8); + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, &tmp8); tmp8 |= AX_CLK_SELECT_ULR; - fnw(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, &tmp8); + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, &tmp8); - fnr(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, &tmp16); + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, &tmp16); tmp16 |= AX_PHYPWR_RSTCTL_AT; - fnw(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, &tmp16); + ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, &tmp16); return 0; } @@ -495,35 +480,14 @@ static int ax88179_auto_detach(struct usbnet *dev, int in_pm) static int ax88179_resume(struct usb_interface *intf) { struct usbnet *dev = usb_get_intfdata(intf); - u16 tmp16; - u8 tmp8; - - usbnet_link_change(dev, 0, 0); - /* Power up ethernet PHY */ - tmp16 = 0; - ax88179_write_cmd_nopm(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, - 2, 2, &tmp16); - udelay(1000); - - tmp16 = AX_PHYPWR_RSTCTL_IPRL; - ax88179_write_cmd_nopm(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, - 2, 2, &tmp16); - msleep(200); + ax88179_set_pm_mode(dev, true); - /* Ethernet PHY Auto Detach*/ - ax88179_auto_detach(dev, 1); + usbnet_link_change(dev, 0, 0); - /* Enable clock */ - ax88179_read_cmd_nopm(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, &tmp8); - tmp8 |= AX_CLK_SELECT_ACS | AX_CLK_SELECT_BCS; - ax88179_write_cmd_nopm(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, &tmp8); - msleep(100); + ax88179_reset(dev); - /* Configure RX control register => start operation */ - tmp16 = AX_RX_CTL_DROPCRCERR | AX_RX_CTL_IPE | AX_RX_CTL_START | - AX_RX_CTL_AP | AX_RX_CTL_AMALL | AX_RX_CTL_AB; - ax88179_write_cmd_nopm(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2, &tmp16); + ax88179_set_pm_mode(dev, false); return usbnet_resume(intf); } @@ -532,40 +496,22 @@ static void ax88179_get_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) { struct usbnet *dev = netdev_priv(net); - u8 opt; + struct ax88179_data *priv = dev->driver_priv; - if (ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_MONITOR_MOD, - 1, 1, &opt) < 0) { - wolinfo->supported = 0; - wolinfo->wolopts = 0; - return; - } - - wolinfo->supported = WAKE_PHY | WAKE_MAGIC; - wolinfo->wolopts = 0; - if (opt & AX_MONITOR_MODE_RWLC) - wolinfo->wolopts |= WAKE_PHY; - if (opt & AX_MONITOR_MODE_RWMP) - wolinfo->wolopts |= WAKE_MAGIC; + wolinfo->supported = priv->wol_supported; + wolinfo->wolopts = priv->wolopts; } static int ax88179_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) { struct usbnet *dev = netdev_priv(net); - u8 opt = 0; + struct ax88179_data *priv = dev->driver_priv; - if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + if (wolinfo->wolopts & ~(priv->wol_supported)) return -EINVAL; - if (wolinfo->wolopts & WAKE_PHY) - opt |= AX_MONITOR_MODE_RWLC; - if (wolinfo->wolopts & WAKE_MAGIC) - opt |= AX_MONITOR_MODE_RWMP; - - if (ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MONITOR_MOD, - 1, 1, &opt) < 0) - return -EINVAL; + priv->wolopts = wolinfo->wolopts; return 0; } @@ -599,8 +545,7 @@ ax88179_get_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, /* ax88179/178A returns 2 bytes from eeprom on read */ for (i = first_word; i <= last_word; i++) { ret = __ax88179_read_cmd(dev, AX_ACCESS_EEPROM, i, 1, 2, - &eeprom_buff[i - first_word], - 0); + &eeprom_buff[i - first_word]); if (ret < 0) { kfree(eeprom_buff); return -EIO; @@ -745,7 +690,7 @@ ax88179_ethtool_set_eee(struct usbnet *dev, struct ethtool_eee *data) static int ax88179_chk_eee(struct usbnet *dev) { struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET }; - struct ax88179_data *priv = (struct ax88179_data *)dev->data; + struct ax88179_data *priv = dev->driver_priv; mii_ethtool_gset(&dev->mii, &ecmd); @@ -848,7 +793,7 @@ static void ax88179_enable_eee(struct usbnet *dev) static int ax88179_get_eee(struct net_device *net, struct ethtool_eee *edata) { struct usbnet *dev = netdev_priv(net); - struct ax88179_data *priv = (struct ax88179_data *)dev->data; + struct ax88179_data *priv = dev->driver_priv; edata->eee_enabled = priv->eee_enabled; edata->eee_active = priv->eee_active; @@ -859,7 +804,7 @@ static int ax88179_get_eee(struct net_device *net, struct ethtool_eee *edata) static int ax88179_set_eee(struct net_device *net, struct ethtool_eee *edata) { struct usbnet *dev = netdev_priv(net); - struct ax88179_data *priv = (struct ax88179_data *)dev->data; + struct ax88179_data *priv = dev->driver_priv; int ret; priv->eee_enabled = edata->eee_enabled; @@ -910,8 +855,8 @@ static const struct ethtool_ops ax88179_ethtool_ops = { static void ax88179_set_multicast(struct net_device *net) { struct usbnet *dev = netdev_priv(net); - struct ax88179_data *data = (struct ax88179_data *)dev->data; - u8 *m_filter = ((u8 *)dev->data) + 12; + struct ax88179_data *data = dev->driver_priv; + u8 *m_filter = ((u8 *)dev->data); data->rxctl = (AX_RX_CTL_START | AX_RX_CTL_AB | AX_RX_CTL_IPE); @@ -923,7 +868,7 @@ static void ax88179_set_multicast(struct net_device *net) } else if (netdev_mc_empty(net)) { /* just broadcast and directed */ } else { - /* We use the 20 byte dev->data for our 8 byte filter buffer + /* We use dev->data for our 8 byte filter buffer * to avoid allocating memory that is tricky to free later */ u32 crc_bits; @@ -1069,7 +1014,7 @@ static int ax88179_check_eeprom(struct usbnet *dev) } while (buf & EEP_BUSY); __ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_SROM_DATA_LOW, - 2, 2, &eeprom[i * 2], 0); + 2, 2, &eeprom[i * 2]); if ((i == 0) && (eeprom[0] == 0xFF)) return -EINVAL; @@ -1322,46 +1267,15 @@ static void ax88179_get_mac_addr(struct usbnet *dev) static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) { - u8 buf[5]; - u16 *tmp16; - u8 *tmp; - struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data; - struct ethtool_eee eee_data; + struct ax88179_data *ax179_data; usbnet_get_endpoints(dev, intf); - tmp16 = (u16 *)buf; - tmp = (u8 *)buf; - - memset(ax179_data, 0, sizeof(*ax179_data)); - - /* Power up ethernet PHY */ - *tmp16 = 0; - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, tmp16); - *tmp16 = AX_PHYPWR_RSTCTL_IPRL; - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, tmp16); - msleep(200); - - *tmp = AX_CLK_SELECT_ACS | AX_CLK_SELECT_BCS; - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp); - msleep(100); - - /* Read MAC address from DTB or asix chip */ - ax88179_get_mac_addr(dev); - memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN); - - /* RX bulk configuration */ - memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5); - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_BULKIN_QCTRL, 5, 5, tmp); - - dev->rx_urb_size = 1024 * 20; + ax179_data = kzalloc(sizeof(*ax179_data), GFP_KERNEL); + if (!ax179_data) + return -ENOMEM; - *tmp = 0x34; - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PAUSE_WATERLVL_LOW, 1, 1, tmp); - - *tmp = 0x52; - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PAUSE_WATERLVL_HIGH, - 1, 1, tmp); + dev->driver_priv = ax179_data; dev->net->netdev_ops = &ax88179_netdev_ops; dev->net->ethtool_ops = &ax88179_ethtool_ops; @@ -1384,52 +1298,14 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) netif_set_tso_max_size(dev->net, 16384); - /* Enable checksum offload */ - *tmp = AX_RXCOE_IP | AX_RXCOE_TCP | AX_RXCOE_UDP | - AX_RXCOE_TCPV6 | AX_RXCOE_UDPV6; - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RXCOE_CTL, 1, 1, tmp); - - *tmp = AX_TXCOE_IP | AX_TXCOE_TCP | AX_TXCOE_UDP | - AX_TXCOE_TCPV6 | AX_TXCOE_UDPV6; - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_TXCOE_CTL, 1, 1, tmp); - - /* Configure RX control register => start operation */ - *tmp16 = AX_RX_CTL_DROPCRCERR | AX_RX_CTL_IPE | AX_RX_CTL_START | - AX_RX_CTL_AP | AX_RX_CTL_AMALL | AX_RX_CTL_AB; - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2, tmp16); - - *tmp = AX_MONITOR_MODE_PMETYPE | AX_MONITOR_MODE_PMEPOL | - AX_MONITOR_MODE_RWMP; - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MONITOR_MOD, 1, 1, tmp); - - /* Configure default medium type => giga */ - *tmp16 = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN | - AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_FULL_DUPLEX | - AX_MEDIUM_GIGAMODE; - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, - 2, 2, tmp16); - - ax88179_led_setting(dev); - - ax179_data->eee_enabled = 0; - ax179_data->eee_active = 0; - - ax88179_disable_eee(dev); - - ax88179_ethtool_get_eee(dev, &eee_data); - eee_data.advertised = 0; - ax88179_ethtool_set_eee(dev, &eee_data); - - /* Restart autoneg */ - mii_nway_restart(&dev->mii); - - usbnet_link_change(dev, 0, 0); + ax88179_reset(dev); return 0; } static void ax88179_unbind(struct usbnet *dev, struct usb_interface *intf) { + struct ax88179_data *ax179_data = dev->driver_priv; u16 tmp16; /* Configure RX control register => stop operation */ @@ -1442,6 +1318,8 @@ static void ax88179_unbind(struct usbnet *dev, struct usb_interface *intf) /* Power down ethernet PHY */ tmp16 = 0; ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, &tmp16); + + kfree(ax179_data); } static void @@ -1618,7 +1496,7 @@ ax88179_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) static int ax88179_link_reset(struct usbnet *dev) { - struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data; + struct ax88179_data *ax179_data = dev->driver_priv; u8 tmp[5], link_sts; u16 mode, tmp16, delay = HZ / 10; u32 tmp32 = 0x40000000; @@ -1693,7 +1571,7 @@ static int ax88179_reset(struct usbnet *dev) u8 buf[5]; u16 *tmp16; u8 *tmp; - struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data; + struct ax88179_data *ax179_data = dev->driver_priv; struct ethtool_eee eee_data; tmp16 = (u16 *)buf; @@ -1712,10 +1590,11 @@ static int ax88179_reset(struct usbnet *dev) msleep(100); /* Ethernet PHY Auto Detach*/ - ax88179_auto_detach(dev, 0); + ax88179_auto_detach(dev); /* Read MAC address from DTB or asix chip */ ax88179_get_mac_addr(dev); + memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN); /* RX bulk configuration */ memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5); @@ -1730,12 +1609,6 @@ static int ax88179_reset(struct usbnet *dev) ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PAUSE_WATERLVL_HIGH, 1, 1, tmp); - dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_RXCSUM; - - dev->net->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_RXCSUM; - /* Enable checksum offload */ *tmp = AX_RXCOE_IP | AX_RXCOE_TCP | AX_RXCOE_UDP | AX_RXCOE_TCPV6 | AX_RXCOE_UDPV6; @@ -1761,6 +1634,12 @@ static int ax88179_reset(struct usbnet *dev) ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, 2, 2, tmp16); + /* Check if WoL is supported */ + ax179_data->wol_supported = 0; + if (ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_MONITOR_MOD, + 1, 1, &tmp) > 0) + ax179_data->wol_supported = WAKE_MAGIC | WAKE_PHY; + ax88179_led_setting(dev); ax179_data->eee_enabled = 0; diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index b082819509e1..0f6efaabaa32 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ #define NETNEXT_VERSION "12" /* Information for net */ -#define NET_VERSION "12" +#define NET_VERSION "13" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>" @@ -5917,7 +5917,8 @@ static void r8153_enter_oob(struct r8152 *tp) wait_oob_link_list_ready(tp); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, mtu_to_size(tp->netdev->mtu)); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522); + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT); switch (tp->version) { case RTL_VER_03: @@ -5953,6 +5954,10 @@ static void r8153_enter_oob(struct r8152 *tp) ocp_data |= NOW_IS_OOB | DIS_MCU_CLROOB; ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); + ocp_data |= MCU_BORW_EN; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + rxdy_gated_en(tp, false); ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); @@ -6555,6 +6560,9 @@ static void rtl8156_down(struct r8152 *tp) rtl_disable(tp); rtl_reset_bmu(tp); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522); + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT); + /* Clear teredo wake event. bit[15:8] is the teredo wakeup * type. Set it to zero. bits[7:0] are the W1C bits about * the events. Set them to all 1 to clear them. @@ -6565,6 +6573,10 @@ static void rtl8156_down(struct r8152 *tp) ocp_data |= NOW_IS_OOB; ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); + ocp_data |= MCU_BORW_EN; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + rtl_rx_vlan_en(tp, true); rxdy_gated_en(tp, false); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ec6ac298d8de..6a12a906a11e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3786,7 +3786,7 @@ static int nvme_add_ns_cdev(struct nvme_ns *ns) } static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, - unsigned nsid, struct nvme_ns_ids *ids) + unsigned nsid, struct nvme_ns_ids *ids, bool is_shared) { struct nvme_ns_head *head; size_t size = sizeof(*head); @@ -3810,6 +3810,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head->subsys = ctrl->subsys; head->ns_id = nsid; head->ids = *ids; + head->shared = is_shared; kref_init(&head->ref); if (head->ids.csi) { @@ -3891,12 +3892,11 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, nsid); goto out_unlock; } - head = nvme_alloc_ns_head(ctrl, nsid, ids); + head = nvme_alloc_ns_head(ctrl, nsid, ids, is_shared); if (IS_ERR(head)) { ret = PTR_ERR(head); goto out_unlock; } - head->shared = is_shared; } else { ret = -EINVAL; if (!is_shared || !head->shared) { diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 193b44755662..58c72d55769a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2690,8 +2690,13 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) struct pci_dev *pdev = to_pci_dev(dev->dev); mutex_lock(&dev->shutdown_lock); - if (pci_device_is_present(pdev) && pci_is_enabled(pdev)) { - u32 csts = readl(dev->bar + NVME_REG_CSTS); + if (pci_is_enabled(pdev)) { + u32 csts; + + if (pci_device_is_present(pdev)) + csts = readl(dev->bar + NVME_REG_CSTS); + else + csts = ~0; if (dev->ctrl.state == NVME_CTRL_LIVE || dev->ctrl.state == NVME_CTRL_RESETTING) { diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index f11d18beac18..700eb19e8450 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -91,6 +91,8 @@ #define AMD_CPU_ID_PCO AMD_CPU_ID_RV #define AMD_CPU_ID_CZN AMD_CPU_ID_RN #define AMD_CPU_ID_YC 0x14B5 +#define AMD_CPU_ID_CB 0x14D8 +#define AMD_CPU_ID_PS 0x14E8 #define PMC_MSG_DELAY_MIN_US 50 #define RESPONSE_REGISTER_LOOP_MAX 20000 @@ -318,6 +320,8 @@ static int amd_pmc_idlemask_read(struct amd_pmc_dev *pdev, struct device *dev, val = amd_pmc_reg_read(pdev, AMD_PMC_SCRATCH_REG_CZN); break; case AMD_CPU_ID_YC: + case AMD_CPU_ID_CB: + case AMD_CPU_ID_PS: val = amd_pmc_reg_read(pdev, AMD_PMC_SCRATCH_REG_YC); break; default: @@ -491,7 +495,8 @@ static void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev) &amd_pmc_idlemask_fops); /* Enable STB only when the module_param is set */ if (enable_stb) { - if (dev->cpu_id == AMD_CPU_ID_YC) + if (dev->cpu_id == AMD_CPU_ID_YC || dev->cpu_id == AMD_CPU_ID_CB || + dev->cpu_id == AMD_CPU_ID_PS) debugfs_create_file("stb_read", 0644, dev->dbgfs_dir, dev, &amd_pmc_stb_debugfs_fops_v2); else @@ -615,6 +620,8 @@ static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) return MSG_OS_HINT_PCO; case AMD_CPU_ID_RN: case AMD_CPU_ID_YC: + case AMD_CPU_ID_CB: + case AMD_CPU_ID_PS: return MSG_OS_HINT_RN; } return -EINVAL; @@ -735,6 +742,8 @@ static struct acpi_s2idle_dev_ops amd_pmc_s2idle_dev_ops = { #endif static const struct pci_device_id pmc_pci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PS) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_CB) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_YC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_CZN) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RN) }, @@ -877,7 +886,7 @@ static int amd_pmc_probe(struct platform_device *pdev) mutex_init(&dev->lock); - if (enable_stb && dev->cpu_id == AMD_CPU_ID_YC) { + if (enable_stb && (dev->cpu_id == AMD_CPU_ID_YC || dev->cpu_id == AMD_CPU_ID_CB)) { err = amd_pmc_s2d_init(dev); if (err) return err; @@ -915,6 +924,7 @@ static const struct acpi_device_id amd_pmc_acpi_ids[] = { {"AMDI0005", 0}, {"AMDI0006", 0}, {"AMDI0007", 0}, + {"AMDI0008", 0}, {"AMD0004", 0}, {"AMD0005", 0}, { } diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 57a07db659cb..478dd300b9c9 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -522,6 +522,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0x31, { KEY_VOLUMEDOWN } }, { KE_KEY, 0x32, { KEY_MUTE } }, { KE_KEY, 0x35, { KEY_SCREENLOCK } }, + { KE_KEY, 0x38, { KEY_PROG3 } }, /* Armoury Crate */ { KE_KEY, 0x40, { KEY_PREVIOUSSONG } }, { KE_KEY, 0x41, { KEY_NEXTSONG } }, { KE_KEY, 0x43, { KEY_STOPCD } }, /* Stop/Eject */ @@ -574,6 +575,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0xA5, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV + HDMI */ { KE_KEY, 0xA6, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV + HDMI */ { KE_KEY, 0xA7, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + HDMI */ + { KE_KEY, 0xB3, { KEY_PROG4 } }, /* AURA */ { KE_KEY, 0xB5, { KEY_CALC } }, { KE_KEY, 0xC4, { KEY_KBDILLUMUP } }, { KE_KEY, 0xC5, { KEY_KBDILLUMDOWN } }, diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index 497ad2f64a51..5e7e6659a849 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -150,6 +150,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660 GAMING X DDR4"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660I AORUS PRO DDR4"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z490 AORUS ELITE AC"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"), diff --git a/drivers/platform/x86/intel/atomisp2/led.c b/drivers/platform/x86/intel/atomisp2/led.c index 5935dfca166f..10077a61d8c5 100644 --- a/drivers/platform/x86/intel/atomisp2/led.c +++ b/drivers/platform/x86/intel/atomisp2/led.c @@ -50,7 +50,8 @@ static const struct dmi_system_id atomisp2_led_systems[] __initconst = { { .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"), + /* Non exact match to also match T100TAF */ + DMI_MATCH(DMI_PRODUCT_NAME, "T100TA"), }, .driver_data = &asus_t100ta_lookup, }, diff --git a/drivers/platform/x86/intel/ifs/Kconfig b/drivers/platform/x86/intel/ifs/Kconfig index 7ce896434b8f..c341a27cc1a3 100644 --- a/drivers/platform/x86/intel/ifs/Kconfig +++ b/drivers/platform/x86/intel/ifs/Kconfig @@ -1,6 +1,9 @@ config INTEL_IFS tristate "Intel In Field Scan" depends on X86 && CPU_SUP_INTEL && 64BIT && SMP + # Discussion on the list has shown that the sysfs API needs a bit + # more work, mark this as broken for now + depends on BROKEN select INTEL_IFS_DEVICE help Enable support for the In Field Scan capability in select diff --git a/drivers/platform/x86/x86-android-tablets.c b/drivers/platform/x86/x86-android-tablets.c index f446be72e539..480375977435 100644 --- a/drivers/platform/x86/x86-android-tablets.c +++ b/drivers/platform/x86/x86-android-tablets.c @@ -27,8 +27,8 @@ #include <linux/pinctrl/machine.h> #include <linux/platform_data/lp855x.h> #include <linux/platform_device.h> -#include <linux/pm.h> #include <linux/power/bq24190_charger.h> +#include <linux/reboot.h> #include <linux/rmi.h> #include <linux/serdev.h> #include <linux/spi/spi.h> @@ -889,6 +889,7 @@ static const struct pinctrl_map lenovo_yoga_tab2_830_1050_codec_pinctrl_map = "INT33FC:02", "pmu_clk2_grp", "pmu_clk"); static struct pinctrl *lenovo_yoga_tab2_830_1050_codec_pinctrl; +static struct sys_off_handler *lenovo_yoga_tab2_830_1050_sys_off_handler; static int __init lenovo_yoga_tab2_830_1050_init_codec(void) { @@ -933,9 +934,11 @@ err_put_device: * followed by a normal 3 second press to recover. Avoid this by doing an EFI * poweroff instead. */ -static void lenovo_yoga_tab2_830_1050_power_off(void) +static int lenovo_yoga_tab2_830_1050_power_off(struct sys_off_data *data) { efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL); + + return NOTIFY_DONE; } static int __init lenovo_yoga_tab2_830_1050_init(void) @@ -950,13 +953,19 @@ static int __init lenovo_yoga_tab2_830_1050_init(void) if (ret) return ret; - pm_power_off = lenovo_yoga_tab2_830_1050_power_off; + /* SYS_OFF_PRIO_FIRMWARE + 1 so that it runs before acpi_power_off */ + lenovo_yoga_tab2_830_1050_sys_off_handler = + register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, SYS_OFF_PRIO_FIRMWARE + 1, + lenovo_yoga_tab2_830_1050_power_off, NULL); + if (IS_ERR(lenovo_yoga_tab2_830_1050_sys_off_handler)) + return PTR_ERR(lenovo_yoga_tab2_830_1050_sys_off_handler); + return 0; } static void lenovo_yoga_tab2_830_1050_exit(void) { - pm_power_off = NULL; /* Just turn poweroff into halt on module unload */ + unregister_sys_off_handler(lenovo_yoga_tab2_830_1050_sys_off_handler); if (lenovo_yoga_tab2_830_1050_codec_pinctrl) { pinctrl_put(lenovo_yoga_tab2_830_1050_codec_pinctrl); diff --git a/drivers/power/reset/arm-versatile-reboot.c b/drivers/power/reset/arm-versatile-reboot.c index 08d0a07b58ef..c7624d7611a7 100644 --- a/drivers/power/reset/arm-versatile-reboot.c +++ b/drivers/power/reset/arm-versatile-reboot.c @@ -146,6 +146,7 @@ static int __init versatile_reboot_probe(void) versatile_reboot_type = (enum versatile_reboot)reboot_id->data; syscon_regmap = syscon_node_to_regmap(np); + of_node_put(np); if (IS_ERR(syscon_regmap)) return PTR_ERR(syscon_regmap); diff --git a/drivers/power/supply/ab8500_fg.c b/drivers/power/supply/ab8500_fg.c index ec8a404d71b4..4339fa9ff009 100644 --- a/drivers/power/supply/ab8500_fg.c +++ b/drivers/power/supply/ab8500_fg.c @@ -3148,6 +3148,7 @@ static int ab8500_fg_probe(struct platform_device *pdev) ret = ab8500_fg_init_hw_registers(di); if (ret) { dev_err(dev, "failed to initialize registers\n"); + destroy_workqueue(di->fg_wq); return ret; } @@ -3159,6 +3160,7 @@ static int ab8500_fg_probe(struct platform_device *pdev) di->fg_psy = devm_power_supply_register(dev, &ab8500_fg_desc, &psy_cfg); if (IS_ERR(di->fg_psy)) { dev_err(dev, "failed to register FG psy\n"); + destroy_workqueue(di->fg_wq); return PTR_ERR(di->fg_psy); } @@ -3174,8 +3176,10 @@ static int ab8500_fg_probe(struct platform_device *pdev) /* Register primary interrupt handlers */ for (i = 0; i < ARRAY_SIZE(ab8500_fg_irq); i++) { irq = platform_get_irq_byname(pdev, ab8500_fg_irq[i].name); - if (irq < 0) + if (irq < 0) { + destroy_workqueue(di->fg_wq); return irq; + } ret = devm_request_threaded_irq(dev, irq, NULL, ab8500_fg_irq[i].isr, @@ -3185,6 +3189,7 @@ static int ab8500_fg_probe(struct platform_device *pdev) if (ret != 0) { dev_err(dev, "failed to request %s IRQ %d: %d\n", ab8500_fg_irq[i].name, irq, ret); + destroy_workqueue(di->fg_wq); return ret; } dev_dbg(dev, "Requested %s IRQ %d: %d\n", @@ -3200,6 +3205,7 @@ static int ab8500_fg_probe(struct platform_device *pdev) ret = ab8500_fg_sysfs_init(di); if (ret) { dev_err(dev, "failed to create sysfs entry\n"); + destroy_workqueue(di->fg_wq); return ret; } @@ -3207,6 +3213,7 @@ static int ab8500_fg_probe(struct platform_device *pdev) if (ret) { dev_err(dev, "failed to create FG psy\n"); ab8500_fg_sysfs_exit(di); + destroy_workqueue(di->fg_wq); return ret; } diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index fad5890c899e..470253c337c7 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -846,17 +846,17 @@ int power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *t { int i, high, low; - /* Break loop at table_len - 1 because that is the highest index */ - for (i = 0; i < table_len - 1; i++) + for (i = 0; i < table_len; i++) if (temp > table[i].temp) break; /* The library function will deal with high == low */ - if ((i == 0) || (i == (table_len - 1))) - high = i; + if (i == 0) + high = low = i; + else if (i == table_len) + high = low = i - 1; else - high = i - 1; - low = i; + high = (low = i) - 1; return fixp_linear_interpolate(table[low].temp, table[low].resistance, @@ -958,17 +958,17 @@ int power_supply_ocv2cap_simple(struct power_supply_battery_ocv_table *table, { int i, high, low; - /* Break loop at table_len - 1 because that is the highest index */ - for (i = 0; i < table_len - 1; i++) + for (i = 0; i < table_len; i++) if (ocv > table[i].ocv) break; /* The library function will deal with high == low */ - if ((i == 0) || (i == (table_len - 1))) - high = i - 1; + if (i == 0) + high = low = i; + else if (i == table_len) + high = low = i - 1; else - high = i; /* i.e. i == 0 */ - low = i; + high = (low = i) - 1; return fixp_linear_interpolate(table[low].ocv, table[low].capacity, diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 5c13d2079d96..0a9045b49c50 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1435,7 +1435,7 @@ static int __verify_queue_reservations(struct device_driver *drv, void *data) if (ap_drv->in_use) { rc = ap_drv->in_use(ap_perms.apm, newaqm); if (rc) - return -EBUSY; + rc = -EBUSY; } /* release the driver's module */ diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index c95360a3c186..0917b05059b4 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -3195,6 +3195,9 @@ static int megasas_map_queues(struct Scsi_Host *shost) qoff += map->nr_queues; offset += map->nr_queues; + /* we never use READ queue, so can't cheat blk-mq */ + shost->tag_set.map[HCTX_TYPE_READ].nr_queues = 0; + /* Setup Poll hctx */ map = &shost->tag_set.map[HCTX_TYPE_POLL]; map->nr_queues = instance->iopoll_q_count; diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index f7466a895d3b..991eb01bb1e0 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -3145,15 +3145,6 @@ void pm8001_bytes_dmaed(struct pm8001_hba_info *pm8001_ha, int i) if (!phy->phy_attached) return; - if (sas_phy->phy) { - struct sas_phy *sphy = sas_phy->phy; - sphy->negotiated_linkrate = sas_phy->linkrate; - sphy->minimum_linkrate = phy->minimum_linkrate; - sphy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS; - sphy->maximum_linkrate = phy->maximum_linkrate; - sphy->maximum_linkrate_hw = phy->maximum_linkrate; - } - if (phy->phy_type & PORT_TYPE_SAS) { struct sas_identify_frame *id; id = (struct sas_identify_frame *)phy->frame_rcvd; @@ -3177,26 +3168,22 @@ void pm8001_get_lrate_mode(struct pm8001_phy *phy, u8 link_rate) switch (link_rate) { case PHY_SPEED_120: phy->sas_phy.linkrate = SAS_LINK_RATE_12_0_GBPS; - phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_12_0_GBPS; break; case PHY_SPEED_60: phy->sas_phy.linkrate = SAS_LINK_RATE_6_0_GBPS; - phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_6_0_GBPS; break; case PHY_SPEED_30: phy->sas_phy.linkrate = SAS_LINK_RATE_3_0_GBPS; - phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_3_0_GBPS; break; case PHY_SPEED_15: phy->sas_phy.linkrate = SAS_LINK_RATE_1_5_GBPS; - phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS; break; } sas_phy->negotiated_linkrate = phy->sas_phy.linkrate; - sas_phy->maximum_linkrate_hw = SAS_LINK_RATE_6_0_GBPS; + sas_phy->maximum_linkrate_hw = phy->maximum_linkrate; sas_phy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS; - sas_phy->maximum_linkrate = SAS_LINK_RATE_6_0_GBPS; - sas_phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS; + sas_phy->maximum_linkrate = phy->maximum_linkrate; + sas_phy->minimum_linkrate = phy->minimum_linkrate; } /** diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index 9b04f1a6a67d..01f2f41928eb 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -143,6 +143,8 @@ static void pm8001_phy_init(struct pm8001_hba_info *pm8001_ha, int phy_id) struct asd_sas_phy *sas_phy = &phy->sas_phy; phy->phy_state = PHY_LINK_DISABLE; phy->pm8001_ha = pm8001_ha; + phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS; + phy->maximum_linkrate = SAS_LINK_RATE_6_0_GBPS; sas_phy->enabled = (phy_id < pm8001_ha->chip->n_phy) ? 1 : 0; sas_phy->class = SAS; sas_phy->iproto = SAS_PROTOCOL_ALL; diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 01c5e8ff4cc5..303cd05fec50 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -3723,8 +3723,12 @@ static int mpi_phy_stop_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) pm8001_dbg(pm8001_ha, MSG, "phy:0x%x status:0x%x\n", phyid, status); if (status == PHY_STOP_SUCCESS || - status == PHY_STOP_ERR_DEVICE_ATTACHED) + status == PHY_STOP_ERR_DEVICE_ATTACHED) { phy->phy_state = PHY_LINK_DISABLE; + phy->sas_phy.phy->negotiated_linkrate = SAS_PHY_DISABLED; + phy->sas_phy.linkrate = SAS_PHY_DISABLED; + } + return 0; } diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c index cba6a4486c24..efdcbe6c4c26 100644 --- a/drivers/spi/spi-amd.c +++ b/drivers/spi/spi-amd.c @@ -33,6 +33,7 @@ #define AMD_SPI_RX_COUNT_REG 0x4B #define AMD_SPI_STATUS_REG 0x4C +#define AMD_SPI_FIFO_SIZE 70 #define AMD_SPI_MEM_SIZE 200 /* M_CMD OP codes for SPI */ @@ -270,6 +271,11 @@ static int amd_spi_master_transfer(struct spi_master *master, return 0; } +static size_t amd_spi_max_transfer_size(struct spi_device *spi) +{ + return AMD_SPI_FIFO_SIZE; +} + static int amd_spi_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -302,6 +308,8 @@ static int amd_spi_probe(struct platform_device *pdev) master->flags = SPI_MASTER_HALF_DUPLEX; master->setup = amd_spi_master_setup; master->transfer_one_message = amd_spi_master_transfer; + master->max_transfer_size = amd_spi_max_transfer_size; + master->max_message_size = amd_spi_max_transfer_size; /* Register the controller with SPI framework */ err = devm_spi_register_master(dev, master); diff --git a/drivers/spi/spi-aspeed-smc.c b/drivers/spi/spi-aspeed-smc.c index 496f3e1e9079..3e891bf22470 100644 --- a/drivers/spi/spi-aspeed-smc.c +++ b/drivers/spi/spi-aspeed-smc.c @@ -558,6 +558,14 @@ static int aspeed_spi_dirmap_create(struct spi_mem_dirmap_desc *desc) u32 ctl_val; int ret = 0; + dev_dbg(aspi->dev, + "CE%d %s dirmap [ 0x%.8llx - 0x%.8llx ] OP %#x mode:%d.%d.%d.%d naddr:%#x ndummies:%#x\n", + chip->cs, op->data.dir == SPI_MEM_DATA_IN ? "read" : "write", + desc->info.offset, desc->info.offset + desc->info.length, + op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth, + op->dummy.buswidth, op->data.buswidth, + op->addr.nbytes, op->dummy.nbytes); + chip->clk_freq = desc->mem->spi->max_speed_hz; /* Only for reads */ @@ -574,9 +582,11 @@ static int aspeed_spi_dirmap_create(struct spi_mem_dirmap_desc *desc) ctl_val = readl(chip->ctl) & ~CTRL_IO_CMD_MASK; ctl_val |= aspeed_spi_get_io_mode(op) | op->cmd.opcode << CTRL_COMMAND_SHIFT | - CTRL_IO_DUMMY_SET(op->dummy.nbytes / op->dummy.buswidth) | CTRL_IO_MODE_READ; + if (op->dummy.nbytes) + ctl_val |= CTRL_IO_DUMMY_SET(op->dummy.nbytes / op->dummy.buswidth); + /* Tune 4BYTE address mode */ if (op->addr.nbytes) { u32 addr_mode = readl(aspi->regs + CE_CTRL_REG); diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 2b9fc8449a62..72b1a5a2298c 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1578,8 +1578,7 @@ static int cqspi_probe(struct platform_device *pdev) ret = cqspi_of_get_pdata(cqspi); if (ret) { dev_err(dev, "Cannot get mandatory OF data.\n"); - ret = -ENODEV; - goto probe_master_put; + return -ENODEV; } /* Obtain QSPI clock. */ @@ -1587,7 +1586,7 @@ static int cqspi_probe(struct platform_device *pdev) if (IS_ERR(cqspi->clk)) { dev_err(dev, "Cannot claim QSPI clock.\n"); ret = PTR_ERR(cqspi->clk); - goto probe_master_put; + return ret; } /* Obtain and remap controller address. */ @@ -1596,7 +1595,7 @@ static int cqspi_probe(struct platform_device *pdev) if (IS_ERR(cqspi->iobase)) { dev_err(dev, "Cannot remap controller address.\n"); ret = PTR_ERR(cqspi->iobase); - goto probe_master_put; + return ret; } /* Obtain and remap AHB address. */ @@ -1605,7 +1604,7 @@ static int cqspi_probe(struct platform_device *pdev) if (IS_ERR(cqspi->ahb_base)) { dev_err(dev, "Cannot remap AHB address.\n"); ret = PTR_ERR(cqspi->ahb_base); - goto probe_master_put; + return ret; } cqspi->mmap_phys_base = (dma_addr_t)res_ahb->start; cqspi->ahb_size = resource_size(res_ahb); @@ -1614,15 +1613,13 @@ static int cqspi_probe(struct platform_device *pdev) /* Obtain IRQ line. */ irq = platform_get_irq(pdev, 0); - if (irq < 0) { - ret = -ENXIO; - goto probe_master_put; - } + if (irq < 0) + return -ENXIO; pm_runtime_enable(dev); ret = pm_runtime_resume_and_get(dev); if (ret < 0) - goto probe_master_put; + return ret; ret = clk_prepare_enable(cqspi->clk); if (ret) { @@ -1716,8 +1713,6 @@ probe_reset_failed: probe_clk_failed: pm_runtime_put_sync(dev); pm_runtime_disable(dev); -probe_master_put: - spi_master_put(master); return ret; } diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index e68f1cc8ef98..6c8d8b051bfd 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -448,6 +448,9 @@ fd_execute_write_same(struct se_cmd *cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } + if (!cmd->t_data_nents) + return TCM_INVALID_CDB_FIELD; + if (cmd->t_data_nents > 1 || cmd->t_data_sg[0].length != cmd->se_dev->dev_attrib.block_size) { pr_err("WRITE_SAME: Illegal SGL t_data_nents: %u length: %u" diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 378c80313a0f..1ed9381751e6 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -494,6 +494,10 @@ iblock_execute_write_same(struct se_cmd *cmd) " backends not supported\n"); return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } + + if (!cmd->t_data_nents) + return TCM_INVALID_CDB_FIELD; + sg = &cmd->t_data_sg[0]; if (cmd->t_data_nents > 1 || diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index ca1b2312d6e7..f6132836eb38 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -312,6 +312,12 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char flags, struct sbc_ops *op pr_warn("WRITE SAME with ANCHOR not supported\n"); return TCM_INVALID_CDB_FIELD; } + + if (flags & 0x01) { + pr_warn("WRITE SAME with NDOB not supported\n"); + return TCM_INVALID_CDB_FIELD; + } + /* * Special case for WRITE_SAME w/ UNMAP=1 that ends up getting * translated into block discard requests within backend code. diff --git a/drivers/tee/optee/optee_smc.h b/drivers/tee/optee/optee_smc.h index c60896cf71cb..73b5e7760d10 100644 --- a/drivers/tee/optee/optee_smc.h +++ b/drivers/tee/optee/optee_smc.h @@ -189,7 +189,7 @@ struct optee_smc_call_get_os_revision_result { * Have config return register usage: * a0 OPTEE_SMC_RETURN_OK * a1 Physical address of start of SHM - * a2 Size of of SHM + * a2 Size of SHM * a3 Cache settings of memory, as defined by the * OPTEE_SMC_SHM_* values above * a4-7 Preserved diff --git a/drivers/tee/optee/smc_abi.c b/drivers/tee/optee/smc_abi.c index 385cb0aee610..a1c1fa1a9c28 100644 --- a/drivers/tee/optee/smc_abi.c +++ b/drivers/tee/optee/smc_abi.c @@ -884,8 +884,8 @@ static int optee_smc_do_call_with_arg(struct tee_context *ctx, rpc_arg_offs = OPTEE_MSG_GET_ARG_SIZE(arg->num_params); rpc_arg = tee_shm_get_va(shm, offs + rpc_arg_offs); - if (IS_ERR(arg)) - return PTR_ERR(arg); + if (IS_ERR(rpc_arg)) + return PTR_ERR(rpc_arg); } if (rpc_arg && tee_shm_is_dynamic(shm)) { diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index af0f7c603fa4..98da206cd761 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -1073,7 +1073,7 @@ EXPORT_SYMBOL_GPL(tee_device_unregister); /** * tee_get_drvdata() - Return driver_data pointer * @teedev: Device containing the driver_data pointer - * @returns the driver_data pointer supplied to tee_register(). + * @returns the driver_data pointer supplied to tee_device_alloc(). */ void *tee_get_drvdata(struct tee_device *teedev) { diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 74bfabe5b453..752dab3356d7 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -111,21 +111,11 @@ static void pty_unthrottle(struct tty_struct *tty) static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c) { struct tty_struct *to = tty->link; - unsigned long flags; - if (tty->flow.stopped) + if (tty->flow.stopped || !c) return 0; - if (c > 0) { - spin_lock_irqsave(&to->port->lock, flags); - /* Stuff the data into the input queue of the other end */ - c = tty_insert_flip_string(to->port, buf, c); - spin_unlock_irqrestore(&to->port->lock, flags); - /* And shovel */ - if (c) - tty_flip_buffer_push(to->port); - } - return c; + return tty_insert_flip_string_and_push_buffer(to->port, buf, c); } /** diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index cfbd2de0ca6e..3f56dbc9432b 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -23,6 +23,7 @@ #include <linux/sysrq.h> #include <linux/delay.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include <linux/tty.h> #include <linux/ratelimit.h> #include <linux/tty_flip.h> @@ -559,6 +560,9 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev) up->port.dev = dev; + if (uart_console_enabled(&up->port)) + pm_runtime_get_sync(up->port.dev); + serial8250_apply_quirks(up); uart_add_one_port(drv, &up->port); } diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c index 7133fceed35e..a8dba4a0a8fb 100644 --- a/drivers/tty/serial/8250/8250_dma.c +++ b/drivers/tty/serial/8250/8250_dma.c @@ -106,10 +106,10 @@ int serial8250_tx_dma(struct uart_8250_port *p) UART_XMIT_SIZE, DMA_TO_DEVICE); dma_async_issue_pending(dma->txchan); - if (dma->tx_err) { + serial8250_clear_THRI(p); + if (dma->tx_err) dma->tx_err = 0; - serial8250_clear_THRI(p); - } + return 0; err: dma->tx_err = 1; diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index f57bbd32ef11..bb6aca07ab56 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -47,7 +47,7 @@ #define RZN1_UART_xDMACR_DMA_EN BIT(0) #define RZN1_UART_xDMACR_1_WORD_BURST (0 << 1) #define RZN1_UART_xDMACR_4_WORD_BURST (1 << 1) -#define RZN1_UART_xDMACR_8_WORD_BURST (3 << 1) +#define RZN1_UART_xDMACR_8_WORD_BURST (2 << 1) #define RZN1_UART_xDMACR_BLK_SZ(x) ((x) << 3) /* Quirks */ @@ -773,18 +773,18 @@ static const struct of_device_id dw8250_of_match[] = { MODULE_DEVICE_TABLE(of, dw8250_of_match); static const struct acpi_device_id dw8250_acpi_match[] = { - { "INT33C4", 0 }, - { "INT33C5", 0 }, - { "INT3434", 0 }, - { "INT3435", 0 }, - { "80860F0A", 0 }, - { "8086228A", 0 }, - { "APMC0D08", 0}, - { "AMD0020", 0 }, - { "AMDI0020", 0 }, - { "AMDI0022", 0 }, - { "BRCM2032", 0 }, - { "HISI0031", 0 }, + { "80860F0A", (kernel_ulong_t)&dw8250_dw_apb }, + { "8086228A", (kernel_ulong_t)&dw8250_dw_apb }, + { "AMD0020", (kernel_ulong_t)&dw8250_dw_apb }, + { "AMDI0020", (kernel_ulong_t)&dw8250_dw_apb }, + { "AMDI0022", (kernel_ulong_t)&dw8250_dw_apb }, + { "APMC0D08", (kernel_ulong_t)&dw8250_dw_apb}, + { "BRCM2032", (kernel_ulong_t)&dw8250_dw_apb }, + { "HISI0031", (kernel_ulong_t)&dw8250_dw_apb }, + { "INT33C4", (kernel_ulong_t)&dw8250_dw_apb }, + { "INT33C5", (kernel_ulong_t)&dw8250_dw_apb }, + { "INT3434", (kernel_ulong_t)&dw8250_dw_apb }, + { "INT3435", (kernel_ulong_t)&dw8250_dw_apb }, { }, }; MODULE_DEVICE_TABLE(acpi, dw8250_acpi_match); diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 8f32fe9e149e..3c36a06a20b0 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1949,7 +1949,7 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) if ((status & UART_LSR_THRE) && (up->ier & UART_IER_THRI)) { if (!up->dma || up->dma->tx_err) serial8250_tx_chars(up); - else + else if (!up->dma->tx_running) __stop_tx(up); } @@ -2975,8 +2975,10 @@ static int serial8250_request_std_resource(struct uart_8250_port *up) case UPIO_MEM32BE: case UPIO_MEM16: case UPIO_MEM: - if (!port->mapbase) + if (!port->mapbase) { + ret = -EINVAL; break; + } if (!request_mem_region(port->mapbase, size, "serial")) { ret = -EBUSY; diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 97ef41cb2721..16a21422ddce 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1367,6 +1367,15 @@ static void pl011_stop_rx(struct uart_port *port) pl011_dma_rx_stop(uap); } +static void pl011_throttle_rx(struct uart_port *port) +{ + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + pl011_stop_rx(port); + spin_unlock_irqrestore(&port->lock, flags); +} + static void pl011_enable_ms(struct uart_port *port) { struct uart_amba_port *uap = @@ -1788,9 +1797,10 @@ static int pl011_allocate_irq(struct uart_amba_port *uap) */ static void pl011_enable_interrupts(struct uart_amba_port *uap) { + unsigned long flags; unsigned int i; - spin_lock_irq(&uap->port.lock); + spin_lock_irqsave(&uap->port.lock, flags); /* Clear out any spuriously appearing RX interrupts */ pl011_write(UART011_RTIS | UART011_RXIS, uap, REG_ICR); @@ -1812,7 +1822,14 @@ static void pl011_enable_interrupts(struct uart_amba_port *uap) if (!pl011_dma_rx_running(uap)) uap->im |= UART011_RXIM; pl011_write(uap->im, uap, REG_IMSC); - spin_unlock_irq(&uap->port.lock); + spin_unlock_irqrestore(&uap->port.lock, flags); +} + +static void pl011_unthrottle_rx(struct uart_port *port) +{ + struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port); + + pl011_enable_interrupts(uap); } static int pl011_startup(struct uart_port *port) @@ -2225,6 +2242,8 @@ static const struct uart_ops amba_pl011_pops = { .stop_tx = pl011_stop_tx, .start_tx = pl011_start_tx, .stop_rx = pl011_stop_rx, + .throttle = pl011_throttle_rx, + .unthrottle = pl011_unthrottle_rx, .enable_ms = pl011_enable_ms, .break_ctl = pl011_break_ctl, .startup = pl011_startup, diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 0429c2a54290..93489fe334d0 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -470,14 +470,14 @@ static void mvebu_uart_shutdown(struct uart_port *port) } } -static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) +static unsigned int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) { unsigned int d_divisor, m_divisor; unsigned long flags; u32 brdv, osamp; if (!port->uartclk) - return -EOPNOTSUPP; + return 0; /* * The baudrate is derived from the UART clock thanks to divisors: @@ -548,7 +548,7 @@ static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) (m_divisor << 16) | (m_divisor << 24); writel(osamp, port->membase + UART_OSAMP); - return 0; + return DIV_ROUND_CLOSEST(port->uartclk, d_divisor * m_divisor); } static void mvebu_uart_set_termios(struct uart_port *port, @@ -587,15 +587,11 @@ static void mvebu_uart_set_termios(struct uart_port *port, max_baud = port->uartclk / 80; baud = uart_get_baud_rate(port, termios, old, min_baud, max_baud); - if (mvebu_uart_baud_rate_set(port, baud)) { - /* No clock available, baudrate cannot be changed */ - if (old) - baud = uart_get_baud_rate(port, old, NULL, - min_baud, max_baud); - } else { - tty_termios_encode_baud_rate(termios, baud, baud); - uart_update_timeout(port, termios->c_cflag, baud); - } + baud = mvebu_uart_baud_rate_set(port, baud); + + /* In case baudrate cannot be changed, report previous old value */ + if (baud == 0 && old) + baud = tty_termios_baud_rate(old); /* Only the following flag changes are supported */ if (old) { @@ -606,6 +602,11 @@ static void mvebu_uart_set_termios(struct uart_port *port, termios->c_cflag |= CS8; } + if (baud != 0) { + tty_termios_encode_baud_rate(termios, baud, baud); + uart_update_timeout(port, termios->c_cflag, baud); + } + spin_unlock_irqrestore(&port->lock, flags); } diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index d5ca904def34..1afe47b62ad5 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -377,8 +377,7 @@ static void enable_tx_dma(struct s3c24xx_uart_port *ourport) /* Enable tx dma mode */ ucon = rd_regl(port, S3C2410_UCON); ucon &= ~(S3C64XX_UCON_TXBURST_MASK | S3C64XX_UCON_TXMODE_MASK); - ucon |= (dma_get_cache_alignment() >= 16) ? - S3C64XX_UCON_TXBURST_16 : S3C64XX_UCON_TXBURST_1; + ucon |= S3C64XX_UCON_TXBURST_1; ucon |= S3C64XX_UCON_TXMODE_DMA; wr_regl(port, S3C2410_UCON, ucon); @@ -674,7 +673,7 @@ static void enable_rx_dma(struct s3c24xx_uart_port *ourport) S3C64XX_UCON_DMASUS_EN | S3C64XX_UCON_TIMEOUT_EN | S3C64XX_UCON_RXMODE_MASK); - ucon |= S3C64XX_UCON_RXBURST_16 | + ucon |= S3C64XX_UCON_RXBURST_1 | 0xf << S3C64XX_UCON_TIMEOUT_SHIFT | S3C64XX_UCON_EMPTYINT_EN | S3C64XX_UCON_TIMEOUT_EN | diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 338ebadfd44b..3dc926d6c00a 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1941,11 +1941,6 @@ static int uart_proc_show(struct seq_file *m, void *v) } #endif -static inline bool uart_console_enabled(struct uart_port *port) -{ - return uart_console(port) && (port->cons->flags & CON_ENABLED); -} - static void uart_port_spin_lock_init(struct uart_port *port) { spin_lock_init(&port->lock); diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index b7b44f4050d4..0973b03eeeaa 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -72,6 +72,8 @@ static void stm32_usart_config_reg_rs485(u32 *cr1, u32 *cr3, u32 delay_ADE, *cr3 |= USART_CR3_DEM; over8 = *cr1 & USART_CR1_OVER8; + *cr1 &= ~(USART_CR1_DEDT_MASK | USART_CR1_DEAT_MASK); + if (over8) rs485_deat_dedt = delay_ADE * baud * 8; else diff --git a/drivers/tty/tty.h b/drivers/tty/tty.h index b710c5ef89ab..f310a8274df1 100644 --- a/drivers/tty/tty.h +++ b/drivers/tty/tty.h @@ -111,4 +111,7 @@ static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); +int tty_insert_flip_string_and_push_buffer(struct tty_port *port, + const unsigned char *chars, size_t cnt); + #endif diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index bfa431a8e690..595d8b49c745 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -532,6 +532,15 @@ static void flush_to_ldisc(struct work_struct *work) } +static inline void tty_flip_buffer_commit(struct tty_buffer *tail) +{ + /* + * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees + * buffer data. + */ + smp_store_release(&tail->commit, tail->used); +} + /** * tty_flip_buffer_push - push terminal buffers * @port: tty port to push @@ -546,16 +555,43 @@ void tty_flip_buffer_push(struct tty_port *port) { struct tty_bufhead *buf = &port->buf; - /* - * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees - * buffer data. - */ - smp_store_release(&buf->tail->commit, buf->tail->used); + tty_flip_buffer_commit(buf->tail); queue_work(system_unbound_wq, &buf->work); } EXPORT_SYMBOL(tty_flip_buffer_push); /** + * tty_insert_flip_string_and_push_buffer - add characters to the tty buffer and + * push + * @port: tty port + * @chars: characters + * @size: size + * + * The function combines tty_insert_flip_string() and tty_flip_buffer_push() + * with the exception of properly holding the @port->lock. + * + * To be used only internally (by pty currently). + * + * Returns: the number added. + */ +int tty_insert_flip_string_and_push_buffer(struct tty_port *port, + const unsigned char *chars, size_t size) +{ + struct tty_bufhead *buf = &port->buf; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + size = tty_insert_flip_string(port, chars, size); + if (size) + tty_flip_buffer_commit(buf->tail); + spin_unlock_irqrestore(&port->lock, flags); + + queue_work(system_unbound_wq, &buf->work); + + return size; +} + +/** * tty_buffer_init - prepare a tty buffer structure * @port: tty port to initialise * diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index f8c87c4d7399..dfc1f4b445f3 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -855,7 +855,7 @@ static void delete_char(struct vc_data *vc, unsigned int nr) unsigned short *p = (unsigned short *) vc->vc_pos; vc_uniscr_delete(vc, nr); - scr_memcpyw(p, p + nr, (vc->vc_cols - vc->state.x - nr) * 2); + scr_memmovew(p, p + nr, (vc->vc_cols - vc->state.x - nr) * 2); scr_memsetw(p + vc->vc_cols - vc->state.x - nr, vc->vc_video_erase_char, nr * 2); vc->vc_need_wrap = 0; diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index ce86d1b790c0..c7b337480e3e 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5738,7 +5738,7 @@ int ufshcd_wb_toggle(struct ufs_hba *hba, bool enable) } hba->dev_info.wb_enabled = enable; - dev_info(hba->dev, "%s Write Booster %s\n", + dev_dbg(hba->dev, "%s Write Booster %s\n", __func__, enable ? "enabled" : "disabled"); return ret; @@ -7253,7 +7253,7 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) hba->silence_err_logs = false; /* scale up clocks to max frequency before full reinitialization */ - ufshcd_set_clk_freq(hba, true); + ufshcd_scale_clks(hba, true); err = ufshcd_hba_enable(hba); diff --git a/drivers/usb/dwc3/dwc3-am62.c b/drivers/usb/dwc3/dwc3-am62.c index fea7aca35dc8..173cf3579c55 100644 --- a/drivers/usb/dwc3/dwc3-am62.c +++ b/drivers/usb/dwc3/dwc3-am62.c @@ -195,8 +195,7 @@ static int dwc3_ti_probe(struct platform_device *pdev) if (i == ARRAY_SIZE(dwc3_ti_rate_table)) { dev_err(dev, "unsupported usb2_refclk rate: %lu KHz\n", rate); - ret = -EINVAL; - goto err_clk_disable; + return -EINVAL; } data->rate_code = i; @@ -204,7 +203,7 @@ static int dwc3_ti_probe(struct platform_device *pdev) /* Read the syscon property and set the rate code */ ret = phy_syscon_pll_refclk(data); if (ret) - goto err_clk_disable; + return ret; /* VBUS divider select */ data->vbus_divider = device_property_read_bool(dev, "ti,vbus-divider"); @@ -245,8 +244,6 @@ err_pm_disable: clk_disable_unprepare(data->usb2_refclk); pm_runtime_disable(dev); pm_runtime_set_suspended(dev); -err_clk_disable: - clk_put(data->usb2_refclk); return ret; } @@ -276,7 +273,6 @@ static int dwc3_ti_remove(struct platform_device *pdev) pm_runtime_disable(dev); pm_runtime_set_suspended(dev); - clk_put(data->usb2_refclk); platform_set_drvdata(pdev, NULL); return 0; } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 8716bece1072..0d89dfa6eef5 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4249,7 +4249,6 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) } evt->count = 0; - evt->flags &= ~DWC3_EVENT_PENDING; ret = IRQ_HANDLED; /* Unmask interrupt */ @@ -4261,6 +4260,9 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) dwc3_writel(dwc->regs, DWC3_DEV_IMOD(0), dwc->imod_interval); } + /* Keep the clearing of DWC3_EVENT_PENDING at the end */ + evt->flags &= ~DWC3_EVENT_PENDING; + return ret; } diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index e5a6b6e36b3d..4303a3283ba0 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -2371,6 +2371,7 @@ static ssize_t f_uvc_opts_string_##cname##_store(struct config_item *item,\ const char *page, size_t len) \ { \ struct f_uvc_opts *opts = to_f_uvc_opts(item); \ + int size = min(sizeof(opts->aname), len + 1); \ int ret = 0; \ \ mutex_lock(&opts->lock); \ @@ -2379,8 +2380,9 @@ static ssize_t f_uvc_opts_string_##cname##_store(struct config_item *item,\ goto end; \ } \ \ - ret = snprintf(opts->aname, min(sizeof(opts->aname), len), \ - "%s", page); \ + ret = strscpy(opts->aname, page, size); \ + if (ret == -E2BIG) \ + ret = size - 1; \ \ end: \ mutex_unlock(&opts->lock); \ diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c index 385be30baad3..896c0d107f72 100644 --- a/drivers/usb/host/ehci-fsl.c +++ b/drivers/usb/host/ehci-fsl.c @@ -76,14 +76,9 @@ static int fsl_ehci_drv_probe(struct platform_device *pdev) return -ENODEV; } - res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!res) { - dev_err(&pdev->dev, - "Found HC with no IRQ. Check %s setup!\n", - dev_name(&pdev->dev)); - return -ENODEV; - } - irq = res->start; + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; hcd = __usb_create_hcd(&fsl_ehci_hc_driver, pdev->dev.parent, &pdev->dev, dev_name(&pdev->dev), NULL); diff --git a/drivers/usb/host/fsl-mph-dr-of.c b/drivers/usb/host/fsl-mph-dr-of.c index 44a7e58a26e3..e5df17522892 100644 --- a/drivers/usb/host/fsl-mph-dr-of.c +++ b/drivers/usb/host/fsl-mph-dr-of.c @@ -112,6 +112,9 @@ static struct platform_device *fsl_usb2_device_register( goto error; } + pdev->dev.of_node = ofdev->dev.of_node; + pdev->dev.of_node_reused = true; + retval = platform_device_add(pdev); if (retval) goto error; diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index b440d338a895..d5a3986dfee7 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1023,6 +1023,9 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_DISPLAY_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_LITE_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ANALOG_PID) }, + /* Belimo Automation devices */ + { USB_DEVICE(FTDI_VID, BELIMO_ZTH_PID) }, + { USB_DEVICE(FTDI_VID, BELIMO_ZIP_PID) }, /* ICP DAS I-756xU devices */ { USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) }, { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index d1a9564697a4..4e92c165c86b 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1569,6 +1569,12 @@ #define CHETCO_SEASMART_ANALOG_PID 0xA5AF /* SeaSmart Analog Adapter */ /* + * Belimo Automation + */ +#define BELIMO_ZTH_PID 0x8050 +#define BELIMO_ZIP_PID 0xC811 + +/* * Unjo AB */ #define UNJO_VID 0x22B7 diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index ee0e520707dd..c4724750c81a 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1718,6 +1718,7 @@ void typec_set_pwr_opmode(struct typec_port *port, partner->usb_pd = 1; sysfs_notify(&partner_dev->kobj, NULL, "supports_usb_power_delivery"); + kobject_uevent(&partner_dev->kobj, KOBJ_CHANGE); } put_device(partner_dev); } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 4b56c39f766d..84b143eef395 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -396,13 +396,15 @@ static void __unmap_grant_pages_done(int result, unsigned int offset = data->unmap_ops - map->unmap_ops; for (i = 0; i < data->count; i++) { - WARN_ON(map->unmap_ops[offset+i].status); + WARN_ON(map->unmap_ops[offset + i].status != GNTST_okay && + map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE); pr_debug("unmap handle=%d st=%d\n", map->unmap_ops[offset+i].handle, map->unmap_ops[offset+i].status); map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; if (use_ptemod) { - WARN_ON(map->kunmap_ops[offset+i].status); + WARN_ON(map->kunmap_ops[offset + i].status != GNTST_okay && + map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE); pr_debug("kunmap handle=%u st=%d\n", map->kunmap_ops[offset+i].handle, map->kunmap_ops[offset+i].status); diff --git a/fs/afs/file.c b/fs/afs/file.c index 42118a4f3383..d1cfb235c4b9 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -375,7 +375,7 @@ static int afs_begin_cache_operation(struct netfs_io_request *rreq) } static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len, - struct folio *folio, void **_fsdata) + struct folio **foliop, void **_fsdata) { struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 415bf1823fb3..9c21e214d29e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -675,9 +675,8 @@ struct btrfs_fs_info { rwlock_t global_root_lock; struct rb_root global_root_tree; - /* The xarray that holds all the FS roots */ - spinlock_t fs_roots_lock; - struct xarray fs_roots; + spinlock_t fs_roots_radix_lock; + struct radix_tree_root fs_roots_radix; /* block group cache stuff */ rwlock_t block_group_cache_lock; @@ -995,10 +994,10 @@ struct btrfs_fs_info { struct btrfs_delayed_root *delayed_root; - /* Extent buffer xarray */ + /* Extent buffer radix tree */ spinlock_t buffer_lock; /* Entries are eb->start / sectorsize */ - struct xarray extent_buffers; + struct radix_tree_root buffer_radix; /* next backup root to be overwritten */ int backup_root_index; @@ -1119,8 +1118,7 @@ enum { */ BTRFS_ROOT_SHAREABLE, BTRFS_ROOT_TRACK_DIRTY, - /* The root is tracked in fs_info::fs_roots */ - BTRFS_ROOT_REGISTERED, + BTRFS_ROOT_IN_RADIX, BTRFS_ROOT_ORPHAN_ITEM_INSERTED, BTRFS_ROOT_DEFRAG_RUNNING, BTRFS_ROOT_FORCE_COW, @@ -1224,10 +1222,10 @@ struct btrfs_root { struct rb_root inode_tree; /* - * Xarray that keeps track of delayed nodes of every inode, protected - * by inode_lock + * radix tree that keeps track of delayed nodes of every inode, + * protected by inode_lock */ - struct xarray delayed_nodes; + struct radix_tree_root delayed_nodes_tree; /* * right now this just gets used so that a root has its own devid * for stat. It may be used for more later diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 66779ab3ed4a..748bf6b0d860 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -78,7 +78,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( } spin_lock(&root->inode_lock); - node = xa_load(&root->delayed_nodes, ino); + node = radix_tree_lookup(&root->delayed_nodes_tree, ino); if (node) { if (btrfs_inode->delayed_node) { @@ -90,9 +90,9 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( /* * It's possible that we're racing into the middle of removing - * this node from the xarray. In this case, the refcount + * this node from the radix tree. In this case, the refcount * was zero and it should never go back to one. Just return - * NULL like it was never in the xarray at all; our release + * NULL like it was never in the radix at all; our release * function is in the process of removing it. * * Some implementations of refcount_inc refuse to bump the @@ -100,7 +100,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( * here, refcount_inc() may decide to just WARN_ONCE() instead * of actually bumping the refcount. * - * If this node is properly in the xarray, we want to bump the + * If this node is properly in the radix, we want to bump the * refcount twice, once for the inode and once for this get * operation. */ @@ -128,30 +128,36 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( u64 ino = btrfs_ino(btrfs_inode); int ret; - do { - node = btrfs_get_delayed_node(btrfs_inode); - if (node) - return node; +again: + node = btrfs_get_delayed_node(btrfs_inode); + if (node) + return node; - node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS); - if (!node) - return ERR_PTR(-ENOMEM); - btrfs_init_delayed_node(node, root, ino); + node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS); + if (!node) + return ERR_PTR(-ENOMEM); + btrfs_init_delayed_node(node, root, ino); - /* Cached in the inode and can be accessed */ - refcount_set(&node->refs, 2); + /* cached in the btrfs inode and can be accessed */ + refcount_set(&node->refs, 2); - spin_lock(&root->inode_lock); - ret = xa_insert(&root->delayed_nodes, ino, node, GFP_NOFS); - if (ret) { - spin_unlock(&root->inode_lock); - kmem_cache_free(delayed_node_cache, node); - if (ret != -EBUSY) - return ERR_PTR(ret); - } - } while (ret); + ret = radix_tree_preload(GFP_NOFS); + if (ret) { + kmem_cache_free(delayed_node_cache, node); + return ERR_PTR(ret); + } + + spin_lock(&root->inode_lock); + ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node); + if (ret == -EEXIST) { + spin_unlock(&root->inode_lock); + kmem_cache_free(delayed_node_cache, node); + radix_tree_preload_end(); + goto again; + } btrfs_inode->delayed_node = node; spin_unlock(&root->inode_lock); + radix_tree_preload_end(); return node; } @@ -270,7 +276,8 @@ static void __btrfs_release_delayed_node( * back up. We can delete it now. */ ASSERT(refcount_read(&delayed_node->refs) == 0); - xa_erase(&root->delayed_nodes, delayed_node->inode_id); + radix_tree_delete(&root->delayed_nodes_tree, + delayed_node->inode_id); spin_unlock(&root->inode_lock); kmem_cache_free(delayed_node_cache, delayed_node); } @@ -1863,35 +1870,34 @@ void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode) void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) { - unsigned long index = 0; - struct btrfs_delayed_node *delayed_node; + u64 inode_id = 0; struct btrfs_delayed_node *delayed_nodes[8]; + int i, n; while (1) { - int n = 0; - spin_lock(&root->inode_lock); - if (xa_empty(&root->delayed_nodes)) { + n = radix_tree_gang_lookup(&root->delayed_nodes_tree, + (void **)delayed_nodes, inode_id, + ARRAY_SIZE(delayed_nodes)); + if (!n) { spin_unlock(&root->inode_lock); - return; + break; } - xa_for_each_start(&root->delayed_nodes, index, delayed_node, index) { + inode_id = delayed_nodes[n - 1]->inode_id + 1; + for (i = 0; i < n; i++) { /* * Don't increase refs in case the node is dead and * about to be removed from the tree in the loop below */ - if (refcount_inc_not_zero(&delayed_node->refs)) { - delayed_nodes[n] = delayed_node; - n++; - } - if (n >= ARRAY_SIZE(delayed_nodes)) - break; + if (!refcount_inc_not_zero(&delayed_nodes[i]->refs)) + delayed_nodes[i] = NULL; } - index++; spin_unlock(&root->inode_lock); - for (int i = 0; i < n; i++) { + for (i = 0; i < n; i++) { + if (!delayed_nodes[i]) + continue; __btrfs_kill_delayed_node(delayed_nodes[i]); btrfs_release_delayed_node(delayed_nodes[i]); } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4ba005c41983..de440ebf5648 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -5,6 +5,7 @@ #include <linux/fs.h> #include <linux/blkdev.h> +#include <linux/radix-tree.h> #include <linux/writeback.h> #include <linux/workqueue.h> #include <linux/kthread.h> @@ -485,7 +486,7 @@ static int csum_dirty_subpage_buffers(struct btrfs_fs_info *fs_info, uptodate = btrfs_subpage_test_uptodate(fs_info, page, cur, fs_info->nodesize); - /* A dirty eb shouldn't disappear from extent_buffers */ + /* A dirty eb shouldn't disappear from buffer_radix */ if (WARN_ON(!eb)) return -EUCLEAN; @@ -1158,7 +1159,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, root->nr_delalloc_inodes = 0; root->nr_ordered_extents = 0; root->inode_tree = RB_ROOT; - xa_init_flags(&root->delayed_nodes, GFP_ATOMIC); + INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); btrfs_init_root_block_rsv(root); @@ -1210,9 +1211,9 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks); #ifdef CONFIG_BTRFS_DEBUG INIT_LIST_HEAD(&root->leak_list); - spin_lock(&fs_info->fs_roots_lock); + spin_lock(&fs_info->fs_roots_radix_lock); list_add_tail(&root->leak_list, &fs_info->allocated_roots); - spin_unlock(&fs_info->fs_roots_lock); + spin_unlock(&fs_info->fs_roots_radix_lock); #endif } @@ -1659,11 +1660,12 @@ static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, { struct btrfs_root *root; - spin_lock(&fs_info->fs_roots_lock); - root = xa_load(&fs_info->fs_roots, (unsigned long)root_id); + spin_lock(&fs_info->fs_roots_radix_lock); + root = radix_tree_lookup(&fs_info->fs_roots_radix, + (unsigned long)root_id); if (root) root = btrfs_grab_root(root); - spin_unlock(&fs_info->fs_roots_lock); + spin_unlock(&fs_info->fs_roots_radix_lock); return root; } @@ -1705,14 +1707,20 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, { int ret; - spin_lock(&fs_info->fs_roots_lock); - ret = xa_insert(&fs_info->fs_roots, (unsigned long)root->root_key.objectid, - root, GFP_NOFS); + ret = radix_tree_preload(GFP_NOFS); + if (ret) + return ret; + + spin_lock(&fs_info->fs_roots_radix_lock); + ret = radix_tree_insert(&fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + root); if (ret == 0) { btrfs_grab_root(root); - set_bit(BTRFS_ROOT_REGISTERED, &root->state); + set_bit(BTRFS_ROOT_IN_RADIX, &root->state); } - spin_unlock(&fs_info->fs_roots_lock); + spin_unlock(&fs_info->fs_roots_radix_lock); + radix_tree_preload_end(); return ret; } @@ -2342,9 +2350,9 @@ void btrfs_put_root(struct btrfs_root *root) btrfs_drew_lock_destroy(&root->snapshot_lock); free_root_extent_buffers(root); #ifdef CONFIG_BTRFS_DEBUG - spin_lock(&root->fs_info->fs_roots_lock); + spin_lock(&root->fs_info->fs_roots_radix_lock); list_del_init(&root->leak_list); - spin_unlock(&root->fs_info->fs_roots_lock); + spin_unlock(&root->fs_info->fs_roots_radix_lock); #endif kfree(root); } @@ -2352,21 +2360,28 @@ void btrfs_put_root(struct btrfs_root *root) void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) { - struct btrfs_root *root; - unsigned long index = 0; + int ret; + struct btrfs_root *gang[8]; + int i; while (!list_empty(&fs_info->dead_roots)) { - root = list_entry(fs_info->dead_roots.next, - struct btrfs_root, root_list); - list_del(&root->root_list); + gang[0] = list_entry(fs_info->dead_roots.next, + struct btrfs_root, root_list); + list_del(&gang[0]->root_list); - if (test_bit(BTRFS_ROOT_REGISTERED, &root->state)) - btrfs_drop_and_free_fs_root(fs_info, root); - btrfs_put_root(root); + if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state)) + btrfs_drop_and_free_fs_root(fs_info, gang[0]); + btrfs_put_root(gang[0]); } - xa_for_each(&fs_info->fs_roots, index, root) { - btrfs_drop_and_free_fs_root(fs_info, root); + while (1) { + ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, + (void **)gang, 0, + ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) + btrfs_drop_and_free_fs_root(fs_info, gang[i]); } } @@ -3134,8 +3149,8 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info) void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) { - xa_init_flags(&fs_info->fs_roots, GFP_ATOMIC); - xa_init_flags(&fs_info->extent_buffers, GFP_ATOMIC); + INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); + INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->delayed_iputs); @@ -3143,7 +3158,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) INIT_LIST_HEAD(&fs_info->caching_block_groups); spin_lock_init(&fs_info->delalloc_root_lock); spin_lock_init(&fs_info->trans_lock); - spin_lock_init(&fs_info->fs_roots_lock); + spin_lock_init(&fs_info->fs_roots_radix_lock); spin_lock_init(&fs_info->delayed_iput_lock); spin_lock_init(&fs_info->defrag_inodes_lock); spin_lock_init(&fs_info->super_lock); @@ -3374,7 +3389,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) /* * btrfs_find_orphan_roots() is responsible for finding all the dead * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load - * them into the fs_info->fs_roots. This must be done before + * them into the fs_info->fs_roots_radix tree. This must be done before * calling btrfs_orphan_cleanup() on the tree root. If we don't do it * first, then btrfs_orphan_cleanup() will delete a dead root's orphan * item before the root's tree is deleted - this means that if we unmount @@ -4499,11 +4514,12 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, { bool drop_ref = false; - spin_lock(&fs_info->fs_roots_lock); - xa_erase(&fs_info->fs_roots, (unsigned long)root->root_key.objectid); - if (test_and_clear_bit(BTRFS_ROOT_REGISTERED, &root->state)) + spin_lock(&fs_info->fs_roots_radix_lock); + radix_tree_delete(&fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid); + if (test_and_clear_bit(BTRFS_ROOT_IN_RADIX, &root->state)) drop_ref = true; - spin_unlock(&fs_info->fs_roots_lock); + spin_unlock(&fs_info->fs_roots_radix_lock); if (BTRFS_FS_ERROR(fs_info)) { ASSERT(root->log_root == NULL); @@ -4519,48 +4535,50 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) { - struct btrfs_root *roots[8]; - unsigned long index = 0; - int i; + u64 root_objectid = 0; + struct btrfs_root *gang[8]; + int i = 0; int err = 0; - int grabbed; + unsigned int ret = 0; while (1) { - struct btrfs_root *root; - - spin_lock(&fs_info->fs_roots_lock); - if (!xa_find(&fs_info->fs_roots, &index, ULONG_MAX, XA_PRESENT)) { - spin_unlock(&fs_info->fs_roots_lock); - return err; + spin_lock(&fs_info->fs_roots_radix_lock); + ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, + (void **)gang, root_objectid, + ARRAY_SIZE(gang)); + if (!ret) { + spin_unlock(&fs_info->fs_roots_radix_lock); + break; } + root_objectid = gang[ret - 1]->root_key.objectid + 1; - grabbed = 0; - xa_for_each_start(&fs_info->fs_roots, index, root, index) { - /* Avoid grabbing roots in dead_roots */ - if (btrfs_root_refs(&root->root_item) > 0) - roots[grabbed++] = btrfs_grab_root(root); - if (grabbed >= ARRAY_SIZE(roots)) - break; + for (i = 0; i < ret; i++) { + /* Avoid to grab roots in dead_roots */ + if (btrfs_root_refs(&gang[i]->root_item) == 0) { + gang[i] = NULL; + continue; + } + /* grab all the search result for later use */ + gang[i] = btrfs_grab_root(gang[i]); } - spin_unlock(&fs_info->fs_roots_lock); + spin_unlock(&fs_info->fs_roots_radix_lock); - for (i = 0; i < grabbed; i++) { - if (!roots[i]) + for (i = 0; i < ret; i++) { + if (!gang[i]) continue; - index = roots[i]->root_key.objectid; - err = btrfs_orphan_cleanup(roots[i]); + root_objectid = gang[i]->root_key.objectid; + err = btrfs_orphan_cleanup(gang[i]); if (err) - goto out; - btrfs_put_root(roots[i]); + break; + btrfs_put_root(gang[i]); } - index++; + root_objectid++; } -out: - /* Release the roots that remain uncleaned due to error */ - for (; i < grabbed; i++) { - if (roots[i]) - btrfs_put_root(roots[i]); + /* release the uncleaned roots due to error */ + for (; i < ret; i++) { + if (gang[i]) + btrfs_put_root(gang[i]); } return err; } @@ -4879,28 +4897,31 @@ static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info) static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info) { - unsigned long index = 0; - int grabbed = 0; - struct btrfs_root *roots[8]; + struct btrfs_root *gang[8]; + u64 root_objectid = 0; + int ret; + + spin_lock(&fs_info->fs_roots_radix_lock); + while ((ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, + (void **)gang, root_objectid, + ARRAY_SIZE(gang))) != 0) { + int i; - spin_lock(&fs_info->fs_roots_lock); - while ((grabbed = xa_extract(&fs_info->fs_roots, (void **)roots, index, - ULONG_MAX, 8, XA_PRESENT))) { - for (int i = 0; i < grabbed; i++) - roots[i] = btrfs_grab_root(roots[i]); - spin_unlock(&fs_info->fs_roots_lock); + for (i = 0; i < ret; i++) + gang[i] = btrfs_grab_root(gang[i]); + spin_unlock(&fs_info->fs_roots_radix_lock); - for (int i = 0; i < grabbed; i++) { - if (!roots[i]) + for (i = 0; i < ret; i++) { + if (!gang[i]) continue; - index = roots[i]->root_key.objectid; - btrfs_free_log(NULL, roots[i]); - btrfs_put_root(roots[i]); + root_objectid = gang[i]->root_key.objectid; + btrfs_free_log(NULL, gang[i]); + btrfs_put_root(gang[i]); } - index++; - spin_lock(&fs_info->fs_roots_lock); + root_objectid++; + spin_lock(&fs_info->fs_roots_radix_lock); } - spin_unlock(&fs_info->fs_roots_lock); + spin_unlock(&fs_info->fs_roots_radix_lock); btrfs_free_log_root_tree(NULL, fs_info); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4157ecc27d4b..a3afc15430ce 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5829,7 +5829,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc) btrfs_qgroup_convert_reserved_meta(root, INT_MAX); btrfs_qgroup_free_meta_all_pertrans(root); - if (test_bit(BTRFS_ROOT_REGISTERED, &root->state)) + if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) btrfs_add_dropped_root(trans, root); else btrfs_put_root(root); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 04e36343da3a..f03ab5dbda7a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2966,7 +2966,7 @@ static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page) } /* - * Find extent buffer for a given bytenr. + * Find extent buffer for a givne bytenr. * * This is for end_bio_extent_readpage(), thus we can't do any unsafe locking * in endio context. @@ -2985,9 +2985,11 @@ static struct extent_buffer *find_extent_buffer_readpage( return (struct extent_buffer *)page->private; } - /* For subpage case, we need to lookup extent buffer xarray */ - eb = xa_load(&fs_info->extent_buffers, - bytenr >> fs_info->sectorsize_bits); + /* For subpage case, we need to lookup buffer radix tree */ + rcu_read_lock(); + eb = radix_tree_lookup(&fs_info->buffer_radix, + bytenr >> fs_info->sectorsize_bits); + rcu_read_unlock(); ASSERT(eb); return eb; } @@ -4435,8 +4437,8 @@ static struct extent_buffer *find_extent_buffer_nolock( struct extent_buffer *eb; rcu_read_lock(); - eb = xa_load(&fs_info->extent_buffers, - start >> fs_info->sectorsize_bits); + eb = radix_tree_lookup(&fs_info->buffer_radix, + start >> fs_info->sectorsize_bits); if (eb && atomic_inc_not_zero(&eb->refs)) { rcu_read_unlock(); return eb; @@ -6129,22 +6131,24 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, if (!eb) return ERR_PTR(-ENOMEM); eb->fs_info = fs_info; - - do { - ret = xa_insert(&fs_info->extent_buffers, - start >> fs_info->sectorsize_bits, - eb, GFP_NOFS); - if (ret == -ENOMEM) { - exists = ERR_PTR(ret); +again: + ret = radix_tree_preload(GFP_NOFS); + if (ret) { + exists = ERR_PTR(ret); + goto free_eb; + } + spin_lock(&fs_info->buffer_lock); + ret = radix_tree_insert(&fs_info->buffer_radix, + start >> fs_info->sectorsize_bits, eb); + spin_unlock(&fs_info->buffer_lock); + radix_tree_preload_end(); + if (ret == -EEXIST) { + exists = find_extent_buffer(fs_info, start); + if (exists) goto free_eb; - } - if (ret == -EBUSY) { - exists = find_extent_buffer(fs_info, start); - if (exists) - goto free_eb; - } - } while (ret); - + else + goto again; + } check_buffer_tree_ref(eb); set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags); @@ -6319,22 +6323,25 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, } if (uptodate) set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); - - do { - ret = xa_insert(&fs_info->extent_buffers, - start >> fs_info->sectorsize_bits, - eb, GFP_NOFS); - if (ret == -ENOMEM) { - exists = ERR_PTR(ret); +again: + ret = radix_tree_preload(GFP_NOFS); + if (ret) { + exists = ERR_PTR(ret); + goto free_eb; + } + + spin_lock(&fs_info->buffer_lock); + ret = radix_tree_insert(&fs_info->buffer_radix, + start >> fs_info->sectorsize_bits, eb); + spin_unlock(&fs_info->buffer_lock); + radix_tree_preload_end(); + if (ret == -EEXIST) { + exists = find_extent_buffer(fs_info, start); + if (exists) goto free_eb; - } - if (ret == -EBUSY) { - exists = find_extent_buffer(fs_info, start); - if (exists) - goto free_eb; - } - } while (ret); - + else + goto again; + } /* add one reference for the tree */ check_buffer_tree_ref(eb); set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags); @@ -6379,8 +6386,10 @@ static int release_extent_buffer(struct extent_buffer *eb) spin_unlock(&eb->refs_lock); - xa_erase(&fs_info->extent_buffers, - eb->start >> fs_info->sectorsize_bits); + spin_lock(&fs_info->buffer_lock); + radix_tree_delete(&fs_info->buffer_radix, + eb->start >> fs_info->sectorsize_bits); + spin_unlock(&fs_info->buffer_lock); } else { spin_unlock(&eb->refs_lock); } @@ -7325,25 +7334,42 @@ void memmove_extent_buffer(const struct extent_buffer *dst, } } +#define GANG_LOOKUP_SIZE 16 static struct extent_buffer *get_next_extent_buffer( struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr) { - struct extent_buffer *eb; - unsigned long index; + struct extent_buffer *gang[GANG_LOOKUP_SIZE]; + struct extent_buffer *found = NULL; u64 page_start = page_offset(page); + u64 cur = page_start; ASSERT(in_range(bytenr, page_start, PAGE_SIZE)); lockdep_assert_held(&fs_info->buffer_lock); - xa_for_each_start(&fs_info->extent_buffers, index, eb, - page_start >> fs_info->sectorsize_bits) { - if (in_range(eb->start, page_start, PAGE_SIZE)) - return eb; - else if (eb->start >= page_start + PAGE_SIZE) - /* Already beyond page end */ - return NULL; + while (cur < page_start + PAGE_SIZE) { + int ret; + int i; + + ret = radix_tree_gang_lookup(&fs_info->buffer_radix, + (void **)gang, cur >> fs_info->sectorsize_bits, + min_t(unsigned int, GANG_LOOKUP_SIZE, + PAGE_SIZE / fs_info->nodesize)); + if (ret == 0) + goto out; + for (i = 0; i < ret; i++) { + /* Already beyond page end */ + if (gang[i]->start >= page_start + PAGE_SIZE) + goto out; + /* Found one */ + if (gang[i]->start >= bytenr) { + found = gang[i]; + goto out; + } + } + cur = gang[ret - 1]->start + gang[ret - 1]->len; } - return NULL; +out: + return found; } static int try_release_subpage_extent_buffer(struct page *page) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0172f75e051a..d50448bf8eed 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3578,7 +3578,6 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) u64 last_objectid = 0; int ret = 0, nr_unlink = 0; - /* Bail out if the cleanup is already running. */ if (test_and_set_bit(BTRFS_ROOT_ORPHAN_CLEANUP, &root->state)) return 0; @@ -3661,17 +3660,17 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) * * btrfs_find_orphan_roots() ran before us, which has * found all deleted roots and loaded them into - * fs_info->fs_roots. So here we can find if an + * fs_info->fs_roots_radix. So here we can find if an * orphan item corresponds to a deleted root by looking - * up the root from that xarray. + * up the root from that radix tree. */ - spin_lock(&fs_info->fs_roots_lock); - dead_root = xa_load(&fs_info->fs_roots, - (unsigned long)found_key.objectid); + spin_lock(&fs_info->fs_roots_radix_lock); + dead_root = radix_tree_lookup(&fs_info->fs_roots_radix, + (unsigned long)found_key.objectid); if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0) is_dead_root = 1; - spin_unlock(&fs_info->fs_roots_lock); + spin_unlock(&fs_info->fs_roots_radix_lock); if (is_dead_root) { /* prevent this orphan from being found again */ @@ -3911,7 +3910,7 @@ cache_index: * cache. * * This is required for both inode re-read from disk and delayed inode - * in the delayed_nodes xarray. + * in delayed_nodes_tree. */ if (BTRFS_I(inode)->last_trans == fs_info->generation) set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fa56890ff81f..c7dea639a56f 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -10,6 +10,7 @@ #include <linux/mount.h> #include <linux/xattr.h> #include <linux/posix_acl_xattr.h> +#include <linux/radix-tree.h> #include <linux/vmalloc.h> #include <linux/string.h> #include <linux/compat.h> @@ -127,7 +128,7 @@ struct send_ctx { struct list_head new_refs; struct list_head deleted_refs; - struct xarray name_cache; + struct radix_tree_root name_cache; struct list_head name_cache_list; int name_cache_size; @@ -268,13 +269,14 @@ struct orphan_dir_info { struct name_cache_entry { struct list_head list; /* - * On 32bit kernels, xarray has only 32bit indices, but we need to - * handle 64bit inums. We use the lower 32bit of the 64bit inum to store - * it in the tree. If more than one inum would fall into the same entry, - * we use inum_aliases to store the additional entries. inum_aliases is - * also used to store entries with the same inum but different generations. + * radix_tree has only 32bit entries but we need to handle 64bit inums. + * We use the lower 32bit of the 64bit inum to store it in the tree. If + * more then one inum would fall into the same entry, we use radix_list + * to store the additional entries. radix_list is also used to store + * entries where two entries have the same inum but different + * generations. */ - struct list_head inum_aliases; + struct list_head radix_list; u64 ino; u64 gen; u64 parent_ino; @@ -2024,9 +2026,9 @@ out: } /* - * Insert a name cache entry. On 32bit kernels the xarray index is 32bit, + * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit, * so we need to do some special handling in case we have clashes. This function - * takes care of this with the help of name_cache_entry::inum_aliases. + * takes care of this with the help of name_cache_entry::radix_list. * In case of error, nce is kfreed. */ static int name_cache_insert(struct send_ctx *sctx, @@ -2035,7 +2037,8 @@ static int name_cache_insert(struct send_ctx *sctx, int ret = 0; struct list_head *nce_head; - nce_head = xa_load(&sctx->name_cache, (unsigned long)nce->ino); + nce_head = radix_tree_lookup(&sctx->name_cache, + (unsigned long)nce->ino); if (!nce_head) { nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL); if (!nce_head) { @@ -2044,14 +2047,14 @@ static int name_cache_insert(struct send_ctx *sctx, } INIT_LIST_HEAD(nce_head); - ret = xa_insert(&sctx->name_cache, nce->ino, nce_head, GFP_KERNEL); + ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); if (ret < 0) { kfree(nce_head); kfree(nce); return ret; } } - list_add_tail(&nce->inum_aliases, nce_head); + list_add_tail(&nce->radix_list, nce_head); list_add_tail(&nce->list, &sctx->name_cache_list); sctx->name_cache_size++; @@ -2063,14 +2066,15 @@ static void name_cache_delete(struct send_ctx *sctx, { struct list_head *nce_head; - nce_head = xa_load(&sctx->name_cache, (unsigned long)nce->ino); + nce_head = radix_tree_lookup(&sctx->name_cache, + (unsigned long)nce->ino); if (!nce_head) { btrfs_err(sctx->send_root->fs_info, "name_cache_delete lookup failed ino %llu cache size %d, leaking memory", nce->ino, sctx->name_cache_size); } - list_del(&nce->inum_aliases); + list_del(&nce->radix_list); list_del(&nce->list); sctx->name_cache_size--; @@ -2078,7 +2082,7 @@ static void name_cache_delete(struct send_ctx *sctx, * We may not get to the final release of nce_head if the lookup fails */ if (nce_head && list_empty(nce_head)) { - xa_erase(&sctx->name_cache, (unsigned long)nce->ino); + radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); kfree(nce_head); } } @@ -2089,11 +2093,11 @@ static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, struct list_head *nce_head; struct name_cache_entry *cur; - nce_head = xa_load(&sctx->name_cache, (unsigned long)ino); + nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino); if (!nce_head) return NULL; - list_for_each_entry(cur, nce_head, inum_aliases) { + list_for_each_entry(cur, nce_head, radix_list) { if (cur->ino == ino && cur->gen == gen) return cur; } @@ -7518,7 +7522,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) INIT_LIST_HEAD(&sctx->new_refs); INIT_LIST_HEAD(&sctx->deleted_refs); - xa_init_flags(&sctx->name_cache, GFP_KERNEL); + INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL); INIT_LIST_HEAD(&sctx->name_cache_list); sctx->flags = arg->flags; diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 1591bfa55bcc..d8e56edd6991 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -150,8 +150,8 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize) void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) { - unsigned long index; - struct extent_buffer *eb; + struct radix_tree_iter iter; + void **slot; struct btrfs_device *dev, *tmp; if (!fs_info) @@ -163,9 +163,25 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) test_mnt->mnt_sb->s_fs_info = NULL; - xa_for_each(&fs_info->extent_buffers, index, eb) { + spin_lock(&fs_info->buffer_lock); + radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { + struct extent_buffer *eb; + + eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock); + if (!eb) + continue; + /* Shouldn't happen but that kind of thinking creates CVE's */ + if (radix_tree_exception(eb)) { + if (radix_tree_deref_retry(eb)) + slot = radix_tree_iter_retry(&iter); + continue; + } + slot = radix_tree_iter_resume(slot, &iter); + spin_unlock(&fs_info->buffer_lock); free_extent_buffer_stale(eb); + spin_lock(&fs_info->buffer_lock); } + spin_unlock(&fs_info->buffer_lock); btrfs_mapping_tree_free(&fs_info->mapping_tree); list_for_each_entry_safe(dev, tmp, &fs_info->fs_devices->devices, @@ -186,7 +202,7 @@ void btrfs_free_dummy_root(struct btrfs_root *root) if (!root) return; /* Will be freed by btrfs_free_fs_roots */ - if (WARN_ON(test_bit(BTRFS_ROOT_REGISTERED, &root->state))) + if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state))) return; btrfs_global_root_delete(root); btrfs_put_root(root); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 06c0a958d114..875b801ab3d7 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -23,7 +23,7 @@ #include "space-info.h" #include "zoned.h" -#define BTRFS_ROOT_TRANS_TAG XA_MARK_0 +#define BTRFS_ROOT_TRANS_TAG 0 /* * Transaction states and transitions @@ -437,15 +437,15 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, */ smp_wmb(); - spin_lock(&fs_info->fs_roots_lock); + spin_lock(&fs_info->fs_roots_radix_lock); if (root->last_trans == trans->transid && !force) { - spin_unlock(&fs_info->fs_roots_lock); + spin_unlock(&fs_info->fs_roots_radix_lock); return 0; } - xa_set_mark(&fs_info->fs_roots, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_TRANS_TAG); - spin_unlock(&fs_info->fs_roots_lock); + radix_tree_tag_set(&fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + spin_unlock(&fs_info->fs_roots_radix_lock); root->last_trans = trans->transid; /* this is pretty tricky. We don't want to @@ -487,9 +487,11 @@ void btrfs_add_dropped_root(struct btrfs_trans_handle *trans, spin_unlock(&cur_trans->dropped_roots_lock); /* Make sure we don't try to update the root at commit time */ - xa_clear_mark(&fs_info->fs_roots, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_TRANS_TAG); + spin_lock(&fs_info->fs_roots_radix_lock); + radix_tree_tag_clear(&fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + spin_unlock(&fs_info->fs_roots_radix_lock); } int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, @@ -1402,8 +1404,9 @@ void btrfs_add_dead_root(struct btrfs_root *root) static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_root *root; - unsigned long index; + struct btrfs_root *gang[8]; + int i; + int ret; /* * At this point no one can be using this transaction to modify any tree @@ -1411,46 +1414,57 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) */ ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING); - spin_lock(&fs_info->fs_roots_lock); - xa_for_each_marked(&fs_info->fs_roots, index, root, BTRFS_ROOT_TRANS_TAG) { - int ret; - - /* - * At this point we can neither have tasks logging inodes - * from a root nor trying to commit a log tree. - */ - ASSERT(atomic_read(&root->log_writers) == 0); - ASSERT(atomic_read(&root->log_commit[0]) == 0); - ASSERT(atomic_read(&root->log_commit[1]) == 0); - - xa_clear_mark(&fs_info->fs_roots, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_TRANS_TAG); - spin_unlock(&fs_info->fs_roots_lock); - - btrfs_free_log(trans, root); - ret = btrfs_update_reloc_root(trans, root); - if (ret) - return ret; - - /* See comments in should_cow_block() */ - clear_bit(BTRFS_ROOT_FORCE_COW, &root->state); - smp_mb__after_atomic(); + spin_lock(&fs_info->fs_roots_radix_lock); + while (1) { + ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, + (void **)gang, 0, + ARRAY_SIZE(gang), + BTRFS_ROOT_TRANS_TAG); + if (ret == 0) + break; + for (i = 0; i < ret; i++) { + struct btrfs_root *root = gang[i]; + int ret2; + + /* + * At this point we can neither have tasks logging inodes + * from a root nor trying to commit a log tree. + */ + ASSERT(atomic_read(&root->log_writers) == 0); + ASSERT(atomic_read(&root->log_commit[0]) == 0); + ASSERT(atomic_read(&root->log_commit[1]) == 0); + + radix_tree_tag_clear(&fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + spin_unlock(&fs_info->fs_roots_radix_lock); + + btrfs_free_log(trans, root); + ret2 = btrfs_update_reloc_root(trans, root); + if (ret2) + return ret2; + + /* see comments in should_cow_block() */ + clear_bit(BTRFS_ROOT_FORCE_COW, &root->state); + smp_mb__after_atomic(); + + if (root->commit_root != root->node) { + list_add_tail(&root->dirty_list, + &trans->transaction->switch_commits); + btrfs_set_root_node(&root->root_item, + root->node); + } - if (root->commit_root != root->node) { - list_add_tail(&root->dirty_list, - &trans->transaction->switch_commits); - btrfs_set_root_node(&root->root_item, root->node); + ret2 = btrfs_update_root(trans, fs_info->tree_root, + &root->root_key, + &root->root_item); + if (ret2) + return ret2; + spin_lock(&fs_info->fs_roots_radix_lock); + btrfs_qgroup_free_meta_all_pertrans(root); } - - ret = btrfs_update_root(trans, fs_info->tree_root, - &root->root_key, &root->root_item); - if (ret) - return ret; - spin_lock(&fs_info->fs_roots_lock); - btrfs_qgroup_free_meta_all_pertrans(root); } - spin_unlock(&fs_info->fs_roots_lock); + spin_unlock(&fs_info->fs_roots_radix_lock); return 0; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 6dee88815491..d6e5916138e4 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -63,7 +63,7 @@ (CONGESTION_ON_THRESH(congestion_kb) >> 2)) static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len, - struct folio *folio, void **_fsdata); + struct folio **foliop, void **_fsdata); static inline struct ceph_snap_context *page_snap_context(struct page *page) { @@ -1288,18 +1288,19 @@ ceph_find_incompatible(struct page *page) } static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len, - struct folio *folio, void **_fsdata) + struct folio **foliop, void **_fsdata) { struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_snap_context *snapc; - snapc = ceph_find_incompatible(folio_page(folio, 0)); + snapc = ceph_find_incompatible(folio_page(*foliop, 0)); if (snapc) { int r; - folio_unlock(folio); - folio_put(folio); + folio_unlock(*foliop); + folio_put(*foliop); + *foliop = NULL; if (IS_ERR(snapc)) return PTR_ERR(snapc); diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 42f892c5712e..0ce535852151 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -319,8 +319,9 @@ zero_out: * conflicting writes once the folio is grabbed and locked. It is passed a * pointer to the fsdata cookie that gets returned to the VM to be passed to * write_end. It is permitted to sleep. It should return 0 if the request - * should go ahead; unlock the folio and return -EAGAIN to cause the folio to - * be regot; or return an error. + * should go ahead or it may return an error. It may also unlock and put the + * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0 + * will cause the folio to be re-got and the process to be retried. * * The calling netfs must initialise a netfs context contiguous to the vfs * inode before calling this. @@ -348,13 +349,13 @@ retry: if (ctx->ops->check_write_begin) { /* Allow the netfs (eg. ceph) to flush conflicts. */ - ret = ctx->ops->check_write_begin(file, pos, len, folio, _fsdata); + ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata); if (ret < 0) { trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); - if (ret == -EAGAIN) - goto retry; goto error; } + if (!folio) + goto retry; } if (folio_test_uptodate(folio)) @@ -416,8 +417,10 @@ have_folio_no_wait: error_put: netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); error: - folio_unlock(folio); - folio_put(folio); + if (folio) { + folio_unlock(folio); + folio_put(folio); + } _leave(" = %d", ret); return ret; } diff --git a/fs/remap_range.c b/fs/remap_range.c index 5e0d97e02f96..881a306ee247 100644 --- a/fs/remap_range.c +++ b/fs/remap_range.c @@ -547,7 +547,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) else if (deduped < 0) info->status = deduped; else - info->bytes_deduped = deduped; + info->bytes_deduped = len; next_fdput: fdput(dst_fd); diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index ff3e82553a76..cb2167c89eee 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -158,9 +158,24 @@ * Useful if your architecture doesn't use IPIs for remote TLB invalidates * and therefore doesn't naturally serialize with software page-table walkers. * + * MMU_GATHER_NO_FLUSH_CACHE + * + * Indicates the architecture has flush_cache_range() but it needs *NOT* be called + * before unmapping a VMA. + * + * NOTE: strictly speaking we shouldn't have this knob and instead rely on + * flush_cache_range() being a NOP, except Sparc64 seems to be + * different here. + * + * MMU_GATHER_MERGE_VMAS + * + * Indicates the architecture wants to merge ranges over VMAs; typical when + * multiple range invalidates are more expensive than a full invalidate. + * * MMU_GATHER_NO_RANGE * - * Use this if your architecture lacks an efficient flush_tlb_range(). + * Use this if your architecture lacks an efficient flush_tlb_range(). This + * option implies MMU_GATHER_MERGE_VMAS above. * * MMU_GATHER_NO_GATHER * @@ -288,6 +303,7 @@ struct mmu_gather { */ unsigned int vma_exec : 1; unsigned int vma_huge : 1; + unsigned int vma_pfn : 1; unsigned int batch_count; @@ -334,8 +350,8 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) #ifdef CONFIG_MMU_GATHER_NO_RANGE -#if defined(tlb_flush) || defined(tlb_start_vma) || defined(tlb_end_vma) -#error MMU_GATHER_NO_RANGE relies on default tlb_flush(), tlb_start_vma() and tlb_end_vma() +#if defined(tlb_flush) +#error MMU_GATHER_NO_RANGE relies on default tlb_flush() #endif /* @@ -355,17 +371,9 @@ static inline void tlb_flush(struct mmu_gather *tlb) static inline void tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } -#define tlb_end_vma tlb_end_vma -static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { } - #else /* CONFIG_MMU_GATHER_NO_RANGE */ #ifndef tlb_flush - -#if defined(tlb_start_vma) || defined(tlb_end_vma) -#error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma() -#endif - /* * When an architecture does not provide its own tlb_flush() implementation * but does have a reasonably efficient flush_vma_range() implementation @@ -385,6 +393,9 @@ static inline void tlb_flush(struct mmu_gather *tlb) flush_tlb_range(&vma, tlb->start, tlb->end); } } +#endif + +#endif /* CONFIG_MMU_GATHER_NO_RANGE */ static inline void tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) @@ -402,17 +413,9 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) */ tlb->vma_huge = is_vm_hugetlb_page(vma); tlb->vma_exec = !!(vma->vm_flags & VM_EXEC); + tlb->vma_pfn = !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)); } -#else - -static inline void -tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } - -#endif - -#endif /* CONFIG_MMU_GATHER_NO_RANGE */ - static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { /* @@ -486,32 +489,36 @@ static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) * case where we're doing a full MM flush. When we're doing a munmap, * the vmas are adjusted to only cover the region to be torn down. */ -#ifndef tlb_start_vma static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm) return; tlb_update_vma_flags(tlb, vma); +#ifndef CONFIG_MMU_GATHER_NO_FLUSH_CACHE flush_cache_range(vma, vma->vm_start, vma->vm_end); -} #endif +} -#ifndef tlb_end_vma static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm) return; /* - * Do a TLB flush and reset the range at VMA boundaries; this avoids - * the ranges growing with the unused space between consecutive VMAs, - * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on - * this. + * VM_PFNMAP is more fragile because the core mm will not track the + * page mapcount -- there might not be page-frames for these PFNs after + * all. Force flush TLBs for such ranges to avoid munmap() vs + * unmap_mapping_range() races. */ - tlb_flush_mmu_tlbonly(tlb); + if (tlb->vma_pfn || !IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) { + /* + * Do a TLB flush and reset the range at VMA boundaries; this avoids + * the ranges growing with the unused space between consecutive VMAs. + */ + tlb_flush_mmu_tlbonly(tlb); + } } -#endif /* * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2b21f2a3452f..20c26aed7896 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -47,6 +47,7 @@ struct kobject; struct mem_cgroup; struct module; struct bpf_func_state; +struct ftrace_ops; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -221,7 +222,7 @@ struct bpf_map { u32 btf_vmlinux_value_type_id; struct btf *btf; #ifdef CONFIG_MEMCG_KMEM - struct mem_cgroup *memcg; + struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; struct bpf_map_off_arr *off_arr; @@ -751,6 +752,16 @@ struct btf_func_model { /* Return the return value of fentry prog. Only used by bpf_struct_ops. */ #define BPF_TRAMP_F_RET_FENTRY_RET BIT(4) +/* Get original function from stack instead of from provided direct address. + * Makes sense for trampolines with fexit or fmod_ret programs. + */ +#define BPF_TRAMP_F_ORIG_STACK BIT(5) + +/* This trampoline is on a function with another ftrace_ops with IPMODIFY, + * e.g., a live patch. This flag is set and cleared by ftrace call backs, + */ +#define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ @@ -833,9 +844,11 @@ struct bpf_tramp_image { struct bpf_trampoline { /* hlist for trampoline_table */ struct hlist_node hlist; + struct ftrace_ops *fops; /* serializes access to fields of this trampoline */ struct mutex mutex; refcount_t refcnt; + u32 flags; u64 key; struct { struct btf_func_model model; @@ -1044,7 +1057,6 @@ struct bpf_prog_aux { bool sleepable; bool tail_call_reachable; bool xdp_has_frags; - bool use_bpf_prog_pack; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -1255,9 +1267,6 @@ struct bpf_dummy_ops { int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); #endif -int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, - int cgroup_atype); -void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog); #else static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) { @@ -1281,6 +1290,13 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, { return -EINVAL; } +#endif + +#if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) +int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, + int cgroup_atype); +void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog); +#else static inline int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, int cgroup_atype) { @@ -1921,7 +1937,8 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, - struct bpf_reg_state *regs); + struct bpf_reg_state *regs, + u32 kfunc_flags); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *reg); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 81b19669efba..2e3bad8640dc 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -345,10 +345,10 @@ struct bpf_verifier_state_list { }; struct bpf_loop_inline_state { - int initialized:1; /* set to true upon first entry */ - int fit_for_inline:1; /* true if callback function is the same - * at each call and flags are always zero - */ + unsigned int initialized:1; /* set to true upon first entry */ + unsigned int fit_for_inline:1; /* true if callback function is the same + * at each call and flags are always zero + */ u32 callback_subprogno; /* valid when fit_for_inline is true */ }; diff --git a/include/linux/btf.h b/include/linux/btf.h index 1bfed7fa0428..cdb376d53238 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -12,14 +12,43 @@ #define BTF_TYPE_EMIT(type) ((void)(type *)0) #define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val) -enum btf_kfunc_type { - BTF_KFUNC_TYPE_CHECK, - BTF_KFUNC_TYPE_ACQUIRE, - BTF_KFUNC_TYPE_RELEASE, - BTF_KFUNC_TYPE_RET_NULL, - BTF_KFUNC_TYPE_KPTR_ACQUIRE, - BTF_KFUNC_TYPE_MAX, -}; +/* These need to be macros, as the expressions are used in assembler input */ +#define KF_ACQUIRE (1 << 0) /* kfunc is an acquire function */ +#define KF_RELEASE (1 << 1) /* kfunc is a release function */ +#define KF_RET_NULL (1 << 2) /* kfunc returns a pointer that may be NULL */ +#define KF_KPTR_GET (1 << 3) /* kfunc returns reference to a kptr */ +/* Trusted arguments are those which are meant to be referenced arguments with + * unchanged offset. It is used to enforce that pointers obtained from acquire + * kfuncs remain unmodified when being passed to helpers taking trusted args. + * + * Consider + * struct foo { + * int data; + * struct foo *next; + * }; + * + * struct bar { + * int data; + * struct foo f; + * }; + * + * struct foo *f = alloc_foo(); // Acquire kfunc + * struct bar *b = alloc_bar(); // Acquire kfunc + * + * If a kfunc set_foo_data() wants to operate only on the allocated object, it + * will set the KF_TRUSTED_ARGS flag, which will prevent unsafe usage like: + * + * set_foo_data(f, 42); // Allowed + * set_foo_data(f->next, 42); // Rejected, non-referenced pointer + * set_foo_data(&f->next, 42);// Rejected, referenced, but wrong type + * set_foo_data(&b->f, 42); // Rejected, referenced, but bad offset + * + * In the final case, usually for the purposes of type matching, it is deduced + * by looking at the type of the member at the offset, but due to the + * requirement of trusted argument, this deduction will be strict and not done + * for this case. + */ +#define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */ struct btf; struct btf_member; @@ -30,16 +59,7 @@ struct btf_id_set; struct btf_kfunc_id_set { struct module *owner; - union { - struct { - struct btf_id_set *check_set; - struct btf_id_set *acquire_set; - struct btf_id_set *release_set; - struct btf_id_set *ret_null_set; - struct btf_id_set *kptr_acquire_set; - }; - struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX]; - }; + struct btf_id_set8 *set; }; struct btf_id_dtor_kfunc { @@ -378,9 +398,9 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); -bool btf_kfunc_id_set_contains(const struct btf *btf, +u32 *btf_kfunc_id_set_contains(const struct btf *btf, enum bpf_prog_type prog_type, - enum btf_kfunc_type type, u32 kfunc_btf_id); + u32 kfunc_btf_id); int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, const struct btf_kfunc_id_set *s); s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); @@ -397,12 +417,11 @@ static inline const char *btf_name_by_offset(const struct btf *btf, { return NULL; } -static inline bool btf_kfunc_id_set_contains(const struct btf *btf, +static inline u32 *btf_kfunc_id_set_contains(const struct btf *btf, enum bpf_prog_type prog_type, - enum btf_kfunc_type type, u32 kfunc_btf_id) { - return false; + return NULL; } static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, const struct btf_kfunc_id_set *s) diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 252a4befeab1..2aea877d644f 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -8,6 +8,15 @@ struct btf_id_set { u32 ids[]; }; +struct btf_id_set8 { + u32 cnt; + u32 flags; + struct { + u32 id; + u32 flags; + } pairs[]; +}; + #ifdef CONFIG_DEBUG_INFO_BTF #include <linux/compiler.h> /* for __PASTE */ @@ -25,7 +34,7 @@ struct btf_id_set { #define BTF_IDS_SECTION ".BTF_ids" -#define ____BTF_ID(symbol) \ +#define ____BTF_ID(symbol, word) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ ".local " #symbol " ; \n" \ @@ -33,10 +42,11 @@ asm( \ ".size " #symbol ", 4; \n" \ #symbol ": \n" \ ".zero 4 \n" \ +word \ ".popsection; \n"); -#define __BTF_ID(symbol) \ - ____BTF_ID(symbol) +#define __BTF_ID(symbol, word) \ + ____BTF_ID(symbol, word) #define __ID(prefix) \ __PASTE(prefix, __COUNTER__) @@ -46,7 +56,14 @@ asm( \ * to 4 zero bytes. */ #define BTF_ID(prefix, name) \ - __BTF_ID(__ID(__BTF_ID__##prefix##__##name##__)) + __BTF_ID(__ID(__BTF_ID__##prefix##__##name##__), "") + +#define ____BTF_ID_FLAGS(prefix, name, flags) \ + __BTF_ID(__ID(__BTF_ID__##prefix##__##name##__), ".long " #flags "\n") +#define __BTF_ID_FLAGS(prefix, name, flags, ...) \ + ____BTF_ID_FLAGS(prefix, name, flags) +#define BTF_ID_FLAGS(prefix, name, ...) \ + __BTF_ID_FLAGS(prefix, name, ##__VA_ARGS__, 0) /* * The BTF_ID_LIST macro defines pure (unsorted) list @@ -145,10 +162,51 @@ asm( \ ".popsection; \n"); \ extern struct btf_id_set name; +/* + * The BTF_SET8_START/END macros pair defines sorted list of + * BTF IDs and their flags plus its members count, with the + * following layout: + * + * BTF_SET8_START(list) + * BTF_ID_FLAGS(type1, name1, flags) + * BTF_ID_FLAGS(type2, name2, flags) + * BTF_SET8_END(list) + * + * __BTF_ID__set8__list: + * .zero 8 + * list: + * __BTF_ID__type1__name1__3: + * .zero 4 + * .word (1 << 0) | (1 << 2) + * __BTF_ID__type2__name2__5: + * .zero 4 + * .word (1 << 3) | (1 << 1) | (1 << 2) + * + */ +#define __BTF_SET8_START(name, scope) \ +asm( \ +".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ +"." #scope " __BTF_ID__set8__" #name "; \n" \ +"__BTF_ID__set8__" #name ":; \n" \ +".zero 8 \n" \ +".popsection; \n"); + +#define BTF_SET8_START(name) \ +__BTF_ID_LIST(name, local) \ +__BTF_SET8_START(name, local) + +#define BTF_SET8_END(name) \ +asm( \ +".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ +".size __BTF_ID__set8__" #name ", .-" #name " \n" \ +".popsection; \n"); \ +extern struct btf_id_set8 name; + #else #define BTF_ID_LIST(name) static u32 __maybe_unused name[5]; #define BTF_ID(prefix, name) +#define BTF_ID_FLAGS(prefix, name, ...) #define BTF_ID_UNUSED #define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n]; #define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1]; @@ -156,6 +214,8 @@ extern struct btf_id_set name; #define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 }; #define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 }; #define BTF_SET_END(name) +#define BTF_SET8_START(name) static struct btf_id_set8 __maybe_unused name = { 0 }; +#define BTF_SET8_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 4c1a8b247545..a5f21dc3c432 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1027,6 +1027,14 @@ u64 bpf_jit_alloc_exec_limit(void); void *bpf_jit_alloc_exec(unsigned long size); void bpf_jit_free_exec(void *addr); void bpf_jit_free(struct bpf_prog *fp); +struct bpf_binary_header * +bpf_jit_binary_pack_hdr(const struct bpf_prog *fp); + +static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) +{ + return list_empty(&fp->aux->ksym.lnode) || + fp->aux->ksym.lnode.prev == LIST_POISON2; +} struct bpf_binary_header * bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image, diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 979f6bfa2c25..0b61371e287b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -208,6 +208,43 @@ enum { FTRACE_OPS_FL_DIRECT = BIT(17), }; +/* + * FTRACE_OPS_CMD_* commands allow the ftrace core logic to request changes + * to a ftrace_ops. Note, the requests may fail. + * + * ENABLE_SHARE_IPMODIFY_SELF - enable a DIRECT ops to work on the same + * function as an ops with IPMODIFY. Called + * when the DIRECT ops is being registered. + * This is called with both direct_mutex and + * ftrace_lock are locked. + * + * ENABLE_SHARE_IPMODIFY_PEER - enable a DIRECT ops to work on the same + * function as an ops with IPMODIFY. Called + * when the other ops (the one with IPMODIFY) + * is being registered. + * This is called with direct_mutex locked. + * + * DISABLE_SHARE_IPMODIFY_PEER - disable a DIRECT ops to work on the same + * function as an ops with IPMODIFY. Called + * when the other ops (the one with IPMODIFY) + * is being unregistered. + * This is called with direct_mutex locked. + */ +enum ftrace_ops_cmd { + FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF, + FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER, + FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER, +}; + +/* + * For most ftrace_ops_cmd, + * Returns: + * 0 - Success. + * Negative on failure. The return value is dependent on the + * callback. + */ +typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, enum ftrace_ops_cmd cmd); + #ifdef CONFIG_DYNAMIC_FTRACE /* The hash used to know what functions callbacks trace */ struct ftrace_ops_hash { @@ -250,6 +287,7 @@ struct ftrace_ops { unsigned long trampoline; unsigned long trampoline_size; struct list_head list; + ftrace_ops_func_t ops_func; #endif }; @@ -340,6 +378,7 @@ unsigned long ftrace_find_rec_direct(unsigned long ip); int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); +int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr); #else struct ftrace_ops; @@ -384,6 +423,10 @@ static inline int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned lo { return -ENODEV; } +static inline int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr) +{ + return -ENODEV; +} #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 83cf7fd842e0..90a45ef7203b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1822,6 +1822,15 @@ struct _kvm_stats_desc { STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_NONE, \ KVM_STATS_BASE_POW10, 0) +/* Instantaneous boolean value, read only */ +#define STATS_DESC_IBOOLEAN(SCOPE, name) \ + STATS_DESC_INSTANT(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \ + KVM_STATS_BASE_POW10, 0) +/* Peak (sticky) boolean value, read/write */ +#define STATS_DESC_PBOOLEAN(SCOPE, name) \ + STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \ + KVM_STATS_BASE_POW10, 0) + /* Cumulative time in nanosecond */ #define STATS_DESC_TIME_NSEC(SCOPE, name) \ STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS, \ @@ -1853,7 +1862,7 @@ struct _kvm_stats_desc { HALT_POLL_HIST_COUNT), \ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist, \ HALT_POLL_HIST_COUNT), \ - STATS_DESC_ICOUNTER(VCPU_GENERIC, blocking) + STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking) extern struct dentry *kvm_debugfs_dir; diff --git a/include/linux/lapb.h b/include/linux/lapb.h index eb56472f23b2..b5333f9413dc 100644 --- a/include/linux/lapb.h +++ b/include/linux/lapb.h @@ -6,6 +6,11 @@ #ifndef LAPB_KERNEL_H #define LAPB_KERNEL_H +#include <linux/skbuff.h> +#include <linux/timer.h> + +struct net_device; + #define LAPB_OK 0 #define LAPB_BADTOKEN 1 #define LAPB_INVALUE 2 diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 1773e5df8e65..1b18dfa52e48 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -214,7 +214,7 @@ struct netfs_request_ops { void (*issue_read)(struct netfs_io_subrequest *subreq); bool (*is_still_valid)(struct netfs_io_request *rreq); int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, - struct folio *folio, void **_fsdata); + struct folio **foliop, void **_fsdata); void (*done)(struct netfs_io_request *rreq); }; diff --git a/include/linux/reset.h b/include/linux/reset.h index 8a21b5756c3e..514ddf003efc 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -731,7 +731,7 @@ static inline int __must_check devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, true); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true); } /** diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 657a0fc68a3f..fde258b3decd 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -390,6 +390,11 @@ static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED; static inline int setup_earlycon(char *buf) { return 0; } #endif +static inline bool uart_console_enabled(struct uart_port *port) +{ + return uart_console(port) && (port->cons->flags & CON_ENABLED); +} + struct uart_port *uart_get_console(struct uart_port *ports, int nr, struct console *c); int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5d7ff8850eb2..ca8afa382bf2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2487,6 +2487,14 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) #endif /* NET_SKBUFF_DATA_USES_OFFSET */ +static inline void skb_assert_len(struct sk_buff *skb) +{ +#ifdef CONFIG_DEBUG_NET + if (WARN_ONCE(!skb->len, "%s\n", __func__)) + DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); +#endif /* CONFIG_DEBUG_NET */ +} + /* * Add data to an sk_buff */ diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 29917850f079..8df475db88c0 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -260,6 +260,7 @@ struct plat_stmmacenet_data { bool has_crossts; int int_snapshot_num; int ext_snapshot_num; + bool int_snapshot_en; bool ext_snapshot_en; bool multi_msi_en; int msi_mac_vec; diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index f742e50207fb..1c53c4c4d88f 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/workqueue.h> +#include <net/sock.h> #include <uapi/linux/vm_sockets.h> #include "vsock_addr.h" diff --git a/include/net/amt.h b/include/net/amt.h index 0e40c3d64fcf..c881bc8b673b 100644 --- a/include/net/amt.h +++ b/include/net/amt.h @@ -7,6 +7,9 @@ #include <linux/siphash.h> #include <linux/jhash.h> +#include <linux/netdevice.h> +#include <net/gro_cells.h> +#include <net/rtnetlink.h> enum amt_msg_type { AMT_MSG_DISCOVERY = 1, @@ -78,6 +81,15 @@ enum amt_status { #define AMT_STATUS_MAX (__AMT_STATUS_MAX - 1) +/* Gateway events only */ +enum amt_event { + AMT_EVENT_NONE, + AMT_EVENT_RECEIVE, + AMT_EVENT_SEND_DISCOVERY, + AMT_EVENT_SEND_REQUEST, + __AMT_EVENT_MAX, +}; + struct amt_header { #if defined(__LITTLE_ENDIAN_BITFIELD) u8 type:4, @@ -292,6 +304,12 @@ struct amt_group_node { struct hlist_head sources[]; }; +#define AMT_MAX_EVENTS 16 +struct amt_events { + enum amt_event event; + struct sk_buff *skb; +}; + struct amt_dev { struct net_device *dev; struct net_device *stream_dev; @@ -308,6 +326,7 @@ struct amt_dev { struct delayed_work req_wq; /* Protected by RTNL */ struct delayed_work secret_wq; + struct work_struct event_wq; /* AMT status */ enum amt_status status; /* Generated key */ @@ -345,6 +364,10 @@ struct amt_dev { /* Used only in gateway mode */ u64 mac:48, reserved:16; + /* AMT gateway side message handler queue */ + struct amt_events events[AMT_MAX_EVENTS]; + u8 event_idx; + u8 nr_events; }; #define AMT_TOS 0xc0 diff --git a/include/net/ax88796.h b/include/net/ax88796.h index 2ed23a368602..b658471f97f0 100644 --- a/include/net/ax88796.h +++ b/include/net/ax88796.h @@ -8,6 +8,8 @@ #ifndef __NET_AX88796_PLAT_H #define __NET_AX88796_PLAT_H +#include <linux/types.h> + struct sk_buff; struct net_device; struct platform_device; diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 6b48d9e2aab9..e72f3b247b5e 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -55,6 +55,8 @@ #define BTPROTO_CMTP 5 #define BTPROTO_HIDP 6 #define BTPROTO_AVDTP 7 +#define BTPROTO_ISO 8 +#define BTPROTO_LAST BTPROTO_ISO #define SOL_HCI 0 #define SOL_L2CAP 6 @@ -149,10 +151,51 @@ struct bt_voice { #define BT_MODE_LE_FLOWCTL 0x03 #define BT_MODE_EXT_FLOWCTL 0x04 -#define BT_PKT_STATUS 16 +#define BT_PKT_STATUS 16 #define BT_SCM_PKT_STATUS 0x03 +#define BT_ISO_QOS 17 + +#define BT_ISO_QOS_CIG_UNSET 0xff +#define BT_ISO_QOS_CIS_UNSET 0xff + +#define BT_ISO_QOS_BIG_UNSET 0xff +#define BT_ISO_QOS_BIS_UNSET 0xff + +struct bt_iso_io_qos { + __u32 interval; + __u16 latency; + __u16 sdu; + __u8 phy; + __u8 rtn; +}; + +struct bt_iso_qos { + union { + __u8 cig; + __u8 big; + }; + union { + __u8 cis; + __u8 bis; + }; + union { + __u8 sca; + __u8 sync_interval; + }; + __u8 packing; + __u8 framing; + struct bt_iso_io_qos in; + struct bt_iso_io_qos out; +}; + +#define BT_ISO_PHY_1M 0x01 +#define BT_ISO_PHY_2M 0x02 +#define BT_ISO_PHY_CODED 0x04 +#define BT_ISO_PHY_ANY (BT_ISO_PHY_1M | BT_ISO_PHY_2M | \ + BT_ISO_PHY_CODED) + #define BT_CODEC 19 struct bt_codec_caps { @@ -177,6 +220,8 @@ struct bt_codecs { #define BT_CODEC_TRANSPARENT 0x03 #define BT_CODEC_MSBC 0x05 +#define BT_ISO_BASE 20 + __printf(1, 2) void bt_info(const char *fmt, ...); __printf(1, 2) @@ -494,7 +539,7 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, struct sk_buff *skb, **frag; skb = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); - if (IS_ERR_OR_NULL(skb)) + if (IS_ERR(skb)) return skb; len -= skb->len; @@ -521,6 +566,7 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, } int bt_to_errno(u16 code); +__u8 bt_status(int err); void hci_sock_set_flag(struct sock *sk, int nr); void hci_sock_clear_flag(struct sock *sk, int nr); @@ -558,6 +604,27 @@ static inline void sco_exit(void) } #endif +#if IS_ENABLED(CONFIG_BT_LE) +int iso_init(void); +int iso_exit(void); +bool iso_enabled(void); +#else +static inline int iso_init(void) +{ + return 0; +} + +static inline int iso_exit(void) +{ + return 0; +} + +static inline bool iso_enabled(void) +{ + return false; +} +#endif + int mgmt_init(void); void mgmt_exit(void); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index fe7935be7dc4..cf29511b25a8 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -228,17 +228,6 @@ enum { */ HCI_QUIRK_VALID_LE_STATES, - /* When this quirk is set, then erroneous data reporting - * is ignored. This is mainly due to the fact that the HCI - * Read Default Erroneous Data Reporting command is advertised, - * but not supported; these controllers often reply with unknown - * command and tend to lock up randomly. Needing a hard reset. - * - * This quirk can be set before hci_register_dev is called or - * during the hdev->setup vendor callback. - */ - HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, - /* * When this quirk is set, then the hci_suspend_notifier is not * registered. This is intended for devices which drop completely @@ -327,6 +316,7 @@ enum { HCI_USER_CHANNEL, HCI_EXT_CONFIGURED, HCI_LE_ADV, + HCI_LE_PER_ADV, HCI_LE_SCAN, HCI_SSP_ENABLED, HCI_SC_ENABLED, @@ -349,6 +339,7 @@ enum { HCI_LE_SCAN_INTERRUPTED, HCI_WIDEBAND_SPEECH_ENABLED, HCI_EVENT_FILTER_CONFIGURED, + HCI_PA_SYNC, HCI_DUT_MODE, HCI_VENDOR_DIAG, @@ -361,6 +352,7 @@ enum { HCI_QUALITY_REPORT, HCI_OFFLOAD_CODECS_ENABLED, HCI_LE_SIMULTANEOUS_ROLES, + HCI_CMD_DRAIN_WORKQUEUE, __HCI_NUM_FLAGS, }; @@ -496,6 +488,7 @@ enum { #define LMP_EXT_INQ 0x01 #define LMP_SIMUL_LE_BR 0x02 #define LMP_SIMPLE_PAIR 0x08 +#define LMP_ERR_DATA_REPORTING 0x20 #define LMP_NO_FLUSH 0x40 #define LMP_LSTO 0x01 @@ -528,9 +521,11 @@ enum { #define HCI_LE_PHY_2M 0x01 #define HCI_LE_PHY_CODED 0x08 #define HCI_LE_EXT_ADV 0x10 +#define HCI_LE_PERIODIC_ADV 0x20 #define HCI_LE_CHAN_SEL_ALG2 0x40 #define HCI_LE_CIS_CENTRAL 0x10 #define HCI_LE_CIS_PERIPHERAL 0x20 +#define HCI_LE_ISO_BROADCASTER 0x40 /* Connection modes */ #define HCI_CM_ACTIVE 0x0000 @@ -1874,6 +1869,22 @@ struct hci_cp_le_ext_conn_param { __le16 max_ce_len; } __packed; +#define HCI_OP_LE_PA_CREATE_SYNC 0x2044 +struct hci_cp_le_pa_create_sync { + __u8 options; + __u8 sid; + __u8 addr_type; + bdaddr_t addr; + __le16 skip; + __le16 sync_timeout; + __u8 sync_cte_type; +} __packed; + +#define HCI_OP_LE_PA_TERM_SYNC 0x2046 +struct hci_cp_le_pa_term_sync { + __le16 handle; +} __packed; + #define HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS 0x203b struct hci_rp_le_read_num_supported_adv_sets { __u8 status; @@ -1908,13 +1919,6 @@ struct hci_rp_le_set_ext_adv_params { __u8 tx_power; } __packed; -#define HCI_OP_LE_SET_EXT_ADV_ENABLE 0x2039 -struct hci_cp_le_set_ext_adv_enable { - __u8 enable; - __u8 num_of_sets; - __u8 data[]; -} __packed; - struct hci_cp_ext_adv_set { __u8 handle; __le16 duration; @@ -1941,6 +1945,37 @@ struct hci_cp_le_set_ext_scan_rsp_data { __u8 data[]; } __packed; +#define HCI_OP_LE_SET_EXT_ADV_ENABLE 0x2039 +struct hci_cp_le_set_ext_adv_enable { + __u8 enable; + __u8 num_of_sets; + __u8 data[]; +} __packed; + +#define HCI_OP_LE_SET_PER_ADV_PARAMS 0x203e +struct hci_cp_le_set_per_adv_params { + __u8 handle; + __le16 min_interval; + __le16 max_interval; + __le16 periodic_properties; +} __packed; + +#define HCI_MAX_PER_AD_LENGTH 252 + +#define HCI_OP_LE_SET_PER_ADV_DATA 0x203f +struct hci_cp_le_set_per_adv_data { + __u8 handle; + __u8 operation; + __u8 length; + __u8 data[]; +} __packed; + +#define HCI_OP_LE_SET_PER_ADV_ENABLE 0x2040 +struct hci_cp_le_set_per_adv_enable { + __u8 enable; + __u8 handle; +} __packed; + #define LE_SET_ADV_DATA_OP_COMPLETE 0x03 #define LE_SET_ADV_DATA_NO_FRAG 0x01 @@ -1998,7 +2033,7 @@ struct hci_rp_le_read_iso_tx_sync { struct hci_cis_params { __u8 cis_id; __le16 c_sdu; - __le16 p_pdu; + __le16 p_sdu; __u8 c_phy; __u8 p_phy; __u8 c_rtn; @@ -2009,7 +2044,7 @@ struct hci_cp_le_set_cig_params { __u8 cig_id; __u8 c_interval[3]; __u8 p_interval[3]; - __u8 wc_sca; + __u8 sca; __u8 packing; __u8 framing; __le16 c_latency; @@ -2052,6 +2087,73 @@ struct hci_cp_le_reject_cis { __u8 reason; } __packed; +#define HCI_OP_LE_CREATE_BIG 0x2068 +struct hci_bis { + __u8 sdu_interval[3]; + __le16 sdu; + __le16 latency; + __u8 rtn; + __u8 phy; + __u8 packing; + __u8 framing; + __u8 encryption; + __u8 bcode[16]; +} __packed; + +struct hci_cp_le_create_big { + __u8 handle; + __u8 adv_handle; + __u8 num_bis; + struct hci_bis bis; +} __packed; + +#define HCI_OP_LE_TERM_BIG 0x206a +struct hci_cp_le_term_big { + __u8 handle; + __u8 reason; +} __packed; + +#define HCI_OP_LE_BIG_CREATE_SYNC 0x206b +struct hci_cp_le_big_create_sync { + __u8 handle; + __le16 sync_handle; + __u8 encryption; + __u8 bcode[16]; + __u8 mse; + __le16 timeout; + __u8 num_bis; + __u8 bis[0]; +} __packed; + +#define HCI_OP_LE_BIG_TERM_SYNC 0x206c +struct hci_cp_le_big_term_sync { + __u8 handle; +} __packed; + +#define HCI_OP_LE_SETUP_ISO_PATH 0x206e +struct hci_cp_le_setup_iso_path { + __le16 handle; + __u8 direction; + __u8 path; + __u8 codec; + __le16 codec_cid; + __le16 codec_vid; + __u8 delay[3]; + __u8 codec_cfg_len; + __u8 codec_cfg[0]; +} __packed; + +struct hci_rp_le_setup_iso_path { + __u8 status; + __le16 handle; +} __packed; + +#define HCI_OP_LE_SET_HOST_FEATURE 0x2074 +struct hci_cp_le_set_host_feature { + __u8 bit_number; + __u8 bit_value; +} __packed; + /* ---- HCI Events ---- */ struct hci_ev_status { __u8 status; @@ -2580,6 +2682,18 @@ struct hci_ev_le_ext_adv_report { struct hci_ev_le_ext_adv_info info[]; } __packed; +#define HCI_EV_LE_PA_SYNC_ESTABLISHED 0x0e +struct hci_ev_le_pa_sync_established { + __u8 status; + __le16 handle; + __u8 sid; + __u8 bdaddr_type; + bdaddr_t bdaddr; + __u8 phy; + __le16 interval; + __u8 clock_accuracy; +} __packed; + #define HCI_EV_LE_ENHANCED_CONN_COMPLETE 0x0a struct hci_ev_le_enh_conn_complete { __u8 status; @@ -2631,6 +2745,55 @@ struct hci_evt_le_cis_req { __u8 cis_id; } __packed; +#define HCI_EVT_LE_CREATE_BIG_COMPLETE 0x1b +struct hci_evt_le_create_big_complete { + __u8 status; + __u8 handle; + __u8 sync_delay[3]; + __u8 transport_delay[3]; + __u8 phy; + __u8 nse; + __u8 bn; + __u8 pto; + __u8 irc; + __le16 max_pdu; + __le16 interval; + __u8 num_bis; + __le16 bis_handle[]; +} __packed; + +#define HCI_EVT_LE_BIG_SYNC_ESTABILISHED 0x1d +struct hci_evt_le_big_sync_estabilished { + __u8 status; + __u8 handle; + __u8 latency[3]; + __u8 nse; + __u8 bn; + __u8 pto; + __u8 irc; + __le16 max_pdu; + __le16 interval; + __u8 num_bis; + __le16 bis[]; +} __packed; + +#define HCI_EVT_LE_BIG_INFO_ADV_REPORT 0x22 +struct hci_evt_le_big_info_adv_report { + __le16 sync_handle; + __u8 num_bis; + __u8 nse; + __le16 iso_interval; + __u8 bn; + __u8 pto; + __u8 irc; + __le16 max_pdu; + __u8 sdu_interval[3]; + __le16 max_sdu; + __u8 phy; + __u8 framing; + __u8 encryption; +} __packed; + #define HCI_EV_VENDOR 0xff /* Internal events generated by Bluetooth stack */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c0ea2a4892b1..e7862903187d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -126,6 +126,7 @@ struct hci_conn_hash { unsigned int acl_num; unsigned int amp_num; unsigned int sco_num; + unsigned int iso_num; unsigned int le_num; unsigned int le_num_peripheral; }; @@ -234,8 +235,9 @@ struct oob_data { struct adv_info { struct list_head list; - bool enabled; - bool pending; + bool enabled; + bool pending; + bool periodic; __u8 instance; __u32 flags; __u16 timeout; @@ -243,8 +245,12 @@ struct adv_info { __u16 duration; __u16 adv_data_len; __u8 adv_data[HCI_MAX_EXT_AD_LENGTH]; + bool adv_data_changed; __u16 scan_rsp_len; __u8 scan_rsp_data[HCI_MAX_EXT_AD_LENGTH]; + bool scan_rsp_changed; + __u16 per_adv_data_len; + __u8 per_adv_data[HCI_MAX_PER_AD_LENGTH]; __s8 tx_power; __u32 min_interval; __u32 max_interval; @@ -258,6 +264,15 @@ struct adv_info { #define HCI_ADV_TX_POWER_NO_PREFERENCE 0x7F +#define DATA_CMP(_d1, _l1, _d2, _l2) \ + (_l1 == _l2 ? memcmp(_d1, _d2, _l1) : _l1 - _l2) + +#define ADV_DATA_CMP(_adv, _data, _len) \ + DATA_CMP((_adv)->adv_data, (_adv)->adv_data_len, _data, _len) + +#define SCAN_RSP_CMP(_adv, _data, _len) \ + DATA_CMP((_adv)->scan_rsp_data, (_adv)->scan_rsp_len, _data, _len) + struct monitored_device { struct list_head list; @@ -463,13 +478,16 @@ struct hci_dev { unsigned int acl_cnt; unsigned int sco_cnt; unsigned int le_cnt; + unsigned int iso_cnt; unsigned int acl_mtu; unsigned int sco_mtu; unsigned int le_mtu; + unsigned int iso_mtu; unsigned int acl_pkts; unsigned int sco_pkts; unsigned int le_pkts; + unsigned int iso_pkts; __u16 block_len; __u16 block_mtu; @@ -506,8 +524,6 @@ struct hci_dev { struct work_struct cmd_work; struct work_struct tx_work; - struct work_struct discov_update; - struct work_struct scan_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; @@ -516,6 +532,7 @@ struct hci_dev { struct sk_buff_head cmd_q; struct sk_buff *sent_cmd; + struct sk_buff *recv_event; struct mutex req_lock; wait_queue_head_t req_wait_q; @@ -580,6 +597,8 @@ struct hci_dev { __u8 adv_data_len; __u8 scan_rsp_data[HCI_MAX_EXT_AD_LENGTH]; __u8 scan_rsp_data_len; + __u8 per_adv_data[HCI_MAX_PER_AD_LENGTH]; + __u8 per_adv_data_len; struct list_head adv_instances; unsigned int adv_instance_cnt; @@ -647,6 +666,7 @@ enum conn_reasons { CONN_REASON_PAIR_DEVICE, CONN_REASON_L2CAP_CHAN, CONN_REASON_SCO_CONNECT, + CONN_REASON_ISO_CONNECT, }; struct hci_conn { @@ -664,6 +684,7 @@ struct hci_conn { __u8 resp_addr_type; __u8 adv_instance; __u16 handle; + __u16 sync_handle; __u16 state; __u8 mode; __u8 type; @@ -694,11 +715,14 @@ struct hci_conn { __u16 le_supv_timeout; __u8 le_adv_data[HCI_MAX_AD_LENGTH]; __u8 le_adv_data_len; + __u8 le_per_adv_data[HCI_MAX_PER_AD_LENGTH]; + __u8 le_per_adv_data_len; __u8 le_tx_phy; __u8 le_rx_phy; __s8 rssi; __s8 tx_power; __s8 max_tx_power; + struct bt_iso_qos iso_qos; unsigned long flags; enum conn_reasons conn_reason; @@ -729,6 +753,7 @@ struct hci_conn { struct hci_dev *hdev; void *l2cap_data; void *sco_data; + void *iso_data; struct amp_mgr *amp_mgr; struct hci_conn *link; @@ -737,6 +762,8 @@ struct hci_conn { void (*connect_cfm_cb) (struct hci_conn *conn, u8 status); void (*security_cfm_cb) (struct hci_conn *conn, u8 status); void (*disconn_cfm_cb) (struct hci_conn *conn, u8 reason); + + void (*cleanup)(struct hci_conn *conn); }; struct hci_chan { @@ -824,6 +851,21 @@ static inline void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) } #endif +#if IS_ENABLED(CONFIG_BT_LE) +int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags); +void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags); +#else +static inline int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, + __u8 *flags) +{ + return 0; +} +static inline void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, + u16 flags) +{ +} +#endif + /* ----- Inquiry cache ----- */ #define INQUIRY_CACHE_AGE_MAX (HZ*30) /* 30 seconds */ #define INQUIRY_ENTRY_AGE_MAX (HZ*60) /* 60 seconds */ @@ -908,6 +950,7 @@ enum { HCI_CONN_NEW_LINK_KEY, HCI_CONN_SCANNING, HCI_CONN_AUTH_FAILURE, + HCI_CONN_PER_ADV, }; static inline bool hci_conn_ssp_enabled(struct hci_conn *conn) @@ -944,6 +987,9 @@ static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c) case ESCO_LINK: h->sco_num++; break; + case ISO_LINK: + h->iso_num++; + break; } } @@ -970,6 +1016,9 @@ static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c) case ESCO_LINK: h->sco_num--; break; + case ISO_LINK: + h->iso_num--; + break; } } @@ -986,6 +1035,8 @@ static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type) case SCO_LINK: case ESCO_LINK: return h->sco_num; + case ISO_LINK: + return h->iso_num; default: return 0; } @@ -995,7 +1046,7 @@ static inline unsigned int hci_conn_count(struct hci_dev *hdev) { struct hci_conn_hash *c = &hdev->conn_hash; - return c->acl_num + c->amp_num + c->sco_num + c->le_num; + return c->acl_num + c->amp_num + c->sco_num + c->le_num + c->iso_num; } static inline __u8 hci_conn_lookup_type(struct hci_dev *hdev, __u16 handle) @@ -1018,6 +1069,29 @@ static inline __u8 hci_conn_lookup_type(struct hci_dev *hdev, __u16 handle) return type; } +static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev, + bdaddr_t *ba, + __u8 big, __u8 bis) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (bacmp(&c->dst, ba) || c->type != ISO_LINK) + continue; + + if (c->iso_qos.big == big && c->iso_qos.bis == bis) { + rcu_read_unlock(); + return c; + } + } + rcu_read_unlock(); + + return NULL; +} + static inline struct hci_conn *hci_conn_hash_lookup_handle(struct hci_dev *hdev, __u16 handle) { @@ -1081,6 +1155,76 @@ static inline struct hci_conn *hci_conn_hash_lookup_le(struct hci_dev *hdev, return NULL; } +static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev, + bdaddr_t *ba, + __u8 ba_type) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type != ISO_LINK) + continue; + + if (ba_type == c->dst_type && !bacmp(&c->dst, ba)) { + rcu_read_unlock(); + return c; + } + } + + rcu_read_unlock(); + + return NULL; +} + +static inline struct hci_conn *hci_conn_hash_lookup_cig(struct hci_dev *hdev, + __u8 handle) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type != ISO_LINK) + continue; + + if (handle == c->iso_qos.cig) { + rcu_read_unlock(); + return c; + } + } + + rcu_read_unlock(); + + return NULL; +} + +static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev, + __u8 handle) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK) + continue; + + if (handle == c->iso_qos.big) { + rcu_read_unlock(); + return c; + } + } + + rcu_read_unlock(); + + return NULL; +} + static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev, __u8 type, __u16 state) { @@ -1101,6 +1245,27 @@ static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev, return NULL; } +typedef void (*hci_conn_func_t)(struct hci_conn *conn, void *data); +static inline void hci_conn_hash_list_state(struct hci_dev *hdev, + hci_conn_func_t func, __u8 type, + __u16 state, void *data) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + if (!func) + return; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type == type && c->state == state) + func(c, data); + } + + rcu_read_unlock(); +} + static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev) { struct hci_conn_hash *h = &hdev->conn_hash; @@ -1124,6 +1289,8 @@ static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev) int hci_disconnect(struct hci_conn *conn, __u8 reason); bool hci_setup_sync(struct hci_conn *conn, __u16 handle); void hci_sco_setup(struct hci_conn *conn, __u8 status); +bool hci_iso_setup_path(struct hci_conn *conn); +int hci_le_create_cis(struct hci_conn *conn); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, u8 role); @@ -1148,6 +1315,17 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, enum conn_reasons conn_reason); struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, __u16 setting, struct bt_codec *codec); +struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, + __u8 dst_type, struct bt_iso_qos *qos); +struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, + __u8 dst_type, struct bt_iso_qos *qos); +struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, + __u8 dst_type, struct bt_iso_qos *qos, + __u8 data_len, __u8 *data); +int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, + __u8 sid); +int hci_le_big_create_sync(struct hci_dev *hdev, struct bt_iso_qos *qos, + __u16 sync_handle, __u8 num_bis, __u8 bis[]); int hci_conn_check_link_mode(struct hci_conn *conn); int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level); int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type, @@ -1286,6 +1464,8 @@ void hci_free_dev(struct hci_dev *hdev); int hci_register_dev(struct hci_dev *hdev); void hci_unregister_dev(struct hci_dev *hdev); void hci_release_dev(struct hci_dev *hdev); +int hci_register_suspend_notifier(struct hci_dev *hdev); +int hci_unregister_suspend_notifier(struct hci_dev *hdev); int hci_suspend_dev(struct hci_dev *hdev); int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); @@ -1392,11 +1572,14 @@ int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, void hci_adv_instances_clear(struct hci_dev *hdev); struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance); struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance); -int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, - u16 adv_data_len, u8 *adv_data, - u16 scan_rsp_len, u8 *scan_rsp_data, - u16 timeout, u16 duration, s8 tx_power, - u32 min_interval, u32 max_interval); +struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, + u32 flags, u16 adv_data_len, u8 *adv_data, + u16 scan_rsp_len, u8 *scan_rsp_data, + u16 timeout, u16 duration, s8 tx_power, + u32 min_interval, u32 max_interval); +struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, + u32 flags, u8 data_len, u8 *data, + u32 min_interval, u32 max_interval); int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance, u16 adv_data_len, u8 *adv_data, u16 scan_rsp_len, u8 *scan_rsp_data); @@ -1407,12 +1590,9 @@ bool hci_adv_instance_is_scannable(struct hci_dev *hdev, u8 instance); void hci_adv_monitors_clear(struct hci_dev *hdev); void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor); -int hci_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status); -int hci_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status); -bool hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, - int *err); -bool hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle, int *err); -bool hci_remove_all_adv_monitor(struct hci_dev *hdev, int *err); +int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor); +int hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle); +int hci_remove_all_adv_monitor(struct hci_dev *hdev); bool hci_is_adv_monitoring(struct hci_dev *hdev); int hci_get_adv_monitor_offload_ext(struct hci_dev *hdev); @@ -1516,6 +1696,19 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define use_enhanced_conn_complete(dev) (ll_privacy_capable(dev) || \ ext_adv_capable(dev)) +/* Periodic advertising support */ +#define per_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_PERIODIC_ADV)) + +/* CIS Master/Slave and BIS support */ +#define iso_capable(dev) (cis_capable(dev) || bis_capable(dev)) +#define cis_capable(dev) \ + (cis_central_capable(dev) || cis_peripheral_capable(dev)) +#define cis_central_capable(dev) \ + ((dev)->le_features[3] & HCI_LE_CIS_CENTRAL) +#define cis_peripheral_capable(dev) \ + ((dev)->le_features[3] & HCI_LE_CIS_PERIPHERAL) +#define bis_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_BROADCASTER) + /* ----- HCI protocols ----- */ #define HCI_PROTO_DEFER 0x01 @@ -1530,6 +1723,9 @@ static inline int hci_proto_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, case ESCO_LINK: return sco_connect_ind(hdev, bdaddr, flags); + case ISO_LINK: + return iso_connect_ind(hdev, bdaddr, flags); + default: BT_ERR("unknown link type %d", type); return -EINVAL; @@ -1737,8 +1933,10 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, const void *param); void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags); void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb); +void hci_send_iso(struct hci_conn *conn, struct sk_buff *skb); void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode); +void *hci_recv_event_data(struct hci_dev *hdev, __u8 event); u32 hci_conn_get_phy(struct hci_conn *conn); @@ -1797,6 +1995,8 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c); #define DISCOV_LE_RESTART_DELAY msecs_to_jiffies(200) /* msec */ #define DISCOV_LE_FAST_ADV_INT_MIN 0x00A0 /* 100 msec */ #define DISCOV_LE_FAST_ADV_INT_MAX 0x00F0 /* 150 msec */ +#define DISCOV_LE_PER_ADV_INT_MIN 0x00A0 /* 200 msec */ +#define DISCOV_LE_PER_ADV_INT_MAX 0x00A0 /* 200 msec */ #define NAME_RESOLVE_DURATION msecs_to_jiffies(10240) /* 10.24 sec */ @@ -1872,8 +2072,6 @@ void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, u8 instance); void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle); int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); -int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status); -int mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status); void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, bdaddr_t *bdaddr, u8 addr_type); diff --git a/include/net/bluetooth/hci_sock.h b/include/net/bluetooth/hci_sock.h index 9949870f7d78..0520e21ab698 100644 --- a/include/net/bluetooth/hci_sock.h +++ b/include/net/bluetooth/hci_sock.h @@ -124,6 +124,8 @@ struct hci_dev_info { __u16 acl_pkts; __u16 sco_mtu; __u16 sco_pkts; + __u16 iso_mtu; + __u16 iso_pkts; struct hci_dev_stats stat; }; diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 2492e3b46a8f..3843f5060c73 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -65,6 +65,10 @@ int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance); int hci_enable_advertising_sync(struct hci_dev *hdev); int hci_enable_advertising(struct hci_dev *hdev); +int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, + u8 *data, u32 flags, u16 min_interval, + u16 max_interval, u16 sync_interval); + int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk, u8 instance, bool force); int hci_disable_advertising_sync(struct hci_dev *hdev); @@ -78,10 +82,12 @@ int hci_read_clock_sync(struct hci_dev *hdev, struct hci_cp_read_clock *cp); int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable); int hci_update_scan_sync(struct hci_dev *hdev); +int hci_update_scan(struct hci_dev *hdev); int hci_write_le_host_supported_sync(struct hci_dev *hdev, u8 le, u8 simul); int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance, struct sock *sk); +int hci_remove_ext_adv_instance(struct hci_dev *hdev, u8 instance); struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, bool ext, struct sock *sk); @@ -105,4 +111,14 @@ int hci_resume_sync(struct hci_dev *hdev); struct hci_conn; +int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason); + int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn); + +int hci_le_remove_cig_sync(struct hci_dev *hdev, u8 handle); + +int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason); + +int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle); + +int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle); diff --git a/include/net/bluetooth/iso.h b/include/net/bluetooth/iso.h new file mode 100644 index 000000000000..3f4fe8b78e1b --- /dev/null +++ b/include/net/bluetooth/iso.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * BlueZ - Bluetooth protocol stack for Linux + * + * Copyright (C) 2022 Intel Corporation + */ + +#ifndef __ISO_H +#define __ISO_H + +/* ISO defaults */ +#define ISO_DEFAULT_MTU 251 +#define ISO_MAX_NUM_BIS 0x1f + +/* ISO socket broadcast address */ +struct sockaddr_iso_bc { + bdaddr_t bc_bdaddr; + __u8 bc_bdaddr_type; + __u8 bc_sid; + __u8 bc_num_bis; + __u8 bc_bis[ISO_MAX_NUM_BIS]; +}; + +/* ISO socket address */ +struct sockaddr_iso { + sa_family_t iso_family; + bdaddr_t iso_bdaddr; + __u8 iso_bdaddr_type; + struct sockaddr_iso_bc iso_bc[]; +}; + +#endif /* __ISO_H */ diff --git a/include/net/bond_options.h b/include/net/bond_options.h index d2aea5cf1e41..69292ecc0325 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -7,6 +7,14 @@ #ifndef _NET_BOND_OPTIONS_H #define _NET_BOND_OPTIONS_H +#include <linux/bits.h> +#include <linux/limits.h> +#include <linux/types.h> +#include <linux/string.h> + +struct netlink_ext_ack; +struct nlattr; + #define BOND_OPT_MAX_NAMELEN 32 #define BOND_OPT_VALID(opt) ((opt) < BOND_OPT_LAST) #define BOND_MODE_ALL_EX(x) (~(x)) diff --git a/include/net/codel_qdisc.h b/include/net/codel_qdisc.h index 58b6d0ebea10..7d3d9219f4fe 100644 --- a/include/net/codel_qdisc.h +++ b/include/net/codel_qdisc.h @@ -49,6 +49,7 @@ * Implemented on linux by Dave Taht and Eric Dumazet */ +#include <net/codel.h> #include <net/pkt_sched.h> /* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */ diff --git a/include/net/datalink.h b/include/net/datalink.h index d9b7faaa539f..c837ffc7ebf8 100644 --- a/include/net/datalink.h +++ b/include/net/datalink.h @@ -2,6 +2,13 @@ #ifndef _NET_INET_DATALINK_H_ #define _NET_INET_DATALINK_H_ +#include <linux/list.h> + +struct llc_sap; +struct net_device; +struct packet_type; +struct sk_buff; + struct datalink_proto { unsigned char type[8]; diff --git a/include/net/dcbevent.h b/include/net/dcbevent.h index 43e34131a53f..02700262f71a 100644 --- a/include/net/dcbevent.h +++ b/include/net/dcbevent.h @@ -8,6 +8,8 @@ #ifndef _DCB_EVENT_H #define _DCB_EVENT_H +struct notifier_block; + enum dcbevent_notif_type { DCB_APP_EVENT = 1, }; diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index e4ad58c4062c..2b2d86fb3131 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -10,6 +10,8 @@ #include <linux/dcbnl.h> +struct net_device; + struct dcb_app_type { int ifindex; struct dcb_app app; diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h index 595b4f6c1eb1..bec303ea8367 100644 --- a/include/net/dn_dev.h +++ b/include/net/dn_dev.h @@ -2,6 +2,7 @@ #ifndef _NET_DN_DEV_H #define _NET_DN_DEV_H +#include <linux/netdevice.h> struct dn_dev; diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index ddd6565957b3..1929a3cd5ebe 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -4,6 +4,8 @@ #include <linux/netlink.h> #include <linux/refcount.h> +#include <linux/rtnetlink.h> +#include <net/fib_rules.h> extern const struct nla_policy rtm_dn_policy[]; diff --git a/include/net/dn_neigh.h b/include/net/dn_neigh.h index 2e3e7793973a..1f7df98bfc33 100644 --- a/include/net/dn_neigh.h +++ b/include/net/dn_neigh.h @@ -2,6 +2,8 @@ #ifndef _NET_DN_NEIGH_H #define _NET_DN_NEIGH_H +#include <net/neighbour.h> + /* * The position of the first two fields of * this structure are critical - SJW diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h index f83932b864a9..a4a18fee0b7c 100644 --- a/include/net/dn_nsp.h +++ b/include/net/dn_nsp.h @@ -6,6 +6,12 @@ *******************************************************************************/ /* dn_nsp.c functions prototyping */ +#include <linux/atomic.h> +#include <linux/types.h> +#include <net/sock.h> + +struct sk_buff; +struct sk_buff_head; void dn_nsp_send_data_ack(struct sock *sk); void dn_nsp_send_oth_ack(struct sock *sk); diff --git a/include/net/dn_route.h b/include/net/dn_route.h index 6f1e94ac0bdf..88c0300236cc 100644 --- a/include/net/dn_route.h +++ b/include/net/dn_route.h @@ -7,6 +7,9 @@ *******************************************************************************/ +#include <linux/types.h> +#include <net/dst.h> + struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri); int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *, struct sock *sk, int flags); diff --git a/include/net/erspan.h b/include/net/erspan.h index 0d9e86bd9893..6cb4cbd6a48f 100644 --- a/include/net/erspan.h +++ b/include/net/erspan.h @@ -58,6 +58,9 @@ * GRE proto ERSPAN type I/II = 0x88BE, type III = 0x22EB */ +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/skbuff.h> #include <uapi/linux/erspan.h> #define ERSPAN_VERSION 0x1 /* ERSPAN type II */ diff --git a/include/net/esp.h b/include/net/esp.h index 9c5637d41d95..322950727dd0 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -5,6 +5,7 @@ #include <linux/skbuff.h> struct ip_esp_hdr; +struct xfrm_state; static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb) { diff --git a/include/net/ethoc.h b/include/net/ethoc.h index 78519ed42ab4..73810f3ca492 100644 --- a/include/net/ethoc.h +++ b/include/net/ethoc.h @@ -10,6 +10,9 @@ #ifndef LINUX_NET_ETHOC_H #define LINUX_NET_ETHOC_H 1 +#include <linux/if.h> +#include <linux/types.h> + struct ethoc_platform_data { u8 hwaddr[IFHWADDRLEN]; s8 phy_id; diff --git a/include/net/firewire.h b/include/net/firewire.h index 299e5df38552..2442d645e412 100644 --- a/include/net/firewire.h +++ b/include/net/firewire.h @@ -2,6 +2,8 @@ #ifndef _NET_FIREWIRE_H #define _NET_FIREWIRE_H +#include <linux/types.h> + /* Pseudo L2 address */ #define FWNET_ALEN 16 union fwnet_hwaddr { diff --git a/include/net/fq.h b/include/net/fq.h index 2eccbbd2b559..07b5aff6ec58 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -7,6 +7,10 @@ #ifndef __NET_SCHED_FQ_H #define __NET_SCHED_FQ_H +#include <linux/skbuff.h> +#include <linux/spinlock.h> +#include <linux/types.h> + struct fq_tin; /** diff --git a/include/net/garp.h b/include/net/garp.h index 4d9a0c6a2e5f..59a07b171def 100644 --- a/include/net/garp.h +++ b/include/net/garp.h @@ -2,6 +2,8 @@ #ifndef _NET_GARP_H #define _NET_GARP_H +#include <linux/if_ether.h> +#include <linux/types.h> #include <net/stp.h> #define GARP_PROTOCOL_ID 0x1 diff --git a/include/net/gtp.h b/include/net/gtp.h index c1d6169df331..2a503f035d18 100644 --- a/include/net/gtp.h +++ b/include/net/gtp.h @@ -2,6 +2,10 @@ #ifndef _GTP_H_ #define _GTP_H_ +#include <linux/netdevice.h> +#include <linux/types.h> +#include <net/rtnetlink.h> + /* General GTP protocol related definitions. */ #define GTP0_PORT 3386 diff --git a/include/net/gue.h b/include/net/gue.h index e42402f180b7..dfca298bec9c 100644 --- a/include/net/gue.h +++ b/include/net/gue.h @@ -30,6 +30,9 @@ * may refer to options placed after this field. */ +#include <asm/byteorder.h> +#include <linux/types.h> + struct guehdr { union { struct { diff --git a/include/net/hwbm.h b/include/net/hwbm.h index c81444611a22..aa495decec35 100644 --- a/include/net/hwbm.h +++ b/include/net/hwbm.h @@ -2,6 +2,8 @@ #ifndef _HWBM_H #define _HWBM_H +#include <linux/mutex.h> + struct hwbm_pool { /* Capacity of the pool */ int size; diff --git a/include/net/ila.h b/include/net/ila.h index f98dcd5791b0..73ebe5eab272 100644 --- a/include/net/ila.h +++ b/include/net/ila.h @@ -8,6 +8,8 @@ #ifndef _NET_ILA_H #define _NET_ILA_H +struct sk_buff; + int ila_xlat_outgoing(struct sk_buff *skb); int ila_xlat_incoming(struct sk_buff *skb); diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 7392f959a405..025bd8d3c769 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -11,6 +11,8 @@ #include <linux/types.h> +struct flowi; +struct flowi6; struct request_sock; struct sk_buff; struct sock; diff --git a/include/net/inet_common.h b/include/net/inet_common.h index cad2a611efde..cec453c18f1d 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -3,6 +3,10 @@ #define _INET_COMMON_H #include <linux/indirect_call_wrapper.h> +#include <linux/net.h> +#include <linux/netdev_features.h> +#include <linux/types.h> +#include <net/sock.h> extern const struct proto_ops inet_stream_ops; extern const struct proto_ops inet_dgram_ops; @@ -12,6 +16,8 @@ extern const struct proto_ops inet_dgram_ops; */ struct msghdr; +struct net; +struct page; struct sock; struct sockaddr; struct socket; diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 911ad930867d..0b0876610553 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -4,6 +4,9 @@ #include <linux/rhashtable-types.h> #include <linux/completion.h> +#include <linux/in6.h> +#include <linux/rbtree_types.h> +#include <linux/refcount.h> /* Per netns frag queues directory */ struct fqdir { diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ebfa3df6f8dc..fd6b510d114b 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -179,7 +179,7 @@ static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept, + return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index daead5fb389a..6395f6b9a5d2 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) { - if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) + if (!sk->sk_mark && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) return skb->mark; return sk->sk_mark; @@ -120,7 +121,7 @@ static inline int inet_request_bound_dev_if(const struct sock *sk, #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); - if (!bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) + if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) return l3mdev_master_ifindex_by_index(net, skb->skb_iif); #endif @@ -132,7 +133,7 @@ static inline int inet_sk_bound_l3mdev(const struct sock *sk) #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); - if (!net->ipv4.sysctl_tcp_l3mdev_accept) + if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) return l3mdev_master_ifindex_by_index(net, sk->sk_bound_dev_if); #endif @@ -374,7 +375,7 @@ static inline bool inet_get_convert_csum(struct sock *sk) static inline bool inet_can_nonlocal_bind(struct net *net, struct inet_sock *inet) { - return net->ipv4.sysctl_ip_nonlocal_bind || + return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) || inet->freebind || inet->transparent; } diff --git a/include/net/ip.h b/include/net/ip.h index 26fffda78cca..1c979fd1904c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -357,7 +357,7 @@ static inline bool sysctl_dev_name_is_allowed(const char *name) static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port) { - return port < net->ipv4.sysctl_ip_prot_sock; + return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock); } #else @@ -384,7 +384,7 @@ void ipfrag_init(void); void ip_static_sysctl_init(void); #define IP4_REPLY_MARK(net, mark) \ - ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) + (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0) static inline bool ip_is_fragment(const struct iphdr *iph) { @@ -446,7 +446,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, struct net *net = dev_net(dst->dev); unsigned int mtu; - if (net->ipv4.sysctl_ip_fwd_use_pmtu || + if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { mtu = rt->rt_pmtu; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index ca2d6b60e1ec..035d61d50a98 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -2,6 +2,16 @@ #ifndef _NET_IP6_ROUTE_H #define _NET_IP6_ROUTE_H +#include <net/addrconf.h> +#include <net/flow.h> +#include <net/ip6_fib.h> +#include <net/sock.h> +#include <net/lwtunnel.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/route.h> +#include <net/nexthop.h> + struct route_info { __u8 type; __u8 length; @@ -19,16 +29,6 @@ struct route_info { __u8 prefix[]; /* 0,8 or 16 */ }; -#include <net/addrconf.h> -#include <net/flow.h> -#include <net/ip6_fib.h> -#include <net/sock.h> -#include <net/lwtunnel.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <linux/route.h> -#include <net/nexthop.h> - #define RT6_LOOKUP_F_IFACE 0x00000001 #define RT6_LOOKUP_F_REACHABLE 0x00000002 #define RT6_LOOKUP_F_HAS_SADDR 0x00000004 diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 70cbc4a72669..20db95055db3 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -387,9 +387,11 @@ static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, const struct sk_buff *skb) { - if (skb->protocol == htons(ETH_P_IP)) + __be16 payload_protocol = skb_protocol(skb, true); + + if (payload_protocol == htons(ETH_P_IP)) return iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) + else if (payload_protocol == htons(ETH_P_IPV6)) return ipv6_get_dsfield((const struct ipv6hdr *)iph); else return 0; @@ -398,9 +400,11 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph, const struct sk_buff *skb) { - if (skb->protocol == htons(ETH_P_IP)) + __be16 payload_protocol = skb_protocol(skb, true); + + if (payload_protocol == htons(ETH_P_IP)) return iph->ttl; - else if (skb->protocol == htons(ETH_P_IPV6)) + else if (payload_protocol == htons(ETH_P_IPV6)) return ((const struct ipv6hdr *)iph)->hop_limit; else return 0; diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h index fee6fc451597..c31108295079 100644 --- a/include/net/ipcomp.h +++ b/include/net/ipcomp.h @@ -2,11 +2,13 @@ #ifndef _NET_IPCOMP_H #define _NET_IPCOMP_H +#include <linux/skbuff.h> #include <linux/types.h> #define IPCOMP_SCRATCH_SIZE 65400 struct crypto_comp; +struct ip_comp_hdr; struct ipcomp_data { u16 threshold; diff --git a/include/net/ipconfig.h b/include/net/ipconfig.h index e3534299bd2a..8276897d0c2e 100644 --- a/include/net/ipconfig.h +++ b/include/net/ipconfig.h @@ -7,6 +7,8 @@ /* The following are initdata: */ +#include <linux/types.h> + extern int ic_proto_enabled; /* Protocols enabled (see IC_xxx) */ extern int ic_set_manually; /* IPconfig parameters set manually */ diff --git a/include/net/llc_c_ac.h b/include/net/llc_c_ac.h index e766300b3e99..3e1f76786d7b 100644 --- a/include/net/llc_c_ac.h +++ b/include/net/llc_c_ac.h @@ -16,6 +16,13 @@ * Connection state transition actions * (Fb = F bit; Pb = P bit; Xb = X bit) */ + +#include <linux/types.h> + +struct sk_buff; +struct sock; +struct timer_list; + #define LLC_CONN_AC_CLR_REMOTE_BUSY 1 #define LLC_CONN_AC_CONN_IND 2 #define LLC_CONN_AC_CONN_CONFIRM 3 diff --git a/include/net/llc_c_st.h b/include/net/llc_c_st.h index 48f3f891b2f9..53823d61d8b6 100644 --- a/include/net/llc_c_st.h +++ b/include/net/llc_c_st.h @@ -11,6 +11,10 @@ * * See the GNU General Public License for more details. */ + +#include <net/llc_c_ac.h> +#include <net/llc_c_ev.h> + /* Connection component state management */ /* connection states */ #define LLC_CONN_OUT_OF_SVC 0 /* prior to allocation */ diff --git a/include/net/llc_s_ac.h b/include/net/llc_s_ac.h index a61b98c108ee..f71790305bc9 100644 --- a/include/net/llc_s_ac.h +++ b/include/net/llc_s_ac.h @@ -11,6 +11,10 @@ * * See the GNU General Public License for more details. */ + +struct llc_sap; +struct sk_buff; + /* SAP component actions */ #define SAP_ACT_UNITDATA_IND 1 #define SAP_ACT_SEND_UI 2 diff --git a/include/net/llc_s_ev.h b/include/net/llc_s_ev.h index 84db3a59ed28..fb7df1d70af3 100644 --- a/include/net/llc_s_ev.h +++ b/include/net/llc_s_ev.h @@ -13,6 +13,7 @@ */ #include <linux/skbuff.h> +#include <net/llc.h> /* Defines SAP component events */ /* Types of events (possible values in 'ev->type') */ diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h index 9deb3a3735da..0c71c27979fb 100644 --- a/include/net/mpls_iptunnel.h +++ b/include/net/mpls_iptunnel.h @@ -6,6 +6,9 @@ #ifndef _NET_MPLS_IPTUNNEL_H #define _NET_MPLS_IPTUNNEL_H 1 +#include <linux/types.h> +#include <net/lwtunnel.h> + struct mpls_iptunnel_encap { u8 labels; u8 ttl_propagate; diff --git a/include/net/mrp.h b/include/net/mrp.h index 1c308c034e1a..92cd3fb6cf9d 100644 --- a/include/net/mrp.h +++ b/include/net/mrp.h @@ -2,6 +2,10 @@ #ifndef _NET_MRP_H #define _NET_MRP_H +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/types.h> + #define MRP_END_MARK 0x0 struct mrp_pdu_hdr { diff --git a/include/net/ncsi.h b/include/net/ncsi.h index fbefe80361ee..08a50d9acb0a 100644 --- a/include/net/ncsi.h +++ b/include/net/ncsi.h @@ -2,6 +2,8 @@ #ifndef __NET_NCSI_H #define __NET_NCSI_H +#include <linux/types.h> + /* * The NCSI device states seen from external. More NCSI device states are * only visible internally (in net/ncsi/internal.h). When the NCSI device diff --git a/include/net/netevent.h b/include/net/netevent.h index 4107016c3bb4..1be3757a8b7f 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -14,6 +14,7 @@ struct dst_entry; struct neighbour; +struct notifier_block ; struct netevent_redirect { struct dst_entry *old; diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 37866c8386e2..3cd3a6e631aa 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -84,4 +84,23 @@ void nf_conntrack_lock(spinlock_t *lock); extern spinlock_t nf_conntrack_expect_lock; +/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ + +#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES) || \ + IS_ENABLED(CONFIG_NF_CT_NETLINK)) + +static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout) +{ + if (timeout > INT_MAX) + timeout = INT_MAX; + WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout); +} + +int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout); +void __nf_ct_change_status(struct nf_conn *ct, unsigned long on, unsigned long off); +int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status); + +#endif + #endif /* _NF_CONNTRACK_CORE_H */ diff --git a/include/net/netns/can.h b/include/net/netns/can.h index 52fbd8291a96..48b79f7e6236 100644 --- a/include/net/netns/can.h +++ b/include/net/netns/can.h @@ -7,6 +7,7 @@ #define __NETNS_CAN_H__ #include <linux/spinlock.h> +#include <linux/timer.h> struct can_dev_rcv_lists; struct can_pkg_stats; diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 388244e315e7..8249060cf5d0 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -2,6 +2,8 @@ #ifndef __NETNS_CORE_H__ #define __NETNS_CORE_H__ +#include <linux/types.h> + struct ctl_table_header; struct prot_inuse; diff --git a/include/net/netns/generic.h b/include/net/netns/generic.h index 8a1ab47c3fb3..7ce68183f6e1 100644 --- a/include/net/netns/generic.h +++ b/include/net/netns/generic.h @@ -8,6 +8,7 @@ #include <linux/bug.h> #include <linux/rcupdate.h> +#include <net/net_namespace.h> /* * Generic net pointers are to be used by modules to put some private diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index ce0cc4e8d8c7..c7320ef356d9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -9,6 +9,7 @@ #include <linux/uidgid.h> #include <net/inet_frag.h> #include <linux/rcupdate.h> +#include <linux/seqlock.h> #include <linux/siphash.h> struct ctl_table_header; diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h index acedef12a35e..1db8f9aaddb4 100644 --- a/include/net/netns/mctp.h +++ b/include/net/netns/mctp.h @@ -6,6 +6,7 @@ #ifndef __NETNS_MCTP_H__ #define __NETNS_MCTP_H__ +#include <linux/mutex.h> #include <linux/types.h> struct netns_mctp { diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h index a7bdcfbb0b28..19ad2574b267 100644 --- a/include/net/netns/mpls.h +++ b/include/net/netns/mpls.h @@ -6,6 +6,8 @@ #ifndef __NETNS_MPLS_H__ #define __NETNS_MPLS_H__ +#include <linux/types.h> + struct mpls_route; struct ctl_table_header; diff --git a/include/net/netns/nexthop.h b/include/net/netns/nexthop.h index 1849e77eb68a..434239b37014 100644 --- a/include/net/netns/nexthop.h +++ b/include/net/netns/nexthop.h @@ -6,6 +6,7 @@ #ifndef __NETNS_NEXTHOP_H__ #define __NETNS_NEXTHOP_H__ +#include <linux/notifier.h> #include <linux/rbtree.h> struct netns_nexthop { diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 40240722cdca..a681147aecd8 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -2,6 +2,9 @@ #ifndef __NETNS_SCTP_H__ #define __NETNS_SCTP_H__ +#include <linux/timer.h> +#include <net/snmp.h> + struct sock; struct proc_dir_entry; struct sctp_mib; diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h index 6f1a33df061d..9859d134d5a8 100644 --- a/include/net/netns/unix.h +++ b/include/net/netns/unix.h @@ -5,6 +5,8 @@ #ifndef __NETNS_UNIX_H__ #define __NETNS_UNIX_H__ +#include <linux/spinlock.h> + struct unix_table { spinlock_t *locks; struct hlist_head *buckets; diff --git a/include/net/netrom.h b/include/net/netrom.h index 80f15b1c1a48..f0565a5987d1 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -14,6 +14,7 @@ #include <net/sock.h> #include <linux/refcount.h> #include <linux/seq_file.h> +#include <net/ax25.h> #define NR_NETWORK_LEN 15 #define NR_TRANSPORT_LEN 5 diff --git a/include/net/p8022.h b/include/net/p8022.h index c2bacc66bfbc..b690ffcad66b 100644 --- a/include/net/p8022.h +++ b/include/net/p8022.h @@ -1,6 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_P8022_H #define _NET_P8022_H + +struct net_device; +struct packet_type; +struct sk_buff; + struct datalink_proto * register_8022_client(unsigned char type, int (*func)(struct sk_buff *skb, diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index 27b1ab5e4e6d..645dddf5ce77 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -10,6 +10,9 @@ #ifndef NET_PHONET_PEP_H #define NET_PHONET_PEP_H +#include <linux/skbuff.h> +#include <net/phonet/phonet.h> + struct pep_sock { struct pn_sock pn_sk; diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index a27bdc6cfeab..862f1719b523 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -10,6 +10,10 @@ #ifndef AF_PHONET_H #define AF_PHONET_H +#include <linux/phonet.h> +#include <linux/skbuff.h> +#include <net/sock.h> + /* * The lower layers may not require more space, ever. Make sure it's * enough. diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 05b49d4d2b11..e9dc8dca5817 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -10,6 +10,11 @@ #ifndef PN_DEV_H #define PN_DEV_H +#include <linux/list.h> +#include <linux/mutex.h> + +struct net; + struct phonet_device_list { struct list_head list; struct mutex lock; diff --git a/include/net/pptp.h b/include/net/pptp.h index 383e25ca53a7..e63176bdd4c8 100644 --- a/include/net/pptp.h +++ b/include/net/pptp.h @@ -2,6 +2,9 @@ #ifndef _NET_PPTP_H #define _NET_PPTP_H +#include <linux/types.h> +#include <net/gre.h> + #define PPP_LCP_ECHOREQ 0x09 #define PPP_LCP_ECHOREP 0x0A #define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP) diff --git a/include/net/protocol.h b/include/net/protocol.h index f51c06ae365f..6aef8cb11cc8 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -35,8 +35,6 @@ /* This is used to register protocols. */ struct net_protocol { - int (*early_demux)(struct sk_buff *skb); - int (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); /* This returns an error if we weren't able to handle the error. */ @@ -52,8 +50,6 @@ struct net_protocol { #if IS_ENABLED(CONFIG_IPV6) struct inet6_protocol { - void (*early_demux)(struct sk_buff *skb); - void (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); /* This returns an error if we weren't able to handle the error. */ diff --git a/include/net/psnap.h b/include/net/psnap.h index 7cb0c8ab4171..88802b0754ad 100644 --- a/include/net/psnap.h +++ b/include/net/psnap.h @@ -2,6 +2,11 @@ #ifndef _NET_PSNAP_H #define _NET_PSNAP_H +struct datalink_proto; +struct sk_buff; +struct packet_type; +struct net_device; + struct datalink_proto * register_snap_client(const unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 47f06f6f5a67..896191f420d5 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -1,3 +1,4 @@ + #ifndef __NET_REGULATORY_H #define __NET_REGULATORY_H /* @@ -19,6 +20,8 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include <linux/ieee80211.h> +#include <linux/nl80211.h> #include <linux/rcupdate.h> /** diff --git a/include/net/rose.h b/include/net/rose.h index 0f0a4ce0fee7..f192a64ddef2 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -9,6 +9,7 @@ #define _ROSE_H #include <linux/rose.h> +#include <net/ax25.h> #include <net/sock.h> #define ROSE_ADDR_LEN 5 diff --git a/include/net/route.h b/include/net/route.h index 651424ef09c9..f3257000d4e1 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -369,7 +369,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) struct net *net = dev_net(dst->dev); if (hoplimit == 0) - hoplimit = net->ipv4.sysctl_ip_default_ttl; + hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); return hoplimit; } diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index dac91aa38c5a..21e7fa2a1813 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -4,6 +4,8 @@ #include <linux/types.h> +struct net; + u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); diff --git a/include/net/smc.h b/include/net/smc.h index e441aa97ad61..37f829d9c6e5 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -11,6 +11,13 @@ #ifndef _SMC_H #define _SMC_H +#include <linux/device.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/wait.h> + +struct sock; + #define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */ struct smc_hashinfo { diff --git a/include/net/stp.h b/include/net/stp.h index 2914e6d53490..528103fce2c0 100644 --- a/include/net/stp.h +++ b/include/net/stp.h @@ -2,6 +2,8 @@ #ifndef _NET_STP_H #define _NET_STP_H +#include <linux/if_ether.h> + struct stp_proto { unsigned char group_address[ETH_ALEN]; void (*rcv)(const struct stp_proto *, struct sk_buff *, diff --git a/include/net/tcp.h b/include/net/tcp.h index 8e48dc56837b..f9e7c85ea829 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -936,7 +936,7 @@ extern const struct inet_connection_sock_af_ops ipv6_specific; INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); -INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb)); +void tcp_v6_early_demux(struct sk_buff *skb); #endif @@ -1407,8 +1407,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); s32 delta; - if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || - ca_ops->cong_control) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) || + tp->packets_out || ca_ops->cong_control) return; delta = tcp_jiffies32 - tp->lsndtime; if (delta > inet_csk(sk)->icsk_rto) @@ -1497,21 +1497,24 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl; + return tp->keepalive_intvl ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); } static inline int keepalive_time_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time; + return tp->keepalive_time ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); } static inline int keepalive_probes(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes; + return tp->keepalive_probes ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); } static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) @@ -1524,7 +1527,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) static inline int tcp_fin_time(const struct sock *sk) { - int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; + int fin_timeout = tcp_sk(sk)->linger2 ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout); const int rto = inet_csk(sk)->icsk_rto; if (fin_timeout < (rto << 2) - (rto >> 1)) @@ -2027,7 +2031,7 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; + return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); } bool tcp_stream_memory_free(const struct sock *sk, int wake); diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index da06613c9603..b830463e3dff 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -3,6 +3,7 @@ #define _TRANSP_V6_H #include <net/checksum.h> +#include <net/sock.h> /* IPv6 transport protocols */ extern struct proto rawv6_prot; @@ -12,6 +13,7 @@ extern struct proto tcpv6_prot; extern struct proto pingv6_prot; struct flowi6; +struct ipcm6_cookie; /* extension headers */ int ipv6_exthdrs_init(void); diff --git a/include/net/tun_proto.h b/include/net/tun_proto.h index 2ea3deba4c99..7b0de7852908 100644 --- a/include/net/tun_proto.h +++ b/include/net/tun_proto.h @@ -1,7 +1,8 @@ #ifndef __NET_TUN_PROTO_H #define __NET_TUN_PROTO_H -#include <linux/kernel.h> +#include <linux/if_ether.h> +#include <linux/types.h> /* One byte protocol values as defined by VXLAN-GPE and NSH. These will * hopefully get a shared IANA registry. diff --git a/include/net/udp.h b/include/net/udp.h index 987f7fc7c0aa..5ee88ddf79c3 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -168,7 +168,7 @@ static inline void udp_csum_pull_header(struct sk_buff *skb) typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, __be16 dport); -INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); +void udp_v6_early_demux(struct sk_buff *skb); INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, @@ -239,7 +239,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept, + return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/include/net/udplite.h b/include/net/udplite.h index a3c53110d30b..0143b373602e 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -6,6 +6,7 @@ #define _UDPLITE_H #include <net/ip6_checksum.h> +#include <net/udp.h> /* UDP-Lite socket options */ #define UDPLITE_SEND_CSCOV 10 /* sender partial coverage (as sent) */ diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h index a2d58b1a12e1..c9df68d5f258 100644 --- a/include/net/xdp_priv.h +++ b/include/net/xdp_priv.h @@ -3,6 +3,7 @@ #define __LINUX_NET_XDP_PRIV_H__ #include <linux/rhashtable.h> +#include <net/xdp.h> /* Private to net/core/xdp.c, but used by trace/events/xdp.h */ struct xdp_mem_allocator { diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 4aa031849668..4277b0dcee05 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -44,6 +44,15 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, xp_set_rxq_info(pool, rxq); } +static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + return pool->heads[0].xdp.rxq->napi_id; +#else + return 0; +#endif +} + static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) { @@ -198,6 +207,11 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, { } +static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool) +{ + return 0; +} + static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3dd13fe738b9..59a217ca2dfd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2361,7 +2361,8 @@ union bpf_attr { * Pull in non-linear data in case the *skb* is non-linear and not * all of *len* are part of the linear section. Make *len* bytes * from *skb* readable and writable. If a zero value is passed for - * *len*, then the whole length of the *skb* is pulled. + * *len*, then all bytes in the linear part of *skb* will be made + * readable and writable. * * This helper is only needed for reading and writing with direct * packet access. diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index ef4257ab3026..2557eb7b0561 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -78,10 +78,13 @@ struct input_id { * Note that input core does not clamp reported values to the * [minimum, maximum] limits, such task is left to userspace. * - * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z) - * is reported in units per millimeter (units/mm), resolution - * for rotational axes (ABS_RX, ABS_RY, ABS_RZ) is reported - * in units per radian. + * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z, + * ABS_MT_POSITION_X, ABS_MT_POSITION_Y) is reported in units + * per millimeter (units/mm), resolution for rotational axes + * (ABS_RX, ABS_RY, ABS_RZ) is reported in units per radian. + * The resolution for the size axes (ABS_MT_TOUCH_MAJOR, + * ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MAJOR, ABS_MT_WIDTH_MINOR) + * is reported in units per millimeter (units/mm). * When INPUT_PROP_ACCELEROMETER is set the resolution changes. * The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in * units per g (units/g) and in units per degree per second diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5088bd9f1922..811897dadcae 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -2083,6 +2083,7 @@ struct kvm_stats_header { #define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES #define KVM_STATS_BASE_SHIFT 8 diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index fe40d3b9458f..d3e734bf8056 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -70,10 +70,8 @@ int array_map_alloc_check(union bpf_attr *attr) attr->map_flags & BPF_F_PRESERVE_ELEMS) return -EINVAL; - if (attr->value_size > KMALLOC_MAX_SIZE) - /* if value_size is bigger, the user space won't be able to - * access the elements. - */ + /* avoid overflow on round_up(map->value_size) */ + if (attr->value_size > INT_MAX) return -E2BIG; return 0; @@ -156,6 +154,11 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) return &array->map; } +static void *array_map_elem_ptr(struct bpf_array* array, u32 index) +{ + return array->value + (u64)array->elem_size * index; +} + /* Called from syscall or from eBPF program */ static void *array_map_lookup_elem(struct bpf_map *map, void *key) { @@ -165,7 +168,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) if (unlikely(index >= array->map.max_entries)) return NULL; - return array->value + array->elem_size * (index & array->index_mask); + return array->value + (u64)array->elem_size * (index & array->index_mask); } static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, @@ -203,7 +206,7 @@ static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_insn *insn = insn_buf; - u32 elem_size = round_up(map->value_size, 8); + u32 elem_size = array->elem_size; const int ret = BPF_REG_0; const int map_ptr = BPF_REG_1; const int index = BPF_REG_2; @@ -272,7 +275,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) * access 'value_size' of them, so copying rounded areas * will not leak any kernel data */ - size = round_up(map->value_size, 8); + size = array->elem_size; rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { @@ -339,7 +342,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, value, map->value_size); } else { val = array->value + - array->elem_size * (index & array->index_mask); + (u64)array->elem_size * (index & array->index_mask); if (map_flags & BPF_F_LOCK) copy_map_value_locked(map, val, value, false); else @@ -376,7 +379,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, * returned or zeros which were zero-filled by percpu_alloc, * so no kernel data leaks possible */ - size = round_up(map->value_size, 8); + size = array->elem_size; rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { @@ -408,8 +411,7 @@ static void array_map_free_timers(struct bpf_map *map) return; for (i = 0; i < array->map.max_entries; i++) - bpf_timer_cancel_and_free(array->value + array->elem_size * i + - map->timer_off); + bpf_timer_cancel_and_free(array_map_elem_ptr(array, i) + map->timer_off); } /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ @@ -420,7 +422,7 @@ static void array_map_free(struct bpf_map *map) if (map_value_has_kptrs(map)) { for (i = 0; i < array->map.max_entries; i++) - bpf_map_free_kptrs(map, array->value + array->elem_size * i); + bpf_map_free_kptrs(map, array_map_elem_ptr(array, i)); bpf_map_free_kptr_off_tab(map); } @@ -556,7 +558,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos) index = info->index & array->index_mask; if (info->percpu_value_buf) return array->pptrs[index]; - return array->value + array->elem_size * index; + return array_map_elem_ptr(array, index); } static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) @@ -575,7 +577,7 @@ static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) index = info->index & array->index_mask; if (info->percpu_value_buf) return array->pptrs[index]; - return array->value + array->elem_size * index; + return array_map_elem_ptr(array, index); } static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) @@ -583,6 +585,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) struct bpf_iter_seq_array_map_info *info = seq->private; struct bpf_iter__bpf_map_elem ctx = {}; struct bpf_map *map = info->map; + struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_iter_meta meta; struct bpf_prog *prog; int off = 0, cpu = 0; @@ -603,7 +606,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) ctx.value = v; } else { pptr = v; - size = round_up(map->value_size, 8); + size = array->elem_size; for_each_possible_cpu(cpu) { bpf_long_memcpy(info->percpu_value_buf + off, per_cpu_ptr(pptr, cpu), @@ -633,11 +636,12 @@ static int bpf_iter_init_array_map(void *priv_data, { struct bpf_iter_seq_array_map_info *seq_info = priv_data; struct bpf_map *map = aux->map; + struct bpf_array *array = container_of(map, struct bpf_array, map); void *value_buf; u32 buf_size; if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { - buf_size = round_up(map->value_size, 8) * num_possible_cpus(); + buf_size = array->elem_size * num_possible_cpus(); value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); if (!value_buf) return -ENOMEM; @@ -690,7 +694,7 @@ static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_ if (is_percpu) val = this_cpu_ptr(array->pptrs[i]); else - val = array->value + array->elem_size * i; + val = array_map_elem_ptr(array, i); num_elems++; key = i; ret = callback_fn((u64)(long)map, (u64)(long)&key, @@ -1322,7 +1326,7 @@ static int array_of_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_array *array = container_of(map, struct bpf_array, map); - u32 elem_size = round_up(map->value_size, 8); + u32 elem_size = array->elem_size; struct bpf_insn *insn = insn_buf; const int ret = BPF_REG_0; const int map_ptr = BPF_REG_1; diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index d469b7f3deef..fa71d58b7ded 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -63,10 +63,11 @@ BTF_ID(func, bpf_lsm_socket_post_create) BTF_ID(func, bpf_lsm_socket_socketpair) BTF_SET_END(bpf_lsm_unlocked_sockopt_hooks) +#ifdef CONFIG_CGROUP_BPF void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func) { - const struct btf_param *args; + const struct btf_param *args __maybe_unused; if (btf_type_vlen(prog->aux->attach_func_proto) < 1 || btf_id_set_contains(&bpf_lsm_current_hooks, @@ -75,9 +76,9 @@ void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, return; } +#ifdef CONFIG_NET args = btf_params(prog->aux->attach_func_proto); -#ifdef CONFIG_NET if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCKET]) *bpf_func = __cgroup_bpf_run_lsm_socket; else if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCK]) @@ -86,6 +87,7 @@ void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, #endif *bpf_func = __cgroup_bpf_run_lsm_current; } +#endif int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog) @@ -219,6 +221,7 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_retval: return prog->expected_attach_type == BPF_LSM_CGROUP ? &bpf_get_retval_proto : NULL; +#ifdef CONFIG_NET case BPF_FUNC_setsockopt: if (prog->expected_attach_type != BPF_LSM_CGROUP) return NULL; @@ -239,6 +242,7 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) prog->aux->attach_btf_id)) return &bpf_unlocked_sk_getsockopt_proto; return NULL; +#endif default: return tracing_prog_func_proto(func_id, prog); } diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 7e0068c3399c..84b2d9dba79a 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -341,6 +341,9 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, tlinks[BPF_TRAMP_FENTRY].links[0] = link; tlinks[BPF_TRAMP_FENTRY].nr_links = 1; + /* BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops, + * and it must be used alone. + */ flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0; return arch_prepare_bpf_trampoline(NULL, image, image_end, model, flags, tlinks, NULL); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 4423045b8ff3..7ac971ea98d1 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -213,7 +213,7 @@ enum { }; struct btf_kfunc_set_tab { - struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX]; + struct btf_id_set8 *sets[BTF_KFUNC_HOOK_MAX]; }; struct btf_id_dtor_kfunc_tab { @@ -1116,7 +1116,8 @@ __printf(2, 3) static void btf_show(struct btf_show *show, const char *fmt, ...) */ #define btf_show_type_value(show, fmt, value) \ do { \ - if ((value) != 0 || (show->flags & BTF_SHOW_ZERO) || \ + if ((value) != (__typeof__(value))0 || \ + (show->flags & BTF_SHOW_ZERO) || \ show->state.depth == 0) { \ btf_show(show, "%s%s" fmt "%s%s", \ btf_show_indent(show), \ @@ -1615,7 +1616,7 @@ static void btf_free_id(struct btf *btf) static void btf_free_kfunc_set_tab(struct btf *btf) { struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab; - int hook, type; + int hook; if (!tab) return; @@ -1624,10 +1625,8 @@ static void btf_free_kfunc_set_tab(struct btf *btf) */ if (btf_is_module(btf)) goto free_tab; - for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) { - for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++) - kfree(tab->sets[hook][type]); - } + for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) + kfree(tab->sets[hook]); free_tab: kfree(tab); btf->kfunc_set_tab = NULL; @@ -6171,13 +6170,14 @@ static bool is_kfunc_arg_mem_size(const struct btf *btf, static int btf_check_func_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs, - bool ptr_to_mem_ok) + bool ptr_to_mem_ok, + u32 kfunc_flags) { enum bpf_prog_type prog_type = resolve_prog_type(env->prog); + bool rel = false, kptr_get = false, trusted_arg = false; struct bpf_verifier_log *log = &env->log; u32 i, nargs, ref_id, ref_obj_id = 0; bool is_kfunc = btf_is_kernel(btf); - bool rel = false, kptr_get = false; const char *func_name, *ref_tname; const struct btf_type *t, *ref_t; const struct btf_param *args; @@ -6209,10 +6209,9 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, if (is_kfunc) { /* Only kfunc can be release func */ - rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog), - BTF_KFUNC_TYPE_RELEASE, func_id); - kptr_get = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog), - BTF_KFUNC_TYPE_KPTR_ACQUIRE, func_id); + rel = kfunc_flags & KF_RELEASE; + kptr_get = kfunc_flags & KF_KPTR_GET; + trusted_arg = kfunc_flags & KF_TRUSTED_ARGS; } /* check that BTF function arguments match actual types that the @@ -6237,10 +6236,19 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, return -EINVAL; } + /* Check if argument must be a referenced pointer, args + i has + * been verified to be a pointer (after skipping modifiers). + */ + if (is_kfunc && trusted_arg && !reg->ref_obj_id) { + bpf_log(log, "R%d must be referenced\n", regno); + return -EINVAL; + } + ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); ref_tname = btf_name_by_offset(btf, ref_t->name_off); - if (rel && reg->ref_obj_id) + /* Trusted args have the same offset checks as release arguments */ + if (trusted_arg || (rel && reg->ref_obj_id)) arg_type |= OBJ_RELEASE; ret = check_func_arg_reg_off(env, reg, regno, arg_type); if (ret < 0) @@ -6338,7 +6346,8 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off); if (!btf_struct_ids_match(log, reg_btf, reg_ref_id, - reg->off, btf, ref_id, rel && reg->ref_obj_id)) { + reg->off, btf, ref_id, + trusted_arg || (rel && reg->ref_obj_id))) { bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", func_name, i, btf_type_str(ref_t), ref_tname, @@ -6441,7 +6450,7 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, return -EINVAL; is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; - err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global); + err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, 0); /* Compiler optimizations can remove arguments from static functions * or mismatched type can be passed into a global function. @@ -6454,9 +6463,10 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, - struct bpf_reg_state *regs) + struct bpf_reg_state *regs, + u32 kfunc_flags) { - return btf_check_func_arg_match(env, btf, func_id, regs, true); + return btf_check_func_arg_match(env, btf, func_id, regs, true, kfunc_flags); } /* Convert BTF of a function into bpf_reg_state if possible @@ -6853,6 +6863,11 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id) return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; } +static void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id) +{ + return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func); +} + enum { BTF_MODULE_F_LIVE = (1 << 0), }; @@ -7101,16 +7116,16 @@ BTF_TRACING_TYPE_xxx /* Kernel Function (kfunc) BTF ID set registration API */ -static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, - enum btf_kfunc_type type, - struct btf_id_set *add_set, bool vmlinux_set) +static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, + struct btf_id_set8 *add_set) { + bool vmlinux_set = !btf_is_module(btf); struct btf_kfunc_set_tab *tab; - struct btf_id_set *set; + struct btf_id_set8 *set; u32 set_cnt; int ret; - if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) { + if (hook >= BTF_KFUNC_HOOK_MAX) { ret = -EINVAL; goto end; } @@ -7126,7 +7141,7 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, btf->kfunc_set_tab = tab; } - set = tab->sets[hook][type]; + set = tab->sets[hook]; /* Warn when register_btf_kfunc_id_set is called twice for the same hook * for module sets. */ @@ -7140,7 +7155,7 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, * pointer and return. */ if (!vmlinux_set) { - tab->sets[hook][type] = add_set; + tab->sets[hook] = add_set; return 0; } @@ -7149,7 +7164,7 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, * and concatenate all individual sets being registered. While each set * is individually sorted, they may become unsorted when concatenated, * hence re-sorting the final set again is required to make binary - * searching the set using btf_id_set_contains function work. + * searching the set using btf_id_set8_contains function work. */ set_cnt = set ? set->cnt : 0; @@ -7164,8 +7179,8 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, } /* Grow set */ - set = krealloc(tab->sets[hook][type], - offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]), + set = krealloc(tab->sets[hook], + offsetof(struct btf_id_set8, pairs[set_cnt + add_set->cnt]), GFP_KERNEL | __GFP_NOWARN); if (!set) { ret = -ENOMEM; @@ -7173,15 +7188,15 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, } /* For newly allocated set, initialize set->cnt to 0 */ - if (!tab->sets[hook][type]) + if (!tab->sets[hook]) set->cnt = 0; - tab->sets[hook][type] = set; + tab->sets[hook] = set; /* Concatenate the two sets */ - memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0])); + memcpy(set->pairs + set->cnt, add_set->pairs, add_set->cnt * sizeof(set->pairs[0])); set->cnt += add_set->cnt; - sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL); + sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL); return 0; end: @@ -7189,38 +7204,25 @@ end: return ret; } -static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, - const struct btf_kfunc_id_set *kset) -{ - bool vmlinux_set = !btf_is_module(btf); - int type, ret = 0; - - for (type = 0; type < ARRAY_SIZE(kset->sets); type++) { - if (!kset->sets[type]) - continue; - - ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set); - if (ret) - break; - } - return ret; -} - -static bool __btf_kfunc_id_set_contains(const struct btf *btf, +static u32 *__btf_kfunc_id_set_contains(const struct btf *btf, enum btf_kfunc_hook hook, - enum btf_kfunc_type type, u32 kfunc_btf_id) { - struct btf_id_set *set; + struct btf_id_set8 *set; + u32 *id; - if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) - return false; + if (hook >= BTF_KFUNC_HOOK_MAX) + return NULL; if (!btf->kfunc_set_tab) - return false; - set = btf->kfunc_set_tab->sets[hook][type]; + return NULL; + set = btf->kfunc_set_tab->sets[hook]; if (!set) - return false; - return btf_id_set_contains(set, kfunc_btf_id); + return NULL; + id = btf_id_set8_contains(set, kfunc_btf_id); + if (!id) + return NULL; + /* The flags for BTF ID are located next to it */ + return id + 1; } static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) @@ -7248,14 +7250,14 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) * keeping the reference for the duration of the call provides the necessary * protection for looking up a well-formed btf->kfunc_set_tab. */ -bool btf_kfunc_id_set_contains(const struct btf *btf, +u32 *btf_kfunc_id_set_contains(const struct btf *btf, enum bpf_prog_type prog_type, - enum btf_kfunc_type type, u32 kfunc_btf_id) + u32 kfunc_btf_id) { enum btf_kfunc_hook hook; hook = bpf_prog_type_to_kfunc_hook(prog_type); - return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id); + return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id); } /* This function must be invoked only from initcalls/module init functions */ @@ -7282,7 +7284,7 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, return PTR_ERR(btf); hook = bpf_prog_type_to_kfunc_hook(prog_type); - ret = btf_populate_kfunc_set(btf, hook, kset); + ret = btf_populate_kfunc_set(btf, hook, kset->set); btf_put(btf); return ret; } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index bfeb9b937315..c1e10d088dbb 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -652,12 +652,6 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) return fp->jited && !bpf_prog_was_classic(fp); } -static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) -{ - return list_empty(&fp->aux->ksym.lnode) || - fp->aux->ksym.lnode.prev == LIST_POISON2; -} - void bpf_prog_kallsyms_add(struct bpf_prog *fp) { if (!bpf_prog_kallsyms_candidate(fp) || @@ -833,15 +827,6 @@ struct bpf_prog_pack { #define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE) -static size_t bpf_prog_pack_size = -1; -static size_t bpf_prog_pack_mask = -1; - -static int bpf_prog_chunk_count(void) -{ - WARN_ON_ONCE(bpf_prog_pack_size == -1); - return bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE; -} - static DEFINE_MUTEX(pack_mutex); static LIST_HEAD(pack_list); @@ -849,55 +834,33 @@ static LIST_HEAD(pack_list); * CONFIG_MMU=n. Use PAGE_SIZE in these cases. */ #ifdef PMD_SIZE -#define BPF_HPAGE_SIZE PMD_SIZE -#define BPF_HPAGE_MASK PMD_MASK +#define BPF_PROG_PACK_SIZE (PMD_SIZE * num_possible_nodes()) #else -#define BPF_HPAGE_SIZE PAGE_SIZE -#define BPF_HPAGE_MASK PAGE_MASK +#define BPF_PROG_PACK_SIZE PAGE_SIZE #endif -static size_t select_bpf_prog_pack_size(void) -{ - size_t size; - void *ptr; - - size = BPF_HPAGE_SIZE * num_online_nodes(); - ptr = module_alloc(size); - - /* Test whether we can get huge pages. If not just use PAGE_SIZE - * packs. - */ - if (!ptr || !is_vm_area_hugepages(ptr)) { - size = PAGE_SIZE; - bpf_prog_pack_mask = PAGE_MASK; - } else { - bpf_prog_pack_mask = BPF_HPAGE_MASK; - } - - vfree(ptr); - return size; -} +#define BPF_PROG_CHUNK_COUNT (BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE) static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_prog_pack *pack; - pack = kzalloc(struct_size(pack, bitmap, BITS_TO_LONGS(bpf_prog_chunk_count())), + pack = kzalloc(struct_size(pack, bitmap, BITS_TO_LONGS(BPF_PROG_CHUNK_COUNT)), GFP_KERNEL); if (!pack) return NULL; - pack->ptr = module_alloc(bpf_prog_pack_size); + pack->ptr = module_alloc(BPF_PROG_PACK_SIZE); if (!pack->ptr) { kfree(pack); return NULL; } - bpf_fill_ill_insns(pack->ptr, bpf_prog_pack_size); - bitmap_zero(pack->bitmap, bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE); + bpf_fill_ill_insns(pack->ptr, BPF_PROG_PACK_SIZE); + bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE); list_add_tail(&pack->list, &pack_list); set_vm_flush_reset_perms(pack->ptr); - set_memory_ro((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE); - set_memory_x((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE); + set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); + set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); return pack; } @@ -909,10 +872,7 @@ static void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insn void *ptr = NULL; mutex_lock(&pack_mutex); - if (bpf_prog_pack_size == -1) - bpf_prog_pack_size = select_bpf_prog_pack_size(); - - if (size > bpf_prog_pack_size) { + if (size > BPF_PROG_PACK_SIZE) { size = round_up(size, PAGE_SIZE); ptr = module_alloc(size); if (ptr) { @@ -924,9 +884,9 @@ static void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insn goto out; } list_for_each_entry(pack, &pack_list, list) { - pos = bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0, + pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0, nbits, 0); - if (pos < bpf_prog_chunk_count()) + if (pos < BPF_PROG_CHUNK_COUNT) goto found_free_area; } @@ -950,18 +910,15 @@ static void bpf_prog_pack_free(struct bpf_binary_header *hdr) struct bpf_prog_pack *pack = NULL, *tmp; unsigned int nbits; unsigned long pos; - void *pack_ptr; mutex_lock(&pack_mutex); - if (hdr->size > bpf_prog_pack_size) { + if (hdr->size > BPF_PROG_PACK_SIZE) { module_memfree(hdr); goto out; } - pack_ptr = (void *)((unsigned long)hdr & bpf_prog_pack_mask); - list_for_each_entry(tmp, &pack_list, list) { - if (tmp->ptr == pack_ptr) { + if ((void *)hdr >= tmp->ptr && (tmp->ptr + BPF_PROG_PACK_SIZE) > (void *)hdr) { pack = tmp; break; } @@ -971,14 +928,14 @@ static void bpf_prog_pack_free(struct bpf_binary_header *hdr) goto out; nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size); - pos = ((unsigned long)hdr - (unsigned long)pack_ptr) >> BPF_PROG_CHUNK_SHIFT; + pos = ((unsigned long)hdr - (unsigned long)pack->ptr) >> BPF_PROG_CHUNK_SHIFT; WARN_ONCE(bpf_arch_text_invalidate(hdr, hdr->size), "bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n"); bitmap_clear(pack->bitmap, pos, nbits); - if (bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0, - bpf_prog_chunk_count(), 0) == 0) { + if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0, + BPF_PROG_CHUNK_COUNT, 0) == 0) { list_del(&pack->list); module_memfree(pack->ptr); kfree(pack); @@ -1155,7 +1112,6 @@ int bpf_jit_binary_pack_finalize(struct bpf_prog *prog, bpf_prog_pack_free(ro_header); return PTR_ERR(ptr); } - prog->aux->use_bpf_prog_pack = true; return 0; } @@ -1179,17 +1135,23 @@ void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header, bpf_jit_uncharge_modmem(size); } +struct bpf_binary_header * +bpf_jit_binary_pack_hdr(const struct bpf_prog *fp) +{ + unsigned long real_start = (unsigned long)fp->bpf_func; + unsigned long addr; + + addr = real_start & BPF_PROG_CHUNK_MASK; + return (void *)addr; +} + static inline struct bpf_binary_header * bpf_jit_binary_hdr(const struct bpf_prog *fp) { unsigned long real_start = (unsigned long)fp->bpf_func; unsigned long addr; - if (fp->aux->use_bpf_prog_pack) - addr = real_start & BPF_PROG_CHUNK_MASK; - else - addr = real_start & PAGE_MASK; - + addr = real_start & PAGE_MASK; return (void *)addr; } @@ -1202,11 +1164,7 @@ void __weak bpf_jit_free(struct bpf_prog *fp) if (fp->jited) { struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); - if (fp->aux->use_bpf_prog_pack) - bpf_jit_binary_pack_free(hdr, NULL /* rw_buffer */); - else - bpf_jit_binary_free(hdr); - + bpf_jit_binary_free(hdr); WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp)); } diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index c2867068e5bd..1400561efb15 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -845,7 +845,7 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, struct bpf_dtab_netdev *dev; dev = bpf_map_kmalloc_node(&dtab->map, sizeof(*dev), - GFP_ATOMIC | __GFP_NOWARN, + GFP_NOWAIT | __GFP_NOWARN, dtab->map.numa_node); if (!dev) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 17fb69c0e0dc..da7578426a46 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -61,7 +61,7 @@ * * As regular device interrupt handlers and soft interrupts are forced into * thread context, the existing code which does - * spin_lock*(); alloc(GPF_ATOMIC); spin_unlock*(); + * spin_lock*(); alloc(GFP_ATOMIC); spin_unlock*(); * just works. * * In theory the BPF locks could be converted to regular spinlocks as well, @@ -978,7 +978,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, goto dec_count; } l_new = bpf_map_kmalloc_node(&htab->map, htab->elem_size, - GFP_ATOMIC | __GFP_NOWARN, + GFP_NOWAIT | __GFP_NOWARN, htab->map.numa_node); if (!l_new) { l_new = ERR_PTR(-ENOMEM); @@ -996,7 +996,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, } else { /* alloc_percpu zero-fills */ pptr = bpf_map_alloc_percpu(&htab->map, size, 8, - GFP_ATOMIC | __GFP_NOWARN); + GFP_NOWAIT | __GFP_NOWARN); if (!pptr) { kfree(l_new); l_new = ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 8654fc97f5fe..49ef0ce040c7 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -165,7 +165,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *key, } new = bpf_map_kmalloc_node(map, struct_size(new, data, map->value_size), - __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN, + __GFP_ZERO | GFP_NOWAIT | __GFP_NOWARN, map->numa_node); if (!new) return -ENOMEM; diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index f0d05a3cc4b9..d789e3b831ad 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -285,7 +285,7 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie, if (value) size += trie->map.value_size; - node = bpf_map_kmalloc_node(&trie->map, size, GFP_ATOMIC | __GFP_NOWARN, + node = bpf_map_kmalloc_node(&trie->map, size, GFP_NOWAIT | __GFP_NOWARN, trie->map.numa_node); if (!node) return NULL; diff --git a/kernel/bpf/preload/iterators/Makefile b/kernel/bpf/preload/iterators/Makefile index bfe24f8c5a20..6762b1260f2f 100644 --- a/kernel/bpf/preload/iterators/Makefile +++ b/kernel/bpf/preload/iterators/Makefile @@ -9,7 +9,7 @@ LLVM_STRIP ?= llvm-strip TOOLS_PATH := $(abspath ../../../../tools) BPFTOOL_SRC := $(TOOLS_PATH)/bpf/bpftool BPFTOOL_OUTPUT := $(abs_out)/bpftool -DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool +DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)/bootstrap/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) LIBBPF_SRC := $(TOOLS_PATH)/lib/bpf @@ -61,9 +61,5 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OU OUTPUT=$(abspath $(dir $@))/ prefix= \ DESTDIR=$(LIBBPF_DESTDIR) $(abspath $@) install_headers -$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOL_SRC) \ - OUTPUT=$(BPFTOOL_OUTPUT)/ \ - LIBBPF_OUTPUT=$(LIBBPF_OUTPUT)/ \ - LIBBPF_DESTDIR=$(LIBBPF_DESTDIR)/ \ - prefix= DESTDIR=$(abs_out)/ install-bin +$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOL_SRC) OUTPUT=$(BPFTOOL_OUTPUT)/ bootstrap diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ab688d85b2c6..83c7136c5788 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -419,35 +419,53 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) #ifdef CONFIG_MEMCG_KMEM static void bpf_map_save_memcg(struct bpf_map *map) { - map->memcg = get_mem_cgroup_from_mm(current->mm); + /* Currently if a map is created by a process belonging to the root + * memory cgroup, get_obj_cgroup_from_current() will return NULL. + * So we have to check map->objcg for being NULL each time it's + * being used. + */ + map->objcg = get_obj_cgroup_from_current(); } static void bpf_map_release_memcg(struct bpf_map *map) { - mem_cgroup_put(map->memcg); + if (map->objcg) + obj_cgroup_put(map->objcg); +} + +static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map) +{ + if (map->objcg) + return get_mem_cgroup_from_objcg(map->objcg); + + return root_mem_cgroup; } void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node) { - struct mem_cgroup *old_memcg; + struct mem_cgroup *memcg, *old_memcg; void *ptr; - old_memcg = set_active_memcg(map->memcg); + memcg = bpf_map_get_memcg(map); + old_memcg = set_active_memcg(memcg); ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); set_active_memcg(old_memcg); + mem_cgroup_put(memcg); return ptr; } void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) { - struct mem_cgroup *old_memcg; + struct mem_cgroup *memcg, *old_memcg; void *ptr; - old_memcg = set_active_memcg(map->memcg); + memcg = bpf_map_get_memcg(map); + old_memcg = set_active_memcg(memcg); ptr = kzalloc(size, flags | __GFP_ACCOUNT); set_active_memcg(old_memcg); + mem_cgroup_put(memcg); return ptr; } @@ -455,12 +473,14 @@ void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, gfp_t flags) { - struct mem_cgroup *old_memcg; + struct mem_cgroup *memcg, *old_memcg; void __percpu *ptr; - old_memcg = set_active_memcg(map->memcg); + memcg = bpf_map_get_memcg(map); + old_memcg = set_active_memcg(memcg); ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); set_active_memcg(old_memcg); + mem_cgroup_put(memcg); return ptr; } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 6cd226584c33..42e387a12694 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -13,6 +13,7 @@ #include <linux/static_call.h> #include <linux/bpf_verifier.h> #include <linux/bpf_lsm.h> +#include <linux/delay.h> /* dummy _ops. The verifier will operate on target program's ops. */ const struct bpf_verifier_ops bpf_extension_verifier_ops = { @@ -29,6 +30,81 @@ static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; /* serializes access to trampoline_table */ static DEFINE_MUTEX(trampoline_mutex); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex); + +static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd) +{ + struct bpf_trampoline *tr = ops->private; + int ret = 0; + + if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) { + /* This is called inside register_ftrace_direct_multi(), so + * tr->mutex is already locked. + */ + lockdep_assert_held_once(&tr->mutex); + + /* Instead of updating the trampoline here, we propagate + * -EAGAIN to register_ftrace_direct_multi(). Then we can + * retry register_ftrace_direct_multi() after updating the + * trampoline. + */ + if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && + !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) { + if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY)) + return -EBUSY; + + tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; + return -EAGAIN; + } + + return 0; + } + + /* The normal locking order is + * tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c) + * + * The following two commands are called from + * + * prepare_direct_functions_for_ipmodify + * cleanup_direct_functions_after_ipmodify + * + * In both cases, direct_mutex is already locked. Use + * mutex_trylock(&tr->mutex) to avoid deadlock in race condition + * (something else is making changes to this same trampoline). + */ + if (!mutex_trylock(&tr->mutex)) { + /* sleep 1 ms to make sure whatever holding tr->mutex makes + * some progress. + */ + msleep(1); + return -EAGAIN; + } + + switch (cmd) { + case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER: + tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; + + if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && + !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) + ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */); + break; + case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER: + tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY; + + if (tr->flags & BPF_TRAMP_F_ORIG_STACK) + ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */); + break; + default: + ret = -EINVAL; + break; + }; + + mutex_unlock(&tr->mutex); + return ret; +} +#endif + bool bpf_prog_has_trampoline(const struct bpf_prog *prog) { enum bpf_attach_type eatype = prog->expected_attach_type; @@ -89,6 +165,16 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key) tr = kzalloc(sizeof(*tr), GFP_KERNEL); if (!tr) goto out; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL); + if (!tr->fops) { + kfree(tr); + tr = NULL; + goto out; + } + tr->fops->private = tr; + tr->fops->ops_func = bpf_tramp_ftrace_ops_func; +#endif tr->key = key; INIT_HLIST_NODE(&tr->hlist); @@ -128,7 +214,7 @@ static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) int ret; if (tr->func.ftrace_managed) - ret = unregister_ftrace_direct((long)ip, (long)old_addr); + ret = unregister_ftrace_direct_multi(tr->fops, (long)old_addr); else ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL); @@ -137,15 +223,20 @@ static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) return ret; } -static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr) +static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr, + bool lock_direct_mutex) { void *ip = tr->func.addr; int ret; - if (tr->func.ftrace_managed) - ret = modify_ftrace_direct((long)ip, (long)old_addr, (long)new_addr); - else + if (tr->func.ftrace_managed) { + if (lock_direct_mutex) + ret = modify_ftrace_direct_multi(tr->fops, (long)new_addr); + else + ret = modify_ftrace_direct_multi_nolock(tr->fops, (long)new_addr); + } else { ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr); + } return ret; } @@ -163,10 +254,12 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) if (bpf_trampoline_module_get(tr)) return -ENOENT; - if (tr->func.ftrace_managed) - ret = register_ftrace_direct((long)ip, (long)new_addr); - else + if (tr->func.ftrace_managed) { + ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 0); + ret = register_ftrace_direct_multi(tr->fops, (long)new_addr); + } else { ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); + } if (ret) bpf_trampoline_module_put(tr); @@ -332,11 +425,11 @@ out: return ERR_PTR(err); } -static int bpf_trampoline_update(struct bpf_trampoline *tr) +static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex) { struct bpf_tramp_image *im; struct bpf_tramp_links *tlinks; - u32 flags = BPF_TRAMP_F_RESTORE_REGS; + u32 orig_flags = tr->flags; bool ip_arg = false; int err, total; @@ -358,15 +451,31 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) goto out; } + /* clear all bits except SHARE_IPMODIFY */ + tr->flags &= BPF_TRAMP_F_SHARE_IPMODIFY; + if (tlinks[BPF_TRAMP_FEXIT].nr_links || - tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) - flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; + tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) { + /* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME + * should not be set together. + */ + tr->flags |= BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; + } else { + tr->flags |= BPF_TRAMP_F_RESTORE_REGS; + } if (ip_arg) - flags |= BPF_TRAMP_F_IP_ARG; + tr->flags |= BPF_TRAMP_F_IP_ARG; + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +again: + if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) && + (tr->flags & BPF_TRAMP_F_CALL_ORIG)) + tr->flags |= BPF_TRAMP_F_ORIG_STACK; +#endif err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE, - &tr->func.model, flags, tlinks, + &tr->func.model, tr->flags, tlinks, tr->func.addr); if (err < 0) goto out; @@ -375,17 +484,34 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) WARN_ON(!tr->cur_image && tr->selector); if (tr->cur_image) /* progs already running at this address */ - err = modify_fentry(tr, tr->cur_image->image, im->image); + err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex); else /* first time registering */ err = register_fentry(tr, im->image); + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + if (err == -EAGAIN) { + /* -EAGAIN from bpf_tramp_ftrace_ops_func. Now + * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the + * trampoline again, and retry register. + */ + /* reset fops->func and fops->trampoline for re-register */ + tr->fops->func = NULL; + tr->fops->trampoline = 0; + goto again; + } +#endif if (err) goto out; + if (tr->cur_image) bpf_tramp_image_put(tr->cur_image); tr->cur_image = im; tr->selector++; out: + /* If any error happens, restore previous flags */ + if (err) + tr->flags = orig_flags; kfree(tlinks); return err; } @@ -451,7 +577,7 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]); tr->progs_cnt[kind]++; - err = bpf_trampoline_update(tr); + err = bpf_trampoline_update(tr, true /* lock_direct_mutex */); if (err) { hlist_del_init(&link->tramp_hlist); tr->progs_cnt[kind]--; @@ -484,7 +610,7 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_ } hlist_del_init(&link->tramp_hlist); tr->progs_cnt[kind]--; - return bpf_trampoline_update(tr); + return bpf_trampoline_update(tr, true /* lock_direct_mutex */); } /* bpf_trampoline_unlink_prog() should never fail. */ @@ -498,7 +624,7 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampolin return err; } -#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +#if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) static void bpf_shim_tramp_link_release(struct bpf_link *link) { struct bpf_shim_tramp_link *shim_link = @@ -712,6 +838,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr) * multiple rcu callbacks. */ hlist_del(&tr->hlist); + kfree(tr->fops); kfree(tr); out: mutex_unlock(&trampoline_mutex); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 328cfab3af60..096fdac70165 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5533,17 +5533,6 @@ static bool arg_type_is_mem_size(enum bpf_arg_type type) type == ARG_CONST_SIZE_OR_ZERO; } -static bool arg_type_is_alloc_size(enum bpf_arg_type type) -{ - return type == ARG_CONST_ALLOC_SIZE_OR_ZERO; -} - -static bool arg_type_is_int_ptr(enum bpf_arg_type type) -{ - return type == ARG_PTR_TO_INT || - type == ARG_PTR_TO_LONG; -} - static bool arg_type_is_release(enum bpf_arg_type type) { return type & OBJ_RELEASE; @@ -5929,7 +5918,8 @@ skip_type_check: meta->ref_obj_id = reg->ref_obj_id; } - if (arg_type == ARG_CONST_MAP_PTR) { + switch (base_type(arg_type)) { + case ARG_CONST_MAP_PTR: /* bpf_map_xxx(map_ptr) call: remember that map_ptr */ if (meta->map_ptr) { /* Use map_uid (which is unique id of inner map) to reject: @@ -5954,7 +5944,8 @@ skip_type_check: } meta->map_ptr = reg->map_ptr; meta->map_uid = reg->map_uid; - } else if (arg_type == ARG_PTR_TO_MAP_KEY) { + break; + case ARG_PTR_TO_MAP_KEY: /* bpf_map_xxx(..., map_ptr, ..., key) call: * check that [key, key + map->key_size) are within * stack limits and initialized @@ -5971,7 +5962,8 @@ skip_type_check: err = check_helper_mem_access(env, regno, meta->map_ptr->key_size, false, NULL); - } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) { + break; + case ARG_PTR_TO_MAP_VALUE: if (type_may_be_null(arg_type) && register_is_null(reg)) return 0; @@ -5987,14 +5979,16 @@ skip_type_check: err = check_helper_mem_access(env, regno, meta->map_ptr->value_size, false, meta); - } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) { + break; + case ARG_PTR_TO_PERCPU_BTF_ID: if (!reg->btf_id) { verbose(env, "Helper has invalid btf_id in R%d\n", regno); return -EACCES; } meta->ret_btf = reg->btf; meta->ret_btf_id = reg->btf_id; - } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { + break; + case ARG_PTR_TO_SPIN_LOCK: if (meta->func_id == BPF_FUNC_spin_lock) { if (process_spin_lock(env, regno, true)) return -EACCES; @@ -6005,12 +5999,15 @@ skip_type_check: verbose(env, "verifier internal error\n"); return -EFAULT; } - } else if (arg_type == ARG_PTR_TO_TIMER) { + break; + case ARG_PTR_TO_TIMER: if (process_timer_func(env, regno, meta)) return -EACCES; - } else if (arg_type == ARG_PTR_TO_FUNC) { + break; + case ARG_PTR_TO_FUNC: meta->subprogno = reg->subprogno; - } else if (base_type(arg_type) == ARG_PTR_TO_MEM) { + break; + case ARG_PTR_TO_MEM: /* The access to this pointer is only checked when we hit the * next is_mem_size argument below. */ @@ -6020,11 +6017,14 @@ skip_type_check: fn->arg_size[arg], false, meta); } - } else if (arg_type_is_mem_size(arg_type)) { - bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); - - err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta); - } else if (arg_type_is_dynptr(arg_type)) { + break; + case ARG_CONST_SIZE: + err = check_mem_size_reg(env, reg, regno, false, meta); + break; + case ARG_CONST_SIZE_OR_ZERO: + err = check_mem_size_reg(env, reg, regno, true, meta); + break; + case ARG_PTR_TO_DYNPTR: if (arg_type & MEM_UNINIT) { if (!is_dynptr_reg_valid_uninit(env, reg)) { verbose(env, "Dynptr has to be an uninitialized dynptr\n"); @@ -6058,21 +6058,28 @@ skip_type_check: err_extra, arg + 1); return -EINVAL; } - } else if (arg_type_is_alloc_size(arg_type)) { + break; + case ARG_CONST_ALLOC_SIZE_OR_ZERO: if (!tnum_is_const(reg->var_off)) { verbose(env, "R%d is not a known constant'\n", regno); return -EACCES; } meta->mem_size = reg->var_off.value; - } else if (arg_type_is_int_ptr(arg_type)) { + break; + case ARG_PTR_TO_INT: + case ARG_PTR_TO_LONG: + { int size = int_ptr_type_to_size(arg_type); err = check_helper_mem_access(env, regno, size, false, meta); if (err) return err; err = check_ptr_alignment(env, reg, 0, size, true); - } else if (arg_type == ARG_PTR_TO_CONST_STR) { + break; + } + case ARG_PTR_TO_CONST_STR: + { struct bpf_map *map = reg->map_ptr; int map_off; u64 map_addr; @@ -6111,9 +6118,12 @@ skip_type_check: verbose(env, "string is not zero-terminated\n"); return -EINVAL; } - } else if (arg_type == ARG_PTR_TO_KPTR) { + break; + } + case ARG_PTR_TO_KPTR: if (process_kptr_func(env, regno, meta)) return -EACCES; + break; } return err; @@ -7160,6 +7170,7 @@ static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { + enum bpf_prog_type prog_type = resolve_prog_type(env->prog); const struct bpf_func_proto *fn = NULL; enum bpf_return_type ret_type; enum bpf_type_flag ret_flag; @@ -7321,7 +7332,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } break; case BPF_FUNC_set_retval: - if (env->prog->expected_attach_type == BPF_LSM_CGROUP) { + if (prog_type == BPF_PROG_TYPE_LSM && + env->prog->expected_attach_type == BPF_LSM_CGROUP) { if (!env->prog->aux->attach_func_proto->type) { /* Make sure programs that attach to void * hooks don't try to modify return value. @@ -7550,6 +7562,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int err, insn_idx = *insn_idx_p; const struct btf_param *args; struct btf *desc_btf; + u32 *kfunc_flags; bool acq; /* skip for now, but return error when we find this in fixup_kfunc_call */ @@ -7565,18 +7578,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, func_name = btf_name_by_offset(desc_btf, func->name_off); func_proto = btf_type_by_id(desc_btf, func->type); - if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), - BTF_KFUNC_TYPE_CHECK, func_id)) { + kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id); + if (!kfunc_flags) { verbose(env, "calling kernel function %s is not allowed\n", func_name); return -EACCES; } - - acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), - BTF_KFUNC_TYPE_ACQUIRE, func_id); + acq = *kfunc_flags & KF_ACQUIRE; /* Check the arguments */ - err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs); + err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs, *kfunc_flags); if (err < 0) return err; /* In case of release function, we get register number of refcounted @@ -7620,8 +7631,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].btf = desc_btf; regs[BPF_REG_0].type = PTR_TO_BTF_ID; regs[BPF_REG_0].btf_id = ptr_type_id; - if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), - BTF_KFUNC_TYPE_RET_NULL, func_id)) { + if (*kfunc_flags & KF_RET_NULL) { regs[BPF_REG_0].type |= PTR_MAYBE_NULL; /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */ regs[BPF_REG_0].id = ++env->id_gen; @@ -12562,6 +12572,7 @@ static bool is_tracing_prog_type(enum bpf_prog_type type) case BPF_PROG_TYPE_TRACEPOINT: case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: return true; default: return false; @@ -13620,6 +13631,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) /* Below members will be freed only at prog->aux */ func[i]->aux->btf = prog->aux->btf; func[i]->aux->func_info = prog->aux->func_info; + func[i]->aux->func_info_cnt = prog->aux->func_info_cnt; func[i]->aux->poke_tab = prog->aux->poke_tab; func[i]->aux->size_poke_tab = prog->aux->size_poke_tab; @@ -13632,9 +13644,6 @@ static int jit_subprogs(struct bpf_verifier_env *env) poke->aux = func[i]->aux; } - /* Use bpf_prog_F_tag to indicate functions in stack traces. - * Long term would need debug info to populate names - */ func[i]->aux->name[0] = 'F'; func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; func[i]->jit_requested = 1; diff --git a/kernel/events/core.c b/kernel/events/core.c index 48bae58d240e..07d51f95d6a6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6253,10 +6253,10 @@ again: if (!atomic_inc_not_zero(&event->rb->mmap_count)) { /* - * Raced against perf_mmap_close() through - * perf_event_set_output(). Try again, hope for better - * luck. + * Raced against perf_mmap_close(); remove the + * event and try again. */ + ring_buffer_attach(event, NULL); mutex_unlock(&event->mmap_mutex); goto again; } @@ -11829,14 +11829,25 @@ err_size: goto out; } +static void mutex_lock_double(struct mutex *a, struct mutex *b) +{ + if (b < a) + swap(a, b); + + mutex_lock(a); + mutex_lock_nested(b, SINGLE_DEPTH_NESTING); +} + static int perf_event_set_output(struct perf_event *event, struct perf_event *output_event) { struct perf_buffer *rb = NULL; int ret = -EINVAL; - if (!output_event) + if (!output_event) { + mutex_lock(&event->mmap_mutex); goto set; + } /* don't allow circular references */ if (event == output_event) @@ -11874,8 +11885,15 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) event->pmu != output_event->pmu) goto out; + /* + * Hold both mmap_mutex to serialize against perf_mmap_close(). Since + * output_event is already on rb->event_list, and the list iteration + * restarts after every removal, it is guaranteed this new event is + * observed *OR* if output_event is already removed, it's guaranteed we + * observe !rb->mmap_count. + */ + mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex); set: - mutex_lock(&event->mmap_mutex); /* Can't redirect output if we've got an active mmap() */ if (atomic_read(&event->mmap_count)) goto unlock; @@ -11885,6 +11903,12 @@ set: rb = ring_buffer_get(output_event); if (!rb) goto unlock; + + /* did we race against perf_mmap_close() */ + if (!atomic_read(&rb->mmap_count)) { + ring_buffer_put(rb); + goto unlock; + } } ring_buffer_attach(event, rb); @@ -11892,20 +11916,13 @@ set: ret = 0; unlock: mutex_unlock(&event->mmap_mutex); + if (output_event) + mutex_unlock(&output_event->mmap_mutex); out: return ret; } -static void mutex_lock_double(struct mutex *a, struct mutex *b) -{ - if (b < a) - swap(a, b); - - mutex_lock(a); - mutex_lock_nested(b, SINGLE_DEPTH_NESTING); -} - static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) { bool nmi_safe = false; diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index fbdf8d3279ac..79a85834ce9d 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -30,6 +30,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/bsearch.h> +#include <linux/btf_ids.h> /* * These will be re-linked against their real values @@ -799,6 +800,96 @@ static const struct seq_operations kallsyms_op = { .show = s_show }; +#ifdef CONFIG_BPF_SYSCALL + +struct bpf_iter__ksym { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct kallsym_iter *, ksym); +}; + +static int ksym_prog_seq_show(struct seq_file *m, bool in_stop) +{ + struct bpf_iter__ksym ctx; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + + meta.seq = m; + prog = bpf_iter_get_info(&meta, in_stop); + if (!prog) + return 0; + + ctx.meta = &meta; + ctx.ksym = m ? m->private : NULL; + return bpf_iter_run_prog(prog, &ctx); +} + +static int bpf_iter_ksym_seq_show(struct seq_file *m, void *p) +{ + return ksym_prog_seq_show(m, false); +} + +static void bpf_iter_ksym_seq_stop(struct seq_file *m, void *p) +{ + if (!p) + (void) ksym_prog_seq_show(m, true); + else + s_stop(m, p); +} + +static const struct seq_operations bpf_iter_ksym_ops = { + .start = s_start, + .next = s_next, + .stop = bpf_iter_ksym_seq_stop, + .show = bpf_iter_ksym_seq_show, +}; + +static int bpf_iter_ksym_init(void *priv_data, struct bpf_iter_aux_info *aux) +{ + struct kallsym_iter *iter = priv_data; + + reset_iter(iter, 0); + + /* cache here as in kallsyms_open() case; use current process + * credentials to tell BPF iterators if values should be shown. + */ + iter->show_value = kallsyms_show_value(current_cred()); + + return 0; +} + +DEFINE_BPF_ITER_FUNC(ksym, struct bpf_iter_meta *meta, struct kallsym_iter *ksym) + +static const struct bpf_iter_seq_info ksym_iter_seq_info = { + .seq_ops = &bpf_iter_ksym_ops, + .init_seq_private = bpf_iter_ksym_init, + .fini_seq_private = NULL, + .seq_priv_size = sizeof(struct kallsym_iter), +}; + +static struct bpf_iter_reg ksym_iter_reg_info = { + .target = "ksym", + .feature = BPF_ITER_RESCHED, + .ctx_arg_info_size = 1, + .ctx_arg_info = { + { offsetof(struct bpf_iter__ksym, ksym), + PTR_TO_BTF_ID_OR_NULL }, + }, + .seq_info = &ksym_iter_seq_info, +}; + +BTF_ID_LIST(btf_ksym_iter_id) +BTF_ID(struct, kallsym_iter) + +static int __init bpf_ksym_iter_register(void) +{ + ksym_iter_reg_info.ctx_arg_info[0].btf_id = *btf_ksym_iter_id; + return bpf_iter_reg_target(&ksym_iter_reg_info); +} + +late_initcall(bpf_ksym_iter_register); + +#endif /* CONFIG_BPF_SYSCALL */ + static inline int kallsyms_for_perf(void) { #ifdef CONFIG_PERF_EVENTS diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b49c6ff6dca0..a1a81fd9889b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3380,6 +3380,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre diff = 0; console_lock(); + for_each_console(c) { if (con && con != c) continue; @@ -3389,11 +3390,19 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre if (printk_seq < seq) diff += seq - printk_seq; } - console_unlock(); - if (diff != last_diff && reset_on_progress) + /* + * If consoles are suspended, it cannot be expected that they + * make forward progress, so timeout immediately. @diff is + * still used to return a valid flush status. + */ + if (console_suspended) + remaining = 0; + else if (diff != last_diff && reset_on_progress) remaining = timeout_ms; + console_unlock(); + if (diff == 0 || remaining == 0) break; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 619b02443522..b233714a1c78 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2135,6 +2135,17 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO_HUNDRED, }, +#ifdef CONFIG_NUMA + { + .procname = "numa_stat", + .data = &sysctl_vm_numa_stat, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sysctl_vm_numa_stat_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif #ifdef CONFIG_HUGETLB_PAGE { .procname = "nr_hugepages", @@ -2151,15 +2162,6 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = &hugetlb_mempolicy_sysctl_handler, }, - { - .procname = "numa_stat", - .data = &sysctl_vm_numa_stat, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = sysctl_vm_numa_stat_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, #endif { .procname = "hugetlb_shm_group", diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 601ccf1b2f09..bc921a3f7ea8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1861,6 +1861,8 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, ftrace_hash_rec_update_modify(ops, filter_hash, 1); } +static bool ops_references_ip(struct ftrace_ops *ops, unsigned long ip); + /* * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK * or no-needed to update, -EBUSY if it detects a conflict of the flag @@ -1869,6 +1871,13 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, * - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected) * - If the hash is EMPTY_HASH, it hits nothing * - Anything else hits the recs which match the hash entries. + * + * DIRECT ops does not have IPMODIFY flag, but we still need to check it + * against functions with FTRACE_FL_IPMODIFY. If there is any overlap, call + * ops_func(SHARE_IPMODIFY_SELF) to make sure current ops can share with + * IPMODIFY. If ops_func(SHARE_IPMODIFY_SELF) returns non-zero, propagate + * the return value to the caller and eventually to the owner of the DIRECT + * ops. */ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, struct ftrace_hash *old_hash, @@ -1877,17 +1886,26 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, struct ftrace_page *pg; struct dyn_ftrace *rec, *end = NULL; int in_old, in_new; + bool is_ipmodify, is_direct; /* Only update if the ops has been registered */ if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) return 0; - if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) + is_ipmodify = ops->flags & FTRACE_OPS_FL_IPMODIFY; + is_direct = ops->flags & FTRACE_OPS_FL_DIRECT; + + /* neither IPMODIFY nor DIRECT, skip */ + if (!is_ipmodify && !is_direct) + return 0; + + if (WARN_ON_ONCE(is_ipmodify && is_direct)) return 0; /* - * Since the IPMODIFY is a very address sensitive action, we do not - * allow ftrace_ops to set all functions to new hash. + * Since the IPMODIFY and DIRECT are very address sensitive + * actions, we do not allow ftrace_ops to set all functions to new + * hash. */ if (!new_hash || !old_hash) return -EINVAL; @@ -1905,12 +1923,32 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, continue; if (in_new) { - /* New entries must ensure no others are using it */ - if (rec->flags & FTRACE_FL_IPMODIFY) - goto rollback; - rec->flags |= FTRACE_FL_IPMODIFY; - } else /* Removed entry */ + if (rec->flags & FTRACE_FL_IPMODIFY) { + int ret; + + /* Cannot have two ipmodify on same rec */ + if (is_ipmodify) + goto rollback; + + FTRACE_WARN_ON(rec->flags & FTRACE_FL_DIRECT); + + /* + * Another ops with IPMODIFY is already + * attached. We are now attaching a direct + * ops. Run SHARE_IPMODIFY_SELF, to check + * whether sharing is supported. + */ + if (!ops->ops_func) + return -EBUSY; + ret = ops->ops_func(ops, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF); + if (ret) + return ret; + } else if (is_ipmodify) { + rec->flags |= FTRACE_FL_IPMODIFY; + } + } else if (is_ipmodify) { rec->flags &= ~FTRACE_FL_IPMODIFY; + } } while_for_each_ftrace_rec(); return 0; @@ -2454,8 +2492,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip, struct ftrace_ops direct_ops = { .func = call_direct_funcs, - .flags = FTRACE_OPS_FL_IPMODIFY - | FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS + .flags = FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_PERMANENT, /* * By declaring the main trampoline as this trampoline @@ -3072,14 +3109,14 @@ static inline int ops_traces_mod(struct ftrace_ops *ops) } /* - * Check if the current ops references the record. + * Check if the current ops references the given ip. * * If the ops traces all functions, then it was already accounted for. * If the ops does not trace the current record function, skip it. * If the ops ignores the function via notrace filter, skip it. */ -static inline bool -ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) +static bool +ops_references_ip(struct ftrace_ops *ops, unsigned long ip) { /* If ops isn't enabled, ignore it */ if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) @@ -3091,16 +3128,29 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) /* The function must be in the filter */ if (!ftrace_hash_empty(ops->func_hash->filter_hash) && - !__ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip)) + !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip)) return false; /* If in notrace hash, we ignore it too */ - if (ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip)) + if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip)) return false; return true; } +/* + * Check if the current ops references the record. + * + * If the ops traces all functions, then it was already accounted for. + * If the ops does not trace the current record function, skip it. + * If the ops ignores the function via notrace filter, skip it. + */ +static bool +ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) +{ + return ops_references_ip(ops, rec->ip); +} + static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) { bool init_nop = ftrace_need_init_nop(); @@ -5215,6 +5265,8 @@ static struct ftrace_direct_func *ftrace_alloc_direct_func(unsigned long addr) return direct; } +static int register_ftrace_function_nolock(struct ftrace_ops *ops); + /** * register_ftrace_direct - Call a custom trampoline directly * @ip: The address of the nop at the beginning of a function @@ -5286,7 +5338,7 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0); if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) { - ret = register_ftrace_function(&direct_ops); + ret = register_ftrace_function_nolock(&direct_ops); if (ret) ftrace_set_filter_ip(&direct_ops, ip, 1, 0); } @@ -5545,8 +5597,7 @@ int modify_ftrace_direct(unsigned long ip, } EXPORT_SYMBOL_GPL(modify_ftrace_direct); -#define MULTI_FLAGS (FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_DIRECT | \ - FTRACE_OPS_FL_SAVE_REGS) +#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS) static int check_direct_multi(struct ftrace_ops *ops) { @@ -5639,7 +5690,7 @@ int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) ops->flags = MULTI_FLAGS; ops->trampoline = FTRACE_REGS_ADDR; - err = register_ftrace_function(ops); + err = register_ftrace_function_nolock(ops); out_remove: if (err) @@ -5691,22 +5742,8 @@ int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) } EXPORT_SYMBOL_GPL(unregister_ftrace_direct_multi); -/** - * modify_ftrace_direct_multi - Modify an existing direct 'multi' call - * to call something else - * @ops: The address of the struct ftrace_ops object - * @addr: The address of the new trampoline to call at @ops functions - * - * This is used to unregister currently registered direct caller and - * register new one @addr on functions registered in @ops object. - * - * Note there's window between ftrace_shutdown and ftrace_startup calls - * where there will be no callbacks called. - * - * Returns: zero on success. Non zero on error, which includes: - * -EINVAL - The @ops object was not properly registered. - */ -int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +static int +__modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) { struct ftrace_hash *hash; struct ftrace_func_entry *entry, *iter; @@ -5717,20 +5754,15 @@ int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) int i, size; int err; - if (check_direct_multi(ops)) - return -EINVAL; - if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) - return -EINVAL; - - mutex_lock(&direct_mutex); + lockdep_assert_held_once(&direct_mutex); /* Enable the tmp_ops to have the same functions as the direct ops */ ftrace_ops_init(&tmp_ops); tmp_ops.func_hash = ops->func_hash; - err = register_ftrace_function(&tmp_ops); + err = register_ftrace_function_nolock(&tmp_ops); if (err) - goto out_direct; + return err; /* * Now the ftrace_ops_list_func() is called to do the direct callers. @@ -5754,7 +5786,64 @@ int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) /* Removing the tmp_ops will add the updated direct callers to the functions */ unregister_ftrace_function(&tmp_ops); - out_direct: + return err; +} + +/** + * modify_ftrace_direct_multi_nolock - Modify an existing direct 'multi' call + * to call something else + * @ops: The address of the struct ftrace_ops object + * @addr: The address of the new trampoline to call at @ops functions + * + * This is used to unregister currently registered direct caller and + * register new one @addr on functions registered in @ops object. + * + * Note there's window between ftrace_shutdown and ftrace_startup calls + * where there will be no callbacks called. + * + * Caller should already have direct_mutex locked, so we don't lock + * direct_mutex here. + * + * Returns: zero on success. Non zero on error, which includes: + * -EINVAL - The @ops object was not properly registered. + */ +int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr) +{ + if (check_direct_multi(ops)) + return -EINVAL; + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return -EINVAL; + + return __modify_ftrace_direct_multi(ops, addr); +} +EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi_nolock); + +/** + * modify_ftrace_direct_multi - Modify an existing direct 'multi' call + * to call something else + * @ops: The address of the struct ftrace_ops object + * @addr: The address of the new trampoline to call at @ops functions + * + * This is used to unregister currently registered direct caller and + * register new one @addr on functions registered in @ops object. + * + * Note there's window between ftrace_shutdown and ftrace_startup calls + * where there will be no callbacks called. + * + * Returns: zero on success. Non zero on error, which includes: + * -EINVAL - The @ops object was not properly registered. + */ +int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +{ + int err; + + if (check_direct_multi(ops)) + return -EINVAL; + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return -EINVAL; + + mutex_lock(&direct_mutex); + err = __modify_ftrace_direct_multi(ops, addr); mutex_unlock(&direct_mutex); return err; } @@ -7965,6 +8054,143 @@ int ftrace_is_dead(void) return ftrace_disabled; } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +/* + * When registering ftrace_ops with IPMODIFY, it is necessary to make sure + * it doesn't conflict with any direct ftrace_ops. If there is existing + * direct ftrace_ops on a kernel function being patched, call + * FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER on it to enable sharing. + * + * @ops: ftrace_ops being registered. + * + * Returns: + * 0 on success; + * Negative on failure. + */ +static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops) +{ + struct ftrace_func_entry *entry; + struct ftrace_hash *hash; + struct ftrace_ops *op; + int size, i, ret; + + lockdep_assert_held_once(&direct_mutex); + + if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) + return 0; + + hash = ops->func_hash->filter_hash; + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + unsigned long ip = entry->ip; + bool found_op = false; + + mutex_lock(&ftrace_lock); + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (!(op->flags & FTRACE_OPS_FL_DIRECT)) + continue; + if (ops_references_ip(op, ip)) { + found_op = true; + break; + } + } while_for_each_ftrace_op(op); + mutex_unlock(&ftrace_lock); + + if (found_op) { + if (!op->ops_func) + return -EBUSY; + + ret = op->ops_func(op, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER); + if (ret) + return ret; + } + } + } + + return 0; +} + +/* + * Similar to prepare_direct_functions_for_ipmodify, clean up after ops + * with IPMODIFY is unregistered. The cleanup is optional for most DIRECT + * ops. + */ +static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops) +{ + struct ftrace_func_entry *entry; + struct ftrace_hash *hash; + struct ftrace_ops *op; + int size, i; + + if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) + return; + + mutex_lock(&direct_mutex); + + hash = ops->func_hash->filter_hash; + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + unsigned long ip = entry->ip; + bool found_op = false; + + mutex_lock(&ftrace_lock); + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (!(op->flags & FTRACE_OPS_FL_DIRECT)) + continue; + if (ops_references_ip(op, ip)) { + found_op = true; + break; + } + } while_for_each_ftrace_op(op); + mutex_unlock(&ftrace_lock); + + /* The cleanup is optional, ignore any errors */ + if (found_op && op->ops_func) + op->ops_func(op, FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER); + } + } + mutex_unlock(&direct_mutex); +} + +#define lock_direct_mutex() mutex_lock(&direct_mutex) +#define unlock_direct_mutex() mutex_unlock(&direct_mutex) + +#else /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + +static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops) +{ + return 0; +} + +static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops) +{ +} + +#define lock_direct_mutex() do { } while (0) +#define unlock_direct_mutex() do { } while (0) + +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + +/* + * Similar to register_ftrace_function, except we don't lock direct_mutex. + */ +static int register_ftrace_function_nolock(struct ftrace_ops *ops) +{ + int ret; + + ftrace_ops_init(ops); + + mutex_lock(&ftrace_lock); + + ret = ftrace_startup(ops, 0); + + mutex_unlock(&ftrace_lock); + + return ret; +} + /** * register_ftrace_function - register a function for profiling * @ops: ops structure that holds the function for profiling. @@ -7980,14 +8206,15 @@ int register_ftrace_function(struct ftrace_ops *ops) { int ret; - ftrace_ops_init(ops); - - mutex_lock(&ftrace_lock); - - ret = ftrace_startup(ops, 0); + lock_direct_mutex(); + ret = prepare_direct_functions_for_ipmodify(ops); + if (ret < 0) + goto out_unlock; - mutex_unlock(&ftrace_lock); + ret = register_ftrace_function_nolock(ops); +out_unlock: + unlock_direct_mutex(); return ret; } EXPORT_SYMBOL_GPL(register_ftrace_function); @@ -8006,6 +8233,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) ret = ftrace_shutdown(ops, 0); mutex_unlock(&ftrace_lock); + cleanup_direct_functions_after_ipmodify(ops); return ret; } EXPORT_SYMBOL_GPL(unregister_ftrace_function); diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index 230038d4f908..bb9962b33f95 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -34,6 +34,27 @@ MODULE_LICENSE("GPL"); #define WATCH_QUEUE_NOTE_SIZE 128 #define WATCH_QUEUE_NOTES_PER_PAGE (PAGE_SIZE / WATCH_QUEUE_NOTE_SIZE) +/* + * This must be called under the RCU read-lock, which makes + * sure that the wqueue still exists. It can then take the lock, + * and check that the wqueue hasn't been destroyed, which in + * turn makes sure that the notification pipe still exists. + */ +static inline bool lock_wqueue(struct watch_queue *wqueue) +{ + spin_lock_bh(&wqueue->lock); + if (unlikely(wqueue->defunct)) { + spin_unlock_bh(&wqueue->lock); + return false; + } + return true; +} + +static inline void unlock_wqueue(struct watch_queue *wqueue) +{ + spin_unlock_bh(&wqueue->lock); +} + static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { @@ -69,6 +90,10 @@ static const struct pipe_buf_operations watch_queue_pipe_buf_ops = { /* * Post a notification to a watch queue. + * + * Must be called with the RCU lock for reading, and the + * watch_queue lock held, which guarantees that the pipe + * hasn't been released. */ static bool post_one_notification(struct watch_queue *wqueue, struct watch_notification *n) @@ -85,9 +110,6 @@ static bool post_one_notification(struct watch_queue *wqueue, spin_lock_irq(&pipe->rd_wait.lock); - if (wqueue->defunct) - goto out; - mask = pipe->ring_size - 1; head = pipe->head; tail = pipe->tail; @@ -203,7 +225,10 @@ void __post_watch_notification(struct watch_list *wlist, if (security_post_notification(watch->cred, cred, n) < 0) continue; - post_one_notification(wqueue, n); + if (lock_wqueue(wqueue)) { + post_one_notification(wqueue, n); + unlock_wqueue(wqueue); + } } rcu_read_unlock(); @@ -462,11 +487,12 @@ int add_watch_to_object(struct watch *watch, struct watch_list *wlist) return -EAGAIN; } - spin_lock_bh(&wqueue->lock); - kref_get(&wqueue->usage); - kref_get(&watch->usage); - hlist_add_head(&watch->queue_node, &wqueue->watches); - spin_unlock_bh(&wqueue->lock); + if (lock_wqueue(wqueue)) { + kref_get(&wqueue->usage); + kref_get(&watch->usage); + hlist_add_head(&watch->queue_node, &wqueue->watches); + unlock_wqueue(wqueue); + } hlist_add_head(&watch->list_node, &wlist->watchers); return 0; @@ -520,20 +546,15 @@ found: wqueue = rcu_dereference(watch->queue); - /* We don't need the watch list lock for the next bit as RCU is - * protecting *wqueue from deallocation. - */ - if (wqueue) { + if (lock_wqueue(wqueue)) { post_one_notification(wqueue, &n.watch); - spin_lock_bh(&wqueue->lock); - if (!hlist_unhashed(&watch->queue_node)) { hlist_del_init_rcu(&watch->queue_node); put_watch(watch); } - spin_unlock_bh(&wqueue->lock); + unlock_wqueue(wqueue); } if (wlist->release_watch) { diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index a9f7eb047768..fd15230a703b 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -84,6 +84,9 @@ config UBSAN_SHIFT config UBSAN_DIV_ZERO bool "Perform checking for integer divide-by-zero" depends on $(cc-option,-fsanitize=integer-divide-by-zero) + # https://github.com/ClangBuiltLinux/linux/issues/1657 + # https://github.com/llvm/llvm-project/issues/56289 + depends on !CC_IS_CLANG help This option enables -fsanitize=integer-divide-by-zero which checks for integer division by zero. This is effectively redundant with the diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index e0ab4cd7afc3..ae3bdc6dfc92 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -29,6 +29,7 @@ menuconfig BT SCO audio links L2CAP (Logical Link Control and Adaptation Protocol) SMP (Security Manager Protocol) on LE (Low Energy) links + ISO isochronous links HCI Device drivers (Interface to the hardware) RFCOMM Module (RFCOMM Protocol) BNEP Module (Bluetooth Network Encapsulation Protocol) diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index a52bba8500e1..0e7b7db42750 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -18,6 +18,7 @@ bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ eir.o hci_sync.o bluetooth-$(CONFIG_BT_BREDR) += sco.o +bluetooth-$(CONFIG_BT_LE) += iso.o bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o bluetooth-$(CONFIG_BT_LEDS) += leds.o bluetooth-$(CONFIG_BT_MSFTEXT) += msft.o diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index b506409bb498..dc65974f5adb 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -38,7 +38,7 @@ #include "selftest.h" /* Bluetooth sockets */ -#define BT_MAX_PROTO 8 +#define BT_MAX_PROTO (BTPROTO_LAST + 1) static const struct net_proto_family *bt_proto[BT_MAX_PROTO]; static DEFINE_RWLOCK(bt_proto_lock); @@ -52,6 +52,7 @@ static const char *const bt_key_strings[BT_MAX_PROTO] = { "sk_lock-AF_BLUETOOTH-BTPROTO_CMTP", "sk_lock-AF_BLUETOOTH-BTPROTO_HIDP", "sk_lock-AF_BLUETOOTH-BTPROTO_AVDTP", + "sk_lock-AF_BLUETOOTH-BTPROTO_ISO", }; static struct lock_class_key bt_slock_key[BT_MAX_PROTO]; @@ -64,6 +65,7 @@ static const char *const bt_slock_key_strings[BT_MAX_PROTO] = { "slock-AF_BLUETOOTH-BTPROTO_CMTP", "slock-AF_BLUETOOTH-BTPROTO_HIDP", "slock-AF_BLUETOOTH-BTPROTO_AVDTP", + "slock-AF_BLUETOOTH-BTPROTO_ISO", }; void bt_sock_reclassify_lock(struct sock *sk, int proto) diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 7d77fb00c2bf..8a85f6cdfbc1 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -13,6 +13,20 @@ #define PNP_INFO_SVCLASS_ID 0x1200 +static u8 eir_append_name(u8 *eir, u16 eir_len, u8 type, u8 *data, u8 data_len) +{ + u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1]; + + /* If data is already NULL terminated just pass it directly */ + if (data[data_len - 1] == '\0') + return eir_append_data(eir, eir_len, type, data, data_len); + + memcpy(name, data, HCI_MAX_SHORT_NAME_LENGTH); + name[HCI_MAX_SHORT_NAME_LENGTH] = '\0'; + + return eir_append_data(eir, eir_len, type, name, sizeof(name)); +} + u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) { size_t short_len; @@ -23,29 +37,26 @@ u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) return ad_len; /* use complete name if present and fits */ - complete_len = strlen(hdev->dev_name); + complete_len = strnlen(hdev->dev_name, sizeof(hdev->dev_name)); if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH) - return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE, + return eir_append_name(ptr, ad_len, EIR_NAME_COMPLETE, hdev->dev_name, complete_len + 1); /* use short name if present */ - short_len = strlen(hdev->short_name); + short_len = strnlen(hdev->short_name, sizeof(hdev->short_name)); if (short_len) - return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, - hdev->short_name, short_len + 1); + return eir_append_name(ptr, ad_len, EIR_NAME_SHORT, + hdev->short_name, + short_len == HCI_MAX_SHORT_NAME_LENGTH ? + short_len : short_len + 1); /* use shortened full name if present, we already know that name * is longer then HCI_MAX_SHORT_NAME_LENGTH */ - if (complete_len) { - u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1]; - - memcpy(name, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH); - name[HCI_MAX_SHORT_NAME_LENGTH] = '\0'; - - return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, name, - sizeof(name)); - } + if (complete_len) + return eir_append_name(ptr, ad_len, EIR_NAME_SHORT, + hdev->dev_name, + HCI_MAX_SHORT_NAME_LENGTH); return ad_len; } @@ -181,7 +192,7 @@ void eir_create(struct hci_dev *hdev, u8 *data) u8 *ptr = data; size_t name_len; - name_len = strlen(hdev->dev_name); + name_len = strnlen(hdev->dev_name, sizeof(hdev->dev_name)); if (name_len > 0) { /* EIR Data type */ @@ -225,6 +236,27 @@ void eir_create(struct hci_dev *hdev, u8 *data) ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); } +u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) +{ + struct adv_info *adv = NULL; + u8 ad_len = 0; + + /* Return 0 when the current instance identifier is invalid. */ + if (instance) { + adv = hci_find_adv_instance(hdev, instance); + if (!adv) + return 0; + } + + if (adv) { + memcpy(ptr, adv->per_adv_data, adv->per_adv_data_len); + ad_len += adv->per_adv_data_len; + ptr += adv->per_adv_data_len; + } + + return ad_len; +} + u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) { struct adv_info *adv = NULL; diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h index 62f2374078f2..0df19f2f4af9 100644 --- a/net/bluetooth/eir.h +++ b/net/bluetooth/eir.h @@ -11,6 +11,7 @@ void eir_create(struct hci_dev *hdev, u8 *data); u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr); +u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); u8 eir_append_local_name(struct hci_dev *hdev, u8 *eir, u8 ad_len); u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index ac06c9724c7f..f54864e19866 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -30,10 +30,13 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> +#include <net/bluetooth/iso.h> +#include <net/bluetooth/mgmt.h> #include "hci_request.h" #include "smp.h" #include "a2mp.h" +#include "eir.h" struct sco_param { u16 pkt_type; @@ -118,10 +121,16 @@ static void hci_conn_cleanup(struct hci_conn *conn) if (test_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags)) hci_conn_params_del(conn->hdev, &conn->dst, conn->dst_type); + if (test_and_clear_bit(HCI_CONN_FLUSH_KEY, &conn->flags)) + hci_remove_link_key(hdev, &conn->dst); + hci_chan_list_flush(conn); hci_conn_hash_del(hdev, conn); + if (conn->cleanup) + conn->cleanup(conn); + if (conn->type == SCO_LINK || conn->type == ESCO_LINK) { switch (conn->setting & SCO_AIRMODE_MASK) { case SCO_AIRMODE_CVSD: @@ -678,6 +687,199 @@ static void le_conn_timeout(struct work_struct *work) hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); } +struct iso_list_data { + union { + u8 cig; + u8 big; + }; + union { + u8 cis; + u8 bis; + u16 sync_handle; + }; + int count; + struct { + struct hci_cp_le_set_cig_params cp; + struct hci_cis_params cis[0x11]; + } pdu; +}; + +static void bis_list(struct hci_conn *conn, void *data) +{ + struct iso_list_data *d = data; + + /* Skip if not broadcast/ANY address */ + if (bacmp(&conn->dst, BDADDR_ANY)) + return; + + if (d->big != conn->iso_qos.big || d->bis == BT_ISO_QOS_BIS_UNSET || + d->bis != conn->iso_qos.bis) + return; + + d->count++; +} + +static void find_bis(struct hci_conn *conn, void *data) +{ + struct iso_list_data *d = data; + + /* Ignore unicast */ + if (bacmp(&conn->dst, BDADDR_ANY)) + return; + + d->count++; +} + +static int terminate_big_sync(struct hci_dev *hdev, void *data) +{ + struct iso_list_data *d = data; + + bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", d->big, d->bis); + + hci_remove_ext_adv_instance_sync(hdev, d->bis, NULL); + + /* Check if ISO connection is a BIS and terminate BIG if there are + * no other connections using it. + */ + hci_conn_hash_list_state(hdev, find_bis, ISO_LINK, BT_CONNECTED, d); + if (d->count) + return 0; + + return hci_le_terminate_big_sync(hdev, d->big, + HCI_ERROR_LOCAL_HOST_TERM); +} + +static void terminate_big_destroy(struct hci_dev *hdev, void *data, int err) +{ + kfree(data); +} + +static int hci_le_terminate_big(struct hci_dev *hdev, u8 big, u8 bis) +{ + struct iso_list_data *d; + + bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", big, bis); + + d = kmalloc(sizeof(*d), GFP_KERNEL); + if (!d) + return -ENOMEM; + + memset(d, 0, sizeof(*d)); + d->big = big; + d->bis = bis; + + return hci_cmd_sync_queue(hdev, terminate_big_sync, d, + terminate_big_destroy); +} + +static int big_terminate_sync(struct hci_dev *hdev, void *data) +{ + struct iso_list_data *d = data; + + bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", d->big, + d->sync_handle); + + /* Check if ISO connection is a BIS and terminate BIG if there are + * no other connections using it. + */ + hci_conn_hash_list_state(hdev, find_bis, ISO_LINK, BT_CONNECTED, d); + if (d->count) + return 0; + + hci_le_big_terminate_sync(hdev, d->big); + + return hci_le_pa_terminate_sync(hdev, d->sync_handle); +} + +static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, u16 sync_handle) +{ + struct iso_list_data *d; + + bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", big, sync_handle); + + d = kmalloc(sizeof(*d), GFP_KERNEL); + if (!d) + return -ENOMEM; + + memset(d, 0, sizeof(*d)); + d->big = big; + d->sync_handle = sync_handle; + + return hci_cmd_sync_queue(hdev, big_terminate_sync, d, + terminate_big_destroy); +} + +/* Cleanup BIS connection + * + * Detects if there any BIS left connected in a BIG + * broadcaster: Remove advertising instance and terminate BIG. + * broadcaster receiver: Teminate BIG sync and terminate PA sync. + */ +static void bis_cleanup(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + + bt_dev_dbg(hdev, "conn %p", conn); + + if (conn->role == HCI_ROLE_MASTER) { + if (!test_and_clear_bit(HCI_CONN_PER_ADV, &conn->flags)) + return; + + hci_le_terminate_big(hdev, conn->iso_qos.big, + conn->iso_qos.bis); + } else { + hci_le_big_terminate(hdev, conn->iso_qos.big, + conn->sync_handle); + } +} + +static int remove_cig_sync(struct hci_dev *hdev, void *data) +{ + u8 handle = PTR_ERR(data); + + return hci_le_remove_cig_sync(hdev, handle); +} + +static int hci_le_remove_cig(struct hci_dev *hdev, u8 handle) +{ + bt_dev_dbg(hdev, "handle 0x%2.2x", handle); + + return hci_cmd_sync_queue(hdev, remove_cig_sync, ERR_PTR(handle), NULL); +} + +static void find_cis(struct hci_conn *conn, void *data) +{ + struct iso_list_data *d = data; + + /* Ignore broadcast */ + if (!bacmp(&conn->dst, BDADDR_ANY)) + return; + + d->count++; +} + +/* Cleanup CIS connection: + * + * Detects if there any CIS left connected in a CIG and remove it. + */ +static void cis_cleanup(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + struct iso_list_data d; + + memset(&d, 0, sizeof(d)); + d.cig = conn->iso_qos.cig; + + /* Check if ISO connection is a CIS and remove CIG if there are + * no other connections using it. + */ + hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECTED, &d); + if (d.count) + return; + + hci_le_remove_cig(hdev, conn->iso_qos.cig); +} + struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, u8 role) { @@ -722,6 +924,17 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, /* conn->src should reflect the local identity address */ hci_copy_identity_address(hdev, &conn->src, &conn->src_type); break; + case ISO_LINK: + /* conn->src should reflect the local identity address */ + hci_copy_identity_address(hdev, &conn->src, &conn->src_type); + + /* set proper cleanup function */ + if (!bacmp(dst, BDADDR_ANY)) + conn->cleanup = bis_cleanup; + else if (conn->role == HCI_ROLE_MASTER) + conn->cleanup = cis_cleanup; + + break; case SCO_LINK: if (lmp_esco_capable(hdev)) conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | @@ -947,7 +1160,7 @@ static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) if (conn != hci_lookup_le_connect(hdev)) goto done; - hci_conn_failed(conn, err); + hci_conn_failed(conn, bt_status(err)); done: hci_dev_unlock(hdev); @@ -1093,6 +1306,108 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev, return 0; } +static int qos_set_big(struct hci_dev *hdev, struct bt_iso_qos *qos) +{ + struct iso_list_data data; + + /* Allocate a BIG if not set */ + if (qos->big == BT_ISO_QOS_BIG_UNSET) { + for (data.big = 0x00; data.big < 0xef; data.big++) { + data.count = 0; + data.bis = 0xff; + + hci_conn_hash_list_state(hdev, bis_list, ISO_LINK, + BT_BOUND, &data); + if (!data.count) + break; + } + + if (data.big == 0xef) + return -EADDRNOTAVAIL; + + /* Update BIG */ + qos->big = data.big; + } + + return 0; +} + +static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos) +{ + struct iso_list_data data; + + /* Allocate BIS if not set */ + if (qos->bis == BT_ISO_QOS_BIS_UNSET) { + /* Find an unused adv set to advertise BIS, skip instance 0x00 + * since it is reserved as general purpose set. + */ + for (data.bis = 0x01; data.bis < hdev->le_num_of_adv_sets; + data.bis++) { + data.count = 0; + + hci_conn_hash_list_state(hdev, bis_list, ISO_LINK, + BT_BOUND, &data); + if (!data.count) + break; + } + + if (data.bis == hdev->le_num_of_adv_sets) + return -EADDRNOTAVAIL; + + /* Update BIS */ + qos->bis = data.bis; + } + + return 0; +} + +/* This function requires the caller holds hdev->lock */ +static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst, + struct bt_iso_qos *qos) +{ + struct hci_conn *conn; + struct iso_list_data data; + int err; + + /* Let's make sure that le is enabled.*/ + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { + if (lmp_le_capable(hdev)) + return ERR_PTR(-ECONNREFUSED); + return ERR_PTR(-EOPNOTSUPP); + } + + err = qos_set_big(hdev, qos); + if (err) + return ERR_PTR(err); + + err = qos_set_bis(hdev, qos); + if (err) + return ERR_PTR(err); + + data.big = qos->big; + data.bis = qos->bis; + data.count = 0; + + /* Check if there is already a matching BIG/BIS */ + hci_conn_hash_list_state(hdev, bis_list, ISO_LINK, BT_BOUND, &data); + if (data.count) + return ERR_PTR(-EADDRINUSE); + + conn = hci_conn_hash_lookup_bis(hdev, dst, qos->big, qos->bis); + if (conn) + return ERR_PTR(-EADDRINUSE); + + conn = hci_conn_add(hdev, ISO_LINK, dst, HCI_ROLE_MASTER); + if (!conn) + return ERR_PTR(-ENOMEM); + + set_bit(HCI_CONN_PER_ADV, &conn->flags); + conn->state = BT_CONNECT; + + hci_conn_hold(conn); + return conn; +} + /* This function requires the caller holds hdev->lock */ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, @@ -1229,6 +1544,589 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, return sco; } +static void cis_add(struct iso_list_data *d, struct bt_iso_qos *qos) +{ + struct hci_cis_params *cis = &d->pdu.cis[d->pdu.cp.num_cis]; + + cis->cis_id = qos->cis; + cis->c_sdu = cpu_to_le16(qos->out.sdu); + cis->p_sdu = cpu_to_le16(qos->in.sdu); + cis->c_phy = qos->out.phy; + cis->p_phy = qos->in.phy; + cis->c_rtn = qos->out.rtn; + cis->p_rtn = qos->in.rtn; + + d->pdu.cp.num_cis++; +} + +static void cis_list(struct hci_conn *conn, void *data) +{ + struct iso_list_data *d = data; + + /* Skip if broadcast/ANY address */ + if (!bacmp(&conn->dst, BDADDR_ANY)) + return; + + if (d->cig != conn->iso_qos.cig || d->cis == BT_ISO_QOS_CIS_UNSET || + d->cis != conn->iso_qos.cis) + return; + + d->count++; + + if (d->pdu.cp.cig_id == BT_ISO_QOS_CIG_UNSET || + d->count >= ARRAY_SIZE(d->pdu.cis)) + return; + + cis_add(d, &conn->iso_qos); +} + +static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos) +{ + struct hci_dev *hdev = conn->hdev; + struct hci_cp_le_create_big cp; + + memset(&cp, 0, sizeof(cp)); + + cp.handle = qos->big; + cp.adv_handle = qos->bis; + cp.num_bis = 0x01; + hci_cpu_to_le24(qos->out.interval, cp.bis.sdu_interval); + cp.bis.sdu = cpu_to_le16(qos->out.sdu); + cp.bis.latency = cpu_to_le16(qos->out.latency); + cp.bis.rtn = qos->out.rtn; + cp.bis.phy = qos->out.phy; + cp.bis.packing = qos->packing; + cp.bis.framing = qos->framing; + cp.bis.encryption = 0x00; + memset(&cp.bis.bcode, 0, sizeof(cp.bis.bcode)); + + return hci_send_cmd(hdev, HCI_OP_LE_CREATE_BIG, sizeof(cp), &cp); +} + +static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos) +{ + struct hci_dev *hdev = conn->hdev; + struct iso_list_data data; + + memset(&data, 0, sizeof(data)); + + /* Allocate a CIG if not set */ + if (qos->cig == BT_ISO_QOS_CIG_UNSET) { + for (data.cig = 0x00; data.cig < 0xff; data.cig++) { + data.count = 0; + data.cis = 0xff; + + hci_conn_hash_list_state(hdev, cis_list, ISO_LINK, + BT_BOUND, &data); + if (data.count) + continue; + + hci_conn_hash_list_state(hdev, cis_list, ISO_LINK, + BT_CONNECTED, &data); + if (!data.count) + break; + } + + if (data.cig == 0xff) + return false; + + /* Update CIG */ + qos->cig = data.cig; + } + + data.pdu.cp.cig_id = qos->cig; + hci_cpu_to_le24(qos->out.interval, data.pdu.cp.c_interval); + hci_cpu_to_le24(qos->in.interval, data.pdu.cp.p_interval); + data.pdu.cp.sca = qos->sca; + data.pdu.cp.packing = qos->packing; + data.pdu.cp.framing = qos->framing; + data.pdu.cp.c_latency = cpu_to_le16(qos->out.latency); + data.pdu.cp.p_latency = cpu_to_le16(qos->in.latency); + + if (qos->cis != BT_ISO_QOS_CIS_UNSET) { + data.count = 0; + data.cig = qos->cig; + data.cis = qos->cis; + + hci_conn_hash_list_state(hdev, cis_list, ISO_LINK, BT_BOUND, + &data); + if (data.count) + return false; + + cis_add(&data, qos); + } + + /* Reprogram all CIS(s) with the same CIG */ + for (data.cig = qos->cig, data.cis = 0x00; data.cis < 0x11; + data.cis++) { + data.count = 0; + + hci_conn_hash_list_state(hdev, cis_list, ISO_LINK, BT_BOUND, + &data); + if (data.count) + continue; + + /* Allocate a CIS if not set */ + if (qos->cis == BT_ISO_QOS_CIS_UNSET) { + /* Update CIS */ + qos->cis = data.cis; + cis_add(&data, qos); + } + } + + if (qos->cis == BT_ISO_QOS_CIS_UNSET || !data.pdu.cp.num_cis) + return false; + + if (hci_send_cmd(hdev, HCI_OP_LE_SET_CIG_PARAMS, + sizeof(data.pdu.cp) + + (data.pdu.cp.num_cis * sizeof(*data.pdu.cis)), + &data.pdu) < 0) + return false; + + return true; +} + +struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, + __u8 dst_type, struct bt_iso_qos *qos) +{ + struct hci_conn *cis; + + cis = hci_conn_hash_lookup_cis(hdev, dst, dst_type); + if (!cis) { + cis = hci_conn_add(hdev, ISO_LINK, dst, HCI_ROLE_MASTER); + if (!cis) + return ERR_PTR(-ENOMEM); + cis->cleanup = cis_cleanup; + } + + if (cis->state == BT_CONNECTED) + return cis; + + /* Check if CIS has been set and the settings matches */ + if (cis->state == BT_BOUND && + !memcmp(&cis->iso_qos, qos, sizeof(*qos))) + return cis; + + /* Update LINK PHYs according to QoS preference */ + cis->le_tx_phy = qos->out.phy; + cis->le_rx_phy = qos->in.phy; + + /* If output interval is not set use the input interval as it cannot be + * 0x000000. + */ + if (!qos->out.interval) + qos->out.interval = qos->in.interval; + + /* If input interval is not set use the output interval as it cannot be + * 0x000000. + */ + if (!qos->in.interval) + qos->in.interval = qos->out.interval; + + /* If output latency is not set use the input latency as it cannot be + * 0x0000. + */ + if (!qos->out.latency) + qos->out.latency = qos->in.latency; + + /* If input latency is not set use the output latency as it cannot be + * 0x0000. + */ + if (!qos->in.latency) + qos->in.latency = qos->out.latency; + + /* Mirror PHYs that are disabled as SDU will be set to 0 */ + if (!qos->in.phy) + qos->in.phy = qos->out.phy; + + if (!qos->out.phy) + qos->out.phy = qos->in.phy; + + if (!hci_le_set_cig_params(cis, qos)) { + hci_conn_drop(cis); + return ERR_PTR(-EINVAL); + } + + cis->iso_qos = *qos; + cis->state = BT_BOUND; + + return cis; +} + +bool hci_iso_setup_path(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + struct hci_cp_le_setup_iso_path cmd; + + memset(&cmd, 0, sizeof(cmd)); + + if (conn->iso_qos.out.sdu) { + cmd.handle = cpu_to_le16(conn->handle); + cmd.direction = 0x00; /* Input (Host to Controller) */ + cmd.path = 0x00; /* HCI path if enabled */ + cmd.codec = 0x03; /* Transparent Data */ + + if (hci_send_cmd(hdev, HCI_OP_LE_SETUP_ISO_PATH, sizeof(cmd), + &cmd) < 0) + return false; + } + + if (conn->iso_qos.in.sdu) { + cmd.handle = cpu_to_le16(conn->handle); + cmd.direction = 0x01; /* Output (Controller to Host) */ + cmd.path = 0x00; /* HCI path if enabled */ + cmd.codec = 0x03; /* Transparent Data */ + + if (hci_send_cmd(hdev, HCI_OP_LE_SETUP_ISO_PATH, sizeof(cmd), + &cmd) < 0) + return false; + } + + return true; +} + +static int hci_create_cis_sync(struct hci_dev *hdev, void *data) +{ + struct { + struct hci_cp_le_create_cis cp; + struct hci_cis cis[0x1f]; + } cmd; + struct hci_conn *conn = data; + u8 cig; + + memset(&cmd, 0, sizeof(cmd)); + cmd.cis[0].acl_handle = cpu_to_le16(conn->link->handle); + cmd.cis[0].cis_handle = cpu_to_le16(conn->handle); + cmd.cp.num_cis++; + cig = conn->iso_qos.cig; + + hci_dev_lock(hdev); + + rcu_read_lock(); + + list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { + struct hci_cis *cis = &cmd.cis[cmd.cp.num_cis]; + + if (conn == data || conn->type != ISO_LINK || + conn->state == BT_CONNECTED || conn->iso_qos.cig != cig) + continue; + + /* Check if all CIS(s) belonging to a CIG are ready */ + if (conn->link->state != BT_CONNECTED || + conn->state != BT_CONNECT) { + cmd.cp.num_cis = 0; + break; + } + + /* Group all CIS with state BT_CONNECT since the spec don't + * allow to send them individually: + * + * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E + * page 2566: + * + * If the Host issues this command before all the + * HCI_LE_CIS_Established events from the previous use of the + * command have been generated, the Controller shall return the + * error code Command Disallowed (0x0C). + */ + cis->acl_handle = cpu_to_le16(conn->link->handle); + cis->cis_handle = cpu_to_le16(conn->handle); + cmd.cp.num_cis++; + } + + rcu_read_unlock(); + + hci_dev_unlock(hdev); + + if (!cmd.cp.num_cis) + return 0; + + return hci_send_cmd(hdev, HCI_OP_LE_CREATE_CIS, sizeof(cmd.cp) + + sizeof(cmd.cis[0]) * cmd.cp.num_cis, &cmd); +} + +int hci_le_create_cis(struct hci_conn *conn) +{ + struct hci_conn *cis; + struct hci_dev *hdev = conn->hdev; + int err; + + switch (conn->type) { + case LE_LINK: + if (!conn->link || conn->state != BT_CONNECTED) + return -EINVAL; + cis = conn->link; + break; + case ISO_LINK: + cis = conn; + break; + default: + return -EINVAL; + } + + if (cis->state == BT_CONNECT) + return 0; + + /* Queue Create CIS */ + err = hci_cmd_sync_queue(hdev, hci_create_cis_sync, cis, NULL); + if (err) + return err; + + cis->state = BT_CONNECT; + + return 0; +} + +static void hci_iso_qos_setup(struct hci_dev *hdev, struct hci_conn *conn, + struct bt_iso_io_qos *qos, __u8 phy) +{ + /* Only set MTU if PHY is enabled */ + if (!qos->sdu && qos->phy) { + if (hdev->iso_mtu > 0) + qos->sdu = hdev->iso_mtu; + else if (hdev->le_mtu > 0) + qos->sdu = hdev->le_mtu; + else + qos->sdu = hdev->acl_mtu; + } + + /* Use the same PHY as ACL if set to any */ + if (qos->phy == BT_ISO_PHY_ANY) + qos->phy = phy; + + /* Use LE ACL connection interval if not set */ + if (!qos->interval) + /* ACL interval unit in 1.25 ms to us */ + qos->interval = conn->le_conn_interval * 1250; + + /* Use LE ACL connection latency if not set */ + if (!qos->latency) + qos->latency = conn->le_conn_latency; +} + +static struct hci_conn *hci_bind_bis(struct hci_conn *conn, + struct bt_iso_qos *qos) +{ + /* Update LINK PHYs according to QoS preference */ + conn->le_tx_phy = qos->out.phy; + conn->le_tx_phy = qos->out.phy; + conn->iso_qos = *qos; + conn->state = BT_BOUND; + + return conn; +} + +static int create_big_sync(struct hci_dev *hdev, void *data) +{ + struct hci_conn *conn = data; + struct bt_iso_qos *qos = &conn->iso_qos; + u16 interval, sync_interval = 0; + u32 flags = 0; + int err; + + if (qos->out.phy == 0x02) + flags |= MGMT_ADV_FLAG_SEC_2M; + + /* Align intervals */ + interval = qos->out.interval / 1250; + + if (qos->bis) + sync_interval = qos->sync_interval * 1600; + + err = hci_start_per_adv_sync(hdev, qos->bis, conn->le_per_adv_data_len, + conn->le_per_adv_data, flags, interval, + interval, sync_interval); + if (err) + return err; + + return hci_le_create_big(conn, &conn->iso_qos); +} + +static void create_pa_complete(struct hci_dev *hdev, void *data, int err) +{ + struct hci_cp_le_pa_create_sync *cp = data; + + bt_dev_dbg(hdev, ""); + + if (err) + bt_dev_err(hdev, "Unable to create PA: %d", err); + + kfree(cp); +} + +static int create_pa_sync(struct hci_dev *hdev, void *data) +{ + struct hci_cp_le_pa_create_sync *cp = data; + int err; + + err = __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC, + sizeof(*cp), cp, HCI_CMD_TIMEOUT); + if (err) { + hci_dev_clear_flag(hdev, HCI_PA_SYNC); + return err; + } + + return hci_update_passive_scan_sync(hdev); +} + +int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, + __u8 sid) +{ + struct hci_cp_le_pa_create_sync *cp; + + if (hci_dev_test_and_set_flag(hdev, HCI_PA_SYNC)) + return -EBUSY; + + cp = kmalloc(sizeof(*cp), GFP_KERNEL); + if (!cp) { + hci_dev_clear_flag(hdev, HCI_PA_SYNC); + return -ENOMEM; + } + + /* Convert from ISO socket address type to HCI address type */ + if (dst_type == BDADDR_LE_PUBLIC) + dst_type = ADDR_LE_DEV_PUBLIC; + else + dst_type = ADDR_LE_DEV_RANDOM; + + memset(cp, 0, sizeof(*cp)); + cp->sid = sid; + cp->addr_type = dst_type; + bacpy(&cp->addr, dst); + + /* Queue start pa_create_sync and scan */ + return hci_cmd_sync_queue(hdev, create_pa_sync, cp, create_pa_complete); +} + +int hci_le_big_create_sync(struct hci_dev *hdev, struct bt_iso_qos *qos, + __u16 sync_handle, __u8 num_bis, __u8 bis[]) +{ + struct _packed { + struct hci_cp_le_big_create_sync cp; + __u8 bis[0x11]; + } pdu; + int err; + + if (num_bis > sizeof(pdu.bis)) + return -EINVAL; + + err = qos_set_big(hdev, qos); + if (err) + return err; + + memset(&pdu, 0, sizeof(pdu)); + pdu.cp.handle = qos->big; + pdu.cp.sync_handle = cpu_to_le16(sync_handle); + pdu.cp.num_bis = num_bis; + memcpy(pdu.bis, bis, num_bis); + + return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC, + sizeof(pdu.cp) + num_bis, &pdu); +} + +static void create_big_complete(struct hci_dev *hdev, void *data, int err) +{ + struct hci_conn *conn = data; + + bt_dev_dbg(hdev, "conn %p", conn); + + if (err) { + bt_dev_err(hdev, "Unable to create BIG: %d", err); + hci_connect_cfm(conn, err); + hci_conn_del(conn); + } +} + +struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, + __u8 dst_type, struct bt_iso_qos *qos, + __u8 base_len, __u8 *base) +{ + struct hci_conn *conn; + int err; + + /* We need hci_conn object using the BDADDR_ANY as dst */ + conn = hci_add_bis(hdev, dst, qos); + if (IS_ERR(conn)) + return conn; + + conn = hci_bind_bis(conn, qos); + if (!conn) { + hci_conn_drop(conn); + return ERR_PTR(-ENOMEM); + } + + /* Add Basic Announcement into Peridic Adv Data if BASE is set */ + if (base_len && base) { + base_len = eir_append_service_data(conn->le_per_adv_data, 0, + 0x1851, base, base_len); + conn->le_per_adv_data_len = base_len; + } + + /* Queue start periodic advertising and create BIG */ + err = hci_cmd_sync_queue(hdev, create_big_sync, conn, + create_big_complete); + if (err < 0) { + hci_conn_drop(conn); + return ERR_PTR(err); + } + + hci_iso_qos_setup(hdev, conn, &qos->out, + conn->le_tx_phy ? conn->le_tx_phy : + hdev->le_tx_def_phys); + + return conn; +} + +struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, + __u8 dst_type, struct bt_iso_qos *qos) +{ + struct hci_conn *le; + struct hci_conn *cis; + + /* Convert from ISO socket address type to HCI address type */ + if (dst_type == BDADDR_LE_PUBLIC) + dst_type = ADDR_LE_DEV_PUBLIC; + else + dst_type = ADDR_LE_DEV_RANDOM; + + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) + le = hci_connect_le(hdev, dst, dst_type, false, + BT_SECURITY_LOW, + HCI_LE_CONN_TIMEOUT, + HCI_ROLE_SLAVE); + else + le = hci_connect_le_scan(hdev, dst, dst_type, + BT_SECURITY_LOW, + HCI_LE_CONN_TIMEOUT, + CONN_REASON_ISO_CONNECT); + if (IS_ERR(le)) + return le; + + hci_iso_qos_setup(hdev, le, &qos->out, + le->le_tx_phy ? le->le_tx_phy : hdev->le_tx_def_phys); + hci_iso_qos_setup(hdev, le, &qos->in, + le->le_rx_phy ? le->le_rx_phy : hdev->le_rx_def_phys); + + cis = hci_bind_cis(hdev, dst, dst_type, qos); + if (IS_ERR(cis)) { + hci_conn_drop(le); + return cis; + } + + le->link = cis; + cis->link = le; + + hci_conn_hold(cis); + + /* If LE is already connected and CIS handle is already set proceed to + * Create CIS immediately. + */ + if (le->state == BT_CONNECTED && cis->handle != HCI_CONN_HANDLE_UNSET) + hci_le_create_cis(le); + + return cis; +} + /* Check link security requirement */ int hci_conn_check_link_mode(struct hci_conn *conn) { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a0f99baafd35..b3a5a3cc9372 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -29,6 +29,7 @@ #include <linux/rfkill.h> #include <linux/debugfs.h> #include <linux/crypto.h> +#include <linux/kcov.h> #include <linux/property.h> #include <linux/suspend.h> #include <linux/wait.h> @@ -594,6 +595,11 @@ static int hci_dev_do_reset(struct hci_dev *hdev) skb_queue_purge(&hdev->rx_q); skb_queue_purge(&hdev->cmd_q); + /* Cancel these to avoid queueing non-chained pending work */ + hci_dev_set_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); + cancel_delayed_work(&hdev->cmd_timer); + cancel_delayed_work(&hdev->ncmd_timer); + /* Avoid potential lockdep warnings from the *_flush() calls by * ensuring the workqueue is empty up front. */ @@ -607,8 +613,13 @@ static int hci_dev_do_reset(struct hci_dev *hdev) if (hdev->flush) hdev->flush(hdev); + hci_dev_clear_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); + atomic_set(&hdev->cmd_cnt, 1); - hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0; + hdev->acl_cnt = 0; + hdev->sco_cnt = 0; + hdev->le_cnt = 0; + hdev->iso_cnt = 0; ret = hci_reset_sync(hdev); @@ -1691,63 +1702,77 @@ static void adv_instance_rpa_expired(struct work_struct *work) } /* This function requires the caller holds hdev->lock */ -int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, - u16 adv_data_len, u8 *adv_data, - u16 scan_rsp_len, u8 *scan_rsp_data, - u16 timeout, u16 duration, s8 tx_power, - u32 min_interval, u32 max_interval) +struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, + u32 flags, u16 adv_data_len, u8 *adv_data, + u16 scan_rsp_len, u8 *scan_rsp_data, + u16 timeout, u16 duration, s8 tx_power, + u32 min_interval, u32 max_interval) { - struct adv_info *adv_instance; + struct adv_info *adv; - adv_instance = hci_find_adv_instance(hdev, instance); - if (adv_instance) { - memset(adv_instance->adv_data, 0, - sizeof(adv_instance->adv_data)); - memset(adv_instance->scan_rsp_data, 0, - sizeof(adv_instance->scan_rsp_data)); + adv = hci_find_adv_instance(hdev, instance); + if (adv) { + memset(adv->adv_data, 0, sizeof(adv->adv_data)); + memset(adv->scan_rsp_data, 0, sizeof(adv->scan_rsp_data)); + memset(adv->per_adv_data, 0, sizeof(adv->per_adv_data)); } else { if (hdev->adv_instance_cnt >= hdev->le_num_of_adv_sets || instance < 1 || instance > hdev->le_num_of_adv_sets) - return -EOVERFLOW; + return ERR_PTR(-EOVERFLOW); - adv_instance = kzalloc(sizeof(*adv_instance), GFP_KERNEL); - if (!adv_instance) - return -ENOMEM; + adv = kzalloc(sizeof(*adv), GFP_KERNEL); + if (!adv) + return ERR_PTR(-ENOMEM); - adv_instance->pending = true; - adv_instance->instance = instance; - list_add(&adv_instance->list, &hdev->adv_instances); + adv->pending = true; + adv->instance = instance; + list_add(&adv->list, &hdev->adv_instances); hdev->adv_instance_cnt++; } - adv_instance->flags = flags; - adv_instance->adv_data_len = adv_data_len; - adv_instance->scan_rsp_len = scan_rsp_len; - adv_instance->min_interval = min_interval; - adv_instance->max_interval = max_interval; - adv_instance->tx_power = tx_power; + adv->flags = flags; + adv->min_interval = min_interval; + adv->max_interval = max_interval; + adv->tx_power = tx_power; - if (adv_data_len) - memcpy(adv_instance->adv_data, adv_data, adv_data_len); + hci_set_adv_instance_data(hdev, instance, adv_data_len, adv_data, + scan_rsp_len, scan_rsp_data); - if (scan_rsp_len) - memcpy(adv_instance->scan_rsp_data, - scan_rsp_data, scan_rsp_len); - - adv_instance->timeout = timeout; - adv_instance->remaining_time = timeout; + adv->timeout = timeout; + adv->remaining_time = timeout; if (duration == 0) - adv_instance->duration = hdev->def_multi_adv_rotation_duration; + adv->duration = hdev->def_multi_adv_rotation_duration; else - adv_instance->duration = duration; + adv->duration = duration; - INIT_DELAYED_WORK(&adv_instance->rpa_expired_cb, - adv_instance_rpa_expired); + INIT_DELAYED_WORK(&adv->rpa_expired_cb, adv_instance_rpa_expired); BT_DBG("%s for %dMR", hdev->name, instance); - return 0; + return adv; +} + +/* This function requires the caller holds hdev->lock */ +struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, + u32 flags, u8 data_len, u8 *data, + u32 min_interval, u32 max_interval) +{ + struct adv_info *adv; + + adv = hci_add_adv_instance(hdev, instance, flags, 0, NULL, 0, NULL, + 0, 0, HCI_ADV_TX_POWER_NO_PREFERENCE, + min_interval, max_interval); + if (IS_ERR(adv)) + return adv; + + adv->periodic = true; + adv->per_adv_data_len = data_len; + + if (data) + memcpy(adv->per_adv_data, data, data_len); + + return adv; } /* This function requires the caller holds hdev->lock */ @@ -1755,29 +1780,33 @@ int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance, u16 adv_data_len, u8 *adv_data, u16 scan_rsp_len, u8 *scan_rsp_data) { - struct adv_info *adv_instance; + struct adv_info *adv; - adv_instance = hci_find_adv_instance(hdev, instance); + adv = hci_find_adv_instance(hdev, instance); /* If advertisement doesn't exist, we can't modify its data */ - if (!adv_instance) + if (!adv) return -ENOENT; - if (adv_data_len) { - memset(adv_instance->adv_data, 0, - sizeof(adv_instance->adv_data)); - memcpy(adv_instance->adv_data, adv_data, adv_data_len); - adv_instance->adv_data_len = adv_data_len; + if (adv_data_len && ADV_DATA_CMP(adv, adv_data, adv_data_len)) { + memset(adv->adv_data, 0, sizeof(adv->adv_data)); + memcpy(adv->adv_data, adv_data, adv_data_len); + adv->adv_data_len = adv_data_len; + adv->adv_data_changed = true; } - if (scan_rsp_len) { - memset(adv_instance->scan_rsp_data, 0, - sizeof(adv_instance->scan_rsp_data)); - memcpy(adv_instance->scan_rsp_data, - scan_rsp_data, scan_rsp_len); - adv_instance->scan_rsp_len = scan_rsp_len; + if (scan_rsp_len && SCAN_RSP_CMP(adv, scan_rsp_data, scan_rsp_len)) { + memset(adv->scan_rsp_data, 0, sizeof(adv->scan_rsp_data)); + memcpy(adv->scan_rsp_data, scan_rsp_data, scan_rsp_len); + adv->scan_rsp_len = scan_rsp_len; + adv->scan_rsp_changed = true; } + /* Mark as changed if there are flags which would affect it */ + if (((adv->flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) || + adv->flags & MGMT_ADV_FLAG_LOCAL_NAME) + adv->scan_rsp_changed = true; + return 0; } @@ -1874,151 +1903,120 @@ void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor) kfree(monitor); } -int hci_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status) -{ - return mgmt_add_adv_patterns_monitor_complete(hdev, status); -} - -int hci_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status) -{ - return mgmt_remove_adv_monitor_complete(hdev, status); -} - /* Assigns handle to a monitor, and if offloading is supported and power is on, * also attempts to forward the request to the controller. - * Returns true if request is forwarded (result is pending), false otherwise. - * This function requires the caller holds hdev->lock. + * This function requires the caller holds hci_req_sync_lock. */ -bool hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, - int *err) +int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor) { int min, max, handle; + int status = 0; - *err = 0; + if (!monitor) + return -EINVAL; - if (!monitor) { - *err = -EINVAL; - return false; - } + hci_dev_lock(hdev); min = HCI_MIN_ADV_MONITOR_HANDLE; max = HCI_MIN_ADV_MONITOR_HANDLE + HCI_MAX_ADV_MONITOR_NUM_HANDLES; handle = idr_alloc(&hdev->adv_monitors_idr, monitor, min, max, GFP_KERNEL); - if (handle < 0) { - *err = handle; - return false; - } + + hci_dev_unlock(hdev); + + if (handle < 0) + return handle; monitor->handle = handle; if (!hdev_is_powered(hdev)) - return false; + return status; switch (hci_get_adv_monitor_offload_ext(hdev)) { case HCI_ADV_MONITOR_EXT_NONE: - hci_update_passive_scan(hdev); - bt_dev_dbg(hdev, "%s add monitor status %d", hdev->name, *err); + bt_dev_dbg(hdev, "%s add monitor %d status %d", hdev->name, + monitor->handle, status); /* Message was not forwarded to controller - not an error */ - return false; + break; + case HCI_ADV_MONITOR_EXT_MSFT: - *err = msft_add_monitor_pattern(hdev, monitor); - bt_dev_dbg(hdev, "%s add monitor msft status %d", hdev->name, - *err); + status = msft_add_monitor_pattern(hdev, monitor); + bt_dev_dbg(hdev, "%s add monitor %d msft status %d", hdev->name, + monitor->handle, status); break; } - return (*err == 0); + return status; } /* Attempts to tell the controller and free the monitor. If somehow the * controller doesn't have a corresponding handle, remove anyway. - * Returns true if request is forwarded (result is pending), false otherwise. - * This function requires the caller holds hdev->lock. + * This function requires the caller holds hci_req_sync_lock. */ -static bool hci_remove_adv_monitor(struct hci_dev *hdev, - struct adv_monitor *monitor, - u16 handle, int *err) +static int hci_remove_adv_monitor(struct hci_dev *hdev, + struct adv_monitor *monitor) { - *err = 0; + int status = 0; switch (hci_get_adv_monitor_offload_ext(hdev)) { case HCI_ADV_MONITOR_EXT_NONE: /* also goes here when powered off */ + bt_dev_dbg(hdev, "%s remove monitor %d status %d", hdev->name, + monitor->handle, status); goto free_monitor; + case HCI_ADV_MONITOR_EXT_MSFT: - *err = msft_remove_monitor(hdev, monitor, handle); + status = msft_remove_monitor(hdev, monitor); + bt_dev_dbg(hdev, "%s remove monitor %d msft status %d", + hdev->name, monitor->handle, status); break; } /* In case no matching handle registered, just free the monitor */ - if (*err == -ENOENT) + if (status == -ENOENT) goto free_monitor; - return (*err == 0); + return status; free_monitor: - if (*err == -ENOENT) + if (status == -ENOENT) bt_dev_warn(hdev, "Removing monitor with no matching handle %d", monitor->handle); hci_free_adv_monitor(hdev, monitor); - *err = 0; - return false; + return status; } -/* Returns true if request is forwarded (result is pending), false otherwise. - * This function requires the caller holds hdev->lock. - */ -bool hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle, int *err) +/* This function requires the caller holds hci_req_sync_lock */ +int hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle) { struct adv_monitor *monitor = idr_find(&hdev->adv_monitors_idr, handle); - bool pending; - - if (!monitor) { - *err = -EINVAL; - return false; - } - - pending = hci_remove_adv_monitor(hdev, monitor, handle, err); - if (!*err && !pending) - hci_update_passive_scan(hdev); - bt_dev_dbg(hdev, "%s remove monitor handle %d, status %d, %spending", - hdev->name, handle, *err, pending ? "" : "not "); + if (!monitor) + return -EINVAL; - return pending; + return hci_remove_adv_monitor(hdev, monitor); } -/* Returns true if request is forwarded (result is pending), false otherwise. - * This function requires the caller holds hdev->lock. - */ -bool hci_remove_all_adv_monitor(struct hci_dev *hdev, int *err) +/* This function requires the caller holds hci_req_sync_lock */ +int hci_remove_all_adv_monitor(struct hci_dev *hdev) { struct adv_monitor *monitor; int idr_next_id = 0; - bool pending = false; - bool update = false; - - *err = 0; + int status = 0; - while (!*err && !pending) { + while (1) { monitor = idr_get_next(&hdev->adv_monitors_idr, &idr_next_id); if (!monitor) break; - pending = hci_remove_adv_monitor(hdev, monitor, 0, err); + status = hci_remove_adv_monitor(hdev, monitor); + if (status) + return status; - if (!*err && !pending) - update = true; + idr_next_id++; } - if (update) - hci_update_passive_scan(hdev); - - bt_dev_dbg(hdev, "%s remove all monitors status %d, %spending", - hdev->name, *err, pending ? "" : "not "); - - return pending; + return status; } /* This function requires the caller holds hdev->lock */ @@ -2640,12 +2638,9 @@ int hci_register_dev(struct hci_dev *hdev) hci_sock_dev_event(hdev, HCI_DEV_REG); hci_dev_hold(hdev); - if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { - hdev->suspend_notifier.notifier_call = hci_suspend_notifier; - error = register_pm_notifier(&hdev->suspend_notifier); - if (error) - goto err_wqueue; - } + error = hci_register_suspend_notifier(hdev); + if (error) + goto err_wqueue; queue_work(hdev->req_workqueue, &hdev->power_on); @@ -2680,8 +2675,7 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_cmd_sync_clear(hdev); - if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) - unregister_pm_notifier(&hdev->suspend_notifier); + hci_unregister_suspend_notifier(hdev); msft_unregister(hdev); @@ -2741,10 +2735,33 @@ void hci_release_dev(struct hci_dev *hdev) ida_simple_remove(&hci_index_ida, hdev->id); kfree_skb(hdev->sent_cmd); + kfree_skb(hdev->recv_event); kfree(hdev); } EXPORT_SYMBOL(hci_release_dev); +int hci_register_suspend_notifier(struct hci_dev *hdev) +{ + int ret = 0; + + if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { + hdev->suspend_notifier.notifier_call = hci_suspend_notifier; + ret = register_pm_notifier(&hdev->suspend_notifier); + } + + return ret; +} + +int hci_unregister_suspend_notifier(struct hci_dev *hdev) +{ + int ret = 0; + + if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) + ret = unregister_pm_notifier(&hdev->suspend_notifier); + + return ret; +} + /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) { @@ -3025,6 +3042,37 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE; } +/* Get data from last received event */ +void *hci_recv_event_data(struct hci_dev *hdev, __u8 event) +{ + struct hci_event_hdr *hdr; + int offset; + + if (!hdev->recv_event) + return NULL; + + hdr = (void *)hdev->recv_event->data; + offset = sizeof(*hdr); + + if (hdr->evt != event) { + /* In case of LE metaevent check the subevent match */ + if (hdr->evt == HCI_EV_LE_META) { + struct hci_ev_le_meta *ev; + + ev = (void *)hdev->recv_event->data + offset; + offset += sizeof(*ev); + if (ev->subevent == event) + goto found; + } + return NULL; + } + +found: + bt_dev_dbg(hdev, "event 0x%2.2x", event); + + return hdev->recv_event->data + offset; +} + /* Send ACL data */ static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags) { @@ -3132,9 +3180,117 @@ void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb) queue_work(hdev->workqueue, &hdev->tx_work); } +/* Send ISO data */ +static void hci_add_iso_hdr(struct sk_buff *skb, __u16 handle, __u8 flags) +{ + struct hci_iso_hdr *hdr; + int len = skb->len; + + skb_push(skb, HCI_ISO_HDR_SIZE); + skb_reset_transport_header(skb); + hdr = (struct hci_iso_hdr *)skb_transport_header(skb); + hdr->handle = cpu_to_le16(hci_handle_pack(handle, flags)); + hdr->dlen = cpu_to_le16(len); +} + +static void hci_queue_iso(struct hci_conn *conn, struct sk_buff_head *queue, + struct sk_buff *skb) +{ + struct hci_dev *hdev = conn->hdev; + struct sk_buff *list; + __u16 flags; + + skb->len = skb_headlen(skb); + skb->data_len = 0; + + hci_skb_pkt_type(skb) = HCI_ISODATA_PKT; + + list = skb_shinfo(skb)->frag_list; + + flags = hci_iso_flags_pack(list ? ISO_START : ISO_SINGLE, 0x00); + hci_add_iso_hdr(skb, conn->handle, flags); + + if (!list) { + /* Non fragmented */ + BT_DBG("%s nonfrag skb %p len %d", hdev->name, skb, skb->len); + + skb_queue_tail(queue, skb); + } else { + /* Fragmented */ + BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); + + skb_shinfo(skb)->frag_list = NULL; + + __skb_queue_tail(queue, skb); + + do { + skb = list; list = list->next; + + hci_skb_pkt_type(skb) = HCI_ISODATA_PKT; + flags = hci_iso_flags_pack(list ? ISO_CONT : ISO_END, + 0x00); + hci_add_iso_hdr(skb, conn->handle, flags); + + BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); + + __skb_queue_tail(queue, skb); + } while (list); + } +} + +void hci_send_iso(struct hci_conn *conn, struct sk_buff *skb) +{ + struct hci_dev *hdev = conn->hdev; + + BT_DBG("%s len %d", hdev->name, skb->len); + + hci_queue_iso(conn, &conn->data_q, skb); + + queue_work(hdev->workqueue, &hdev->tx_work); +} + /* ---- HCI TX task (outgoing data) ---- */ /* HCI Connection scheduler */ +static inline void hci_quote_sent(struct hci_conn *conn, int num, int *quote) +{ + struct hci_dev *hdev; + int cnt, q; + + if (!conn) { + *quote = 0; + return; + } + + hdev = conn->hdev; + + switch (conn->type) { + case ACL_LINK: + cnt = hdev->acl_cnt; + break; + case AMP_LINK: + cnt = hdev->block_cnt; + break; + case SCO_LINK: + case ESCO_LINK: + cnt = hdev->sco_cnt; + break; + case LE_LINK: + cnt = hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt; + break; + case ISO_LINK: + cnt = hdev->iso_mtu ? hdev->iso_cnt : + hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt; + break; + default: + cnt = 0; + bt_dev_err(hdev, "unknown link type %d", conn->type); + } + + q = cnt / num; + *quote = q ? q : 1; +} + static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int *quote) { @@ -3167,29 +3323,7 @@ static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, rcu_read_unlock(); - if (conn) { - int cnt, q; - - switch (conn->type) { - case ACL_LINK: - cnt = hdev->acl_cnt; - break; - case SCO_LINK: - case ESCO_LINK: - cnt = hdev->sco_cnt; - break; - case LE_LINK: - cnt = hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt; - break; - default: - cnt = 0; - bt_dev_err(hdev, "unknown link type %d", conn->type); - } - - q = cnt / num; - *quote = q ? q : 1; - } else - *quote = 0; + hci_quote_sent(conn, num, quote); BT_DBG("conn %p quote %d", conn, *quote); return conn; @@ -3223,7 +3357,7 @@ static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type, struct hci_chan *chan = NULL; unsigned int num = 0, min = ~0, cur_prio = 0; struct hci_conn *conn; - int cnt, q, conn_num = 0; + int conn_num = 0; BT_DBG("%s", hdev->name); @@ -3273,27 +3407,8 @@ static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type, if (!chan) return NULL; - switch (chan->conn->type) { - case ACL_LINK: - cnt = hdev->acl_cnt; - break; - case AMP_LINK: - cnt = hdev->block_cnt; - break; - case SCO_LINK: - case ESCO_LINK: - cnt = hdev->sco_cnt; - break; - case LE_LINK: - cnt = hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt; - break; - default: - cnt = 0; - bt_dev_err(hdev, "unknown link type %d", chan->conn->type); - } + hci_quote_sent(chan->conn, num, quote); - q = cnt / num; - *quote = q ? q : 1; BT_DBG("chan %p quote %d", chan, *quote); return chan; } @@ -3582,18 +3697,46 @@ static void hci_sched_le(struct hci_dev *hdev) hci_prio_recalculate(hdev, LE_LINK); } +/* Schedule CIS */ +static void hci_sched_iso(struct hci_dev *hdev) +{ + struct hci_conn *conn; + struct sk_buff *skb; + int quote, *cnt; + + BT_DBG("%s", hdev->name); + + if (!hci_conn_num(hdev, ISO_LINK)) + return; + + cnt = hdev->iso_pkts ? &hdev->iso_cnt : + hdev->le_pkts ? &hdev->le_cnt : &hdev->acl_cnt; + while (*cnt && (conn = hci_low_sent(hdev, ISO_LINK, "e))) { + while (quote-- && (skb = skb_dequeue(&conn->data_q))) { + BT_DBG("skb %p len %d", skb, skb->len); + hci_send_frame(hdev, skb); + + conn->sent++; + if (conn->sent == ~0) + conn->sent = 0; + (*cnt)--; + } + } +} + static void hci_tx_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, tx_work); struct sk_buff *skb; - BT_DBG("%s acl %d sco %d le %d", hdev->name, hdev->acl_cnt, - hdev->sco_cnt, hdev->le_cnt); + BT_DBG("%s acl %d sco %d le %d iso %d", hdev->name, hdev->acl_cnt, + hdev->sco_cnt, hdev->le_cnt, hdev->iso_cnt); if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { /* Schedule queues and send stuff to HCI driver */ hci_sched_sco(hdev); hci_sched_esco(hdev); + hci_sched_iso(hdev); hci_sched_acl(hdev); hci_sched_le(hdev); } @@ -3676,6 +3819,43 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb); } +static void hci_isodata_packet(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_iso_hdr *hdr; + struct hci_conn *conn; + __u16 handle, flags; + + hdr = skb_pull_data(skb, sizeof(*hdr)); + if (!hdr) { + bt_dev_err(hdev, "ISO packet too small"); + goto drop; + } + + handle = __le16_to_cpu(hdr->handle); + flags = hci_flags(handle); + handle = hci_handle(handle); + + bt_dev_dbg(hdev, "len %d handle 0x%4.4x flags 0x%4.4x", skb->len, + handle, flags); + + hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_handle(hdev, handle); + hci_dev_unlock(hdev); + + if (!conn) { + bt_dev_err(hdev, "ISO packet for unknown connection handle %d", + handle); + goto drop; + } + + /* Send to upper protocol */ + iso_recv(conn, skb, flags); + return; + +drop: + kfree_skb(skb); +} + static bool hci_req_is_complete(struct hci_dev *hdev) { struct sk_buff *skb; @@ -3781,7 +3961,14 @@ static void hci_rx_work(struct work_struct *work) BT_DBG("%s", hdev->name); - while ((skb = skb_dequeue(&hdev->rx_q))) { + /* The kcov_remote functions used for collecting packet parsing + * coverage information from this background thread and associate + * the coverage with the syscall's thread which originally injected + * the packet. This helps fuzzing the kernel. + */ + for (; (skb = skb_dequeue(&hdev->rx_q)); kcov_remote_stop()) { + kcov_remote_start_common(skb_get_kcov_handle(skb)); + /* Send copy to monitor */ hci_send_to_monitor(hdev, skb); @@ -3830,6 +4017,11 @@ static void hci_rx_work(struct work_struct *work) hci_scodata_packet(hdev, skb); break; + case HCI_ISODATA_PKT: + BT_DBG("%s ISO data packet", hdev->name); + hci_isodata_packet(hdev, skb); + break; + default: kfree_skb(skb); break; @@ -3864,7 +4056,8 @@ static void hci_cmd_work(struct work_struct *work) if (res < 0) __hci_cmd_sync_cancel(hdev, -res); - if (test_bit(HCI_RESET, &hdev->flags)) + if (test_bit(HCI_RESET, &hdev->flags) || + hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) cancel_delayed_work(&hdev->cmd_timer); else schedule_delayed_work(&hdev->cmd_timer, diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index af17dfb20e01..ea33dd0cd478 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2741,7 +2741,7 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) mgmt_conn = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags); if (conn->type == ACL_LINK) { - if (test_bit(HCI_CONN_FLUSH_KEY, &conn->flags)) + if (test_and_clear_bit(HCI_CONN_FLUSH_KEY, &conn->flags)) hci_remove_link_key(hdev, &conn->dst); } @@ -3173,7 +3173,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES, sizeof(cp), &cp); - hci_req_update_scan(hdev); + hci_update_scan(hdev); } /* Set packet type for incoming connection */ @@ -3368,10 +3368,10 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data, reason, mgmt_connected); if (conn->type == ACL_LINK) { - if (test_bit(HCI_CONN_FLUSH_KEY, &conn->flags)) + if (test_and_clear_bit(HCI_CONN_FLUSH_KEY, &conn->flags)) hci_remove_link_key(hdev, &conn->dst); - hci_req_update_scan(hdev); + hci_update_scan(hdev); } params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); @@ -3768,12 +3768,182 @@ static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev, u8 ncmd) cancel_delayed_work(&hdev->ncmd_timer); atomic_set(&hdev->cmd_cnt, 1); } else { - schedule_delayed_work(&hdev->ncmd_timer, - HCI_NCMD_TIMEOUT); + if (!hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) + schedule_delayed_work(&hdev->ncmd_timer, + HCI_NCMD_TIMEOUT); } } } +static u8 hci_cc_le_read_buffer_size_v2(struct hci_dev *hdev, void *data, + struct sk_buff *skb) +{ + struct hci_rp_le_read_buffer_size_v2 *rp = data; + + bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + + if (rp->status) + return rp->status; + + hdev->le_mtu = __le16_to_cpu(rp->acl_mtu); + hdev->le_pkts = rp->acl_max_pkt; + hdev->iso_mtu = __le16_to_cpu(rp->iso_mtu); + hdev->iso_pkts = rp->iso_max_pkt; + + hdev->le_cnt = hdev->le_pkts; + hdev->iso_cnt = hdev->iso_pkts; + + BT_DBG("%s acl mtu %d:%d iso mtu %d:%d", hdev->name, hdev->acl_mtu, + hdev->acl_pkts, hdev->iso_mtu, hdev->iso_pkts); + + return rp->status; +} + +static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data, + struct sk_buff *skb) +{ + struct hci_rp_le_set_cig_params *rp = data; + struct hci_conn *conn; + int i = 0; + + bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + + hci_dev_lock(hdev); + + if (rp->status) { + while ((conn = hci_conn_hash_lookup_cig(hdev, rp->cig_id))) { + conn->state = BT_CLOSED; + hci_connect_cfm(conn, rp->status); + hci_conn_del(conn); + } + goto unlock; + } + + rcu_read_lock(); + + list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { + if (conn->type != ISO_LINK || conn->iso_qos.cig != rp->cig_id || + conn->state == BT_CONNECTED) + continue; + + conn->handle = __le16_to_cpu(rp->handle[i++]); + + bt_dev_dbg(hdev, "%p handle 0x%4.4x link %p", conn, + conn->handle, conn->link); + + /* Create CIS if LE is already connected */ + if (conn->link && conn->link->state == BT_CONNECTED) + hci_le_create_cis(conn->link); + + if (i == rp->num_handles) + break; + } + + rcu_read_unlock(); + +unlock: + hci_dev_unlock(hdev); + + return rp->status; +} + +static u8 hci_cc_le_setup_iso_path(struct hci_dev *hdev, void *data, + struct sk_buff *skb) +{ + struct hci_rp_le_setup_iso_path *rp = data; + struct hci_cp_le_setup_iso_path *cp; + struct hci_conn *conn; + + bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + + cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SETUP_ISO_PATH); + if (!cp) + return rp->status; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (!conn) + goto unlock; + + if (rp->status) { + hci_connect_cfm(conn, rp->status); + hci_conn_del(conn); + goto unlock; + } + + switch (cp->direction) { + /* Input (Host to Controller) */ + case 0x00: + /* Only confirm connection if output only */ + if (conn->iso_qos.out.sdu && !conn->iso_qos.in.sdu) + hci_connect_cfm(conn, rp->status); + break; + /* Output (Controller to Host) */ + case 0x01: + /* Confirm connection since conn->iso_qos is always configured + * last. + */ + hci_connect_cfm(conn, rp->status); + break; + } + +unlock: + hci_dev_unlock(hdev); + return rp->status; +} + +static void hci_cs_le_create_big(struct hci_dev *hdev, u8 status) +{ + bt_dev_dbg(hdev, "status 0x%2.2x", status); +} + +static u8 hci_cc_set_per_adv_param(struct hci_dev *hdev, void *data, + struct sk_buff *skb) +{ + struct hci_ev_status *rp = data; + struct hci_cp_le_set_per_adv_params *cp; + + bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + + if (rp->status) + return rp->status; + + cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_PER_ADV_PARAMS); + if (!cp) + return rp->status; + + /* TODO: set the conn state */ + return rp->status; +} + +static u8 hci_cc_le_set_per_adv_enable(struct hci_dev *hdev, void *data, + struct sk_buff *skb) +{ + struct hci_ev_status *rp = data; + __u8 *sent; + + bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + + if (rp->status) + return rp->status; + + sent = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_PER_ADV_ENABLE); + if (!sent) + return rp->status; + + hci_dev_lock(hdev); + + if (*sent) + hci_dev_set_flag(hdev, HCI_LE_PER_ADV); + else + hci_dev_clear_flag(hdev, HCI_LE_PER_ADV); + + hci_dev_unlock(hdev); + + return rp->status; +} + #define HCI_CC_VL(_op, _func, _min, _max) \ { \ .op = _op, \ @@ -3947,9 +4117,18 @@ static const struct hci_cc { hci_cc_le_set_adv_set_random_addr), HCI_CC_STATUS(HCI_OP_LE_REMOVE_ADV_SET, hci_cc_le_remove_adv_set), HCI_CC_STATUS(HCI_OP_LE_CLEAR_ADV_SETS, hci_cc_le_clear_adv_sets), + HCI_CC_STATUS(HCI_OP_LE_SET_PER_ADV_PARAMS, hci_cc_set_per_adv_param), + HCI_CC_STATUS(HCI_OP_LE_SET_PER_ADV_ENABLE, + hci_cc_le_set_per_adv_enable), HCI_CC(HCI_OP_LE_READ_TRANSMIT_POWER, hci_cc_le_read_transmit_power, sizeof(struct hci_rp_le_read_transmit_power)), - HCI_CC_STATUS(HCI_OP_LE_SET_PRIVACY_MODE, hci_cc_le_set_privacy_mode) + HCI_CC_STATUS(HCI_OP_LE_SET_PRIVACY_MODE, hci_cc_le_set_privacy_mode), + HCI_CC(HCI_OP_LE_READ_BUFFER_SIZE_V2, hci_cc_le_read_buffer_size_v2, + sizeof(struct hci_rp_le_read_buffer_size_v2)), + HCI_CC_VL(HCI_OP_LE_SET_CIG_PARAMS, hci_cc_le_set_cig_params, + sizeof(struct hci_rp_le_set_cig_params), HCI_MAX_EVENT_SIZE), + HCI_CC(HCI_OP_LE_SETUP_ISO_PATH, hci_cc_le_setup_iso_path, + sizeof(struct hci_rp_le_setup_iso_path)), }; static u8 hci_cc_func(struct hci_dev *hdev, const struct hci_cc *cc, @@ -4012,6 +4191,40 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, void *data, queue_work(hdev->workqueue, &hdev->cmd_work); } +static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status) +{ + struct hci_cp_le_create_cis *cp; + int i; + + bt_dev_dbg(hdev, "status 0x%2.2x", status); + + if (!status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_LE_CREATE_CIS); + if (!cp) + return; + + hci_dev_lock(hdev); + + /* Remove connection if command failed */ + for (i = 0; cp->num_cis; cp->num_cis--, i++) { + struct hci_conn *conn; + u16 handle; + + handle = __le16_to_cpu(cp->cis[i].cis_handle); + + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (conn) { + conn->state = BT_CLOSED; + hci_connect_cfm(conn, status); + hci_conn_del(conn); + } + } + + hci_dev_unlock(hdev); +} + #define HCI_CS(_op, _func) \ { \ .op = _op, \ @@ -4041,7 +4254,9 @@ static const struct hci_cs { HCI_CS(HCI_OP_LE_CREATE_CONN, hci_cs_le_create_conn), HCI_CS(HCI_OP_LE_READ_REMOTE_FEATURES, hci_cs_le_read_remote_features), HCI_CS(HCI_OP_LE_START_ENC, hci_cs_le_start_enc), - HCI_CS(HCI_OP_LE_EXT_CREATE_CONN, hci_cs_le_ext_create_conn) + HCI_CS(HCI_OP_LE_EXT_CREATE_CONN, hci_cs_le_ext_create_conn), + HCI_CS(HCI_OP_LE_CREATE_CIS, hci_cs_le_create_cis), + HCI_CS(HCI_OP_LE_CREATE_BIG, hci_cs_le_create_big), }; static void hci_cmd_status_evt(struct hci_dev *hdev, void *data, @@ -4177,6 +4392,22 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, hdev->sco_cnt = hdev->sco_pkts; break; + case ISO_LINK: + if (hdev->iso_pkts) { + hdev->iso_cnt += count; + if (hdev->iso_cnt > hdev->iso_pkts) + hdev->iso_cnt = hdev->iso_pkts; + } else if (hdev->le_pkts) { + hdev->le_cnt += count; + if (hdev->le_cnt > hdev->le_pkts) + hdev->le_cnt = hdev->le_pkts; + } else { + hdev->acl_cnt += count; + if (hdev->acl_cnt > hdev->acl_pkts) + hdev->acl_cnt = hdev->acl_pkts; + } + break; + default: bt_dev_err(hdev, "unknown type %d conn %p", conn->type, conn); @@ -6249,6 +6480,39 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } +static int hci_le_pa_term_sync(struct hci_dev *hdev, __le16 handle) +{ + struct hci_cp_le_pa_term_sync cp; + + memset(&cp, 0, sizeof(cp)); + cp.handle = handle; + + return hci_send_cmd(hdev, HCI_OP_LE_PA_TERM_SYNC, sizeof(cp), &cp); +} + +static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, + struct sk_buff *skb) +{ + struct hci_ev_le_pa_sync_established *ev = data; + int mask = hdev->link_mode; + __u8 flags = 0; + + bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + + if (ev->status) + return; + + hci_dev_lock(hdev); + + hci_dev_clear_flag(hdev, HCI_PA_SYNC); + + mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ISO_LINK, &flags); + if (!(mask & HCI_LM_ACCEPT)) + hci_le_pa_term_sync(hdev, ev->handle); + + hci_dev_unlock(hdev); +} + static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -6479,6 +6743,226 @@ unlock: hci_dev_unlock(hdev); } +static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data, + struct sk_buff *skb) +{ + struct hci_evt_le_cis_established *ev = data; + struct hci_conn *conn; + u16 handle = __le16_to_cpu(ev->handle); + + bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (!conn) { + bt_dev_err(hdev, + "Unable to find connection with handle 0x%4.4x", + handle); + goto unlock; + } + + if (conn->role == HCI_ROLE_SLAVE) { + __le32 interval; + + memset(&interval, 0, sizeof(interval)); + + memcpy(&interval, ev->c_latency, sizeof(ev->c_latency)); + conn->iso_qos.in.interval = le32_to_cpu(interval); + memcpy(&interval, ev->p_latency, sizeof(ev->p_latency)); + conn->iso_qos.out.interval = le32_to_cpu(interval); + conn->iso_qos.in.latency = le16_to_cpu(ev->interval); + conn->iso_qos.out.latency = le16_to_cpu(ev->interval); + conn->iso_qos.in.sdu = le16_to_cpu(ev->c_mtu); + conn->iso_qos.out.sdu = le16_to_cpu(ev->p_mtu); + conn->iso_qos.in.phy = ev->c_phy; + conn->iso_qos.out.phy = ev->p_phy; + } + + if (!ev->status) { + conn->state = BT_CONNECTED; + hci_debugfs_create_conn(conn); + hci_conn_add_sysfs(conn); + hci_iso_setup_path(conn); + goto unlock; + } + + hci_connect_cfm(conn, ev->status); + hci_conn_del(conn); + +unlock: + hci_dev_unlock(hdev); +} + +static void hci_le_reject_cis(struct hci_dev *hdev, __le16 handle) +{ + struct hci_cp_le_reject_cis cp; + + memset(&cp, 0, sizeof(cp)); + cp.handle = handle; + cp.reason = HCI_ERROR_REJ_BAD_ADDR; + hci_send_cmd(hdev, HCI_OP_LE_REJECT_CIS, sizeof(cp), &cp); +} + +static void hci_le_accept_cis(struct hci_dev *hdev, __le16 handle) +{ + struct hci_cp_le_accept_cis cp; + + memset(&cp, 0, sizeof(cp)); + cp.handle = handle; + hci_send_cmd(hdev, HCI_OP_LE_ACCEPT_CIS, sizeof(cp), &cp); +} + +static void hci_le_cis_req_evt(struct hci_dev *hdev, void *data, + struct sk_buff *skb) +{ + struct hci_evt_le_cis_req *ev = data; + u16 acl_handle, cis_handle; + struct hci_conn *acl, *cis; + int mask; + __u8 flags = 0; + + acl_handle = __le16_to_cpu(ev->acl_handle); + cis_handle = __le16_to_cpu(ev->cis_handle); + + bt_dev_dbg(hdev, "acl 0x%4.4x handle 0x%4.4x cig 0x%2.2x cis 0x%2.2x", + acl_handle, cis_handle, ev->cig_id, ev->cis_id); + + hci_dev_lock(hdev); + + acl = hci_conn_hash_lookup_handle(hdev, acl_handle); + if (!acl) + goto unlock; + + mask = hci_proto_connect_ind(hdev, &acl->dst, ISO_LINK, &flags); + if (!(mask & HCI_LM_ACCEPT)) { + hci_le_reject_cis(hdev, ev->cis_handle); + goto unlock; + } + + cis = hci_conn_hash_lookup_handle(hdev, cis_handle); + if (!cis) { + cis = hci_conn_add(hdev, ISO_LINK, &acl->dst, HCI_ROLE_SLAVE); + if (!cis) { + hci_le_reject_cis(hdev, ev->cis_handle); + goto unlock; + } + cis->handle = cis_handle; + } + + cis->iso_qos.cig = ev->cig_id; + cis->iso_qos.cis = ev->cis_id; + + if (!(flags & HCI_PROTO_DEFER)) { + hci_le_accept_cis(hdev, ev->cis_handle); + } else { + cis->state = BT_CONNECT2; + hci_connect_cfm(cis, 0); + } + +unlock: + hci_dev_unlock(hdev); +} + +static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, + struct sk_buff *skb) +{ + struct hci_evt_le_create_big_complete *ev = data; + struct hci_conn *conn; + + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); + + if (!hci_le_ev_skb_pull(hdev, skb, HCI_EVT_LE_CREATE_BIG_COMPLETE, + flex_array_size(ev, bis_handle, ev->num_bis))) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_big(hdev, ev->handle); + if (!conn) + goto unlock; + + if (ev->num_bis) + conn->handle = __le16_to_cpu(ev->bis_handle[0]); + + if (!ev->status) { + conn->state = BT_CONNECTED; + hci_debugfs_create_conn(conn); + hci_conn_add_sysfs(conn); + hci_iso_setup_path(conn); + goto unlock; + } + + hci_connect_cfm(conn, ev->status); + hci_conn_del(conn); + +unlock: + hci_dev_unlock(hdev); +} + +static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, + struct sk_buff *skb) +{ + struct hci_evt_le_big_sync_estabilished *ev = data; + struct hci_conn *bis; + int i; + + bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + + if (!hci_le_ev_skb_pull(hdev, skb, HCI_EVT_LE_BIG_SYNC_ESTABILISHED, + flex_array_size(ev, bis, ev->num_bis))) + return; + + if (ev->status) + return; + + hci_dev_lock(hdev); + + for (i = 0; i < ev->num_bis; i++) { + u16 handle = le16_to_cpu(ev->bis[i]); + __le32 interval; + + bis = hci_conn_hash_lookup_handle(hdev, handle); + if (!bis) { + bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY, + HCI_ROLE_SLAVE); + if (!bis) + continue; + bis->handle = handle; + } + + bis->iso_qos.big = ev->handle; + memset(&interval, 0, sizeof(interval)); + memcpy(&interval, ev->latency, sizeof(ev->latency)); + bis->iso_qos.in.interval = le32_to_cpu(interval); + /* Convert ISO Interval (1.25 ms slots) to latency (ms) */ + bis->iso_qos.in.latency = le16_to_cpu(ev->interval) * 125 / 100; + bis->iso_qos.in.sdu = le16_to_cpu(ev->max_pdu); + + hci_connect_cfm(bis, ev->status); + } + + hci_dev_unlock(hdev); +} + +static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data, + struct sk_buff *skb) +{ + struct hci_evt_le_big_info_adv_report *ev = data; + int mask = hdev->link_mode; + __u8 flags = 0; + + bt_dev_dbg(hdev, "sync_handle 0x%4.4x", le16_to_cpu(ev->sync_handle)); + + hci_dev_lock(hdev); + + mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags); + if (!(mask & HCI_LM_ACCEPT)) + hci_le_pa_term_sync(hdev, ev->sync_handle); + + hci_dev_unlock(hdev); +} + #define HCI_LE_EV_VL(_op, _func, _min_len, _max_len) \ [_op] = { \ .func = _func, \ @@ -6539,9 +7023,34 @@ static const struct hci_le_ev { HCI_LE_EV_VL(HCI_EV_LE_EXT_ADV_REPORT, hci_le_ext_adv_report_evt, sizeof(struct hci_ev_le_ext_adv_report), HCI_MAX_EVENT_SIZE), + /* [0x0e = HCI_EV_LE_PA_SYNC_ESTABLISHED] */ + HCI_LE_EV(HCI_EV_LE_PA_SYNC_ESTABLISHED, + hci_le_pa_sync_estabilished_evt, + sizeof(struct hci_ev_le_pa_sync_established)), /* [0x12 = HCI_EV_LE_EXT_ADV_SET_TERM] */ HCI_LE_EV(HCI_EV_LE_EXT_ADV_SET_TERM, hci_le_ext_adv_term_evt, sizeof(struct hci_evt_le_ext_adv_set_term)), + /* [0x19 = HCI_EVT_LE_CIS_ESTABLISHED] */ + HCI_LE_EV(HCI_EVT_LE_CIS_ESTABLISHED, hci_le_cis_estabilished_evt, + sizeof(struct hci_evt_le_cis_established)), + /* [0x1a = HCI_EVT_LE_CIS_REQ] */ + HCI_LE_EV(HCI_EVT_LE_CIS_REQ, hci_le_cis_req_evt, + sizeof(struct hci_evt_le_cis_req)), + /* [0x1b = HCI_EVT_LE_CREATE_BIG_COMPLETE] */ + HCI_LE_EV_VL(HCI_EVT_LE_CREATE_BIG_COMPLETE, + hci_le_create_big_complete_evt, + sizeof(struct hci_evt_le_create_big_complete), + HCI_MAX_EVENT_SIZE), + /* [0x1d = HCI_EV_LE_BIG_SYNC_ESTABILISHED] */ + HCI_LE_EV_VL(HCI_EVT_LE_BIG_SYNC_ESTABILISHED, + hci_le_big_sync_established_evt, + sizeof(struct hci_evt_le_big_sync_estabilished), + HCI_MAX_EVENT_SIZE), + /* [0x22 = HCI_EVT_LE_BIG_INFO_ADV_REPORT] */ + HCI_LE_EV_VL(HCI_EVT_LE_BIG_INFO_ADV_REPORT, + hci_le_big_info_adv_report_evt, + sizeof(struct hci_evt_le_big_info_adv_report), + HCI_MAX_EVENT_SIZE), }; static void hci_le_meta_evt(struct hci_dev *hdev, void *data, @@ -6580,7 +7089,6 @@ static void hci_le_meta_evt(struct hci_dev *hdev, void *data, if (skb->len > subev->max_len) bt_dev_warn(hdev, "unexpected subevent 0x%2.2x length: %u > %u", ev->subevent, skb->len, subev->max_len); - data = hci_le_ev_skb_pull(hdev, skb, ev->subevent, subev->min_len); if (!data) return; @@ -6935,6 +7443,9 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) goto done; } + kfree_skb(hdev->recv_event); + hdev->recv_event = skb_clone(skb, GFP_KERNEL); + event = hdr->evt; if (!event) { bt_dev_warn(hdev, "Received unexpected HCI Event 0x%2.2x", diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 38ecaf9264ee..e64d558e5d69 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -827,7 +827,6 @@ void __hci_req_disable_advertising(struct hci_request *req) { if (ext_adv_capable(req->hdev)) { __hci_req_disable_ext_adv_instance(req, 0x00); - } else { u8 enable = 0x00; @@ -1338,15 +1337,15 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) bdaddr_t random_addr; u8 own_addr_type; int err; - struct adv_info *adv_instance; - bool secondary_adv; + struct adv_info *adv; + bool secondary_adv, require_privacy; if (instance > 0) { - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) + adv = hci_find_adv_instance(hdev, instance); + if (!adv) return -EINVAL; } else { - adv_instance = NULL; + adv = NULL; } flags = hci_adv_instance_flags(hdev, instance); @@ -1364,18 +1363,24 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) * advertising is used. In that case it is fine to use a * non-resolvable private address. */ - err = hci_get_random_address(hdev, !connectable, - adv_use_rpa(hdev, flags), adv_instance, + require_privacy = !connectable; + + /* Don't require privacy for periodic adv? */ + if (adv && adv->periodic) + require_privacy = false; + + err = hci_get_random_address(hdev, require_privacy, + adv_use_rpa(hdev, flags), adv, &own_addr_type, &random_addr); if (err < 0) return err; memset(&cp, 0, sizeof(cp)); - if (adv_instance) { - hci_cpu_to_le24(adv_instance->min_interval, cp.min_interval); - hci_cpu_to_le24(adv_instance->max_interval, cp.max_interval); - cp.tx_power = adv_instance->tx_power; + if (adv) { + hci_cpu_to_le24(adv->min_interval, cp.min_interval); + hci_cpu_to_le24(adv->max_interval, cp.max_interval); + cp.tx_power = adv->tx_power; } else { hci_cpu_to_le24(hdev->le_adv_min_interval, cp.min_interval); hci_cpu_to_le24(hdev->le_adv_max_interval, cp.max_interval); @@ -1396,7 +1401,8 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) else cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_SCAN_IND); } else { - if (secondary_adv) + /* Secondary and periodic cannot use legacy PDUs */ + if (secondary_adv || (adv && adv->periodic)) cp.evt_properties = cpu_to_le16(LE_EXT_ADV_NON_CONN_IND); else cp.evt_properties = cpu_to_le16(LE_LEGACY_NONCONN_IND); @@ -1426,8 +1432,8 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) struct hci_cp_le_set_adv_set_rand_addr cp; /* Check if random address need to be updated */ - if (adv_instance) { - if (!bacmp(&random_addr, &adv_instance->random_addr)) + if (adv) { + if (!bacmp(&random_addr, &adv->random_addr)) return 0; } else { if (!bacmp(&random_addr, &hdev->random_addr)) @@ -1835,21 +1841,6 @@ void __hci_req_update_scan(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } -static int update_scan(struct hci_request *req, unsigned long opt) -{ - hci_dev_lock(req->hdev); - __hci_req_update_scan(req); - hci_dev_unlock(req->hdev); - return 0; -} - -static void scan_update_work(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, scan_update); - - hci_req_sync(hdev, update_scan, 0, HCI_CMD_TIMEOUT, NULL); -} - static u8 get_service_classes(struct hci_dev *hdev) { struct bt_uuid *uuid; @@ -1890,69 +1881,6 @@ void __hci_req_update_class(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); } -static void write_iac(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_write_current_iac_lap cp; - - if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) - return; - - if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) { - /* Limited discoverable mode */ - cp.num_iac = min_t(u8, hdev->num_iac, 2); - cp.iac_lap[0] = 0x00; /* LIAC */ - cp.iac_lap[1] = 0x8b; - cp.iac_lap[2] = 0x9e; - cp.iac_lap[3] = 0x33; /* GIAC */ - cp.iac_lap[4] = 0x8b; - cp.iac_lap[5] = 0x9e; - } else { - /* General discoverable mode */ - cp.num_iac = 1; - cp.iac_lap[0] = 0x33; /* GIAC */ - cp.iac_lap[1] = 0x8b; - cp.iac_lap[2] = 0x9e; - } - - hci_req_add(req, HCI_OP_WRITE_CURRENT_IAC_LAP, - (cp.num_iac * 3) + 1, &cp); -} - -static int discoverable_update(struct hci_request *req, unsigned long opt) -{ - struct hci_dev *hdev = req->hdev; - - hci_dev_lock(hdev); - - if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { - write_iac(req); - __hci_req_update_scan(req); - __hci_req_update_class(req); - } - - /* Advertising instances don't use the global discoverable setting, so - * only update AD if advertising was enabled using Set Advertising. - */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) { - __hci_req_update_adv_data(req, 0x00); - - /* Discoverable mode affects the local advertising - * address in limited privacy mode. - */ - if (hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY)) { - if (ext_adv_capable(hdev)) - __hci_req_start_ext_adv(req, 0x00); - else - __hci_req_enable_advertising(req); - } - } - - hci_dev_unlock(hdev); - - return 0; -} - void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason) { @@ -2227,146 +2155,6 @@ unlock: hci_dev_unlock(hdev); } -static int active_scan(struct hci_request *req, unsigned long opt) -{ - uint16_t interval = opt; - struct hci_dev *hdev = req->hdev; - u8 own_addr_type; - /* Accept list is not used for discovery */ - u8 filter_policy = 0x00; - /* Default is to enable duplicates filter */ - u8 filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - /* Discovery doesn't require controller address resolution */ - bool addr_resolv = false; - int err; - - bt_dev_dbg(hdev, ""); - - /* If controller is scanning, it means the background scanning is - * running. Thus, we should temporarily stop it in order to set the - * discovery scanning parameters. - */ - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { - hci_req_add_le_scan_disable(req, false); - cancel_interleave_scan(hdev); - } - - /* All active scans will be done with either a resolvable private - * address (when privacy feature has been enabled) or non-resolvable - * private address. - */ - err = hci_update_random_address(req, true, scan_use_rpa(hdev), - &own_addr_type); - if (err < 0) - own_addr_type = ADDR_LE_DEV_PUBLIC; - - hci_dev_lock(hdev); - if (hci_is_adv_monitoring(hdev)) { - /* Duplicate filter should be disabled when some advertisement - * monitor is activated, otherwise AdvMon can only receive one - * advertisement for one peer(*) during active scanning, and - * might report loss to these peers. - * - * Note that different controllers have different meanings of - * |duplicate|. Some of them consider packets with the same - * address as duplicate, and others consider packets with the - * same address and the same RSSI as duplicate. Although in the - * latter case we don't need to disable duplicate filter, but - * it is common to have active scanning for a short period of - * time, the power impact should be neglectable. - */ - filter_dup = LE_SCAN_FILTER_DUP_DISABLE; - } - hci_dev_unlock(hdev); - - hci_req_start_scan(req, LE_SCAN_ACTIVE, interval, - hdev->le_scan_window_discovery, own_addr_type, - filter_policy, filter_dup, addr_resolv); - return 0; -} - -static int interleaved_discov(struct hci_request *req, unsigned long opt) -{ - int err; - - bt_dev_dbg(req->hdev, ""); - - err = active_scan(req, opt); - if (err) - return err; - - return bredr_inquiry(req, DISCOV_BREDR_INQUIRY_LEN); -} - -static void start_discovery(struct hci_dev *hdev, u8 *status) -{ - unsigned long timeout; - - bt_dev_dbg(hdev, "type %u", hdev->discovery.type); - - switch (hdev->discovery.type) { - case DISCOV_TYPE_BREDR: - if (!hci_dev_test_flag(hdev, HCI_INQUIRY)) - hci_req_sync(hdev, bredr_inquiry, - DISCOV_BREDR_INQUIRY_LEN, HCI_CMD_TIMEOUT, - status); - return; - case DISCOV_TYPE_INTERLEAVED: - /* When running simultaneous discovery, the LE scanning time - * should occupy the whole discovery time sine BR/EDR inquiry - * and LE scanning are scheduled by the controller. - * - * For interleaving discovery in comparison, BR/EDR inquiry - * and LE scanning are done sequentially with separate - * timeouts. - */ - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, - &hdev->quirks)) { - timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); - /* During simultaneous discovery, we double LE scan - * interval. We must leave some time for the controller - * to do BR/EDR inquiry. - */ - hci_req_sync(hdev, interleaved_discov, - hdev->le_scan_int_discovery * 2, HCI_CMD_TIMEOUT, - status); - break; - } - - timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout); - hci_req_sync(hdev, active_scan, hdev->le_scan_int_discovery, - HCI_CMD_TIMEOUT, status); - break; - case DISCOV_TYPE_LE: - timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); - hci_req_sync(hdev, active_scan, hdev->le_scan_int_discovery, - HCI_CMD_TIMEOUT, status); - break; - default: - *status = HCI_ERROR_UNSPECIFIED; - return; - } - - if (*status) - return; - - bt_dev_dbg(hdev, "timeout %u ms", jiffies_to_msecs(timeout)); - - /* When service discovery is used and the controller has a - * strict duplicate filter, it is important to remember the - * start and duration of the scan. This is required for - * restarting scanning during the discovery phase. - */ - if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) && - hdev->discovery.result_filtering) { - hdev->discovery.scan_start = jiffies; - hdev->discovery.scan_duration = timeout; - } - - queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_disable, - timeout); -} - bool hci_req_stop_discovery(struct hci_request *req) { struct hci_dev *hdev = req->hdev; @@ -2462,180 +2250,8 @@ error: return err; } -static int stop_discovery(struct hci_request *req, unsigned long opt) -{ - hci_dev_lock(req->hdev); - hci_req_stop_discovery(req); - hci_dev_unlock(req->hdev); - - return 0; -} - -static void discov_update(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - discov_update); - u8 status = 0; - - switch (hdev->discovery.state) { - case DISCOVERY_STARTING: - start_discovery(hdev, &status); - mgmt_start_discovery_complete(hdev, status); - if (status) - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - else - hci_discovery_set_state(hdev, DISCOVERY_FINDING); - break; - case DISCOVERY_STOPPING: - hci_req_sync(hdev, stop_discovery, 0, HCI_CMD_TIMEOUT, &status); - mgmt_stop_discovery_complete(hdev, status); - if (!status) - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - break; - case DISCOVERY_STOPPED: - default: - return; - } -} - -static void discov_off(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - discov_off.work); - - bt_dev_dbg(hdev, ""); - - hci_dev_lock(hdev); - - /* When discoverable timeout triggers, then just make sure - * the limited discoverable flag is cleared. Even in the case - * of a timeout triggered from general discoverable, it is - * safe to unconditionally clear the flag. - */ - hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); - hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); - hdev->discov_timeout = 0; - - hci_dev_unlock(hdev); - - hci_req_sync(hdev, discoverable_update, 0, HCI_CMD_TIMEOUT, NULL); - mgmt_new_settings(hdev); -} - -static int powered_update_hci(struct hci_request *req, unsigned long opt) -{ - struct hci_dev *hdev = req->hdev; - u8 link_sec; - - hci_dev_lock(hdev); - - if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) && - !lmp_host_ssp_capable(hdev)) { - u8 mode = 0x01; - - hci_req_add(req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode); - - if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) { - u8 support = 0x01; - - hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT, - sizeof(support), &support); - } - } - - if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && - lmp_bredr_capable(hdev)) { - struct hci_cp_write_le_host_supported cp; - - cp.le = 0x01; - cp.simul = 0x00; - - /* Check first if we already have the right - * host state (host features set) - */ - if (cp.le != lmp_host_le_capable(hdev) || - cp.simul != lmp_host_le_br_capable(hdev)) - hci_req_add(req, HCI_OP_WRITE_LE_HOST_SUPPORTED, - sizeof(cp), &cp); - } - - if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { - /* Make sure the controller has a good default for - * advertising data. This also applies to the case - * where BR/EDR was toggled during the AUTO_OFF phase. - */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - list_empty(&hdev->adv_instances)) { - int err; - - if (ext_adv_capable(hdev)) { - err = __hci_req_setup_ext_adv_instance(req, - 0x00); - if (!err) - __hci_req_update_scan_rsp_data(req, - 0x00); - } else { - err = 0; - __hci_req_update_adv_data(req, 0x00); - __hci_req_update_scan_rsp_data(req, 0x00); - } - - if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) { - if (!ext_adv_capable(hdev)) - __hci_req_enable_advertising(req); - else if (!err) - __hci_req_enable_ext_advertising(req, - 0x00); - } - } else if (!list_empty(&hdev->adv_instances)) { - struct adv_info *adv_instance; - - adv_instance = list_first_entry(&hdev->adv_instances, - struct adv_info, list); - __hci_req_schedule_adv_instance(req, - adv_instance->instance, - true); - } - } - - link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); - if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) - hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, - sizeof(link_sec), &link_sec); - - if (lmp_bredr_capable(hdev)) { - if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) - __hci_req_write_fast_connectable(req, true); - else - __hci_req_write_fast_connectable(req, false); - __hci_req_update_scan(req); - __hci_req_update_class(req); - __hci_req_update_name(req); - __hci_req_update_eir(req); - } - - hci_dev_unlock(hdev); - return 0; -} - -int __hci_req_hci_power_on(struct hci_dev *hdev) -{ - /* Register the available SMP channels (BR/EDR and LE) only when - * successfully powering on the controller. This late - * registration is required so that LE SMP can clearly decide if - * the public address or static address is used. - */ - smp_register(hdev); - - return __hci_req_sync(hdev, powered_update_hci, 0, HCI_CMD_TIMEOUT, - NULL); -} - void hci_request_setup(struct hci_dev *hdev) { - INIT_WORK(&hdev->discov_update, discov_update); - INIT_WORK(&hdev->scan_update, scan_update_work); - INIT_DELAYED_WORK(&hdev->discov_off, discov_off); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); @@ -2646,9 +2262,6 @@ void hci_request_cancel_all(struct hci_dev *hdev) { __hci_cmd_sync_cancel(hdev, ENODEV); - cancel_work_sync(&hdev->discov_update); - cancel_work_sync(&hdev->scan_update); - cancel_delayed_work_sync(&hdev->discov_off); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 7f8df258e295..39d001fa3acf 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -68,8 +68,6 @@ int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); -int __hci_req_hci_power_on(struct hci_dev *hdev); - void __hci_req_write_fast_connectable(struct hci_request *req, bool enable); void __hci_req_update_name(struct hci_request *req); void __hci_req_update_eir(struct hci_request *req); @@ -85,6 +83,9 @@ void __hci_req_enable_advertising(struct hci_request *req); void __hci_req_disable_advertising(struct hci_request *req); void __hci_req_update_adv_data(struct hci_request *req, u8 instance); int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance); +int hci_req_start_per_adv(struct hci_dev *hdev, u8 instance, u32 flags, + u16 min_interval, u16 max_interval, + u16 sync_interval); void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance); int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, @@ -94,8 +95,14 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, bool force); int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance); +int __hci_req_setup_per_adv_instance(struct hci_request *req, u8 instance, + u16 min_interval, u16 max_interval); int __hci_req_start_ext_adv(struct hci_request *req, u8 instance); +int __hci_req_start_per_adv(struct hci_request *req, u8 instance, u32 flags, + u16 min_interval, u16 max_interval, + u16 sync_interval); int __hci_req_enable_ext_advertising(struct hci_request *req, u8 instance); +int __hci_req_enable_per_advertising(struct hci_request *req, u8 instance); int __hci_req_disable_ext_adv_instance(struct hci_request *req, u8 instance); int __hci_req_remove_ext_adv_instance(struct hci_request *req, u8 instance); void __hci_req_clear_ext_adv_sets(struct hci_request *req); @@ -110,11 +117,6 @@ bool hci_req_stop_discovery(struct hci_request *req); int hci_req_configure_datapath(struct hci_dev *hdev, struct bt_codec *codec); -static inline void hci_req_update_scan(struct hci_dev *hdev) -{ - queue_work(hdev->req_workqueue, &hdev->scan_update); -} - void __hci_req_update_scan(struct hci_request *req); int hci_update_random_address(struct hci_request *req, bool require_privacy, diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 189e3115c8c6..0d015d4a8e41 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -869,7 +869,8 @@ static int hci_sock_release(struct socket *sock) hdev = hci_pi(sk)->hdev; if (hdev) { - if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { + if (hci_pi(sk)->channel == HCI_CHANNEL_USER && + !hci_dev_test_flag(hdev, HCI_UNREGISTER)) { /* When releasing a user channel exclusive access, * call hci_dev_do_close directly instead of calling * hci_dev_close to ensure the exclusive access will @@ -878,9 +879,15 @@ static int hci_sock_release(struct socket *sock) * The checking of HCI_AUTO_OFF is not needed in this * case since it will have been cleared already when * opening the user channel. + * + * Make sure to also check that we haven't already + * unregistered since all the cleanup will have already + * been complete and hdev will get released when we put + * below. */ hci_dev_do_close(hdev); hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); + hci_register_suspend_notifier(hdev); mgmt_index_added(hdev); } @@ -1209,6 +1216,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, } mgmt_index_removed(hdev); + hci_unregister_suspend_notifier(hdev); err = hci_dev_open(hdev->id); if (err) { @@ -1223,6 +1231,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, err = 0; } else { hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); + hci_register_suspend_notifier(hdev); mgmt_index_added(hdev); hci_dev_put(hdev); goto done; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 1739e8cb3291..148ce629a59f 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -849,26 +849,38 @@ static int hci_set_ext_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance) u8 data[HCI_MAX_EXT_AD_LENGTH]; } pdu; u8 len; + struct adv_info *adv = NULL; + int err; memset(&pdu, 0, sizeof(pdu)); - len = eir_create_scan_rsp(hdev, instance, pdu.data); - - if (hdev->scan_rsp_data_len == len && - !memcmp(pdu.data, hdev->scan_rsp_data, len)) - return 0; + if (instance) { + adv = hci_find_adv_instance(hdev, instance); + if (!adv || !adv->scan_rsp_changed) + return 0; + } - memcpy(hdev->scan_rsp_data, pdu.data, len); - hdev->scan_rsp_data_len = len; + len = eir_create_scan_rsp(hdev, instance, pdu.data); pdu.cp.handle = instance; pdu.cp.length = len; pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE; pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG; - return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA, - sizeof(pdu.cp) + len, &pdu.cp, - HCI_CMD_TIMEOUT); + err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA, + sizeof(pdu.cp) + len, &pdu.cp, + HCI_CMD_TIMEOUT); + if (err) + return err; + + if (adv) { + adv->scan_rsp_changed = false; + } else { + memcpy(hdev->scan_rsp_data, pdu.data, len); + hdev->scan_rsp_data_len = len; + } + + return 0; } static int __hci_set_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance) @@ -965,6 +977,187 @@ int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance) return hci_enable_ext_advertising_sync(hdev, instance); } +static int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance) +{ + struct hci_cp_le_set_per_adv_enable cp; + + /* If periodic advertising already disabled there is nothing to do. */ + if (!hci_dev_test_flag(hdev, HCI_LE_PER_ADV)) + return 0; + + memset(&cp, 0, sizeof(cp)); + + cp.enable = 0x00; + cp.handle = instance; + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_ENABLE, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); +} + +static int hci_set_per_adv_params_sync(struct hci_dev *hdev, u8 instance, + u16 min_interval, u16 max_interval) +{ + struct hci_cp_le_set_per_adv_params cp; + + memset(&cp, 0, sizeof(cp)); + + if (!min_interval) + min_interval = DISCOV_LE_PER_ADV_INT_MIN; + + if (!max_interval) + max_interval = DISCOV_LE_PER_ADV_INT_MAX; + + cp.handle = instance; + cp.min_interval = cpu_to_le16(min_interval); + cp.max_interval = cpu_to_le16(max_interval); + cp.periodic_properties = 0x0000; + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_PARAMS, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); +} + +static int hci_set_per_adv_data_sync(struct hci_dev *hdev, u8 instance) +{ + struct { + struct hci_cp_le_set_per_adv_data cp; + u8 data[HCI_MAX_PER_AD_LENGTH]; + } pdu; + u8 len; + + memset(&pdu, 0, sizeof(pdu)); + + if (instance) { + struct adv_info *adv = hci_find_adv_instance(hdev, instance); + + if (!adv || !adv->periodic) + return 0; + } + + len = eir_create_per_adv_data(hdev, instance, pdu.data); + + pdu.cp.length = len; + pdu.cp.handle = instance; + pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE; + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_DATA, + sizeof(pdu.cp) + len, &pdu, + HCI_CMD_TIMEOUT); +} + +static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance) +{ + struct hci_cp_le_set_per_adv_enable cp; + + /* If periodic advertising already enabled there is nothing to do. */ + if (hci_dev_test_flag(hdev, HCI_LE_PER_ADV)) + return 0; + + memset(&cp, 0, sizeof(cp)); + + cp.enable = 0x01; + cp.handle = instance; + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_ENABLE, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); +} + +/* Checks if periodic advertising data contains a Basic Announcement and if it + * does generates a Broadcast ID and add Broadcast Announcement. + */ +static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv) +{ + u8 bid[3]; + u8 ad[4 + 3]; + + /* Skip if NULL adv as instance 0x00 is used for general purpose + * advertising so it cannot used for the likes of Broadcast Announcement + * as it can be overwritten at any point. + */ + if (!adv) + return 0; + + /* Check if PA data doesn't contains a Basic Audio Announcement then + * there is nothing to do. + */ + if (!eir_get_service_data(adv->per_adv_data, adv->per_adv_data_len, + 0x1851, NULL)) + return 0; + + /* Check if advertising data already has a Broadcast Announcement since + * the process may want to control the Broadcast ID directly and in that + * case the kernel shall no interfere. + */ + if (eir_get_service_data(adv->adv_data, adv->adv_data_len, 0x1852, + NULL)) + return 0; + + /* Generate Broadcast ID */ + get_random_bytes(bid, sizeof(bid)); + eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid)); + hci_set_adv_instance_data(hdev, adv->instance, sizeof(ad), ad, 0, NULL); + + return hci_update_adv_data_sync(hdev, adv->instance); +} + +int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, + u8 *data, u32 flags, u16 min_interval, + u16 max_interval, u16 sync_interval) +{ + struct adv_info *adv = NULL; + int err; + bool added = false; + + hci_disable_per_advertising_sync(hdev, instance); + + if (instance) { + adv = hci_find_adv_instance(hdev, instance); + /* Create an instance if that could not be found */ + if (!adv) { + adv = hci_add_per_instance(hdev, instance, flags, + data_len, data, + sync_interval, + sync_interval); + if (IS_ERR(adv)) + return PTR_ERR(adv); + added = true; + } + } + + /* Only start advertising if instance 0 or if a dedicated instance has + * been added. + */ + if (!adv || added) { + err = hci_start_ext_adv_sync(hdev, instance); + if (err < 0) + goto fail; + + err = hci_adv_bcast_annoucement(hdev, adv); + if (err < 0) + goto fail; + } + + err = hci_set_per_adv_params_sync(hdev, instance, min_interval, + max_interval); + if (err < 0) + goto fail; + + err = hci_set_per_adv_data_sync(hdev, instance); + if (err < 0) + goto fail; + + err = hci_enable_per_advertising_sync(hdev, instance); + if (err < 0) + goto fail; + + return 0; + +fail: + if (added) + hci_remove_adv_instance(hdev, instance); + + return err; +} + static int hci_start_adv_sync(struct hci_dev *hdev, u8 instance) { int err; @@ -1104,6 +1297,42 @@ int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance, HCI_CMD_TIMEOUT, sk); } +static int remove_ext_adv_sync(struct hci_dev *hdev, void *data) +{ + struct adv_info *adv = data; + u8 instance = 0; + + if (adv) + instance = adv->instance; + + return hci_remove_ext_adv_instance_sync(hdev, instance, NULL); +} + +int hci_remove_ext_adv_instance(struct hci_dev *hdev, u8 instance) +{ + struct adv_info *adv = NULL; + + if (instance) { + adv = hci_find_adv_instance(hdev, instance); + if (!adv) + return -EINVAL; + } + + return hci_cmd_sync_queue(hdev, remove_ext_adv_sync, adv, NULL); +} + +int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason) +{ + struct hci_cp_le_term_big cp; + + memset(&cp, 0, sizeof(cp)); + cp.handle = handle; + cp.reason = reason; + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_TERM_BIG, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); +} + static void cancel_adv_timeout(struct hci_dev *hdev) { if (hdev->adv_instance_timeout) { @@ -1119,27 +1348,39 @@ static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) u8 data[HCI_MAX_EXT_AD_LENGTH]; } pdu; u8 len; + struct adv_info *adv = NULL; + int err; memset(&pdu, 0, sizeof(pdu)); - len = eir_create_adv_data(hdev, instance, pdu.data); - - /* There's nothing to do if the data hasn't changed */ - if (hdev->adv_data_len == len && - memcmp(pdu.data, hdev->adv_data, len) == 0) - return 0; + if (instance) { + adv = hci_find_adv_instance(hdev, instance); + if (!adv || !adv->adv_data_changed) + return 0; + } - memcpy(hdev->adv_data, pdu.data, len); - hdev->adv_data_len = len; + len = eir_create_adv_data(hdev, instance, pdu.data); pdu.cp.length = len; pdu.cp.handle = instance; pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE; pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG; - return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA, - sizeof(pdu.cp) + len, &pdu.cp, - HCI_CMD_TIMEOUT); + err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA, + sizeof(pdu.cp) + len, &pdu.cp, + HCI_CMD_TIMEOUT); + if (err) + return err; + + /* Update data if the command succeed */ + if (adv) { + adv->adv_data_changed = false; + } else { + memcpy(hdev->adv_data, pdu.data, len); + hdev->adv_data_len = len; + } + + return 0; } static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) @@ -1612,6 +1853,9 @@ static int hci_le_add_resolve_list_sync(struct hci_dev *hdev, bacpy(&cp.bdaddr, ¶ms->addr); memcpy(cp.peer_irk, irk->val, 16); + /* Default privacy mode is always Network */ + params->privacy_mode = HCI_NETWORK_PRIVACY; + done: if (hci_dev_test_flag(hdev, HCI_PRIVACY)) memcpy(cp.local_irk, hdev->irk, 16); @@ -1865,12 +2109,15 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev) } /* Go through the current accept list programmed into the - * controller one by one and check if that address is still - * in the list of pending connections or list of devices to + * controller one by one and check if that address is connected or is + * still in the list of pending connections or list of devices to * report. If not present in either list, then remove it from * the controller. */ list_for_each_entry_safe(b, t, &hdev->le_accept_list, list) { + if (hci_conn_hash_lookup_le(hdev, &b->bdaddr, b->bdaddr_type)) + continue; + pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns, &b->bdaddr, b->bdaddr_type); @@ -2171,7 +2418,8 @@ int hci_update_passive_scan_sync(struct hci_dev *hdev) if (list_empty(&hdev->pend_le_conns) && list_empty(&hdev->pend_le_reports) && - !hci_is_adv_monitoring(hdev)) { + !hci_is_adv_monitoring(hdev) && + !hci_dev_test_flag(hdev, HCI_PA_SYNC)) { /* If there is no pending LE connections or devices * to be scanned for or no ADV monitors, we should stop the * background scanning. @@ -2206,6 +2454,16 @@ int hci_update_passive_scan_sync(struct hci_dev *hdev) return err; } +static int update_scan_sync(struct hci_dev *hdev, void *data) +{ + return hci_update_scan_sync(hdev); +} + +int hci_update_scan(struct hci_dev *hdev) +{ + return hci_cmd_sync_queue(hdev, update_scan_sync, NULL, NULL); +} + static int update_passive_scan_sync(struct hci_dev *hdev, void *data) { return hci_update_passive_scan_sync(hdev); @@ -2760,6 +3018,12 @@ static const struct hci_init_stage amp_init2[] = { /* Read Buffer Size (ACL mtu, max pkt, etc.) */ static int hci_read_buffer_size_sync(struct hci_dev *hdev) { + /* Use Read LE Buffer Size V2 if supported */ + if (hdev->commands[41] & 0x20) + return __hci_cmd_sync_status(hdev, + HCI_OP_LE_READ_BUFFER_SIZE_V2, + 0, NULL, HCI_CMD_TIMEOUT); + return __hci_cmd_sync_status(hdev, HCI_OP_READ_BUFFER_SIZE, 0, NULL, HCI_CMD_TIMEOUT); } @@ -3011,6 +3275,10 @@ static int hci_init2_sync(struct hci_dev *hdev) if (hdev->dev_type == HCI_AMP) return hci_init_stage_sync(hdev, amp_init2); + err = hci_init_stage_sync(hdev, hci_init2); + if (err) + return err; + if (lmp_bredr_capable(hdev)) { err = hci_init_stage_sync(hdev, br_init2); if (err) @@ -3028,7 +3296,7 @@ static int hci_init2_sync(struct hci_dev *hdev) hci_dev_set_flag(hdev, HCI_LE_ENABLED); } - return hci_init_stage_sync(hdev, hci_init2); + return 0; } static int hci_set_event_mask_sync(struct hci_dev *hdev) @@ -3191,7 +3459,7 @@ static int hci_read_page_scan_activity_sync(struct hci_dev *hdev) static int hci_read_def_err_data_reporting_sync(struct hci_dev *hdev) { if (!(hdev->commands[18] & 0x04) || - test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) + !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_READ_DEF_ERR_DATA_REPORTING, @@ -3349,6 +3617,19 @@ static int hci_le_set_event_mask_sync(struct hci_dev *hdev) if (ext_adv_capable(hdev)) events[2] |= 0x02; /* LE Advertising Set Terminated */ + if (cis_capable(hdev)) { + events[3] |= 0x01; /* LE CIS Established */ + if (cis_peripheral_capable(hdev)) + events[3] |= 0x02; /* LE CIS Request */ + } + + if (bis_capable(hdev)) { + events[3] |= 0x04; /* LE Create BIG Complete */ + events[3] |= 0x08; /* LE Terminate BIG Complete */ + events[3] |= 0x10; /* LE BIG Sync Established */ + events[3] |= 0x20; /* LE BIG Sync Loss */ + } + return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EVENT_MASK, sizeof(events), events, HCI_CMD_TIMEOUT); } @@ -3489,6 +3770,24 @@ static int hci_set_le_support_sync(struct hci_dev *hdev) sizeof(cp), &cp, HCI_CMD_TIMEOUT); } +/* LE Set Host Feature */ +static int hci_le_set_host_feature_sync(struct hci_dev *hdev) +{ + struct hci_cp_le_set_host_feature cp; + + if (!iso_capable(hdev)) + return 0; + + memset(&cp, 0, sizeof(cp)); + + /* Isochronous Channels (Host Support) */ + cp.bit_number = 32; + cp.bit_value = 1; + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_HOST_FEATURE, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); +} + /* LE Controller init stage 3 command sequence */ static const struct hci_init_stage le_init3[] = { /* HCI_OP_LE_SET_EVENT_MASK */ @@ -3515,6 +3814,8 @@ static const struct hci_init_stage le_init3[] = { HCI_INIT(hci_le_read_num_support_adv_sets_sync), /* HCI_OP_WRITE_LE_HOST_SUPPORTED */ HCI_INIT(hci_set_le_support_sync), + /* HCI_OP_LE_SET_HOST_FEATURE */ + HCI_INIT(hci_le_set_host_feature_sync), {} }; @@ -3578,7 +3879,7 @@ static int hci_set_event_mask_page_2_sync(struct hci_dev *hdev) if (lmp_cpb_central_capable(hdev)) { events[1] |= 0x40; /* Triggered Clock Capture */ events[1] |= 0x80; /* Synchronization Train Complete */ - events[2] |= 0x10; /* Peripheral Page Response Timeout */ + events[2] |= 0x08; /* Truncated Page Complete */ events[2] |= 0x20; /* CPB Channel Map Change */ changed = true; } @@ -3590,7 +3891,7 @@ static int hci_set_event_mask_page_2_sync(struct hci_dev *hdev) events[2] |= 0x01; /* Synchronization Train Received */ events[2] |= 0x02; /* CPB Receive */ events[2] |= 0x04; /* CPB Timeout */ - events[2] |= 0x08; /* Truncated Page Complete */ + events[2] |= 0x10; /* Peripheral Page Response Timeout */ changed = true; } @@ -3676,7 +3977,7 @@ static int hci_set_err_data_report_sync(struct hci_dev *hdev) bool enabled = hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED); if (!(hdev->commands[18] & 0x08) || - test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) + !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING)) return 0; if (enabled == hdev->err_data_reporting) @@ -3835,9 +4136,6 @@ static const struct { HCI_QUIRK_BROKEN(STORED_LINK_KEY, "HCI Delete Stored Link Key command is advertised, " "but not supported."), - HCI_QUIRK_BROKEN(ERR_DATA_REPORTING, - "HCI Read Default Erroneous Data Reporting command is " - "advertised, but not supported."), HCI_QUIRK_BROKEN(READ_TRANSMIT_POWER, "HCI Read Transmit Power Level command is advertised, " "but not supported."), @@ -3848,90 +4146,40 @@ static const struct { "advertised, but not supported.") }; -int hci_dev_open_sync(struct hci_dev *hdev) +/* This function handles hdev setup stage: + * + * Calls hdev->setup + * Setup address if HCI_QUIRK_USE_BDADDR_PROPERTY is set. + */ +static int hci_dev_setup_sync(struct hci_dev *hdev) { int ret = 0; - - bt_dev_dbg(hdev, ""); - - if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) { - ret = -ENODEV; - goto done; - } + bool invalid_bdaddr; + size_t i; if (!hci_dev_test_flag(hdev, HCI_SETUP) && - !hci_dev_test_flag(hdev, HCI_CONFIG)) { - /* Check for rfkill but allow the HCI setup stage to - * proceed (which in itself doesn't cause any RF activity). - */ - if (hci_dev_test_flag(hdev, HCI_RFKILLED)) { - ret = -ERFKILL; - goto done; - } - - /* Check for valid public address or a configured static - * random address, but let the HCI setup proceed to - * be able to determine if there is a public address - * or not. - * - * In case of user channel usage, it is not important - * if a public address or static random address is - * available. - * - * This check is only valid for BR/EDR controllers - * since AMP controllers do not have an address. - */ - if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && - hdev->dev_type == HCI_PRIMARY && - !bacmp(&hdev->bdaddr, BDADDR_ANY) && - !bacmp(&hdev->static_addr, BDADDR_ANY)) { - ret = -EADDRNOTAVAIL; - goto done; - } - } - - if (test_bit(HCI_UP, &hdev->flags)) { - ret = -EALREADY; - goto done; - } - - if (hdev->open(hdev)) { - ret = -EIO; - goto done; - } - - set_bit(HCI_RUNNING, &hdev->flags); - hci_sock_dev_event(hdev, HCI_DEV_OPEN); - - atomic_set(&hdev->cmd_cnt, 1); - set_bit(HCI_INIT, &hdev->flags); + !test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks)) + return 0; - if (hci_dev_test_flag(hdev, HCI_SETUP) || - test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks)) { - bool invalid_bdaddr; - size_t i; + bt_dev_dbg(hdev, ""); - hci_sock_dev_event(hdev, HCI_DEV_SETUP); + hci_sock_dev_event(hdev, HCI_DEV_SETUP); - if (hdev->setup) - ret = hdev->setup(hdev); + if (hdev->setup) + ret = hdev->setup(hdev); - for (i = 0; i < ARRAY_SIZE(hci_broken_table); i++) { - if (test_bit(hci_broken_table[i].quirk, &hdev->quirks)) - bt_dev_warn(hdev, "%s", - hci_broken_table[i].desc); - } - - /* The transport driver can set the quirk to mark the - * BD_ADDR invalid before creating the HCI device or in - * its setup callback. - */ - invalid_bdaddr = test_bit(HCI_QUIRK_INVALID_BDADDR, - &hdev->quirks); + for (i = 0; i < ARRAY_SIZE(hci_broken_table); i++) { + if (test_bit(hci_broken_table[i].quirk, &hdev->quirks)) + bt_dev_warn(hdev, "%s", hci_broken_table[i].desc); + } - if (ret) - goto setup_failed; + /* The transport driver can set the quirk to mark the + * BD_ADDR invalid before creating the HCI device or in + * its setup callback. + */ + invalid_bdaddr = test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); + if (!ret) { if (test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) { if (!bacmp(&hdev->public_addr, BDADDR_ANY)) hci_dev_get_bd_addr_from_property(hdev); @@ -3950,33 +4198,51 @@ int hci_dev_open_sync(struct hci_dev *hdev) invalid_bdaddr = false; } } + } -setup_failed: - /* The transport driver can set these quirks before - * creating the HCI device or in its setup callback. - * - * For the invalid BD_ADDR quirk it is possible that - * it becomes a valid address if the bootloader does - * provide it (see above). - * - * In case any of them is set, the controller has to - * start up as unconfigured. - */ - if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || - invalid_bdaddr) - hci_dev_set_flag(hdev, HCI_UNCONFIGURED); + /* The transport driver can set these quirks before + * creating the HCI device or in its setup callback. + * + * For the invalid BD_ADDR quirk it is possible that + * it becomes a valid address if the bootloader does + * provide it (see above). + * + * In case any of them is set, the controller has to + * start up as unconfigured. + */ + if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || + invalid_bdaddr) + hci_dev_set_flag(hdev, HCI_UNCONFIGURED); - /* For an unconfigured controller it is required to - * read at least the version information provided by - * the Read Local Version Information command. - * - * If the set_bdaddr driver callback is provided, then - * also the original Bluetooth public device address - * will be read using the Read BD Address command. - */ - if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) - ret = hci_unconf_init_sync(hdev); - } + /* For an unconfigured controller it is required to + * read at least the version information provided by + * the Read Local Version Information command. + * + * If the set_bdaddr driver callback is provided, then + * also the original Bluetooth public device address + * will be read using the Read BD Address command. + */ + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) + return hci_unconf_init_sync(hdev); + + return ret; +} + +/* This function handles hdev init stage: + * + * Calls hci_dev_setup_sync to perform setup stage + * Calls hci_init_sync to perform HCI command init sequence + */ +static int hci_dev_init_sync(struct hci_dev *hdev) +{ + int ret; + + bt_dev_dbg(hdev, ""); + + atomic_set(&hdev->cmd_cnt, 1); + set_bit(HCI_INIT, &hdev->flags); + + ret = hci_dev_setup_sync(hdev); if (hci_dev_test_flag(hdev, HCI_CONFIG)) { /* If public address change is configured, ensure that @@ -4016,6 +4282,65 @@ setup_failed: clear_bit(HCI_INIT, &hdev->flags); + return ret; +} + +int hci_dev_open_sync(struct hci_dev *hdev) +{ + int ret; + + bt_dev_dbg(hdev, ""); + + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) { + ret = -ENODEV; + goto done; + } + + if (!hci_dev_test_flag(hdev, HCI_SETUP) && + !hci_dev_test_flag(hdev, HCI_CONFIG)) { + /* Check for rfkill but allow the HCI setup stage to + * proceed (which in itself doesn't cause any RF activity). + */ + if (hci_dev_test_flag(hdev, HCI_RFKILLED)) { + ret = -ERFKILL; + goto done; + } + + /* Check for valid public address or a configured static + * random address, but let the HCI setup proceed to + * be able to determine if there is a public address + * or not. + * + * In case of user channel usage, it is not important + * if a public address or static random address is + * available. + * + * This check is only valid for BR/EDR controllers + * since AMP controllers do not have an address. + */ + if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + hdev->dev_type == HCI_PRIMARY && + !bacmp(&hdev->bdaddr, BDADDR_ANY) && + !bacmp(&hdev->static_addr, BDADDR_ANY)) { + ret = -EADDRNOTAVAIL; + goto done; + } + } + + if (test_bit(HCI_UP, &hdev->flags)) { + ret = -EALREADY; + goto done; + } + + if (hdev->open(hdev)) { + ret = -EIO; + goto done; + } + + set_bit(HCI_RUNNING, &hdev->flags); + hci_sock_dev_event(hdev, HCI_DEV_OPEN); + + ret = hci_dev_init_sync(hdev); if (!ret) { hci_dev_hold(hdev); hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); @@ -4435,8 +4760,7 @@ static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } -static int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, - u8 reason) +int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { int err; @@ -5039,13 +5363,13 @@ static int hci_resume_scan_sync(struct hci_dev *hdev) if (!hdev->scanning_paused) return 0; + hdev->scanning_paused = false; + hci_update_scan_sync(hdev); /* Reset passive scanning to normal */ hci_update_passive_scan_sync(hdev); - hdev->scanning_paused = false; - return 0; } @@ -5064,7 +5388,6 @@ int hci_resume_sync(struct hci_dev *hdev) return 0; hdev->suspended = false; - hdev->scanning_paused = false; /* Restore event mask */ hci_set_event_mask_sync(hdev); @@ -5375,3 +5698,36 @@ done: hci_resume_advertising_sync(hdev); return err; } + +int hci_le_remove_cig_sync(struct hci_dev *hdev, u8 handle) +{ + struct hci_cp_le_remove_cig cp; + + memset(&cp, 0, sizeof(cp)); + cp.cig_id = handle; + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_REMOVE_CIG, sizeof(cp), + &cp, HCI_CMD_TIMEOUT); +} + +int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle) +{ + struct hci_cp_le_big_term_sync cp; + + memset(&cp, 0, sizeof(cp)); + cp.handle = handle; + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_BIG_TERM_SYNC, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); +} + +int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle) +{ + struct hci_cp_le_pa_term_sync cp; + + memset(&cp, 0, sizeof(cp)); + cp.handle = cpu_to_le16(handle); + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_TERM_SYNC, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); +} diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c new file mode 100644 index 000000000000..ff09c353e64e --- /dev/null +++ b/net/bluetooth/iso.c @@ -0,0 +1,1824 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * BlueZ - Bluetooth protocol stack for Linux + * + * Copyright (C) 2022 Intel Corporation + */ + +#include <linux/module.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/sched/signal.h> + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> +#include <net/bluetooth/iso.h> + +static const struct proto_ops iso_sock_ops; + +static struct bt_sock_list iso_sk_list = { + .lock = __RW_LOCK_UNLOCKED(iso_sk_list.lock) +}; + +/* ---- ISO connections ---- */ +struct iso_conn { + struct hci_conn *hcon; + + /* @lock: spinlock protecting changes to iso_conn fields */ + spinlock_t lock; + struct sock *sk; + + struct delayed_work timeout_work; + + struct sk_buff *rx_skb; + __u32 rx_len; + __u16 tx_sn; +}; + +#define iso_conn_lock(c) spin_lock(&(c)->lock) +#define iso_conn_unlock(c) spin_unlock(&(c)->lock) + +static void iso_sock_close(struct sock *sk); +static void iso_sock_kill(struct sock *sk); + +/* ----- ISO socket info ----- */ +#define iso_pi(sk) ((struct iso_pinfo *)sk) + +struct iso_pinfo { + struct bt_sock bt; + bdaddr_t src; + __u8 src_type; + bdaddr_t dst; + __u8 dst_type; + __u8 bc_sid; + __u8 bc_num_bis; + __u8 bc_bis[ISO_MAX_NUM_BIS]; + __u16 sync_handle; + __u32 flags; + struct bt_iso_qos qos; + __u8 base_len; + __u8 base[HCI_MAX_PER_AD_LENGTH]; + struct iso_conn *conn; +}; + +/* ---- ISO timers ---- */ +#define ISO_CONN_TIMEOUT (HZ * 40) +#define ISO_DISCONN_TIMEOUT (HZ * 2) + +static void iso_sock_timeout(struct work_struct *work) +{ + struct iso_conn *conn = container_of(work, struct iso_conn, + timeout_work.work); + struct sock *sk; + + iso_conn_lock(conn); + sk = conn->sk; + if (sk) + sock_hold(sk); + iso_conn_unlock(conn); + + if (!sk) + return; + + BT_DBG("sock %p state %d", sk, sk->sk_state); + + lock_sock(sk); + sk->sk_err = ETIMEDOUT; + sk->sk_state_change(sk); + release_sock(sk); + sock_put(sk); +} + +static void iso_sock_set_timer(struct sock *sk, long timeout) +{ + if (!iso_pi(sk)->conn) + return; + + BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout); + cancel_delayed_work(&iso_pi(sk)->conn->timeout_work); + schedule_delayed_work(&iso_pi(sk)->conn->timeout_work, timeout); +} + +static void iso_sock_clear_timer(struct sock *sk) +{ + if (!iso_pi(sk)->conn) + return; + + BT_DBG("sock %p state %d", sk, sk->sk_state); + cancel_delayed_work(&iso_pi(sk)->conn->timeout_work); +} + +/* ---- ISO connections ---- */ +static struct iso_conn *iso_conn_add(struct hci_conn *hcon) +{ + struct iso_conn *conn = hcon->iso_data; + + if (conn) + return conn; + + conn = kzalloc(sizeof(*conn), GFP_KERNEL); + if (!conn) + return NULL; + + spin_lock_init(&conn->lock); + INIT_DELAYED_WORK(&conn->timeout_work, iso_sock_timeout); + + hcon->iso_data = conn; + conn->hcon = hcon; + conn->tx_sn = 0; + + BT_DBG("hcon %p conn %p", hcon, conn); + + return conn; +} + +/* Delete channel. Must be called on the locked socket. */ +static void iso_chan_del(struct sock *sk, int err) +{ + struct iso_conn *conn; + struct sock *parent; + + conn = iso_pi(sk)->conn; + + BT_DBG("sk %p, conn %p, err %d", sk, conn, err); + + if (conn) { + iso_conn_lock(conn); + conn->sk = NULL; + iso_pi(sk)->conn = NULL; + iso_conn_unlock(conn); + + if (conn->hcon) + hci_conn_drop(conn->hcon); + } + + sk->sk_state = BT_CLOSED; + sk->sk_err = err; + + parent = bt_sk(sk)->parent; + if (parent) { + bt_accept_unlink(sk); + parent->sk_data_ready(parent); + } else { + sk->sk_state_change(sk); + } + + sock_set_flag(sk, SOCK_ZAPPED); +} + +static void iso_conn_del(struct hci_conn *hcon, int err) +{ + struct iso_conn *conn = hcon->iso_data; + struct sock *sk; + + if (!conn) + return; + + BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); + + /* Kill socket */ + iso_conn_lock(conn); + sk = conn->sk; + if (sk) + sock_hold(sk); + iso_conn_unlock(conn); + + if (sk) { + lock_sock(sk); + iso_sock_clear_timer(sk); + iso_chan_del(sk, err); + release_sock(sk); + sock_put(sk); + } + + /* Ensure no more work items will run before freeing conn. */ + cancel_delayed_work_sync(&conn->timeout_work); + + hcon->iso_data = NULL; + kfree(conn); +} + +static int __iso_chan_add(struct iso_conn *conn, struct sock *sk, + struct sock *parent) +{ + BT_DBG("conn %p", conn); + + if (iso_pi(sk)->conn == conn && conn->sk == sk) + return 0; + + if (conn->sk) { + BT_ERR("conn->sk already set"); + return -EBUSY; + } + + iso_pi(sk)->conn = conn; + conn->sk = sk; + + if (parent) + bt_accept_enqueue(parent, sk, true); + + return 0; +} + +static int iso_chan_add(struct iso_conn *conn, struct sock *sk, + struct sock *parent) +{ + int err; + + iso_conn_lock(conn); + err = __iso_chan_add(conn, sk, parent); + iso_conn_unlock(conn); + + return err; +} + +static int iso_connect_bis(struct sock *sk) +{ + struct iso_conn *conn; + struct hci_conn *hcon; + struct hci_dev *hdev; + int err; + + BT_DBG("%pMR", &iso_pi(sk)->src); + + hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src, + iso_pi(sk)->src_type); + if (!hdev) + return -EHOSTUNREACH; + + hci_dev_lock(hdev); + + if (!bis_capable(hdev)) { + err = -EOPNOTSUPP; + goto done; + } + + /* Fail if out PHYs are marked as disabled */ + if (!iso_pi(sk)->qos.out.phy) { + err = -EINVAL; + goto done; + } + + hcon = hci_connect_bis(hdev, &iso_pi(sk)->dst, iso_pi(sk)->dst_type, + &iso_pi(sk)->qos, iso_pi(sk)->base_len, + iso_pi(sk)->base); + if (IS_ERR(hcon)) { + err = PTR_ERR(hcon); + goto done; + } + + conn = iso_conn_add(hcon); + if (!conn) { + hci_conn_drop(hcon); + err = -ENOMEM; + goto done; + } + + /* Update source addr of the socket */ + bacpy(&iso_pi(sk)->src, &hcon->src); + + err = iso_chan_add(conn, sk, NULL); + if (err) + goto done; + + if (hcon->state == BT_CONNECTED) { + iso_sock_clear_timer(sk); + sk->sk_state = BT_CONNECTED; + } else { + sk->sk_state = BT_CONNECT; + iso_sock_set_timer(sk, sk->sk_sndtimeo); + } + +done: + hci_dev_unlock(hdev); + hci_dev_put(hdev); + return err; +} + +static int iso_connect_cis(struct sock *sk) +{ + struct iso_conn *conn; + struct hci_conn *hcon; + struct hci_dev *hdev; + int err; + + BT_DBG("%pMR -> %pMR", &iso_pi(sk)->src, &iso_pi(sk)->dst); + + hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src, + iso_pi(sk)->src_type); + if (!hdev) + return -EHOSTUNREACH; + + hci_dev_lock(hdev); + + if (!cis_central_capable(hdev)) { + err = -EOPNOTSUPP; + goto done; + } + + /* Fail if either PHYs are marked as disabled */ + if (!iso_pi(sk)->qos.in.phy && !iso_pi(sk)->qos.out.phy) { + err = -EINVAL; + goto done; + } + + /* Just bind if DEFER_SETUP has been set */ + if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { + hcon = hci_bind_cis(hdev, &iso_pi(sk)->dst, + iso_pi(sk)->dst_type, &iso_pi(sk)->qos); + if (IS_ERR(hcon)) { + err = PTR_ERR(hcon); + goto done; + } + } else { + hcon = hci_connect_cis(hdev, &iso_pi(sk)->dst, + iso_pi(sk)->dst_type, &iso_pi(sk)->qos); + if (IS_ERR(hcon)) { + err = PTR_ERR(hcon); + goto done; + } + } + + conn = iso_conn_add(hcon); + if (!conn) { + hci_conn_drop(hcon); + err = -ENOMEM; + goto done; + } + + /* Update source addr of the socket */ + bacpy(&iso_pi(sk)->src, &hcon->src); + + err = iso_chan_add(conn, sk, NULL); + if (err) + goto done; + + if (hcon->state == BT_CONNECTED) { + iso_sock_clear_timer(sk); + sk->sk_state = BT_CONNECTED; + } else if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { + iso_sock_clear_timer(sk); + sk->sk_state = BT_CONNECT; + } else { + sk->sk_state = BT_CONNECT; + iso_sock_set_timer(sk, sk->sk_sndtimeo); + } + +done: + hci_dev_unlock(hdev); + hci_dev_put(hdev); + return err; +} + +static int iso_send_frame(struct sock *sk, struct sk_buff *skb) +{ + struct iso_conn *conn = iso_pi(sk)->conn; + struct hci_iso_data_hdr *hdr; + int len = 0; + + BT_DBG("sk %p len %d", sk, skb->len); + + if (skb->len > iso_pi(sk)->qos.out.sdu) + return -EMSGSIZE; + + len = skb->len; + + /* Push ISO data header */ + hdr = skb_push(skb, HCI_ISO_DATA_HDR_SIZE); + hdr->sn = cpu_to_le16(conn->tx_sn++); + hdr->slen = cpu_to_le16(hci_iso_data_len_pack(len, + HCI_ISO_STATUS_VALID)); + + if (sk->sk_state == BT_CONNECTED) + hci_send_iso(conn->hcon, skb); + else + len = -ENOTCONN; + + return len; +} + +static void iso_recv_frame(struct iso_conn *conn, struct sk_buff *skb) +{ + struct sock *sk; + + iso_conn_lock(conn); + sk = conn->sk; + iso_conn_unlock(conn); + + if (!sk) + goto drop; + + BT_DBG("sk %p len %d", sk, skb->len); + + if (sk->sk_state != BT_CONNECTED) + goto drop; + + if (!sock_queue_rcv_skb(sk, skb)) + return; + +drop: + kfree_skb(skb); +} + +/* -------- Socket interface ---------- */ +static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *ba) +{ + struct sock *sk; + + sk_for_each(sk, &iso_sk_list.head) { + if (sk->sk_state != BT_LISTEN) + continue; + + if (!bacmp(&iso_pi(sk)->src, ba)) + return sk; + } + + return NULL; +} + +static struct sock *__iso_get_sock_listen_by_sid(bdaddr_t *ba, bdaddr_t *bc, + __u8 sid) +{ + struct sock *sk; + + sk_for_each(sk, &iso_sk_list.head) { + if (sk->sk_state != BT_LISTEN) + continue; + + if (bacmp(&iso_pi(sk)->src, ba)) + continue; + + if (bacmp(&iso_pi(sk)->dst, bc)) + continue; + + if (iso_pi(sk)->bc_sid == sid) + return sk; + } + + return NULL; +} + +typedef bool (*iso_sock_match_t)(struct sock *sk, void *data); + +/* Find socket listening: + * source bdaddr (Unicast) + * destination bdaddr (Broadcast only) + * match func - pass NULL to ignore + * match func data - pass -1 to ignore + * Returns closest match. + */ +static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst, + iso_sock_match_t match, void *data) +{ + struct sock *sk = NULL, *sk1 = NULL; + + read_lock(&iso_sk_list.lock); + + sk_for_each(sk, &iso_sk_list.head) { + if (sk->sk_state != BT_LISTEN) + continue; + + /* Match Broadcast destination */ + if (bacmp(dst, BDADDR_ANY) && bacmp(&iso_pi(sk)->dst, dst)) + continue; + + /* Use Match function if provided */ + if (match && !match(sk, data)) + continue; + + /* Exact match. */ + if (!bacmp(&iso_pi(sk)->src, src)) + break; + + /* Closest match */ + if (!bacmp(&iso_pi(sk)->src, BDADDR_ANY)) + sk1 = sk; + } + + read_unlock(&iso_sk_list.lock); + + return sk ? sk : sk1; +} + +static void iso_sock_destruct(struct sock *sk) +{ + BT_DBG("sk %p", sk); + + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); +} + +static void iso_sock_cleanup_listen(struct sock *parent) +{ + struct sock *sk; + + BT_DBG("parent %p", parent); + + /* Close not yet accepted channels */ + while ((sk = bt_accept_dequeue(parent, NULL))) { + iso_sock_close(sk); + iso_sock_kill(sk); + } + + parent->sk_state = BT_CLOSED; + sock_set_flag(parent, SOCK_ZAPPED); +} + +/* Kill socket (only if zapped and orphan) + * Must be called on unlocked socket. + */ +static void iso_sock_kill(struct sock *sk) +{ + if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket || + sock_flag(sk, SOCK_DEAD)) + return; + + BT_DBG("sk %p state %d", sk, sk->sk_state); + + /* Kill poor orphan */ + bt_sock_unlink(&iso_sk_list, sk); + sock_set_flag(sk, SOCK_DEAD); + sock_put(sk); +} + +static void iso_conn_defer_reject(struct hci_conn *conn) +{ + struct hci_cp_le_reject_cis cp; + + BT_DBG("conn %p", conn); + + memset(&cp, 0, sizeof(cp)); + cp.handle = cpu_to_le16(conn->handle); + cp.reason = HCI_ERROR_REJ_BAD_ADDR; + hci_send_cmd(conn->hdev, HCI_OP_LE_REJECT_CIS, sizeof(cp), &cp); +} + +static void __iso_sock_close(struct sock *sk) +{ + BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket); + + switch (sk->sk_state) { + case BT_LISTEN: + iso_sock_cleanup_listen(sk); + break; + + case BT_CONNECTED: + case BT_CONFIG: + if (iso_pi(sk)->conn->hcon) { + sk->sk_state = BT_DISCONN; + iso_sock_set_timer(sk, ISO_DISCONN_TIMEOUT); + iso_conn_lock(iso_pi(sk)->conn); + hci_conn_drop(iso_pi(sk)->conn->hcon); + iso_pi(sk)->conn->hcon = NULL; + iso_conn_unlock(iso_pi(sk)->conn); + } else { + iso_chan_del(sk, ECONNRESET); + } + break; + + case BT_CONNECT2: + if (iso_pi(sk)->conn->hcon) + iso_conn_defer_reject(iso_pi(sk)->conn->hcon); + iso_chan_del(sk, ECONNRESET); + break; + case BT_CONNECT: + /* In case of DEFER_SETUP the hcon would be bound to CIG which + * needs to be removed so just call hci_conn_del so the cleanup + * callback do what is needed. + */ + if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) && + iso_pi(sk)->conn->hcon) { + hci_conn_del(iso_pi(sk)->conn->hcon); + iso_pi(sk)->conn->hcon = NULL; + } + + iso_chan_del(sk, ECONNRESET); + break; + case BT_DISCONN: + iso_chan_del(sk, ECONNRESET); + break; + + default: + sock_set_flag(sk, SOCK_ZAPPED); + break; + } +} + +/* Must be called on unlocked socket. */ +static void iso_sock_close(struct sock *sk) +{ + iso_sock_clear_timer(sk); + lock_sock(sk); + __iso_sock_close(sk); + release_sock(sk); + iso_sock_kill(sk); +} + +static void iso_sock_init(struct sock *sk, struct sock *parent) +{ + BT_DBG("sk %p", sk); + + if (parent) { + sk->sk_type = parent->sk_type; + bt_sk(sk)->flags = bt_sk(parent)->flags; + security_sk_clone(parent, sk); + } +} + +static struct proto iso_proto = { + .name = "ISO", + .owner = THIS_MODULE, + .obj_size = sizeof(struct iso_pinfo) +}; + +#define DEFAULT_IO_QOS \ +{ \ + .interval = 10000u, \ + .latency = 10u, \ + .sdu = 40u, \ + .phy = BT_ISO_PHY_2M, \ + .rtn = 2u, \ +} + +static struct bt_iso_qos default_qos = { + .cig = BT_ISO_QOS_CIG_UNSET, + .cis = BT_ISO_QOS_CIS_UNSET, + .sca = 0x00, + .packing = 0x00, + .framing = 0x00, + .in = DEFAULT_IO_QOS, + .out = DEFAULT_IO_QOS, +}; + +static struct sock *iso_sock_alloc(struct net *net, struct socket *sock, + int proto, gfp_t prio, int kern) +{ + struct sock *sk; + + sk = sk_alloc(net, PF_BLUETOOTH, prio, &iso_proto, kern); + if (!sk) + return NULL; + + sock_init_data(sock, sk); + INIT_LIST_HEAD(&bt_sk(sk)->accept_q); + + sk->sk_destruct = iso_sock_destruct; + sk->sk_sndtimeo = ISO_CONN_TIMEOUT; + + sock_reset_flag(sk, SOCK_ZAPPED); + + sk->sk_protocol = proto; + sk->sk_state = BT_OPEN; + + /* Set address type as public as default src address is BDADDR_ANY */ + iso_pi(sk)->src_type = BDADDR_LE_PUBLIC; + + iso_pi(sk)->qos = default_qos; + + bt_sock_link(&iso_sk_list, sk); + return sk; +} + +static int iso_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) +{ + struct sock *sk; + + BT_DBG("sock %p", sock); + + sock->state = SS_UNCONNECTED; + + if (sock->type != SOCK_SEQPACKET) + return -ESOCKTNOSUPPORT; + + sock->ops = &iso_sock_ops; + + sk = iso_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern); + if (!sk) + return -ENOMEM; + + iso_sock_init(sk, NULL); + return 0; +} + +static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr, + int addr_len) +{ + struct sockaddr_iso *sa = (struct sockaddr_iso *)addr; + struct sock *sk = sock->sk; + int i; + + BT_DBG("sk %p bc_sid %u bc_num_bis %u", sk, sa->iso_bc->bc_sid, + sa->iso_bc->bc_num_bis); + + if (addr_len > sizeof(*sa) + sizeof(*sa->iso_bc) || + sa->iso_bc->bc_num_bis < 0x01 || sa->iso_bc->bc_num_bis > 0x1f) + return -EINVAL; + + bacpy(&iso_pi(sk)->dst, &sa->iso_bc->bc_bdaddr); + iso_pi(sk)->dst_type = sa->iso_bc->bc_bdaddr_type; + iso_pi(sk)->sync_handle = -1; + iso_pi(sk)->bc_sid = sa->iso_bc->bc_sid; + iso_pi(sk)->bc_num_bis = sa->iso_bc->bc_num_bis; + + for (i = 0; i < iso_pi(sk)->bc_num_bis; i++) { + if (sa->iso_bc->bc_bis[i] < 0x01 || + sa->iso_bc->bc_bis[i] > 0x1f) + return -EINVAL; + + memcpy(iso_pi(sk)->bc_bis, sa->iso_bc->bc_bis, + iso_pi(sk)->bc_num_bis); + } + + return 0; +} + +static int iso_sock_bind(struct socket *sock, struct sockaddr *addr, + int addr_len) +{ + struct sockaddr_iso *sa = (struct sockaddr_iso *)addr; + struct sock *sk = sock->sk; + int err = 0; + + BT_DBG("sk %p %pMR type %u", sk, &sa->iso_bdaddr, sa->iso_bdaddr_type); + + if (!addr || addr_len < sizeof(struct sockaddr_iso) || + addr->sa_family != AF_BLUETOOTH) + return -EINVAL; + + lock_sock(sk); + + if (sk->sk_state != BT_OPEN) { + err = -EBADFD; + goto done; + } + + if (sk->sk_type != SOCK_SEQPACKET) { + err = -EINVAL; + goto done; + } + + /* Check if the address type is of LE type */ + if (!bdaddr_type_is_le(sa->iso_bdaddr_type)) { + err = -EINVAL; + goto done; + } + + bacpy(&iso_pi(sk)->src, &sa->iso_bdaddr); + iso_pi(sk)->src_type = sa->iso_bdaddr_type; + + /* Check for Broadcast address */ + if (addr_len > sizeof(*sa)) { + err = iso_sock_bind_bc(sock, addr, addr_len); + if (err) + goto done; + } + + sk->sk_state = BT_BOUND; + +done: + release_sock(sk); + return err; +} + +static int iso_sock_connect(struct socket *sock, struct sockaddr *addr, + int alen, int flags) +{ + struct sockaddr_iso *sa = (struct sockaddr_iso *)addr; + struct sock *sk = sock->sk; + int err; + + BT_DBG("sk %p", sk); + + if (alen < sizeof(struct sockaddr_iso) || + addr->sa_family != AF_BLUETOOTH) + return -EINVAL; + + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) + return -EBADFD; + + if (sk->sk_type != SOCK_SEQPACKET) + return -EINVAL; + + /* Check if the address type is of LE type */ + if (!bdaddr_type_is_le(sa->iso_bdaddr_type)) + return -EINVAL; + + lock_sock(sk); + + bacpy(&iso_pi(sk)->dst, &sa->iso_bdaddr); + iso_pi(sk)->dst_type = sa->iso_bdaddr_type; + + if (bacmp(&iso_pi(sk)->dst, BDADDR_ANY)) + err = iso_connect_cis(sk); + else + err = iso_connect_bis(sk); + + if (err) + goto done; + + if (!test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { + err = bt_sock_wait_state(sk, BT_CONNECTED, + sock_sndtimeo(sk, flags & O_NONBLOCK)); + } + +done: + release_sock(sk); + return err; +} + +static int iso_listen_bis(struct sock *sk) +{ + struct hci_dev *hdev; + int err = 0; + + BT_DBG("%pMR -> %pMR (SID 0x%2.2x)", &iso_pi(sk)->src, + &iso_pi(sk)->dst, iso_pi(sk)->bc_sid); + + write_lock(&iso_sk_list.lock); + + if (__iso_get_sock_listen_by_sid(&iso_pi(sk)->src, &iso_pi(sk)->dst, + iso_pi(sk)->bc_sid)) + err = -EADDRINUSE; + + write_unlock(&iso_sk_list.lock); + + if (err) + return err; + + hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src, + iso_pi(sk)->src_type); + if (!hdev) + return -EHOSTUNREACH; + + hci_dev_lock(hdev); + + err = hci_pa_create_sync(hdev, &iso_pi(sk)->dst, iso_pi(sk)->dst_type, + iso_pi(sk)->bc_sid); + + hci_dev_unlock(hdev); + + return err; +} + +static int iso_listen_cis(struct sock *sk) +{ + int err = 0; + + BT_DBG("%pMR", &iso_pi(sk)->src); + + write_lock(&iso_sk_list.lock); + + if (__iso_get_sock_listen_by_addr(&iso_pi(sk)->src)) + err = -EADDRINUSE; + + write_unlock(&iso_sk_list.lock); + + return err; +} + +static int iso_sock_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + int err = 0; + + BT_DBG("sk %p backlog %d", sk, backlog); + + lock_sock(sk); + + if (sk->sk_state != BT_BOUND) { + err = -EBADFD; + goto done; + } + + if (sk->sk_type != SOCK_SEQPACKET) { + err = -EINVAL; + goto done; + } + + if (!bacmp(&iso_pi(sk)->dst, BDADDR_ANY)) + err = iso_listen_cis(sk); + else + err = iso_listen_bis(sk); + + if (err) + goto done; + + sk->sk_max_ack_backlog = backlog; + sk->sk_ack_backlog = 0; + + sk->sk_state = BT_LISTEN; + +done: + release_sock(sk); + return err; +} + +static int iso_sock_accept(struct socket *sock, struct socket *newsock, + int flags, bool kern) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + struct sock *sk = sock->sk, *ch; + long timeo; + int err = 0; + + lock_sock(sk); + + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + + BT_DBG("sk %p timeo %ld", sk, timeo); + + /* Wait for an incoming connection. (wake-one). */ + add_wait_queue_exclusive(sk_sleep(sk), &wait); + while (1) { + if (sk->sk_state != BT_LISTEN) { + err = -EBADFD; + break; + } + + ch = bt_accept_dequeue(sk, newsock); + if (ch) + break; + + if (!timeo) { + err = -EAGAIN; + break; + } + + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + break; + } + + release_sock(sk); + + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + lock_sock(sk); + } + remove_wait_queue(sk_sleep(sk), &wait); + + if (err) + goto done; + + newsock->state = SS_CONNECTED; + + BT_DBG("new socket %p", ch); + +done: + release_sock(sk); + return err; +} + +static int iso_sock_getname(struct socket *sock, struct sockaddr *addr, + int peer) +{ + struct sockaddr_iso *sa = (struct sockaddr_iso *)addr; + struct sock *sk = sock->sk; + + BT_DBG("sock %p, sk %p", sock, sk); + + addr->sa_family = AF_BLUETOOTH; + + if (peer) { + bacpy(&sa->iso_bdaddr, &iso_pi(sk)->dst); + sa->iso_bdaddr_type = iso_pi(sk)->dst_type; + } else { + bacpy(&sa->iso_bdaddr, &iso_pi(sk)->src); + sa->iso_bdaddr_type = iso_pi(sk)->src_type; + } + + return sizeof(struct sockaddr_iso); +} + +static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) +{ + struct sock *sk = sock->sk; + struct iso_conn *conn = iso_pi(sk)->conn; + struct sk_buff *skb, **frag; + int err; + + BT_DBG("sock %p, sk %p", sock, sk); + + err = sock_error(sk); + if (err) + return err; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + if (sk->sk_state != BT_CONNECTED) + return -ENOTCONN; + + skb = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu, + HCI_ISO_DATA_HDR_SIZE, 0); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + len -= skb->len; + + BT_DBG("skb %p len %d", sk, skb->len); + + /* Continuation fragments */ + frag = &skb_shinfo(skb)->frag_list; + while (len) { + struct sk_buff *tmp; + + tmp = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu, + 0, 0); + if (IS_ERR(tmp)) { + kfree_skb(skb); + return PTR_ERR(tmp); + } + + *frag = tmp; + + len -= tmp->len; + + skb->len += tmp->len; + skb->data_len += tmp->len; + + BT_DBG("frag %p len %d", *frag, tmp->len); + + frag = &(*frag)->next; + } + + lock_sock(sk); + + if (sk->sk_state == BT_CONNECTED) + err = iso_send_frame(sk, skb); + else + err = -ENOTCONN; + + release_sock(sk); + + if (err < 0) + kfree_skb(skb); + return err; +} + +static void iso_conn_defer_accept(struct hci_conn *conn) +{ + struct hci_cp_le_accept_cis cp; + struct hci_dev *hdev = conn->hdev; + + BT_DBG("conn %p", conn); + + conn->state = BT_CONFIG; + + cp.handle = cpu_to_le16(conn->handle); + + hci_send_cmd(hdev, HCI_OP_LE_ACCEPT_CIS, sizeof(cp), &cp); +} + +static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) +{ + struct sock *sk = sock->sk; + struct iso_pinfo *pi = iso_pi(sk); + int err; + + BT_DBG("sk %p", sk); + + lock_sock(sk); + + if (test_and_clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { + switch (sk->sk_state) { + case BT_CONNECT2: + iso_conn_defer_accept(pi->conn->hcon); + sk->sk_state = BT_CONFIG; + release_sock(sk); + return 0; + case BT_CONNECT: + err = iso_connect_cis(sk); + release_sock(sk); + return err; + } + } + + release_sock(sk); + + return bt_sock_recvmsg(sock, msg, len, flags); +} + +static bool check_io_qos(struct bt_iso_io_qos *qos) +{ + /* If no PHY is enable SDU must be 0 */ + if (!qos->phy && qos->sdu) + return false; + + if (qos->interval && (qos->interval < 0xff || qos->interval > 0xfffff)) + return false; + + if (qos->latency && (qos->latency < 0x05 || qos->latency > 0xfa0)) + return false; + + if (qos->phy > BT_ISO_PHY_ANY) + return false; + + return true; +} + +static bool check_qos(struct bt_iso_qos *qos) +{ + if (qos->sca > 0x07) + return false; + + if (qos->packing > 0x01) + return false; + + if (qos->framing > 0x01) + return false; + + if (!check_io_qos(&qos->in)) + return false; + + if (!check_io_qos(&qos->out)) + return false; + + return true; +} + +static int iso_sock_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen) +{ + struct sock *sk = sock->sk; + int len, err = 0; + struct bt_iso_qos qos; + u32 opt; + + BT_DBG("sk %p", sk); + + lock_sock(sk); + + switch (optname) { + case BT_DEFER_SETUP: + if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { + err = -EINVAL; + break; + } + + if (copy_from_sockptr(&opt, optval, sizeof(u32))) { + err = -EFAULT; + break; + } + + if (opt) + set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); + else + clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); + break; + + case BT_ISO_QOS: + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND && + sk->sk_state != BT_CONNECT2) { + err = -EINVAL; + break; + } + + len = min_t(unsigned int, sizeof(qos), optlen); + if (len != sizeof(qos)) + return -EINVAL; + + memset(&qos, 0, sizeof(qos)); + + if (copy_from_sockptr(&qos, optval, len)) { + err = -EFAULT; + break; + } + + if (!check_qos(&qos)) { + err = -EINVAL; + break; + } + + iso_pi(sk)->qos = qos; + + break; + + case BT_ISO_BASE: + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND && + sk->sk_state != BT_CONNECT2) { + err = -EINVAL; + break; + } + + if (optlen > sizeof(iso_pi(sk)->base)) { + err = -EOVERFLOW; + break; + } + + len = min_t(unsigned int, sizeof(iso_pi(sk)->base), optlen); + + if (copy_from_sockptr(iso_pi(sk)->base, optval, len)) { + err = -EFAULT; + break; + } + + iso_pi(sk)->base_len = len; + + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static int iso_sock_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + int len, err = 0; + struct bt_iso_qos qos; + u8 base_len; + u8 *base; + + BT_DBG("sk %p", sk); + + if (get_user(len, optlen)) + return -EFAULT; + + lock_sock(sk); + + switch (optname) { + case BT_DEFER_SETUP: + if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { + err = -EINVAL; + break; + } + + if (put_user(test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags), + (u32 __user *)optval)) + err = -EFAULT; + + break; + + case BT_ISO_QOS: + if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONNECT2) + qos = iso_pi(sk)->conn->hcon->iso_qos; + else + qos = iso_pi(sk)->qos; + + len = min_t(unsigned int, len, sizeof(qos)); + if (copy_to_user(optval, (char *)&qos, len)) + err = -EFAULT; + + break; + + case BT_ISO_BASE: + if (sk->sk_state == BT_CONNECTED) { + base_len = iso_pi(sk)->conn->hcon->le_per_adv_data_len; + base = iso_pi(sk)->conn->hcon->le_per_adv_data; + } else { + base_len = iso_pi(sk)->base_len; + base = iso_pi(sk)->base; + } + + len = min_t(unsigned int, len, base_len); + if (copy_to_user(optval, base, len)) + err = -EFAULT; + + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static int iso_sock_shutdown(struct socket *sock, int how) +{ + struct sock *sk = sock->sk; + int err = 0; + + BT_DBG("sock %p, sk %p", sock, sk); + + if (!sk) + return 0; + + sock_hold(sk); + lock_sock(sk); + + if (!sk->sk_shutdown) { + sk->sk_shutdown = SHUTDOWN_MASK; + iso_sock_clear_timer(sk); + __iso_sock_close(sk); + + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) + err = bt_sock_wait_state(sk, BT_CLOSED, + sk->sk_lingertime); + } + + release_sock(sk); + sock_put(sk); + + return err; +} + +static int iso_sock_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + int err = 0; + + BT_DBG("sock %p, sk %p", sock, sk); + + if (!sk) + return 0; + + iso_sock_close(sk); + + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) { + lock_sock(sk); + err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); + release_sock(sk); + } + + sock_orphan(sk); + iso_sock_kill(sk); + return err; +} + +static void iso_sock_ready(struct sock *sk) +{ + BT_DBG("sk %p", sk); + + if (!sk) + return; + + lock_sock(sk); + iso_sock_clear_timer(sk); + sk->sk_state = BT_CONNECTED; + sk->sk_state_change(sk); + release_sock(sk); +} + +struct iso_list_data { + struct hci_conn *hcon; + int count; +}; + +static bool iso_match_big(struct sock *sk, void *data) +{ + struct hci_evt_le_big_sync_estabilished *ev = data; + + return ev->handle == iso_pi(sk)->qos.big; +} + +static void iso_conn_ready(struct iso_conn *conn) +{ + struct sock *parent; + struct sock *sk = conn->sk; + struct hci_ev_le_big_sync_estabilished *ev; + + BT_DBG("conn %p", conn); + + if (sk) { + iso_sock_ready(conn->sk); + } else { + iso_conn_lock(conn); + + if (!conn->hcon) { + iso_conn_unlock(conn); + return; + } + + ev = hci_recv_event_data(conn->hcon->hdev, + HCI_EVT_LE_BIG_SYNC_ESTABILISHED); + if (ev) + parent = iso_get_sock_listen(&conn->hcon->src, + &conn->hcon->dst, + iso_match_big, ev); + else + parent = iso_get_sock_listen(&conn->hcon->src, + BDADDR_ANY, NULL, NULL); + + if (!parent) { + iso_conn_unlock(conn); + return; + } + + lock_sock(parent); + + sk = iso_sock_alloc(sock_net(parent), NULL, + BTPROTO_ISO, GFP_ATOMIC, 0); + if (!sk) { + release_sock(parent); + iso_conn_unlock(conn); + return; + } + + iso_sock_init(sk, parent); + + bacpy(&iso_pi(sk)->src, &conn->hcon->src); + iso_pi(sk)->src_type = conn->hcon->src_type; + + /* If hcon has no destination address (BDADDR_ANY) it means it + * was created by HCI_EV_LE_BIG_SYNC_ESTABILISHED so we need to + * initialize using the parent socket destination address. + */ + if (!bacmp(&conn->hcon->dst, BDADDR_ANY)) { + bacpy(&conn->hcon->dst, &iso_pi(parent)->dst); + conn->hcon->dst_type = iso_pi(parent)->dst_type; + conn->hcon->sync_handle = iso_pi(parent)->sync_handle; + } + + bacpy(&iso_pi(sk)->dst, &conn->hcon->dst); + iso_pi(sk)->dst_type = conn->hcon->dst_type; + + hci_conn_hold(conn->hcon); + __iso_chan_add(conn, sk, parent); + + if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags)) + sk->sk_state = BT_CONNECT2; + else + sk->sk_state = BT_CONNECTED; + + /* Wake up parent */ + parent->sk_data_ready(parent); + + release_sock(parent); + + iso_conn_unlock(conn); + } +} + +static bool iso_match_sid(struct sock *sk, void *data) +{ + struct hci_ev_le_pa_sync_established *ev = data; + + return ev->sid == iso_pi(sk)->bc_sid; +} + +static bool iso_match_sync_handle(struct sock *sk, void *data) +{ + struct hci_evt_le_big_info_adv_report *ev = data; + + return le16_to_cpu(ev->sync_handle) == iso_pi(sk)->sync_handle; +} + +/* ----- ISO interface with lower layer (HCI) ----- */ + +int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) +{ + struct hci_ev_le_pa_sync_established *ev1; + struct hci_evt_le_big_info_adv_report *ev2; + struct sock *sk; + int lm = 0; + + bt_dev_dbg(hdev, "bdaddr %pMR", bdaddr); + + /* Broadcast receiver requires handling of some events before it can + * proceed to establishing a BIG sync: + * + * 1. HCI_EV_LE_PA_SYNC_ESTABLISHED: The socket may specify a specific + * SID to listen to and once sync is estabilished its handle needs to + * be stored in iso_pi(sk)->sync_handle so it can be matched once + * receiving the BIG Info. + * 2. HCI_EVT_LE_BIG_INFO_ADV_REPORT: When connect_ind is triggered by a + * a BIG Info it attempts to check if there any listening socket with + * the same sync_handle and if it does then attempt to create a sync. + */ + ev1 = hci_recv_event_data(hdev, HCI_EV_LE_PA_SYNC_ESTABLISHED); + if (ev1) { + sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_sid, + ev1); + if (sk) + iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle); + + goto done; + } + + ev2 = hci_recv_event_data(hdev, HCI_EVT_LE_BIG_INFO_ADV_REPORT); + if (ev2) { + sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, + iso_match_sync_handle, ev2); + if (sk) { + int err; + + if (ev2->num_bis < iso_pi(sk)->bc_num_bis) + iso_pi(sk)->bc_num_bis = ev2->num_bis; + + err = hci_le_big_create_sync(hdev, + &iso_pi(sk)->qos, + iso_pi(sk)->sync_handle, + iso_pi(sk)->bc_num_bis, + iso_pi(sk)->bc_bis); + if (err) { + bt_dev_err(hdev, "hci_le_big_create_sync: %d", + err); + sk = NULL; + } + } + } else { + sk = iso_get_sock_listen(&hdev->bdaddr, BDADDR_ANY, NULL, NULL); + } + +done: + if (!sk) + return lm; + + lm |= HCI_LM_ACCEPT; + + if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) + *flags |= HCI_PROTO_DEFER; + + return lm; +} + +static void iso_connect_cfm(struct hci_conn *hcon, __u8 status) +{ + if (hcon->type != ISO_LINK) { + if (hcon->type != LE_LINK) + return; + + /* Check if LE link has failed */ + if (status) { + if (hcon->link) + iso_conn_del(hcon->link, bt_to_errno(status)); + return; + } + + /* Create CIS if pending */ + hci_le_create_cis(hcon); + return; + } + + BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status); + + if (!status) { + struct iso_conn *conn; + + conn = iso_conn_add(hcon); + if (conn) + iso_conn_ready(conn); + } else { + iso_conn_del(hcon, bt_to_errno(status)); + } +} + +static void iso_disconn_cfm(struct hci_conn *hcon, __u8 reason) +{ + if (hcon->type != ISO_LINK) + return; + + BT_DBG("hcon %p reason %d", hcon, reason); + + iso_conn_del(hcon, bt_to_errno(reason)); +} + +void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) +{ + struct iso_conn *conn = hcon->iso_data; + struct hci_iso_data_hdr *hdr; + __u16 pb, ts, len; + + if (!conn) + goto drop; + + pb = hci_iso_flags_pb(flags); + ts = hci_iso_flags_ts(flags); + + BT_DBG("conn %p len %d pb 0x%x ts 0x%x", conn, skb->len, pb, ts); + + switch (pb) { + case ISO_START: + case ISO_SINGLE: + if (conn->rx_len) { + BT_ERR("Unexpected start frame (len %d)", skb->len); + kfree_skb(conn->rx_skb); + conn->rx_skb = NULL; + conn->rx_len = 0; + } + + if (ts) { + /* TODO: add timestamp to the packet? */ + hdr = skb_pull_data(skb, HCI_ISO_TS_DATA_HDR_SIZE); + if (!hdr) { + BT_ERR("Frame is too short (len %d)", skb->len); + goto drop; + } + + } else { + hdr = skb_pull_data(skb, HCI_ISO_DATA_HDR_SIZE); + if (!hdr) { + BT_ERR("Frame is too short (len %d)", skb->len); + goto drop; + } + } + + len = __le16_to_cpu(hdr->slen); + flags = hci_iso_data_flags(len); + len = hci_iso_data_len(len); + + BT_DBG("Start: total len %d, frag len %d flags 0x%4.4x", len, + skb->len, flags); + + if (len == skb->len) { + /* Complete frame received */ + iso_recv_frame(conn, skb); + return; + } + + if (pb == ISO_SINGLE) { + BT_ERR("Frame malformed (len %d, expected len %d)", + skb->len, len); + goto drop; + } + + if (skb->len > len) { + BT_ERR("Frame is too long (len %d, expected len %d)", + skb->len, len); + goto drop; + } + + /* Allocate skb for the complete frame (with header) */ + conn->rx_skb = bt_skb_alloc(len, GFP_KERNEL); + if (!conn->rx_skb) + goto drop; + + skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), + skb->len); + conn->rx_len = len - skb->len; + break; + + case ISO_CONT: + BT_DBG("Cont: frag len %d (expecting %d)", skb->len, + conn->rx_len); + + if (!conn->rx_len) { + BT_ERR("Unexpected continuation frame (len %d)", + skb->len); + goto drop; + } + + if (skb->len > conn->rx_len) { + BT_ERR("Fragment is too long (len %d, expected %d)", + skb->len, conn->rx_len); + kfree_skb(conn->rx_skb); + conn->rx_skb = NULL; + conn->rx_len = 0; + goto drop; + } + + skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), + skb->len); + conn->rx_len -= skb->len; + return; + + case ISO_END: + skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), + skb->len); + conn->rx_len -= skb->len; + + if (!conn->rx_len) { + struct sk_buff *rx_skb = conn->rx_skb; + + /* Complete frame received. iso_recv_frame + * takes ownership of the skb so set the global + * rx_skb pointer to NULL first. + */ + conn->rx_skb = NULL; + iso_recv_frame(conn, rx_skb); + } + break; + } + +drop: + kfree_skb(skb); +} + +static struct hci_cb iso_cb = { + .name = "ISO", + .connect_cfm = iso_connect_cfm, + .disconn_cfm = iso_disconn_cfm, +}; + +static int iso_debugfs_show(struct seq_file *f, void *p) +{ + struct sock *sk; + + read_lock(&iso_sk_list.lock); + + sk_for_each(sk, &iso_sk_list.head) { + seq_printf(f, "%pMR %pMR %d\n", &iso_pi(sk)->src, + &iso_pi(sk)->dst, sk->sk_state); + } + + read_unlock(&iso_sk_list.lock); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(iso_debugfs); + +static struct dentry *iso_debugfs; + +static const struct proto_ops iso_sock_ops = { + .family = PF_BLUETOOTH, + .owner = THIS_MODULE, + .release = iso_sock_release, + .bind = iso_sock_bind, + .connect = iso_sock_connect, + .listen = iso_sock_listen, + .accept = iso_sock_accept, + .getname = iso_sock_getname, + .sendmsg = iso_sock_sendmsg, + .recvmsg = iso_sock_recvmsg, + .poll = bt_sock_poll, + .ioctl = bt_sock_ioctl, + .mmap = sock_no_mmap, + .socketpair = sock_no_socketpair, + .shutdown = iso_sock_shutdown, + .setsockopt = iso_sock_setsockopt, + .getsockopt = iso_sock_getsockopt +}; + +static const struct net_proto_family iso_sock_family_ops = { + .family = PF_BLUETOOTH, + .owner = THIS_MODULE, + .create = iso_sock_create, +}; + +static bool iso_inited; + +bool iso_enabled(void) +{ + return iso_inited; +} + +int iso_init(void) +{ + int err; + + BUILD_BUG_ON(sizeof(struct sockaddr_iso) > sizeof(struct sockaddr)); + + if (iso_inited) + return -EALREADY; + + err = proto_register(&iso_proto, 0); + if (err < 0) + return err; + + err = bt_sock_register(BTPROTO_ISO, &iso_sock_family_ops); + if (err < 0) { + BT_ERR("ISO socket registration failed"); + goto error; + } + + err = bt_procfs_init(&init_net, "iso", &iso_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create ISO proc file"); + bt_sock_unregister(BTPROTO_ISO); + goto error; + } + + BT_INFO("ISO socket layer initialized"); + + hci_register_cb(&iso_cb); + + if (IS_ERR_OR_NULL(bt_debugfs)) + return 0; + + if (!iso_debugfs) { + iso_debugfs = debugfs_create_file("iso", 0444, bt_debugfs, + NULL, &iso_debugfs_fops); + } + + iso_inited = true; + + return 0; + +error: + proto_unregister(&iso_proto); + return err; +} + +int iso_exit(void) +{ + if (!iso_inited) + return -EALREADY; + + bt_procfs_cleanup(&init_net, "iso"); + + debugfs_remove(iso_debugfs); + iso_debugfs = NULL; + + hci_unregister_cb(&iso_cb); + + bt_sock_unregister(BTPROTO_ISO); + + proto_unregister(&iso_proto); + + iso_inited = false; + + return 0; +} diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ae78490ecd3d..09ecaf556de5 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1369,6 +1369,7 @@ static void l2cap_le_connect(struct l2cap_chan *chan) l2cap_le_flowctl_init(chan, 0); + memset(&req, 0, sizeof(req)); req.psm = chan->psm; req.scid = cpu_to_le16(chan->scid); req.mtu = cpu_to_le16(chan->imtu); diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c index 5326f41a58b7..469a0c95b6e8 100644 --- a/net/bluetooth/lib.c +++ b/net/bluetooth/lib.c @@ -135,6 +135,77 @@ int bt_to_errno(__u16 code) } EXPORT_SYMBOL(bt_to_errno); +/* Unix errno to Bluetooth error codes mapping */ +__u8 bt_status(int err) +{ + /* Don't convert if already positive value */ + if (err >= 0) + return err; + + switch (err) { + case -EBADRQC: + return 0x01; + + case -ENOTCONN: + return 0x02; + + case -EIO: + return 0x03; + + case -EHOSTDOWN: + return 0x04; + + case -EACCES: + return 0x05; + + case -EBADE: + return 0x06; + + case -ENOMEM: + return 0x07; + + case -ETIMEDOUT: + return 0x08; + + case -EMLINK: + return 0x09; + + case EALREADY: + return 0x0b; + + case -EBUSY: + return 0x0c; + + case -ECONNREFUSED: + return 0x0d; + + case -EOPNOTSUPP: + return 0x11; + + case -EINVAL: + return 0x12; + + case -ECONNRESET: + return 0x13; + + case -ECONNABORTED: + return 0x16; + + case ELOOP: + return 0x17; + + case -EPROTONOSUPPORT: + return 0x1a; + + case -EPROTO: + return 0x19; + + default: + return 0x1f; + } +} +EXPORT_SYMBOL(bt_status); + void bt_info(const char *format, ...) { struct va_format vaf; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ae758ab1b558..8cfafd7a0576 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1023,11 +1023,39 @@ static void rpa_expired(struct work_struct *work) hci_cmd_sync_queue(hdev, rpa_expired_sync, NULL, NULL); } +static void discov_off(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + discov_off.work); + + bt_dev_dbg(hdev, ""); + + hci_dev_lock(hdev); + + /* When discoverable timeout triggers, then just make sure + * the limited discoverable flag is cleared. Even in the case + * of a timeout triggered from general discoverable, it is + * safe to unconditionally clear the flag. + */ + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); + hdev->discov_timeout = 0; + + hci_update_discoverable(hdev); + + mgmt_new_settings(hdev); + + hci_dev_unlock(hdev); +} + static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) { if (hci_dev_test_and_set_flag(hdev, HCI_MGMT)) return; + BT_INFO("MGMT ver %d.%d", MGMT_VERSION, MGMT_REVISION); + + INIT_DELAYED_WORK(&hdev->discov_off, discov_off); INIT_DELAYED_WORK(&hdev->service_cache, service_cache_off); INIT_DELAYED_WORK(&hdev->rpa_expired, rpa_expired); @@ -1082,11 +1110,11 @@ static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir) eir_len = eir_append_le16(eir, eir_len, EIR_APPEARANCE, hdev->appearance); - name_len = strlen(hdev->dev_name); + name_len = strnlen(hdev->dev_name, sizeof(hdev->dev_name)); eir_len = eir_append_data(eir, eir_len, EIR_NAME_COMPLETE, hdev->dev_name, name_len); - name_len = strlen(hdev->short_name); + name_len = strnlen(hdev->short_name, sizeof(hdev->short_name)); eir_len = eir_append_data(eir, eir_len, EIR_NAME_SHORT, hdev->short_name, name_len); @@ -1611,7 +1639,7 @@ static int set_connectable_update_settings(struct hci_dev *hdev, return err; if (changed) { - hci_req_update_scan(hdev); + hci_update_scan(hdev); hci_update_passive_scan(hdev); return new_settings(hdev, sk); } @@ -2528,6 +2556,37 @@ static int device_unpaired(struct hci_dev *hdev, bdaddr_t *bdaddr, skip_sk); } +static void unpair_device_complete(struct hci_dev *hdev, void *data, int err) +{ + struct mgmt_pending_cmd *cmd = data; + struct mgmt_cp_unpair_device *cp = cmd->param; + + if (!err) + device_unpaired(hdev, &cp->addr.bdaddr, cp->addr.type, cmd->sk); + + cmd->cmd_complete(cmd, err); + mgmt_pending_free(cmd); +} + +static int unpair_device_sync(struct hci_dev *hdev, void *data) +{ + struct mgmt_pending_cmd *cmd = data; + struct mgmt_cp_unpair_device *cp = cmd->param; + struct hci_conn *conn; + + if (cp->addr.type == BDADDR_BREDR) + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, + &cp->addr.bdaddr); + else + conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr, + le_addr_type(cp->addr.type)); + + if (!conn) + return 0; + + return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM); +} + static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -2638,7 +2697,7 @@ done: goto unlock; } - cmd = mgmt_pending_add(sk, MGMT_OP_UNPAIR_DEVICE, hdev, cp, + cmd = mgmt_pending_new(sk, MGMT_OP_UNPAIR_DEVICE, hdev, cp, sizeof(*cp)); if (!cmd) { err = -ENOMEM; @@ -2647,9 +2706,10 @@ done: cmd->cmd_complete = addr_cmd_complete; - err = hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); + err = hci_cmd_sync_queue(hdev, unpair_device_sync, cmd, + unpair_device_complete); if (err < 0) - mgmt_pending_remove(cmd); + mgmt_pending_free(cmd); unlock: hci_dev_unlock(hdev); @@ -3925,10 +3985,16 @@ static const u8 rpa_resolution_uuid[16] = { 0xea, 0x11, 0x73, 0xc2, 0x48, 0xa1, 0xc0, 0x15, }; +/* 6fbaf188-05e0-496a-9885-d6ddfdb4e03e */ +static const u8 iso_socket_uuid[16] = { + 0x3e, 0xe0, 0xb4, 0xfd, 0xdd, 0xd6, 0x85, 0x98, + 0x6a, 0x49, 0xe0, 0x05, 0x88, 0xf1, 0xba, 0x6f, +}; + static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { - char buf[102]; /* Enough space for 5 features: 2 + 20 * 5 */ + char buf[122]; /* Enough space for 6 features: 2 + 20 * 6 */ struct mgmt_rp_read_exp_features_info *rp = (void *)buf; u16 idx = 0; u32 flags; @@ -3992,6 +4058,13 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, idx++; } + if (IS_ENABLED(CONFIG_BT_LE)) { + flags = iso_enabled() ? BIT(0) : 0; + memcpy(rp->features[idx].uuid, iso_socket_uuid, 16); + rp->features[idx].flags = cpu_to_le32(flags); + idx++; + } + rp->feature_count = cpu_to_le16(idx); /* After reading the experimental features information, enable @@ -4384,6 +4457,57 @@ static int set_le_simultaneous_roles_func(struct sock *sk, struct hci_dev *hdev, return err; } +#ifdef CONFIG_BT_LE +static int set_iso_socket_func(struct sock *sk, struct hci_dev *hdev, + struct mgmt_cp_set_exp_feature *cp, u16 data_len) +{ + struct mgmt_rp_set_exp_feature rp; + bool val, changed = false; + int err; + + /* Command requires to use the non-controller index */ + if (hdev) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_INDEX); + + /* Parameters are limited to a single octet */ + if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) + return mgmt_cmd_status(sk, MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); + + /* Only boolean on/off is supported */ + if (cp->param[0] != 0x00 && cp->param[0] != 0x01) + return mgmt_cmd_status(sk, MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); + + val = cp->param[0] ? true : false; + if (val) + err = iso_init(); + else + err = iso_exit(); + + if (!err) + changed = true; + + memcpy(rp.uuid, iso_socket_uuid, 16); + rp.flags = cpu_to_le32(val ? BIT(0) : 0); + + hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); + + err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, 0, + &rp, sizeof(rp)); + + if (changed) + exp_feature_changed(hdev, iso_socket_uuid, val, sk); + + return err; +} +#endif + static const struct mgmt_exp_feature { const u8 *uuid; int (*set_func)(struct sock *sk, struct hci_dev *hdev, @@ -4397,6 +4521,9 @@ static const struct mgmt_exp_feature { EXP_FEAT(quality_report_uuid, set_quality_report_func), EXP_FEAT(offload_codecs_uuid, set_offload_codec_func), EXP_FEAT(le_simultaneous_roles_uuid, set_le_simultaneous_roles_func), +#ifdef CONFIG_BT_LE + EXP_FEAT(iso_socket_uuid, set_iso_socket_func), +#endif /* end with a null feature */ EXP_FEAT(NULL, NULL) @@ -4646,23 +4773,15 @@ static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev, return err; } -int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status) +static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, + void *data, int status) { struct mgmt_rp_add_adv_patterns_monitor rp; - struct mgmt_pending_cmd *cmd; - struct adv_monitor *monitor; - int err = 0; + struct mgmt_pending_cmd *cmd = data; + struct adv_monitor *monitor = cmd->user_data; hci_dev_lock(hdev); - cmd = pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev); - if (!cmd) { - cmd = pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev); - if (!cmd) - goto done; - } - - monitor = cmd->user_data; rp.monitor_handle = cpu_to_le16(monitor->handle); if (!status) { @@ -4673,26 +4792,29 @@ int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status) hci_update_passive_scan(hdev); } - err = mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, - mgmt_status(status), &rp, sizeof(rp)); + mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status), &rp, sizeof(rp)); mgmt_pending_remove(cmd); -done: hci_dev_unlock(hdev); - bt_dev_dbg(hdev, "add monitor %d complete, status %u", + bt_dev_dbg(hdev, "add monitor %d complete, status %d", rp.monitor_handle, status); +} - return err; +static int mgmt_add_adv_patterns_monitor_sync(struct hci_dev *hdev, void *data) +{ + struct mgmt_pending_cmd *cmd = data; + struct adv_monitor *monitor = cmd->user_data; + + return hci_add_adv_monitor(hdev, monitor); } static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, struct adv_monitor *m, u8 status, void *data, u16 len, u16 op) { - struct mgmt_rp_add_adv_patterns_monitor rp; struct mgmt_pending_cmd *cmd; int err; - bool pending; hci_dev_lock(hdev); @@ -4714,12 +4836,11 @@ static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, } cmd->user_data = m; - pending = hci_add_adv_monitor(hdev, m, &err); + err = hci_cmd_sync_queue(hdev, mgmt_add_adv_patterns_monitor_sync, cmd, + mgmt_add_adv_patterns_monitor_complete); if (err) { - if (err == -ENOSPC || err == -ENOMEM) + if (err == -ENOMEM) status = MGMT_STATUS_NO_RESOURCES; - else if (err == -EINVAL) - status = MGMT_STATUS_INVALID_PARAMS; else status = MGMT_STATUS_FAILED; @@ -4727,18 +4848,6 @@ static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (!pending) { - mgmt_pending_remove(cmd); - rp.monitor_handle = cpu_to_le16(m->handle); - mgmt_adv_monitor_added(sk, hdev, m->handle); - m->state = ADV_MONITOR_STATE_REGISTERED; - hdev->adv_monitors_cnt++; - - hci_dev_unlock(hdev); - return mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_SUCCESS, - &rp, sizeof(rp)); - } - hci_dev_unlock(hdev); return 0; @@ -4879,49 +4988,46 @@ done: MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI); } -int mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status) +static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, + void *data, int status) { struct mgmt_rp_remove_adv_monitor rp; - struct mgmt_cp_remove_adv_monitor *cp; - struct mgmt_pending_cmd *cmd; - int err = 0; + struct mgmt_pending_cmd *cmd = data; + struct mgmt_cp_remove_adv_monitor *cp = cmd->param; hci_dev_lock(hdev); - cmd = pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev); - if (!cmd) - goto done; - - cp = cmd->param; rp.monitor_handle = cp->monitor_handle; if (!status) hci_update_passive_scan(hdev); - err = mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, - mgmt_status(status), &rp, sizeof(rp)); + mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status), &rp, sizeof(rp)); mgmt_pending_remove(cmd); -done: hci_dev_unlock(hdev); - bt_dev_dbg(hdev, "remove monitor %d complete, status %u", + bt_dev_dbg(hdev, "remove monitor %d complete, status %d", rp.monitor_handle, status); +} - return err; +static int mgmt_remove_adv_monitor_sync(struct hci_dev *hdev, void *data) +{ + struct mgmt_pending_cmd *cmd = data; + struct mgmt_cp_remove_adv_monitor *cp = cmd->param; + u16 handle = __le16_to_cpu(cp->monitor_handle); + + if (!handle) + return hci_remove_all_adv_monitor(hdev); + + return hci_remove_single_adv_monitor(hdev, handle); } static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { - struct mgmt_cp_remove_adv_monitor *cp = data; - struct mgmt_rp_remove_adv_monitor rp; struct mgmt_pending_cmd *cmd; - u16 handle = __le16_to_cpu(cp->monitor_handle); int err, status; - bool pending; - - BT_DBG("request for %s", hdev->name); - rp.monitor_handle = cp->monitor_handle; hci_dev_lock(hdev); @@ -4939,34 +5045,23 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (handle) - pending = hci_remove_single_adv_monitor(hdev, handle, &err); - else - pending = hci_remove_all_adv_monitor(hdev, &err); + err = hci_cmd_sync_queue(hdev, mgmt_remove_adv_monitor_sync, cmd, + mgmt_remove_adv_monitor_complete); if (err) { mgmt_pending_remove(cmd); - if (err == -ENOENT) - status = MGMT_STATUS_INVALID_INDEX; + if (err == -ENOMEM) + status = MGMT_STATUS_NO_RESOURCES; else status = MGMT_STATUS_FAILED; - goto unlock; - } - - /* monitor can be removed without forwarding request to controller */ - if (!pending) { mgmt_pending_remove(cmd); - hci_dev_unlock(hdev); - - return mgmt_cmd_complete(sk, hdev->id, - MGMT_OP_REMOVE_ADV_MONITOR, - MGMT_STATUS_SUCCESS, - &rp, sizeof(rp)); + goto unlock; } hci_dev_unlock(hdev); + return 0; unlock: @@ -6711,11 +6806,6 @@ static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err) mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status, &rp, sizeof(rp)); - if (conn) { - hci_conn_drop(conn); - hci_conn_put(conn); - } - mgmt_pending_free(cmd); } @@ -6734,15 +6824,10 @@ static int get_conn_info_sync(struct hci_dev *hdev, void *data) else conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); - if (!conn || conn != cmd->user_data || conn->state != BT_CONNECTED) { - if (cmd->user_data) { - hci_conn_drop(cmd->user_data); - hci_conn_put(cmd->user_data); - cmd->user_data = NULL; - } + if (!conn || conn->state != BT_CONNECTED) return MGMT_STATUS_NOT_CONNECTED; - } + cmd->user_data = conn; handle = cpu_to_le16(conn->handle); /* Refresh RSSI each time */ @@ -6821,11 +6906,12 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, cmd = mgmt_pending_new(sk, MGMT_OP_GET_CONN_INFO, hdev, data, len); - if (!cmd) + if (!cmd) { err = -ENOMEM; - else + } else { err = hci_cmd_sync_queue(hdev, get_conn_info_sync, cmd, get_conn_info_complete); + } if (err < 0) { mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, @@ -6837,9 +6923,6 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } - hci_conn_hold(conn); - cmd->user_data = hci_conn_get(conn); - conn->conn_info_timestamp = jiffies; } else { /* Cache is valid, just reply with values cached in hci_conn */ @@ -6878,8 +6961,6 @@ static void get_clock_info_complete(struct hci_dev *hdev, void *data, int err) if (conn) { rp.piconet_clock = cpu_to_le32(conn->clock); rp.accuracy = cpu_to_le16(conn->clock_accuracy); - hci_conn_drop(conn); - hci_conn_put(conn); } complete: @@ -6894,30 +6975,21 @@ static int get_clock_info_sync(struct hci_dev *hdev, void *data) struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_get_clock_info *cp = cmd->param; struct hci_cp_read_clock hci_cp; - struct hci_conn *conn = cmd->user_data; - int err; + struct hci_conn *conn; memset(&hci_cp, 0, sizeof(hci_cp)); - err = hci_read_clock_sync(hdev, &hci_cp); + hci_read_clock_sync(hdev, &hci_cp); - if (conn) { - /* Make sure connection still exists */ - conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, - &cp->addr.bdaddr); + /* Make sure connection still exists */ + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->addr.bdaddr); + if (!conn || conn->state != BT_CONNECTED) + return MGMT_STATUS_NOT_CONNECTED; - if (conn && conn == cmd->user_data && - conn->state == BT_CONNECTED) { - hci_cp.handle = cpu_to_le16(conn->handle); - hci_cp.which = 0x01; /* Piconet clock */ - err = hci_read_clock_sync(hdev, &hci_cp); - } else if (cmd->user_data) { - hci_conn_drop(cmd->user_data); - hci_conn_put(cmd->user_data); - cmd->user_data = NULL; - } - } + cmd->user_data = conn; + hci_cp.handle = cpu_to_le16(conn->handle); + hci_cp.which = 0x01; /* Piconet clock */ - return err; + return hci_read_clock_sync(hdev, &hci_cp); } static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data, @@ -6976,10 +7048,6 @@ static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data, if (cmd) mgmt_pending_free(cmd); - - } else if (conn) { - hci_conn_hold(conn); - cmd->user_data = hci_conn_get(conn); } @@ -7108,7 +7176,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, if (err) goto unlock; - hci_req_update_scan(hdev); + hci_update_scan(hdev); goto added; } @@ -7220,7 +7288,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, goto unlock; } - hci_req_update_scan(hdev); + hci_update_scan(hdev); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); @@ -7284,7 +7352,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, kfree(b); } - hci_req_update_scan(hdev); + hci_update_scan(hdev); list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) { if (p->auto_connect == HCI_AUTO_CONN_DISABLED) @@ -8090,7 +8158,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, u16 timeout, duration; unsigned int prev_instance_cnt; u8 schedule_instance = 0; - struct adv_info *next_instance; + struct adv_info *adv, *next_instance; int err; struct mgmt_pending_cmd *cmd; @@ -8141,7 +8209,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, prev_instance_cnt = hdev->adv_instance_cnt; - err = hci_add_adv_instance(hdev, cp->instance, flags, + adv = hci_add_adv_instance(hdev, cp->instance, flags, cp->adv_data_len, cp->data, cp->scan_rsp_len, cp->data + cp->adv_data_len, @@ -8149,7 +8217,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, HCI_ADV_TX_POWER_NO_PREFERENCE, hdev->le_adv_min_interval, hdev->le_adv_max_interval); - if (err < 0) { + if (IS_ERR(adv)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_FAILED); goto unlock; @@ -8280,6 +8348,7 @@ static int add_ext_adv_params(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_add_ext_adv_params *cp = data; struct mgmt_rp_add_ext_adv_params rp; struct mgmt_pending_cmd *cmd = NULL; + struct adv_info *adv; u32 flags, min_interval, max_interval; u16 timeout, duration; u8 status; @@ -8349,11 +8418,11 @@ static int add_ext_adv_params(struct sock *sk, struct hci_dev *hdev, HCI_ADV_TX_POWER_NO_PREFERENCE; /* Create advertising instance with no advertising or response data */ - err = hci_add_adv_instance(hdev, cp->instance, flags, - 0, NULL, 0, NULL, timeout, duration, - tx_power, min_interval, max_interval); + adv = hci_add_adv_instance(hdev, cp->instance, flags, 0, NULL, 0, NULL, + timeout, duration, tx_power, min_interval, + max_interval); - if (err < 0) { + if (IS_ERR(adv)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, MGMT_STATUS_FAILED); goto unlock; @@ -8866,6 +8935,11 @@ void mgmt_index_removed(struct hci_dev *hdev) mgmt_index_event(MGMT_EV_EXT_INDEX_REMOVED, hdev, &ev, sizeof(ev), HCI_MGMT_EXT_INDEX_EVENTS); + + /* Cancel any remaining timed work */ + cancel_delayed_work_sync(&hdev->discov_off); + cancel_delayed_work_sync(&hdev->service_cache); + cancel_delayed_work_sync(&hdev->rpa_expired); } void mgmt_power_on(struct hci_dev *hdev, int err) diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index f43994523b1f..14975769f678 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -99,18 +99,11 @@ struct msft_data { __u8 evt_prefix_len; __u8 *evt_prefix; struct list_head handle_map; - __u16 pending_add_handle; - __u16 pending_remove_handle; __u8 resuming; __u8 suspending; __u8 filter_enabled; }; -static int __msft_add_monitor_pattern(struct hci_dev *hdev, - struct adv_monitor *monitor); -static int __msft_remove_monitor(struct hci_dev *hdev, - struct adv_monitor *monitor, u16 handle); - bool msft_monitor_supported(struct hci_dev *hdev) { return !!(msft_get_features(hdev) & MSFT_FEATURE_MASK_LE_ADV_MONITOR); @@ -164,34 +157,6 @@ failed: return false; } -static void reregister_monitor(struct hci_dev *hdev, int handle) -{ - struct adv_monitor *monitor; - struct msft_data *msft = hdev->msft_data; - int err; - - while (1) { - monitor = idr_get_next(&hdev->adv_monitors_idr, &handle); - if (!monitor) { - /* All monitors have been resumed */ - msft->resuming = false; - hci_update_passive_scan(hdev); - return; - } - - msft->pending_add_handle = (u16)handle; - err = __msft_add_monitor_pattern(hdev, monitor); - - /* If success, we return and wait for monitor added callback */ - if (!err) - return; - - /* Otherwise remove the monitor and keep registering */ - hci_free_adv_monitor(hdev, monitor); - handle++; - } -} - /* is_mgmt = true matches the handle exposed to userspace via mgmt. * is_mgmt = false matches the handle used by the msft controller. * This function requires the caller holds hdev->lock @@ -243,34 +208,27 @@ static int msft_monitor_device_del(struct hci_dev *hdev, __u16 mgmt_handle, return count; } -static void msft_le_monitor_advertisement_cb(struct hci_dev *hdev, - u8 status, u16 opcode, - struct sk_buff *skb) +static int msft_le_monitor_advertisement_cb(struct hci_dev *hdev, u16 opcode, + struct adv_monitor *monitor, + struct sk_buff *skb) { struct msft_rp_le_monitor_advertisement *rp; - struct adv_monitor *monitor; struct msft_monitor_advertisement_handle_data *handle_data; struct msft_data *msft = hdev->msft_data; + int status = 0; hci_dev_lock(hdev); - monitor = idr_find(&hdev->adv_monitors_idr, msft->pending_add_handle); - if (!monitor) { - bt_dev_err(hdev, "msft add advmon: monitor %u is not found!", - msft->pending_add_handle); + rp = (struct msft_rp_le_monitor_advertisement *)skb->data; + if (skb->len < sizeof(*rp)) { status = HCI_ERROR_UNSPECIFIED; goto unlock; } + status = rp->status; if (status) goto unlock; - rp = (struct msft_rp_le_monitor_advertisement *)skb->data; - if (skb->len < sizeof(*rp)) { - status = HCI_ERROR_UNSPECIFIED; - goto unlock; - } - handle_data = kmalloc(sizeof(*handle_data), GFP_KERNEL); if (!handle_data) { status = HCI_ERROR_UNSPECIFIED; @@ -285,29 +243,23 @@ static void msft_le_monitor_advertisement_cb(struct hci_dev *hdev, monitor->state = ADV_MONITOR_STATE_OFFLOADED; unlock: - if (status && monitor) + if (status) hci_free_adv_monitor(hdev, monitor); hci_dev_unlock(hdev); - if (!msft->resuming) - hci_add_adv_patterns_monitor_complete(hdev, status); + return status; } -static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev, - u8 status, u16 opcode, - struct sk_buff *skb) +static int msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev, + u16 opcode, + struct adv_monitor *monitor, + struct sk_buff *skb) { - struct msft_cp_le_cancel_monitor_advertisement *cp; struct msft_rp_le_cancel_monitor_advertisement *rp; - struct adv_monitor *monitor; struct msft_monitor_advertisement_handle_data *handle_data; struct msft_data *msft = hdev->msft_data; - int err; - bool pending; - - if (status) - goto done; + int status = 0; rp = (struct msft_rp_le_cancel_monitor_advertisement *)skb->data; if (skb->len < sizeof(*rp)) { @@ -315,22 +267,22 @@ static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev, goto done; } + status = rp->status; + if (status) + goto done; + hci_dev_lock(hdev); - cp = hci_sent_cmd_data(hdev, hdev->msft_opcode); - handle_data = msft_find_handle_data(hdev, cp->handle, false); + handle_data = msft_find_handle_data(hdev, monitor->handle, true); if (handle_data) { - monitor = idr_find(&hdev->adv_monitors_idr, - handle_data->mgmt_handle); - - if (monitor && monitor->state == ADV_MONITOR_STATE_OFFLOADED) + if (monitor->state == ADV_MONITOR_STATE_OFFLOADED) monitor->state = ADV_MONITOR_STATE_REGISTERED; /* Do not free the monitor if it is being removed due to * suspend. It will be re-monitored on resume. */ - if (monitor && !msft->suspending) { + if (!msft->suspending) { hci_free_adv_monitor(hdev, monitor); /* Clear any monitored devices by this Adv Monitor */ @@ -342,35 +294,19 @@ static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev, kfree(handle_data); } - /* If remove all monitors is required, we need to continue the process - * here because the earlier it was paused when waiting for the - * response from controller. - */ - if (msft->pending_remove_handle == 0) { - pending = hci_remove_all_adv_monitor(hdev, &err); - if (pending) { - hci_dev_unlock(hdev); - return; - } - - if (err) - status = HCI_ERROR_UNSPECIFIED; - } - hci_dev_unlock(hdev); done: - if (!msft->suspending) - hci_remove_adv_monitor_complete(hdev, status); + return status; } +/* This function requires the caller holds hci_req_sync_lock */ static int msft_remove_monitor_sync(struct hci_dev *hdev, struct adv_monitor *monitor) { struct msft_cp_le_cancel_monitor_advertisement cp; struct msft_monitor_advertisement_handle_data *handle_data; struct sk_buff *skb; - u8 status; handle_data = msft_find_handle_data(hdev, monitor->handle, true); @@ -386,13 +322,8 @@ static int msft_remove_monitor_sync(struct hci_dev *hdev, if (IS_ERR(skb)) return PTR_ERR(skb); - status = skb->data[0]; - skb_pull(skb, 1); - - msft_le_cancel_monitor_advertisement_cb(hdev, status, hdev->msft_opcode, - skb); - - return status; + return msft_le_cancel_monitor_advertisement_cb(hdev, hdev->msft_opcode, + monitor, skb); } /* This function requires the caller holds hci_req_sync_lock */ @@ -463,7 +394,6 @@ static int msft_add_monitor_sync(struct hci_dev *hdev, ptrdiff_t offset = 0; u8 pattern_count = 0; struct sk_buff *skb; - u8 status; if (!msft_monitor_pattern_valid(monitor)) return -EINVAL; @@ -505,20 +435,40 @@ static int msft_add_monitor_sync(struct hci_dev *hdev, if (IS_ERR(skb)) return PTR_ERR(skb); - status = skb->data[0]; - skb_pull(skb, 1); + return msft_le_monitor_advertisement_cb(hdev, hdev->msft_opcode, + monitor, skb); +} - msft_le_monitor_advertisement_cb(hdev, status, hdev->msft_opcode, skb); +/* This function requires the caller holds hci_req_sync_lock */ +static void reregister_monitor(struct hci_dev *hdev) +{ + struct adv_monitor *monitor; + struct msft_data *msft = hdev->msft_data; + int handle = 0; - return status; + if (!msft) + return; + + msft->resuming = true; + + while (1) { + monitor = idr_get_next(&hdev->adv_monitors_idr, &handle); + if (!monitor) + break; + + msft_add_monitor_sync(hdev, monitor); + + handle++; + } + + /* All monitors have been reregistered */ + msft->resuming = false; } /* This function requires the caller holds hci_req_sync_lock */ int msft_resume_sync(struct hci_dev *hdev) { struct msft_data *msft = hdev->msft_data; - struct adv_monitor *monitor; - int handle = 0; if (!msft || !msft_monitor_supported(hdev)) return 0; @@ -533,24 +483,12 @@ int msft_resume_sync(struct hci_dev *hdev) hci_dev_unlock(hdev); - msft->resuming = true; - - while (1) { - monitor = idr_get_next(&hdev->adv_monitors_idr, &handle); - if (!monitor) - break; - - msft_add_monitor_sync(hdev, monitor); - - handle++; - } - - /* All monitors have been resumed */ - msft->resuming = false; + reregister_monitor(hdev); return 0; } +/* This function requires the caller holds hci_req_sync_lock */ void msft_do_open(struct hci_dev *hdev) { struct msft_data *msft = hdev->msft_data; @@ -583,7 +521,7 @@ void msft_do_open(struct hci_dev *hdev) /* Monitors get removed on power off, so we need to explicitly * tell the controller to re-monitor. */ - reregister_monitor(hdev, 0); + reregister_monitor(hdev); } } @@ -829,66 +767,7 @@ static void msft_le_set_advertisement_filter_enable_cb(struct hci_dev *hdev, hci_dev_unlock(hdev); } -/* This function requires the caller holds hdev->lock */ -static int __msft_add_monitor_pattern(struct hci_dev *hdev, - struct adv_monitor *monitor) -{ - struct msft_cp_le_monitor_advertisement *cp; - struct msft_le_monitor_advertisement_pattern_data *pattern_data; - struct msft_le_monitor_advertisement_pattern *pattern; - struct adv_pattern *entry; - struct hci_request req; - struct msft_data *msft = hdev->msft_data; - size_t total_size = sizeof(*cp) + sizeof(*pattern_data); - ptrdiff_t offset = 0; - u8 pattern_count = 0; - int err = 0; - - if (!msft_monitor_pattern_valid(monitor)) - return -EINVAL; - - list_for_each_entry(entry, &monitor->patterns, list) { - pattern_count++; - total_size += sizeof(*pattern) + entry->length; - } - - cp = kmalloc(total_size, GFP_KERNEL); - if (!cp) - return -ENOMEM; - - cp->sub_opcode = MSFT_OP_LE_MONITOR_ADVERTISEMENT; - cp->rssi_high = monitor->rssi.high_threshold; - cp->rssi_low = monitor->rssi.low_threshold; - cp->rssi_low_interval = (u8)monitor->rssi.low_threshold_timeout; - cp->rssi_sampling_period = monitor->rssi.sampling_period; - - cp->cond_type = MSFT_MONITOR_ADVERTISEMENT_TYPE_PATTERN; - - pattern_data = (void *)cp->data; - pattern_data->count = pattern_count; - - list_for_each_entry(entry, &monitor->patterns, list) { - pattern = (void *)(pattern_data->data + offset); - /* the length also includes data_type and offset */ - pattern->length = entry->length + 2; - pattern->data_type = entry->ad_type; - pattern->start_byte = entry->offset; - memcpy(pattern->pattern, entry->value, entry->length); - offset += sizeof(*pattern) + entry->length; - } - - hci_req_init(&req, hdev); - hci_req_add(&req, hdev->msft_opcode, total_size, cp); - err = hci_req_run_skb(&req, msft_le_monitor_advertisement_cb); - kfree(cp); - - if (!err) - msft->pending_add_handle = monitor->handle; - - return err; -} - -/* This function requires the caller holds hdev->lock */ +/* This function requires the caller holds hci_req_sync_lock */ int msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor) { struct msft_data *msft = hdev->msft_data; @@ -899,41 +778,11 @@ int msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor) if (msft->resuming || msft->suspending) return -EBUSY; - return __msft_add_monitor_pattern(hdev, monitor); + return msft_add_monitor_sync(hdev, monitor); } -/* This function requires the caller holds hdev->lock */ -static int __msft_remove_monitor(struct hci_dev *hdev, - struct adv_monitor *monitor, u16 handle) -{ - struct msft_cp_le_cancel_monitor_advertisement cp; - struct msft_monitor_advertisement_handle_data *handle_data; - struct hci_request req; - struct msft_data *msft = hdev->msft_data; - int err = 0; - - handle_data = msft_find_handle_data(hdev, monitor->handle, true); - - /* If no matched handle, just remove without telling controller */ - if (!handle_data) - return -ENOENT; - - cp.sub_opcode = MSFT_OP_LE_CANCEL_MONITOR_ADVERTISEMENT; - cp.handle = handle_data->msft_handle; - - hci_req_init(&req, hdev); - hci_req_add(&req, hdev->msft_opcode, sizeof(cp), &cp); - err = hci_req_run_skb(&req, msft_le_cancel_monitor_advertisement_cb); - - if (!err) - msft->pending_remove_handle = handle; - - return err; -} - -/* This function requires the caller holds hdev->lock */ -int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, - u16 handle) +/* This function requires the caller holds hci_req_sync_lock */ +int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor) { struct msft_data *msft = hdev->msft_data; @@ -943,7 +792,7 @@ int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, if (msft->resuming || msft->suspending) return -EBUSY; - return __msft_remove_monitor(hdev, monitor, handle); + return msft_remove_monitor_sync(hdev, monitor); } void msft_req_add_set_filter_enable(struct hci_request *req, bool enable) diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h index afcaf7d3b1cb..2a63205b377b 100644 --- a/net/bluetooth/msft.h +++ b/net/bluetooth/msft.h @@ -20,8 +20,7 @@ void msft_do_close(struct hci_dev *hdev); void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb); __u64 msft_get_features(struct hci_dev *hdev); int msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor); -int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, - u16 handle); +int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor); void msft_req_add_set_filter_enable(struct hci_request *req, bool enable); int msft_set_filter_enable(struct hci_dev *hdev, bool enable); int msft_suspend_sync(struct hci_dev *hdev); @@ -49,8 +48,7 @@ static inline int msft_add_monitor_pattern(struct hci_dev *hdev, } static inline int msft_remove_monitor(struct hci_dev *hdev, - struct adv_monitor *monitor, - u16 handle) + struct adv_monitor *monitor) { return -EOPNOTSUPP; } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 2ca96acbc50a..cbc9cd5058cb 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -691,52 +691,35 @@ noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) { } +noinline void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) +{ +} + __diag_pop(); ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); -BTF_SET_START(test_sk_check_kfunc_ids) -BTF_ID(func, bpf_kfunc_call_test1) -BTF_ID(func, bpf_kfunc_call_test2) -BTF_ID(func, bpf_kfunc_call_test3) -BTF_ID(func, bpf_kfunc_call_test_acquire) -BTF_ID(func, bpf_kfunc_call_memb_acquire) -BTF_ID(func, bpf_kfunc_call_test_release) -BTF_ID(func, bpf_kfunc_call_memb_release) -BTF_ID(func, bpf_kfunc_call_memb1_release) -BTF_ID(func, bpf_kfunc_call_test_kptr_get) -BTF_ID(func, bpf_kfunc_call_test_pass_ctx) -BTF_ID(func, bpf_kfunc_call_test_pass1) -BTF_ID(func, bpf_kfunc_call_test_pass2) -BTF_ID(func, bpf_kfunc_call_test_fail1) -BTF_ID(func, bpf_kfunc_call_test_fail2) -BTF_ID(func, bpf_kfunc_call_test_fail3) -BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1) -BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1) -BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2) -BTF_SET_END(test_sk_check_kfunc_ids) - -BTF_SET_START(test_sk_acquire_kfunc_ids) -BTF_ID(func, bpf_kfunc_call_test_acquire) -BTF_ID(func, bpf_kfunc_call_memb_acquire) -BTF_ID(func, bpf_kfunc_call_test_kptr_get) -BTF_SET_END(test_sk_acquire_kfunc_ids) - -BTF_SET_START(test_sk_release_kfunc_ids) -BTF_ID(func, bpf_kfunc_call_test_release) -BTF_ID(func, bpf_kfunc_call_memb_release) -BTF_ID(func, bpf_kfunc_call_memb1_release) -BTF_SET_END(test_sk_release_kfunc_ids) - -BTF_SET_START(test_sk_ret_null_kfunc_ids) -BTF_ID(func, bpf_kfunc_call_test_acquire) -BTF_ID(func, bpf_kfunc_call_memb_acquire) -BTF_ID(func, bpf_kfunc_call_test_kptr_get) -BTF_SET_END(test_sk_ret_null_kfunc_ids) - -BTF_SET_START(test_sk_kptr_acquire_kfunc_ids) -BTF_ID(func, bpf_kfunc_call_test_kptr_get) -BTF_SET_END(test_sk_kptr_acquire_kfunc_ids) +BTF_SET8_START(test_sk_check_kfunc_ids) +BTF_ID_FLAGS(func, bpf_kfunc_call_test1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test3) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_kptr_get, KF_ACQUIRE | KF_RET_NULL | KF_KPTR_GET) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS) +BTF_SET8_END(test_sk_check_kfunc_ids) static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, u32 size, u32 headroom, u32 tailroom) @@ -955,6 +938,9 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) { struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; + if (!skb->len) + return -EINVAL; + if (!__skb) return 0; @@ -1617,12 +1603,8 @@ out: } static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &test_sk_check_kfunc_ids, - .acquire_set = &test_sk_acquire_kfunc_ids, - .release_set = &test_sk_release_kfunc_ids, - .ret_null_set = &test_sk_ret_null_kfunc_ids, - .kptr_acquire_set = &test_sk_kptr_acquire_kfunc_ids + .owner = THIS_MODULE, + .set = &test_sk_check_kfunc_ids, }; BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids) diff --git a/net/core/datagram.c b/net/core/datagram.c index 8c702904d960..f3988ef8e9af 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -616,7 +616,7 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, { int frag; - if (msg && msg->sg_from_iter) + if (msg && msg->msg_ubuf && msg->sg_from_iter) return msg->sg_from_iter(sk, skb, from, length); frag = skb_shinfo(skb)->nr_frags; diff --git a/net/core/dev.c b/net/core/dev.c index d588fd0a54ce..716df64fcfa5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4168,6 +4168,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) bool again = false; skb_reset_mac_header(skb); + skb_assert_len(skb); if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)) __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED); diff --git a/net/core/filter.c b/net/core/filter.c index 59aada9c4e19..57c5e4c4efd2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -237,7 +237,7 @@ BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb, BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *, data, int, headlen, int, offset) { - u16 tmp, *ptr; + __be16 tmp, *ptr; const int len = sizeof(tmp); if (offset >= 0) { @@ -264,7 +264,7 @@ BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb, BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *, data, int, headlen, int, offset) { - u32 tmp, *ptr; + __be32 tmp, *ptr; const int len = sizeof(tmp); if (likely(offset >= 0)) { @@ -7084,7 +7084,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) return -EINVAL; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) return -EINVAL; if (!th->ack || th->rst || th->syn) @@ -7159,7 +7159,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) return -EINVAL; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) return -ENOENT; if (!th->syn || th->ack || th->fin || th->rst) diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 5f85e01d4093..b0ff6153be62 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -64,7 +64,7 @@ u32 secure_tcpv6_ts_off(const struct net *net, .daddr = *(struct in6_addr *)daddr, }; - if (net->ipv4.sysctl_tcp_timestamps != 1) + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); @@ -120,7 +120,7 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #ifdef CONFIG_INET u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr) { - if (net->ipv4.sysctl_tcp_timestamps != 1) + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 266d3b7b7d0b..81627892bdd4 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -462,7 +462,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, if (copied == len) break; - } while (i != msg_rx->sg.end); + } while (!sg_is_last(sge)); if (unlikely(peek)) { msg_rx = sk_psock_next_msg(psock, msg_rx); @@ -472,7 +472,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, } msg_rx->sg.start = i; - if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) { + if (!sge->length && sg_is_last(sge)) { msg_rx = sk_psock_dequeue_msg(psock); kfree_sk_msg(msg_rx); } diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 3f00a28fe762..5daa1fa54249 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -387,7 +387,7 @@ void reuseport_stop_listen_sock(struct sock *sk) prog = rcu_dereference_protected(reuse->prog, lockdep_is_held(&reuseport_lock)); - if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req || + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req) || (prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) { /* Migration capable, move sk from the listening section * to the closed section. @@ -545,7 +545,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk, hash = migrating_sk->sk_hash; prog = rcu_dereference(reuse->prog); if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) { - if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req)) goto select_by_hash; goto failure; } diff --git a/net/dsa/port.c b/net/dsa/port.c index 3738f2d40a0b..2dd76eb1621c 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -248,6 +248,7 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp, struct netlink_ext_ack extack = {0}; bool change_vlan_filtering = false; struct dsa_switch *ds = dp->ds; + struct dsa_port *other_dp; bool vlan_filtering; int err; @@ -270,8 +271,8 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp, * VLAN-aware bridge. */ if (change_vlan_filtering && ds->vlan_filtering_is_global) { - dsa_switch_for_each_port(dp, ds) { - struct net_device *br = dsa_port_bridge_dev_get(dp); + dsa_switch_for_each_port(other_dp, ds) { + struct net_device *br = dsa_port_bridge_dev_get(other_dp); if (br && br_vlan_enabled(br)) { change_vlan_filtering = false; @@ -799,7 +800,7 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, ds->vlan_filtering = vlan_filtering; dsa_switch_for_each_user_port(other_dp, ds) { - struct net_device *slave = dp->slave; + struct net_device *slave = other_dp->slave; /* We might be called in the unbind path, so not * all slave devices might still be registered. diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fac552e98eb9..3ca0cc467886 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -217,7 +217,7 @@ int inet_listen(struct socket *sock, int backlog) * because the socket was in TCP_LISTEN state previously but * was shutdown() rather than close(). */ - tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { @@ -335,7 +335,7 @@ lookup_protocol: inet->hdrincl = 1; } - if (net->ipv4.sysctl_ip_no_pmtu_disc) + if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -1711,24 +1711,14 @@ static const struct net_protocol igmp_protocol = { }; #endif -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct net_protocol tcp_protocol = { - .early_demux = tcp_v4_early_demux, - .early_demux_handler = tcp_v4_early_demux, +static const struct net_protocol tcp_protocol = { .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, .no_policy = 1, .icmp_strict_tag_validation = 1, }; -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct net_protocol udp_protocol = { - .early_demux = udp_v4_early_demux, - .early_demux_handler = udp_v4_early_demux, +static const struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1, diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 6eea1e9e998d..f8ad04470d3a 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -507,7 +507,7 @@ static int ah_init_state(struct xfrm_state *x) if (aalg_desc->uinfo.auth.icv_fullbits/8 != crypto_ahash_digestsize(ahash)) { - pr_info("%s: %s digestsize %u != %hu\n", + pr_info("%s: %s digestsize %u != %u\n", __func__, x->aalg->alg_name, crypto_ahash_digestsize(ahash), aalg_desc->uinfo.auth.icv_fullbits / 8); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 7a181631b995..85a9e500c42d 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -197,17 +197,17 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, } } -BTF_SET_START(bpf_tcp_ca_check_kfunc_ids) -BTF_ID(func, tcp_reno_ssthresh) -BTF_ID(func, tcp_reno_cong_avoid) -BTF_ID(func, tcp_reno_undo_cwnd) -BTF_ID(func, tcp_slow_start) -BTF_ID(func, tcp_cong_avoid_ai) -BTF_SET_END(bpf_tcp_ca_check_kfunc_ids) +BTF_SET8_START(bpf_tcp_ca_check_kfunc_ids) +BTF_ID_FLAGS(func, tcp_reno_ssthresh) +BTF_ID_FLAGS(func, tcp_reno_cong_avoid) +BTF_ID_FLAGS(func, tcp_reno_undo_cwnd) +BTF_ID_FLAGS(func, tcp_slow_start) +BTF_ID_FLAGS(func, tcp_cong_avoid_ai) +BTF_SET8_END(bpf_tcp_ca_check_kfunc_ids) static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &bpf_tcp_ca_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &bpf_tcp_ca_check_kfunc_ids, }; static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = { diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 7eae8d686e20..5c03eba787e5 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -1106,7 +1106,7 @@ static int esp_init_authenc(struct xfrm_state *x) err = -EINVAL; if (aalg_desc->uinfo.auth.icv_fullbits / 8 != crypto_aead_authsize(aead)) { - pr_info("ESP: %s digestsize %u != %hu\n", + pr_info("ESP: %s digestsize %u != %u\n", x->aalg->alg_name, crypto_aead_authsize(aead), aalg_desc->uinfo.auth.icv_fullbits / 8); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 14f037e8ac55..2dc97583d279 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -2217,7 +2217,7 @@ void fib_select_multipath(struct fib_result *res, int hash) } change_nexthops(fi) { - if (net->ipv4.sysctl_fib_multipath_use_neigh) { + if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) { if (!fib_good_nh(nexthop_nh)) continue; if (!first) { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 57c4f0d87a7a..d5d745c3e345 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -881,7 +881,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) * values please see * Documentation/networking/ip-sysctl.rst */ - switch (net->ipv4.sysctl_ip_no_pmtu_disc) { + switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) { default: net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n", &iph->daddr); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index b65d074d9620..e3ab0cb61624 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -467,7 +467,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, if (pmc->multiaddr == IGMP_ALL_HOSTS) return skb; - if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(pmc->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return skb; mtu = READ_ONCE(dev->mtu); @@ -593,7 +594,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) if (pmc->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(pmc->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) @@ -736,7 +737,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) return igmpv3_send_report(in_dev, pmc); - if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(group) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return 0; if (type == IGMP_HOST_LEAVE_MESSAGE) @@ -825,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev) struct net *net = dev_net(in_dev->dev); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv); + WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv)); igmp_ifc_start_timer(in_dev, 1); } @@ -920,7 +922,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group) if (group == IGMP_ALL_HOSTS) return false; - if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(group) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return false; rcu_read_lock(); @@ -1006,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * received value was zero, use the default or statically * configured value. */ - in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv; + in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; /* RFC3376, 8.3. Query Response Interval: @@ -1045,7 +1048,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (im->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(im->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; spin_lock_bh(&im->lock); if (im->tm_running) @@ -1186,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im, pmc->interface = im->interface; in_dev_hold(in_dev); pmc->multiaddr = im->multiaddr; - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); pmc->sfmode = im->sfmode; if (pmc->sfmode == MCAST_INCLUDE) { struct ip_sf_list *psf; @@ -1237,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) swap(im->tomb, pmc->tomb); swap(im->sources, pmc->sources); for (psf = im->sources; psf; psf = psf->sf_next) - psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + psf->sf_crcount = in_dev->mr_qrv ?: + READ_ONCE(net->ipv4.sysctl_igmp_qrv); } else { - im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + im->crcount = in_dev->mr_qrv ?: + READ_ONCE(net->ipv4.sysctl_igmp_qrv); } in_dev_put(pmc->interface); kfree_pmc(pmc); @@ -1296,7 +1301,8 @@ static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp) #ifdef CONFIG_IP_MULTICAST if (im->multiaddr == IGMP_ALL_HOSTS) return; - if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(im->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return; reporter = im->reporter; @@ -1338,13 +1344,14 @@ static void igmp_group_added(struct ip_mc_list *im) #ifdef CONFIG_IP_MULTICAST if (im->multiaddr == IGMP_ALL_HOSTS) return; - if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(im->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return; if (in_dev->dead) return; - im->unsolicit_count = net->ipv4.sysctl_igmp_qrv; + im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) { spin_lock_bh(&im->lock); igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY); @@ -1358,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im) * IN() to IN(A). */ if (im->sfmode == MCAST_EXCLUDE) - im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); igmp_ifc_event(in_dev); #endif @@ -1642,7 +1649,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev) if (im->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(im->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; /* a failover is happening and switches @@ -1749,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev) in_dev->mr_qi = IGMP_QUERY_INTERVAL; in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL; - in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; + in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv); } #else static void ip_mc_reset(struct in_device *in_dev) @@ -1883,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #ifdef CONFIG_IP_MULTICAST if (psf->sf_oldin && !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { - psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); psf->sf_next = pmc->tomb; pmc->tomb = psf; rv = 1; @@ -1947,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, /* filter mode change */ pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -2126,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, #ifdef CONFIG_IP_MULTICAST /* else no filters; keep old mode for reports */ - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -2192,7 +2199,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr, count++; } err = -ENOBUFS; - if (count >= net->ipv4.sysctl_igmp_max_memberships) + if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships)) goto done; iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); if (!iml) @@ -2379,7 +2386,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } /* else, add a new source to the filter */ - if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) { + if (psl && psl->sl_count >= READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) { err = -ENOBUFS; goto done; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 53f5f956d948..eb31c7158b39 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -263,7 +263,7 @@ next_port: goto other_half_scan; } - if (net->ipv4.sysctl_ip_autobind_reuse && !relax) { + if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) { /* We still have a chance to connect to different destinations */ relax = true; goto ports_exhausted; @@ -833,7 +833,8 @@ static void reqsk_timer_handler(struct timer_list *t) icsk = inet_csk(sk_listener); net = sock_net(sk_listener); - max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; + max_syn_ack_retries = icsk->icsk_syn_retries ? : + READ_ONCE(net->ipv4.sysctl_tcp_synack_retries); /* Normally all the openreqs are young and become mature * (i.e. converted to established socket) for first timeout. * If synack was not acknowledged for 1 second, it means diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index e3aa436a1bdf..e18931a6d153 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -157,7 +157,7 @@ int ip_forward(struct sk_buff *skb) !skb_sec_path(skb)) ip_rt_send_redirect(skb); - if (net->ipv4.sysctl_ip_fwd_update_priority) + if (READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority)) skb->priority = rt_tos2priority(iph->tos); return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index b1165f717cd1..1b512390b3cf 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -312,14 +312,13 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph, ip_hdr(hint)->tos == iph->tos; } -INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *)); +int tcp_v4_early_demux(struct sk_buff *skb); +int udp_v4_early_demux(struct sk_buff *skb); static int ip_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *dev, const struct sk_buff *hint) { const struct iphdr *iph = ip_hdr(skb); - int (*edemux)(struct sk_buff *skb); int err, drop_reason; struct rtable *rt; @@ -332,21 +331,29 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, goto drop_error; } - if (net->ipv4.sysctl_ip_early_demux && + if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) && !skb_dst(skb) && !skb->sk && !ip_is_fragment(iph)) { - const struct net_protocol *ipprot; - int protocol = iph->protocol; - - ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { - err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux, - udp_v4_early_demux, skb); - if (unlikely(err)) - goto drop_error; - /* must reload iph, skb->head might have changed */ - iph = ip_hdr(skb); + switch (iph->protocol) { + case IPPROTO_TCP: + if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) { + tcp_v4_early_demux(skb); + + /* must reload iph, skb->head might have changed */ + iph = ip_hdr(skb); + } + break; + case IPPROTO_UDP: + if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) { + err = udp_v4_early_demux(skb); + if (unlikely(err)) + goto drop_error; + + /* must reload iph, skb->head might have changed */ + iph = ip_hdr(skb); + } + break; } } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 445a9ecaefa1..a8a323ecbb54 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -782,7 +782,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) /* numsrc >= (4G-140)/128 overflow in 32 bits */ err = -ENOBUFS; if (gsf->gf_numsrc >= 0x1ffffff || - gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf) + gsf->gf_numsrc > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf)) goto out_free_gsf; err = -EINVAL; @@ -832,7 +832,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, /* numsrc >= (4G-140)/128 overflow in 32 bits */ err = -ENOBUFS; - if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) + if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf)) goto out_free_gsf; err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode, &gf32->gf_group, gf32->gf_slist_flex); @@ -1244,7 +1244,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, } /* numsrc >= (1G-4) overflow in 32 bits */ if (msf->imsf_numsrc >= 0x3ffffffcU || - msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) { + msf->imsf_numsrc > READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) { kfree(msf); err = -ENOBUFS; break; @@ -1606,7 +1606,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, { struct net *net = sock_net(sk); val = (inet->uc_ttl == -1 ? - net->ipv4.sysctl_ip_default_ttl : + READ_ONCE(net->ipv4.sysctl_ip_default_ttl) : inet->uc_ttl); break; } diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 918c61fda0f3..d640adcaf1b1 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -62,7 +62,7 @@ struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net, skb_reserve(nskb, LL_MAX_HEADER); niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, - net->ipv4.sysctl_ip_default_ttl); + READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); nf_reject_ip_tcphdr_put(nskb, oldskb, oth); niph->tot_len = htons(nskb->len); ip_send_check(niph); @@ -117,7 +117,7 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net, skb_reserve(nskb, LL_MAX_HEADER); niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, - net->ipv4.sysctl_ip_default_ttl); + READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); skb_reset_transport_header(nskb); icmph = skb_put_zero(nskb, sizeof(struct icmphdr)); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 28836071f0a6..0088a4c64d77 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -387,7 +387,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) seq_printf(seq, "\nIp: %d %d", IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, - net->ipv4.sysctl_ip_default_ttl); + READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8f4a8f66d604..bce603f399bd 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1398,7 +1398,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) struct fib_info *fi = res->fi; u32 mtu = 0; - if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu || + if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) || fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU)) mtu = fi->fib_mtu; @@ -1928,7 +1928,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net, const struct sk_buff *skb, bool *p_has_inner) { - u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); struct flow_keys keys, hash_keys; if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) @@ -1957,7 +1957,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net, const struct sk_buff *skb, bool has_inner) { - u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); struct flow_keys keys, hash_keys; /* We assume the packet carries an encapsulation, but if none was @@ -2017,7 +2017,7 @@ static u32 fib_multipath_custom_hash_skb(const struct net *net, static u32 fib_multipath_custom_hash_fl4(const struct net *net, const struct flowi4 *fl4) { - u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); struct flow_keys hash_keys; if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) @@ -2047,7 +2047,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, struct flow_keys hash_keys; u32 mhash = 0; - switch (net->ipv4.sysctl_fib_multipath_hash_policy) { + switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) { case 0: memset(&hash_keys, 0, sizeof(hash_keys)); hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index b387c4835155..942d2dfa1115 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -247,12 +247,12 @@ bool cookie_timestamp_decode(const struct net *net, return true; } - if (!net->ipv4.sysctl_tcp_timestamps) + if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps)) return false; tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0; - if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack) + if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack)) return false; if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK) @@ -261,7 +261,7 @@ bool cookie_timestamp_decode(const struct net *net, tcp_opt->wscale_ok = 1; tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK; - return net->ipv4.sysctl_tcp_window_scaling != 0; + return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0; } EXPORT_SYMBOL(cookie_timestamp_decode); @@ -340,7 +340,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) struct flowi4 fl4; u32 tsoff = 0; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 108fd86f2718..5490c285668b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -84,7 +84,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, * port limit. */ if ((range[1] < range[0]) || - (range[0] < net->ipv4.sysctl_ip_prot_sock)) + (range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock))) ret = -EINVAL; else set_local_port_range(net, range); @@ -110,7 +110,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write, .extra2 = &ip_privileged_port_max, }; - pports = net->ipv4.sysctl_ip_prot_sock; + pports = READ_ONCE(net->ipv4.sysctl_ip_prot_sock); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); @@ -122,7 +122,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write, if (range[0] < pports) ret = -EINVAL; else - net->ipv4.sysctl_ip_prot_sock = pports; + WRITE_ONCE(net->ipv4.sysctl_ip_prot_sock, pports); } return ret; @@ -350,61 +350,6 @@ bad_key: return ret; } -static void proc_configure_early_demux(int enabled, int protocol) -{ - struct net_protocol *ipprot; -#if IS_ENABLED(CONFIG_IPV6) - struct inet6_protocol *ip6prot; -#endif - - rcu_read_lock(); - - ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot) - ipprot->early_demux = enabled ? ipprot->early_demux_handler : - NULL; - -#if IS_ENABLED(CONFIG_IPV6) - ip6prot = rcu_dereference(inet6_protos[protocol]); - if (ip6prot) - ip6prot->early_demux = enabled ? ip6prot->early_demux_handler : - NULL; -#endif - rcu_read_unlock(); -} - -static int proc_tcp_early_demux(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int ret = 0; - - ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); - - if (write && !ret) { - int enabled = init_net.ipv4.sysctl_tcp_early_demux; - - proc_configure_early_demux(enabled, IPPROTO_TCP); - } - - return ret; -} - -static int proc_udp_early_demux(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int ret = 0; - - ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); - - if (write && !ret) { - int enabled = init_net.ipv4.sysctl_udp_early_demux; - - proc_configure_early_demux(enabled, IPPROTO_UDP); - } - - return ret; -} - static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -707,14 +652,14 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_udp_early_demux, .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_udp_early_demux + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_early_demux", .data = &init_net.ipv4.sysctl_tcp_early_demux, .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_tcp_early_demux + .proc_handler = proc_dou8vec_minmax, }, { .procname = "nexthop_compat_mode", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ddf63a0ad643..ba2bdc811374 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -443,7 +443,7 @@ void tcp_init_sock(struct sock *sk) tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; - tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; + tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering); tcp_assign_congestion_control(sk); tp->tsoffset = 0; @@ -1171,7 +1171,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, struct sockaddr *uaddr = msg->msg_name; int err, flags; - if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || + if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & + TFO_CLIENT_ENABLE) || (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && uaddr->sa_family == AF_UNSPEC)) return -EOPNOTSUPP; @@ -3691,7 +3692,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, case TCP_FASTOPEN_CONNECT: if (val > 1 || val < 0) { err = -EINVAL; - } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { + } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) & + TFO_CLIENT_ENABLE) { if (sk->sk_state == TCP_CLOSE) tp->fastopen_connect = val; else @@ -4041,12 +4043,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = keepalive_probes(tp); break; case TCP_SYNCNT: - val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; + val = icsk->icsk_syn_retries ? : + READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); break; case TCP_LINGER2: val = tp->linger2; if (val >= 0) - val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ; + val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; break; case TCP_DEFER_ACCEPT: val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 075e744bfb48..54eec33c6e1c 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1154,24 +1154,24 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .set_state = bbr_set_state, }; -BTF_SET_START(tcp_bbr_check_kfunc_ids) +BTF_SET8_START(tcp_bbr_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE -BTF_ID(func, bbr_init) -BTF_ID(func, bbr_main) -BTF_ID(func, bbr_sndbuf_expand) -BTF_ID(func, bbr_undo_cwnd) -BTF_ID(func, bbr_cwnd_event) -BTF_ID(func, bbr_ssthresh) -BTF_ID(func, bbr_min_tso_segs) -BTF_ID(func, bbr_set_state) +BTF_ID_FLAGS(func, bbr_init) +BTF_ID_FLAGS(func, bbr_main) +BTF_ID_FLAGS(func, bbr_sndbuf_expand) +BTF_ID_FLAGS(func, bbr_undo_cwnd) +BTF_ID_FLAGS(func, bbr_cwnd_event) +BTF_ID_FLAGS(func, bbr_ssthresh) +BTF_ID_FLAGS(func, bbr_min_tso_segs) +BTF_ID_FLAGS(func, bbr_set_state) #endif #endif -BTF_SET_END(tcp_bbr_check_kfunc_ids) +BTF_SET8_END(tcp_bbr_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &tcp_bbr_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &tcp_bbr_check_kfunc_ids, }; static int __init bbr_register(void) diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 68178e7280ce..768c10c1f649 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -485,22 +485,22 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .name = "cubic", }; -BTF_SET_START(tcp_cubic_check_kfunc_ids) +BTF_SET8_START(tcp_cubic_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE -BTF_ID(func, cubictcp_init) -BTF_ID(func, cubictcp_recalc_ssthresh) -BTF_ID(func, cubictcp_cong_avoid) -BTF_ID(func, cubictcp_state) -BTF_ID(func, cubictcp_cwnd_event) -BTF_ID(func, cubictcp_acked) +BTF_ID_FLAGS(func, cubictcp_init) +BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh) +BTF_ID_FLAGS(func, cubictcp_cong_avoid) +BTF_ID_FLAGS(func, cubictcp_state) +BTF_ID_FLAGS(func, cubictcp_cwnd_event) +BTF_ID_FLAGS(func, cubictcp_acked) #endif #endif -BTF_SET_END(tcp_cubic_check_kfunc_ids) +BTF_SET8_END(tcp_cubic_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &tcp_cubic_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &tcp_cubic_check_kfunc_ids, }; static int __init cubictcp_register(void) diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index ab034a4e9324..2a6c0dd665a4 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -239,22 +239,22 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = { .name = "dctcp-reno", }; -BTF_SET_START(tcp_dctcp_check_kfunc_ids) +BTF_SET8_START(tcp_dctcp_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE -BTF_ID(func, dctcp_init) -BTF_ID(func, dctcp_update_alpha) -BTF_ID(func, dctcp_cwnd_event) -BTF_ID(func, dctcp_ssthresh) -BTF_ID(func, dctcp_cwnd_undo) -BTF_ID(func, dctcp_state) +BTF_ID_FLAGS(func, dctcp_init) +BTF_ID_FLAGS(func, dctcp_update_alpha) +BTF_ID_FLAGS(func, dctcp_cwnd_event) +BTF_ID_FLAGS(func, dctcp_ssthresh) +BTF_ID_FLAGS(func, dctcp_cwnd_undo) +BTF_ID_FLAGS(func, dctcp_state) #endif #endif -BTF_SET_END(tcp_dctcp_check_kfunc_ids) +BTF_SET8_END(tcp_dctcp_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &tcp_dctcp_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &tcp_dctcp_check_kfunc_ids, }; static int __init dctcp_register(void) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index fdbcf2a6d08e..825b216d11f5 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -332,7 +332,7 @@ static bool tcp_fastopen_no_cookie(const struct sock *sk, const struct dst_entry *dst, int flag) { - return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) || + return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) || tcp_sk(sk)->fastopen_no_cookie || (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE)); } @@ -347,7 +347,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, const struct dst_entry *dst) { bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; - int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); struct tcp_fastopen_cookie valid_foc = { .len = -1 }; struct sock *child; int ret = 0; @@ -489,7 +489,7 @@ void tcp_fastopen_active_disable(struct sock *sk) { struct net *net = sock_net(sk); - if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)) return; /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */ @@ -510,7 +510,8 @@ void tcp_fastopen_active_disable(struct sock *sk) */ bool tcp_fastopen_active_should_disable(struct sock *sk) { - unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout; + unsigned int tfo_bh_timeout = + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout); unsigned long timeout; int tfo_da_times; int multiplier; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index acee998c14b2..ae73b34d32e9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1050,7 +1050,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, tp->undo_marker ? tp->undo_retrans : 0); #endif tp->reordering = min_t(u32, (metric + mss - 1) / mss, - sock_net(sk)->ipv4.sysctl_tcp_max_reordering); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); } /* This exciting event is worth to be remembered. 8) */ @@ -2029,7 +2029,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) return; tp->reordering = min_t(u32, tp->packets_out + addend, - sock_net(sk)->ipv4.sysctl_tcp_max_reordering); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); tp->reord_seen++; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); } @@ -2094,7 +2094,8 @@ static inline void tcp_init_undo(struct tcp_sock *tp) static bool tcp_is_rack(const struct sock *sk) { - return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION; + return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_LOSS_DETECTION; } /* If we detect SACK reneging, forget all SACK information @@ -2138,6 +2139,7 @@ void tcp_enter_loss(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; + u8 reordering; tcp_timeout_mark_lost(sk); @@ -2158,10 +2160,12 @@ void tcp_enter_loss(struct sock *sk) /* Timeout in disordered state after receiving substantial DUPACKs * suggests that the degree of reordering is over-estimated. */ + reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering); if (icsk->icsk_ca_state <= TCP_CA_Disorder && - tp->sacked_out >= net->ipv4.sysctl_tcp_reordering) + tp->sacked_out >= reordering) tp->reordering = min_t(unsigned int, tp->reordering, - net->ipv4.sysctl_tcp_reordering); + reordering); + tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; tcp_ecn_queue_cwr(tp); @@ -3463,7 +3467,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) * new SACK or ECE mark may first advance cwnd here and later reduce * cwnd in tcp_fastretrans_alert() based on more states. */ - if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering) + if (tcp_sk(sk)->reordering > + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering)) return flag & FLAG_FORWARD_PROGRESS; return flag & FLAG_DATA_ACKED; @@ -4056,7 +4061,7 @@ void tcp_parse_options(const struct net *net, break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW && th->syn && - !estab && net->ipv4.sysctl_tcp_window_scaling) { + !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; if (snd_wscale > TCP_MAX_WSCALE) { @@ -4072,7 +4077,7 @@ void tcp_parse_options(const struct net *net, case TCPOPT_TIMESTAMP: if ((opsize == TCPOLEN_TIMESTAMP) && ((estab && opt_rx->tstamp_ok) || - (!estab && net->ipv4.sysctl_tcp_timestamps))) { + (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = get_unaligned_be32(ptr); opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); @@ -4080,7 +4085,7 @@ void tcp_parse_options(const struct net *net, break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && - !estab && net->ipv4.sysctl_tcp_sack) { + !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) { opt_rx->sack_ok = TCP_SACK_SEEN; tcp_sack_reset(opt_rx); } @@ -5564,7 +5569,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) struct tcp_sock *tp = tcp_sk(sk); u32 ptr = ntohs(th->urg_ptr); - if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg) + if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg)) ptr--; ptr += ntohl(th->seq); @@ -6794,11 +6799,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; const char *msg = "Dropping request"; - bool want_cookie = false; struct net *net = sock_net(sk); + bool want_cookie = false; + u8 syncookies; + + syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); #ifdef CONFIG_SYN_COOKIES - if (net->ipv4.sysctl_tcp_syncookies) { + if (syncookies) { msg = "Sending cookies"; want_cookie = true; __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); @@ -6806,8 +6814,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) #endif __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); - if (!queue->synflood_warned && - net->ipv4.sysctl_tcp_syncookies != 2 && + if (!queue->synflood_warned && syncookies != 2 && xchg(&queue->synflood_warned, 1) == 0) net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", proto, sk->sk_num, msg); @@ -6856,7 +6863,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, struct tcp_sock *tp = tcp_sk(sk); u16 mss; - if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 && !inet_csk_reqsk_queue_is_full(sk)) return 0; @@ -6890,13 +6897,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, bool want_cookie = false; struct dst_entry *dst; struct flowi fl; + u8 syncookies; + + syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); /* TW buckets are converted to open requests without * limitations, they conserve resources and peer is * evidently real one. */ - if ((net->ipv4.sysctl_tcp_syncookies == 2 || - inet_csk_reqsk_queue_is_full(sk)) && !isn) { + if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) { want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name); if (!want_cookie) goto drop; @@ -6945,10 +6954,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); if (!want_cookie && !isn) { + int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog); + /* Kill the following clause, if you dislike this way. */ - if (!net->ipv4.sysctl_tcp_syncookies && - (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (net->ipv4.sysctl_max_syn_backlog >> 2)) && + if (!syncookies && + (max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (max_syn_backlog >> 2)) && !tcp_peer_is_proven(req, dst)) { /* Without syncookies last quarter of * backlog is filled with destinations, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 228d36692d08..c7e7101647dc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -108,10 +108,10 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb) int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) { + int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse); const struct inet_timewait_sock *tw = inet_twsk(sktw); const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); struct tcp_sock *tp = tcp_sk(sk); - int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse; if (reuse == 2) { /* Still does not detect *everything* that goes through diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 7029b0e98edb..a501150deaa3 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -428,7 +428,8 @@ void tcp_update_metrics(struct sock *sk) if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) { val = tcp_metric_get(tm, TCP_METRIC_REORDERING); if (val < tp->reordering && - tp->reordering != net->ipv4.sysctl_tcp_reordering) + tp->reordering != + READ_ONCE(net->ipv4.sysctl_tcp_reordering)) tcp_metric_set(tm, TCP_METRIC_REORDERING, tp->reordering); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 6854bb1fb32b..cb95d88497ae 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -173,7 +173,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, * Oh well... nobody has a sufficient solution to this * protocol bug yet. */ - if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) { + if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) { kill: inet_twsk_deschedule_put(tw); return TCP_TW_SUCCESS; @@ -781,7 +781,7 @@ listen_overflow: if (sk != req->rsk_listener) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); - if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) { + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow)) { inet_rsk(req)->acked = 1; return NULL; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 858a15cc2cc9..2b72ccd2e651 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -791,18 +791,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); remaining -= TCPOLEN_MSS_ALIGNED; - if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) { opts->options |= OPTION_TS; opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } - if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) { opts->ws = tp->rx_opt.rcv_wscale; opts->options |= OPTION_WSCALE; remaining -= TCPOLEN_WSCALE_ALIGNED; } - if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!(OPTION_TS & opts->options))) remaining -= TCPOLEN_SACKPERM_ALIGNED; @@ -1719,7 +1719,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ - mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); + mss_now = max(mss_now, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss)); return mss_now; } @@ -1762,10 +1763,10 @@ void tcp_mtup_init(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct net *net = sock_net(sk); - icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1; + icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1; icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + icsk->icsk_af_ops->net_header_len; - icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss)); icsk->icsk_mtup.probe_size = 0; if (icsk->icsk_mtup.enabled) icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; @@ -1897,7 +1898,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; - if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) && (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && !ca_ops->cong_control) tcp_cwnd_application_limited(sk); @@ -2282,7 +2283,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk) u32 interval; s32 delta; - interval = net->ipv4.sysctl_tcp_probe_interval; + interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval); delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp; if (unlikely(delta >= interval * HZ)) { int mss = tcp_current_mss(sk); @@ -2366,7 +2367,7 @@ static int tcp_mtu_probe(struct sock *sk) * probing process by not resetting search range to its orignal. */ if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) || - interval < net->ipv4.sysctl_tcp_probe_threshold) { + interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) { /* Check whether enough time has elaplased for * another round of probing. */ @@ -2740,7 +2741,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) if (rcu_access_pointer(tp->fastopen_rsk)) return false; - early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans; + early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans); /* Schedule a loss probe in 2*RTT for SACK capable connections * not in loss recovery, that are either limited by cwnd or application. */ @@ -3104,7 +3105,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, struct sk_buff *skb = to, *tmp; bool first = true; - if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)) return; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) return; @@ -3656,7 +3657,7 @@ static void tcp_connect_init(struct sock *sk) * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ tp->tcp_header_len = sizeof(struct tcphdr); - if (sock_net(sk)->ipv4.sysctl_tcp_timestamps) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps)) tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED; #ifdef CONFIG_TCP_MD5SIG @@ -3692,7 +3693,7 @@ static void tcp_connect_init(struct sock *sk) tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, - sock_net(sk)->ipv4.sysctl_tcp_window_scaling, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling), &rcv_wscale, rcv_wnd); @@ -4099,7 +4100,7 @@ void tcp_send_probe0(struct sock *sk) icsk->icsk_probes_out++; if (err <= 0) { - if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) + if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2)) icsk->icsk_backoff++; timeout = tcp_probe0_when(sk, TCP_RTO_MAX); } else { diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 48f30e7209f2..50abaa941387 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -14,7 +14,8 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk) return 0; if (tp->sacked_out >= tp->reordering && - !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH)) + !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_NO_DUPTHRESH)) return 0; } @@ -187,7 +188,8 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); - if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND || + if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_STATIC_REO_WND) || !rs->prior_delivered) return; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 2208755e8efc..b4dfb82d6ecb 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -143,7 +143,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) */ static int tcp_orphan_retries(struct sock *sk, bool alive) { - int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */ + int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */ /* We know from an ICMP that something is wrong. */ if (sk->sk_err_soft && !alive) @@ -163,7 +163,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) int mss; /* Black hole detection */ - if (!net->ipv4.sysctl_tcp_mtu_probing) + if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing)) return; if (!icsk->icsk_mtup.enabled) { @@ -171,9 +171,9 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } else { mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; - mss = min(net->ipv4.sysctl_tcp_base_mss, mss); - mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor); - mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); + mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss); + mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor)); + mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss)); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); } tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); @@ -239,17 +239,18 @@ static int tcp_write_timeout(struct sock *sk) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) __dst_negative_advice(sk); - retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; + retry_until = icsk->icsk_syn_retries ? : + READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); expired = icsk->icsk_retransmits >= retry_until; } else { - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) { + if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) { /* Black hole detection */ tcp_mtu_probing(icsk, sk); __dst_negative_advice(sk); } - retry_until = net->ipv4.sysctl_tcp_retries2; + retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { const bool alive = icsk->icsk_rto < TCP_RTO_MAX; @@ -374,7 +375,7 @@ static void tcp_probe_timer(struct sock *sk) msecs_to_jiffies(icsk->icsk_user_timeout)) goto abort; - max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; + max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; @@ -400,12 +401,15 @@ abort: tcp_write_err(sk); static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) { struct inet_connection_sock *icsk = inet_csk(sk); - int max_retries = icsk->icsk_syn_retries ? : - sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ struct tcp_sock *tp = tcp_sk(sk); + int max_retries; req->rsk_ops->syn_ack_timeout(req); + /* add one more retry for fastopen */ + max_retries = icsk->icsk_syn_retries ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1; + if (req->num_timeout >= max_retries) { tcp_write_err(sk); return; @@ -568,7 +572,7 @@ out_reset_timer: * linear-timeout retransmissions into a black hole */ if (sk->sk_state == TCP_ESTABLISHED && - (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) && + (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) && tcp_stream_is_thin(tp) && icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { icsk->icsk_backoff = 0; @@ -579,7 +583,7 @@ out_reset_timer: } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX); - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0)) + if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0)) __sk_dst_reset(sk); out:; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6ed807b6c647..b624e3d8c5f0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7042,9 +7042,9 @@ static const struct ctl_table addrconf_sysctl[] = { .data = &ipv6_devconf.accept_untracked_na, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = (void *)SYSCTL_ZERO, - .extra2 = (void *)SYSCTL_ONE, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, }, { /* sentinel */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 0ee0770e79aa..2ce0c44d0081 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -227,7 +227,7 @@ lookup_protocol: RCU_INIT_POINTER(inet->mc_list, NULL); inet->rcv_tos = 0; - if (net->ipv4.sysctl_ip_no_pmtu_disc) + if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 0322cc86b84e..e1ebf5e42ebe 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -45,20 +45,23 @@ #include <net/inet_ecn.h> #include <net/dst_metadata.h> -INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *)); static void ip6_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb) { - void (*edemux)(struct sk_buff *skb); - - if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { - const struct inet6_protocol *ipprot; - - ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); - if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) - INDIRECT_CALL_2(edemux, tcp_v6_early_demux, - udp_v6_early_demux, skb); + if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) && + !skb_dst(skb) && !skb->sk) { + switch (ipv6_hdr(skb)->nexthdr) { + case IPPROTO_TCP: + if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) + tcp_v6_early_demux(skb); + break; + case IPPROTO_UDP: + if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) + udp_v6_early_demux(skb); + break; + } } + if (!skb_valid_dst(skb)) ip6_route_input(skb); } diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index ecf3a553a0dc..b1179f62bd23 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -64,6 +64,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (err) return err; + memset(&fl6, 0, sizeof(fl6)); + if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name); if (msg->msg_namelen < sizeof(*u)) @@ -72,12 +74,15 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EAFNOSUPPORT; } daddr = &(u->sin6_addr); + if (np->sndflow) + fl6.flowlabel = u->sin6_flowinfo & IPV6_FLOWINFO_MASK; if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr))) oif = u->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; daddr = &sk->sk_v6_daddr; + fl6.flowlabel = np->flow_label; } if (!oif) @@ -101,7 +106,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.sockc.tsflags = sk->sk_tsflags; ipc6.sockc.mark = sk->sk_mark; - memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; if (msg->msg_controllen) { diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 9cc123f000fb..5014aa663452 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -141,7 +141,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) __u8 rcv_wscale; u32 tsoff = 0; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 70d4890d8d2f..85b8b765dcb1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1825,7 +1825,7 @@ do_time_wait: goto discard_it; } -INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) +void tcp_v6_early_demux(struct sk_buff *skb) { const struct ipv6hdr *hdr; const struct tcphdr *th; @@ -2182,12 +2182,7 @@ struct proto tcpv6_prot = { }; EXPORT_SYMBOL_GPL(tcpv6_prot); -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct inet6_protocol tcpv6_protocol = { - .early_demux = tcp_v6_early_demux, - .early_demux_handler = tcp_v6_early_demux, +static const struct inet6_protocol tcpv6_protocol = { .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index be074f07073a..16c176e7c69a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1052,7 +1052,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, return NULL; } -INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) +void udp_v6_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct udphdr *uh; @@ -1660,12 +1660,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, return ipv6_getsockopt(sk, level, optname, optval, optlen); } -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct inet6_protocol udpv6_protocol = { - .early_demux = udp_v6_early_demux, - .early_demux_handler = udp_v6_early_demux, +static const struct inet6_protocol udpv6_protocol = { .handler = udpv6_rcv, .err_handler = udpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index bc4d5cd63a94..1cd87b28c9b0 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -55,57 +55,131 @@ enum { NF_BPF_CT_OPTS_SZ = 12, }; -static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, - struct bpf_sock_tuple *bpf_tuple, - u32 tuple_len, u8 protonum, - s32 netns_id, u8 *dir) +static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple, + u32 tuple_len, u8 protonum, u8 dir, + struct nf_conntrack_tuple *tuple) { - struct nf_conntrack_tuple_hash *hash; - struct nf_conntrack_tuple tuple; - struct nf_conn *ct; + union nf_inet_addr *src = dir ? &tuple->dst.u3 : &tuple->src.u3; + union nf_inet_addr *dst = dir ? &tuple->src.u3 : &tuple->dst.u3; + union nf_conntrack_man_proto *sport = dir ? (void *)&tuple->dst.u + : &tuple->src.u; + union nf_conntrack_man_proto *dport = dir ? &tuple->src.u + : (void *)&tuple->dst.u; if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP)) - return ERR_PTR(-EPROTO); - if (unlikely(netns_id < BPF_F_CURRENT_NETNS)) - return ERR_PTR(-EINVAL); + return -EPROTO; + + memset(tuple, 0, sizeof(*tuple)); - memset(&tuple, 0, sizeof(tuple)); switch (tuple_len) { case sizeof(bpf_tuple->ipv4): - tuple.src.l3num = AF_INET; - tuple.src.u3.ip = bpf_tuple->ipv4.saddr; - tuple.src.u.tcp.port = bpf_tuple->ipv4.sport; - tuple.dst.u3.ip = bpf_tuple->ipv4.daddr; - tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport; + tuple->src.l3num = AF_INET; + src->ip = bpf_tuple->ipv4.saddr; + sport->tcp.port = bpf_tuple->ipv4.sport; + dst->ip = bpf_tuple->ipv4.daddr; + dport->tcp.port = bpf_tuple->ipv4.dport; break; case sizeof(bpf_tuple->ipv6): - tuple.src.l3num = AF_INET6; - memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr)); - tuple.src.u.tcp.port = bpf_tuple->ipv6.sport; - memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr)); - tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport; + tuple->src.l3num = AF_INET6; + memcpy(src->ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr)); + sport->tcp.port = bpf_tuple->ipv6.sport; + memcpy(dst->ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr)); + dport->tcp.port = bpf_tuple->ipv6.dport; break; default: - return ERR_PTR(-EAFNOSUPPORT); + return -EAFNOSUPPORT; + } + tuple->dst.protonum = protonum; + tuple->dst.dir = dir; + + return 0; +} + +static struct nf_conn * +__bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple, + u32 tuple_len, struct bpf_ct_opts *opts, u32 opts_len, + u32 timeout) +{ + struct nf_conntrack_tuple otuple, rtuple; + struct nf_conn *ct; + int err; + + if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] || + opts_len != NF_BPF_CT_OPTS_SZ) + return ERR_PTR(-EINVAL); + + if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) + return ERR_PTR(-EINVAL); + + err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto, + IP_CT_DIR_ORIGINAL, &otuple); + if (err < 0) + return ERR_PTR(err); + + err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto, + IP_CT_DIR_REPLY, &rtuple); + if (err < 0) + return ERR_PTR(err); + + if (opts->netns_id >= 0) { + net = get_net_ns_by_id(net, opts->netns_id); + if (unlikely(!net)) + return ERR_PTR(-ENONET); } - tuple.dst.protonum = protonum; + ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple, + GFP_ATOMIC); + if (IS_ERR(ct)) + goto out; + + memset(&ct->proto, 0, sizeof(ct->proto)); + __nf_ct_set_timeout(ct, timeout * HZ); + ct->status |= IPS_CONFIRMED; + +out: + if (opts->netns_id >= 0) + put_net(net); + + return ct; +} + +static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, + struct bpf_sock_tuple *bpf_tuple, + u32 tuple_len, struct bpf_ct_opts *opts, + u32 opts_len) +{ + struct nf_conntrack_tuple_hash *hash; + struct nf_conntrack_tuple tuple; + struct nf_conn *ct; + int err; + + if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] || + opts_len != NF_BPF_CT_OPTS_SZ) + return ERR_PTR(-EINVAL); + if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP)) + return ERR_PTR(-EPROTO); + if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) + return ERR_PTR(-EINVAL); + + err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto, + IP_CT_DIR_ORIGINAL, &tuple); + if (err < 0) + return ERR_PTR(err); - if (netns_id >= 0) { - net = get_net_ns_by_id(net, netns_id); + if (opts->netns_id >= 0) { + net = get_net_ns_by_id(net, opts->netns_id); if (unlikely(!net)) return ERR_PTR(-ENONET); } hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple); - if (netns_id >= 0) + if (opts->netns_id >= 0) put_net(net); if (!hash) return ERR_PTR(-ENOENT); ct = nf_ct_tuplehash_to_ctrack(hash); - if (dir) - *dir = NF_CT_DIRECTION(hash); + opts->dir = NF_CT_DIRECTION(hash); return ct; } @@ -114,6 +188,43 @@ __diag_push(); __diag_ignore_all("-Wmissing-prototypes", "Global functions as their definitions will be in nf_conntrack BTF"); +struct nf_conn___init { + struct nf_conn ct; +}; + +/* bpf_xdp_ct_alloc - Allocate a new CT entry + * + * Parameters: + * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program + * Cannot be NULL + * @bpf_tuple - Pointer to memory representing the tuple to look up + * Cannot be NULL + * @tuple__sz - Length of the tuple structure + * Must be one of sizeof(bpf_tuple->ipv4) or + * sizeof(bpf_tuple->ipv6) + * @opts - Additional options for allocation (documented above) + * Cannot be NULL + * @opts__sz - Length of the bpf_ct_opts structure + * Must be NF_BPF_CT_OPTS_SZ (12) + */ +struct nf_conn___init * +bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, + u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) +{ + struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx; + struct nf_conn *nfct; + + nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, tuple__sz, + opts, opts__sz, 10); + if (IS_ERR(nfct)) { + if (opts) + opts->error = PTR_ERR(nfct); + return NULL; + } + + return (struct nf_conn___init *)nfct; +} + /* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a * reference to it * @@ -138,25 +249,50 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, struct net *caller_net; struct nf_conn *nfct; - BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ); - - if (!opts) - return NULL; - if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || - opts__sz != NF_BPF_CT_OPTS_SZ) { - opts->error = -EINVAL; - return NULL; - } caller_net = dev_net(ctx->rxq->dev); - nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto, - opts->netns_id, &opts->dir); + nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz); if (IS_ERR(nfct)) { - opts->error = PTR_ERR(nfct); + if (opts) + opts->error = PTR_ERR(nfct); return NULL; } return nfct; } +/* bpf_skb_ct_alloc - Allocate a new CT entry + * + * Parameters: + * @skb_ctx - Pointer to ctx (__sk_buff) in TC program + * Cannot be NULL + * @bpf_tuple - Pointer to memory representing the tuple to look up + * Cannot be NULL + * @tuple__sz - Length of the tuple structure + * Must be one of sizeof(bpf_tuple->ipv4) or + * sizeof(bpf_tuple->ipv6) + * @opts - Additional options for allocation (documented above) + * Cannot be NULL + * @opts__sz - Length of the bpf_ct_opts structure + * Must be NF_BPF_CT_OPTS_SZ (12) + */ +struct nf_conn___init * +bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, + u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct nf_conn *nfct; + struct net *net; + + net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk); + nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, opts, opts__sz, 10); + if (IS_ERR(nfct)) { + if (opts) + opts->error = PTR_ERR(nfct); + return NULL; + } + + return (struct nf_conn___init *)nfct; +} + /* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a * reference to it * @@ -181,20 +317,31 @@ bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, struct net *caller_net; struct nf_conn *nfct; - BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ); - - if (!opts) - return NULL; - if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || - opts__sz != NF_BPF_CT_OPTS_SZ) { - opts->error = -EINVAL; - return NULL; - } caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk); - nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto, - opts->netns_id, &opts->dir); + nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz); if (IS_ERR(nfct)) { - opts->error = PTR_ERR(nfct); + if (opts) + opts->error = PTR_ERR(nfct); + return NULL; + } + return nfct; +} + +/* bpf_ct_insert_entry - Add the provided entry into a CT map + * + * This must be invoked for referenced PTR_TO_BTF_ID. + * + * @nfct - Pointer to referenced nf_conn___init object, obtained + * using bpf_xdp_ct_alloc or bpf_skb_ct_alloc. + */ +struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) +{ + struct nf_conn *nfct = (struct nf_conn *)nfct_i; + int err; + + err = nf_conntrack_hash_check_insert(nfct); + if (err < 0) { + nf_conntrack_free(nfct); return NULL; } return nfct; @@ -217,50 +364,90 @@ void bpf_ct_release(struct nf_conn *nfct) nf_ct_put(nfct); } +/* bpf_ct_set_timeout - Set timeout of allocated nf_conn + * + * Sets the default timeout of newly allocated nf_conn before insertion. + * This helper must be invoked for refcounted pointer to nf_conn___init. + * + * Parameters: + * @nfct - Pointer to referenced nf_conn object, obtained using + * bpf_xdp_ct_alloc or bpf_skb_ct_alloc. + * @timeout - Timeout in msecs. + */ +void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout) +{ + __nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout)); +} + +/* bpf_ct_change_timeout - Change timeout of inserted nf_conn + * + * Change timeout associated of the inserted or looked up nf_conn. + * This helper must be invoked for refcounted pointer to nf_conn. + * + * Parameters: + * @nfct - Pointer to referenced nf_conn object, obtained using + * bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup. + * @timeout - New timeout in msecs. + */ +int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout) +{ + return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout)); +} + +/* bpf_ct_set_status - Set status field of allocated nf_conn + * + * Set the status field of the newly allocated nf_conn before insertion. + * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn___init. + * + * Parameters: + * @nfct - Pointer to referenced nf_conn object, obtained using + * bpf_xdp_ct_alloc or bpf_skb_ct_alloc. + * @status - New status value. + */ +int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status) +{ + return nf_ct_change_status_common((struct nf_conn *)nfct, status); +} + +/* bpf_ct_change_status - Change status of inserted nf_conn + * + * Change the status field of the provided connection tracking entry. + * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn. + * + * Parameters: + * @nfct - Pointer to referenced nf_conn object, obtained using + * bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup. + * @status - New status value. + */ +int bpf_ct_change_status(struct nf_conn *nfct, u32 status) +{ + return nf_ct_change_status_common(nfct, status); +} + __diag_pop() -BTF_SET_START(nf_ct_xdp_check_kfunc_ids) -BTF_ID(func, bpf_xdp_ct_lookup) -BTF_ID(func, bpf_ct_release) -BTF_SET_END(nf_ct_xdp_check_kfunc_ids) - -BTF_SET_START(nf_ct_tc_check_kfunc_ids) -BTF_ID(func, bpf_skb_ct_lookup) -BTF_ID(func, bpf_ct_release) -BTF_SET_END(nf_ct_tc_check_kfunc_ids) - -BTF_SET_START(nf_ct_acquire_kfunc_ids) -BTF_ID(func, bpf_xdp_ct_lookup) -BTF_ID(func, bpf_skb_ct_lookup) -BTF_SET_END(nf_ct_acquire_kfunc_ids) - -BTF_SET_START(nf_ct_release_kfunc_ids) -BTF_ID(func, bpf_ct_release) -BTF_SET_END(nf_ct_release_kfunc_ids) - -/* Both sets are identical */ -#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids - -static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &nf_ct_xdp_check_kfunc_ids, - .acquire_set = &nf_ct_acquire_kfunc_ids, - .release_set = &nf_ct_release_kfunc_ids, - .ret_null_set = &nf_ct_ret_null_kfunc_ids, -}; +BTF_SET8_START(nf_ct_kfunc_set) +BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE) +BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS) +BTF_SET8_END(nf_ct_kfunc_set) -static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &nf_ct_tc_check_kfunc_ids, - .acquire_set = &nf_ct_acquire_kfunc_ids, - .release_set = &nf_ct_release_kfunc_ids, - .ret_null_set = &nf_ct_ret_null_kfunc_ids, +static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = { + .owner = THIS_MODULE, + .set = &nf_ct_kfunc_set, }; int register_nf_conntrack_bpf(void) { int ret; - ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set); - return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set); + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8c97d062b1ae..71c2f4f95d36 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2806,3 +2806,65 @@ err_expect: free_percpu(net->ct.stat); return ret; } + +#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES) || \ + IS_ENABLED(CONFIG_NF_CT_NETLINK)) + +/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ + +int __nf_ct_change_timeout(struct nf_conn *ct, u64 timeout) +{ + if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) + return -EPERM; + + __nf_ct_set_timeout(ct, timeout); + + if (test_bit(IPS_DYING_BIT, &ct->status)) + return -ETIME; + + return 0; +} +EXPORT_SYMBOL_GPL(__nf_ct_change_timeout); + +void __nf_ct_change_status(struct nf_conn *ct, unsigned long on, unsigned long off) +{ + unsigned int bit; + + /* Ignore these unchangable bits */ + on &= ~IPS_UNCHANGEABLE_MASK; + off &= ~IPS_UNCHANGEABLE_MASK; + + for (bit = 0; bit < __IPS_MAX_BIT; bit++) { + if (on & (1 << bit)) + set_bit(bit, &ct->status); + else if (off & (1 << bit)) + clear_bit(bit, &ct->status); + } +} +EXPORT_SYMBOL_GPL(__nf_ct_change_status); + +int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status) +{ + unsigned long d; + + d = ct->status ^ status; + + if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) + /* unchangeable */ + return -EBUSY; + + if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) + /* SEEN_REPLY bit can only be set */ + return -EBUSY; + + if (d & IPS_ASSURED && !(status & IPS_ASSURED)) + /* ASSURED bit can only be set */ + return -EBUSY; + + __nf_ct_change_status(ct, status, 0); + return 0; +} +EXPORT_SYMBOL_GPL(nf_ct_change_status_common); + +#endif diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index f8dd4ed8dc60..04169b54f2a2 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1891,45 +1891,10 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, } #endif -static void -__ctnetlink_change_status(struct nf_conn *ct, unsigned long on, - unsigned long off) -{ - unsigned int bit; - - /* Ignore these unchangable bits */ - on &= ~IPS_UNCHANGEABLE_MASK; - off &= ~IPS_UNCHANGEABLE_MASK; - - for (bit = 0; bit < __IPS_MAX_BIT; bit++) { - if (on & (1 << bit)) - set_bit(bit, &ct->status); - else if (off & (1 << bit)) - clear_bit(bit, &ct->status); - } -} - static int ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) { - unsigned long d; - unsigned int status = ntohl(nla_get_be32(cda[CTA_STATUS])); - d = ct->status ^ status; - - if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) - /* unchangeable */ - return -EBUSY; - - if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) - /* SEEN_REPLY bit can only be set */ - return -EBUSY; - - if (d & IPS_ASSURED && !(status & IPS_ASSURED)) - /* ASSURED bit can only be set */ - return -EBUSY; - - __ctnetlink_change_status(ct, status, 0); - return 0; + return nf_ct_change_status_common(ct, ntohl(nla_get_be32(cda[CTA_STATUS]))); } static int @@ -2024,16 +1989,7 @@ static int ctnetlink_change_helper(struct nf_conn *ct, static int ctnetlink_change_timeout(struct nf_conn *ct, const struct nlattr * const cda[]) { - u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; - - if (timeout > INT_MAX) - timeout = INT_MAX; - WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout); - - if (test_bit(IPS_DYING_BIT, &ct->status)) - return -ETIME; - - return 0; + return __nf_ct_change_timeout(ct, (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ); } #if defined(CONFIG_NF_CONNTRACK_MARK) @@ -2293,9 +2249,7 @@ ctnetlink_create_conntrack(struct net *net, goto err1; timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; - if (timeout > INT_MAX) - timeout = INT_MAX; - ct->timeout = (u32)timeout + nfct_time_stamp; + __nf_ct_set_timeout(ct, timeout); rcu_read_lock(); if (cda[CTA_HELP]) { @@ -2837,7 +2791,7 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[]) * unchangeable bits but do not error out. Also user programs * are allowed to clear the bits that they are allowed to change. */ - __ctnetlink_change_status(ct, status, ~status); + __nf_ct_change_status(ct, status, ~status); return 0; } diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index e479dd0561c5..16915f8eef2b 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -405,7 +405,7 @@ synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr, iph->tos = 0; iph->id = 0; iph->frag_off = htons(IP_DF); - iph->ttl = net->ipv4.sysctl_ip_default_ttl; + iph->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); iph->protocol = IPPROTO_TCP; iph->check = 0; iph->saddr = saddr; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index c7a240232b8d..790d6809be81 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3534,7 +3534,7 @@ int tc_setup_action(struct flow_action *flow_action, struct tc_action *actions[], struct netlink_ext_ack *extack) { - int i, j, index, err = 0; + int i, j, k, index, err = 0; struct tc_action *act; BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY); @@ -3554,14 +3554,18 @@ int tc_setup_action(struct flow_action *flow_action, if (err) goto err_out_locked; - entry->hw_stats = tc_act_hw_stats(act->hw_stats); - entry->hw_index = act->tcfa_index; index = 0; err = tc_setup_offload_act(act, entry, &index, extack); - if (!err) - j += index; - else + if (err) goto err_out_locked; + + for (k = 0; k < index ; k++) { + entry[k].hw_stats = tc_act_hw_stats(act->hw_stats); + entry[k].hw_index = act->tcfa_index; + } + + j += index; + spin_unlock_bh(&act->tcfa_lock); } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index fa500ea3a1f1..bcd3384ab07a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -358,7 +358,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) && ret != RTN_LOCAL && !sp->inet.freebind && - !net->ipv4.sysctl_ip_nonlocal_bind) + !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind)) return 0; if (ipv6_only_sock(sctp_opt2sk(sp))) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 1d83fa9ae56f..175026ae33ae 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -2127,7 +2127,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) init_waitqueue_head(&lgr->llc_flow_waiter); init_waitqueue_head(&lgr->llc_msg_waiter); mutex_init(&lgr->llc_conf_mutex); - lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time; + lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); } /* called after lgr was removed from lgr_list */ diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 6abbe3c2520c..b1fcd61836d1 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -98,13 +98,16 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx) unsigned long flags; spin_lock_irqsave(&tls_device_lock, flags); + if (unlikely(!refcount_dec_and_test(&ctx->refcount))) + goto unlock; + list_move_tail(&ctx->list, &tls_device_gc_list); /* schedule_work inside the spinlock * to make sure tls_device_down waits for that work. */ schedule_work(&tls_device_gc_work); - +unlock: spin_unlock_irqrestore(&tls_device_lock, flags); } @@ -195,8 +198,7 @@ void tls_device_sk_destruct(struct sock *sk) clean_acked_data_disable(inet_csk(sk)); } - if (refcount_dec_and_test(&tls_ctx->refcount)) - tls_device_queue_ctx_destruction(tls_ctx); + tls_device_queue_ctx_destruction(tls_ctx); } EXPORT_SYMBOL_GPL(tls_device_sk_destruct); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 859ea02022c0..ed5e6f1df9c7 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1803,6 +1803,7 @@ static long tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx, bool nonblock) { long timeo; + int err; lock_sock(sk); @@ -1818,15 +1819,23 @@ static long tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx, !READ_ONCE(ctx->reader_present), &wait); remove_wait_queue(&ctx->wq, &wait); - if (!timeo) - return -EAGAIN; - if (signal_pending(current)) - return sock_intr_errno(timeo); + if (timeo <= 0) { + err = -EAGAIN; + goto err_unlock; + } + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + goto err_unlock; + } } WRITE_ONCE(ctx->reader_present, 1); return timeo; + +err_unlock: + release_sock(sk); + return err; } static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 09002387987e..5b4ce6ba1bc7 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -639,8 +639,11 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len if (unlikely(need_wait)) return -EOPNOTSUPP; - if (sk_can_busy_loop(sk)) + if (sk_can_busy_loop(sk)) { + if (xs->zc) + __sk_mark_napi_id_once(sk, xsk_pool_get_napi_id(xs->pool)); sk_busy_loop(sk, 1); /* only support non-blocking sockets */ + } if (xs->zc && xsk_no_wakeup(sk)) return 0; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f1876ea61fdc..f1a0bab920a5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2678,8 +2678,10 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, *num_xfrms = 0; return 0; } - if (IS_ERR(pols[0])) + if (IS_ERR(pols[0])) { + *num_pols = 0; return PTR_ERR(pols[0]); + } *num_xfrms = pols[0]->xfrm_nr; @@ -2694,6 +2696,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, if (pols[1]) { if (IS_ERR(pols[1])) { xfrm_pols_put(pols, *num_pols); + *num_pols = 0; return PTR_ERR(pols[1]); } (*num_pols)++; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 08564e0eef20..ccfb172eb5b8 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2620,7 +2620,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) int err; if (family == AF_INET && - xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc) + READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)) x->props.flags |= XFRM_STATE_NOPMTUDISC; err = -EPROTONOSUPPORT; diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 5002a5b9a7da..727da3c5879b 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -282,12 +282,10 @@ $(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OU BPFTOOLDIR := $(TOOLS_PATH)/bpf/bpftool BPFTOOL_OUTPUT := $(abspath $(BPF_SAMPLES_PATH))/bpftool -BPFTOOL := $(BPFTOOL_OUTPUT)/bpftool -$(BPFTOOL): $(LIBBPF) $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) | $(BPFTOOL_OUTPUT) - $(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../ \ - OUTPUT=$(BPFTOOL_OUTPUT)/ \ - LIBBPF_OUTPUT=$(LIBBPF_OUTPUT)/ \ - LIBBPF_DESTDIR=$(LIBBPF_DESTDIR)/ +BPFTOOL := $(BPFTOOL_OUTPUT)/bootstrap/bpftool +$(BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) | $(BPFTOOL_OUTPUT) + $(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../ \ + OUTPUT=$(BPFTOOL_OUTPUT)/ bootstrap $(LIBBPF_OUTPUT) $(BPFTOOL_OUTPUT): $(call msg,MKDIR,$@) diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c index 16dbf49e0f19..88a26f3ce201 100644 --- a/samples/bpf/fds_example.c +++ b/samples/bpf/fds_example.c @@ -17,6 +17,7 @@ #include <bpf/libbpf.h> #include "bpf_insn.h" #include "sock_example.h" +#include "bpf_util.h" #define BPF_F_PIN (1 << 0) #define BPF_F_GET (1 << 1) @@ -52,7 +53,7 @@ static int bpf_prog_create(const char *object) BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }; - size_t insns_cnt = sizeof(insns) / sizeof(struct bpf_insn); + size_t insns_cnt = ARRAY_SIZE(insns); struct bpf_object *obj; int err; diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c index a88f69504c08..5b66f2401b96 100644 --- a/samples/bpf/sock_example.c +++ b/samples/bpf/sock_example.c @@ -29,6 +29,7 @@ #include <bpf/bpf.h> #include "bpf_insn.h" #include "sock_example.h" +#include "bpf_util.h" char bpf_log_buf[BPF_LOG_BUF_SIZE]; @@ -58,7 +59,7 @@ static int test_sock(void) BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */ BPF_EXIT_INSN(), }; - size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + size_t insns_cnt = ARRAY_SIZE(prog); LIBBPF_OPTS(bpf_prog_load_opts, opts, .log_buf = bpf_log_buf, .log_size = BPF_LOG_BUF_SIZE, diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c index 6d90874b09c3..68ce69457afe 100644 --- a/samples/bpf/test_cgrp2_attach.c +++ b/samples/bpf/test_cgrp2_attach.c @@ -31,6 +31,7 @@ #include <bpf/bpf.h> #include "bpf_insn.h" +#include "bpf_util.h" enum { MAP_KEY_PACKETS, @@ -70,7 +71,7 @@ static int prog_load(int map_fd, int verdict) BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ BPF_EXIT_INSN(), }; - size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + size_t insns_cnt = ARRAY_SIZE(prog); LIBBPF_OPTS(bpf_prog_load_opts, opts, .log_buf = bpf_log_buf, .log_size = BPF_LOG_BUF_SIZE, diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c index be98ccb4952f..5efb91763d65 100644 --- a/samples/bpf/test_lru_dist.c +++ b/samples/bpf/test_lru_dist.c @@ -523,7 +523,7 @@ int main(int argc, char **argv) return -1; } - for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) { + for (f = 0; f < ARRAY_SIZE(map_flags); f++) { test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]); test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]); test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f], diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c index e8b4cc184ac9..652ec720533d 100644 --- a/samples/bpf/test_map_in_map_user.c +++ b/samples/bpf/test_map_in_map_user.c @@ -12,6 +12,8 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> +#include "bpf_util.h" + static int map_fd[7]; #define PORT_A (map_fd[0]) @@ -28,7 +30,7 @@ static const char * const test_names[] = { "Hash of Hash", }; -#define NR_TESTS (sizeof(test_names) / sizeof(*test_names)) +#define NR_TESTS ARRAY_SIZE(test_names) static void check_map_id(int inner_map_fd, int map_in_map_fd, uint32_t key) { diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c index e910dc265c31..9d7d79f0d47d 100644 --- a/samples/bpf/tracex5_user.c +++ b/samples/bpf/tracex5_user.c @@ -8,6 +8,7 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> #include "trace_helpers.h" +#include "bpf_util.h" #ifdef __mips__ #define MAX_ENTRIES 6000 /* MIPS n64 syscalls start at 5000 */ @@ -24,7 +25,7 @@ static void install_accept_all_seccomp(void) BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), }; struct sock_fprog prog = { - .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])), + .len = (unsigned short)ARRAY_SIZE(filter), .filter = filter, }; if (prctl(PR_SET_SECCOMP, 2, &prog)) diff --git a/samples/bpf/xdp_redirect_map.bpf.c b/samples/bpf/xdp_redirect_map.bpf.c index 415bac1758e3..8557c278df77 100644 --- a/samples/bpf/xdp_redirect_map.bpf.c +++ b/samples/bpf/xdp_redirect_map.bpf.c @@ -33,7 +33,7 @@ struct { } tx_port_native SEC(".maps"); /* store egress interface mac address */ -const volatile char tx_mac_addr[ETH_ALEN]; +const volatile __u8 tx_mac_addr[ETH_ALEN]; static __always_inline int xdp_redirect_map(struct xdp_md *ctx, void *redirect_map) { @@ -73,6 +73,7 @@ int xdp_redirect_map_egress(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; + u8 *mac_addr = (u8 *) tx_mac_addr; struct ethhdr *eth = data; u64 nh_off; @@ -80,7 +81,8 @@ int xdp_redirect_map_egress(struct xdp_md *ctx) if (data + nh_off > data_end) return XDP_DROP; - __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN); + barrier_var(mac_addr); /* prevent optimizing out memcpy */ + __builtin_memcpy(eth->h_source, mac_addr, ETH_ALEN); return XDP_PASS; } diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index b6e4fc849577..c889a1394dc1 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -40,6 +40,8 @@ static const struct option long_options[] = { {} }; +static int verbose = 0; + int main(int argc, char **argv) { struct bpf_devmap_val devmap_val = {}; @@ -79,6 +81,7 @@ int main(int argc, char **argv) break; case 'v': sample_switch_mode(); + verbose = 1; break; case 's': mask |= SAMPLE_REDIRECT_MAP_CNT; @@ -134,6 +137,12 @@ int main(int argc, char **argv) ret = EXIT_FAIL; goto end_destroy; } + if (verbose) + printf("Egress ifindex:%d using src MAC %02x:%02x:%02x:%02x:%02x:%02x\n", + ifindex_out, + skel->rodata->tx_mac_addr[0], skel->rodata->tx_mac_addr[1], + skel->rodata->tx_mac_addr[2], skel->rodata->tx_mac_addr[3], + skel->rodata->tx_mac_addr[4], skel->rodata->tx_mac_addr[5]); } skel->rodata->from_match[0] = ifindex_in; diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index a0ec321469bd..dfb260de17a8 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -333,27 +333,7 @@ class PrinterRST(Printer): .. Copyright (C) All BPF authors and contributors from 2014 to present. .. See git log include/uapi/linux/bpf.h in kernel tree for details. .. -.. %%%LICENSE_START(VERBATIM) -.. Permission is granted to make and distribute verbatim copies of this -.. manual provided the copyright notice and this permission notice are -.. preserved on all copies. -.. -.. Permission is granted to copy and distribute modified versions of this -.. manual under the conditions for verbatim copying, provided that the -.. entire resulting derived work is distributed under the terms of a -.. permission notice identical to this one. -.. -.. Since the Linux kernel and libraries are constantly changing, this -.. manual page may be incorrect or out-of-date. The author(s) assume no -.. responsibility for errors or omissions, or for damages resulting from -.. the use of the information contained herein. The author(s) may not -.. have taken the same level of care in the production of this manual, -.. which is licensed free of charge, as they might when working -.. professionally. -.. -.. Formatted or processed versions of this manual, if unaccompanied by -.. the source, must acknowledge the copyright and authors of this work. -.. %%%LICENSE_END +.. SPDX-License-Identifier: Linux-man-pages-copyleft .. .. Please do not edit this file. It was generated from the documentation .. located in file include/uapi/linux/bpf.h of the Linux kernel sources diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py index 46f7542db08c..dc07b6d12e30 100644 --- a/scripts/gdb/linux/symbols.py +++ b/scripts/gdb/linux/symbols.py @@ -180,7 +180,7 @@ lx-symbols command.""" self.breakpoint.delete() self.breakpoint = None self.breakpoint = LoadModuleBreakpoint( - "kernel/module.c:do_init_module", self) + "kernel/module/main.c:do_init_module", self) else: gdb.write("Note: symbol update on module loading not supported " "with this gdb version\n") diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 73917413365b..a8802b8da946 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -2247,6 +2247,10 @@ bool ima_appraise_signature(enum kernel_read_file_id id) if (id >= READING_MAX_ID) return false; + if (id == READING_KEXEC_IMAGE && !(ima_appraise & IMA_APPRAISE_ENFORCE) + && security_locked_down(LOCKDOWN_KEXEC)) + return false; + func = read_idmap[id] ?: FILE_CHECK; rcu_read_lock(); diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 03acc823838a..00f5227c8459 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -203,8 +203,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ @@ -296,6 +296,12 @@ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ @@ -316,6 +322,7 @@ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ #define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ +#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ #define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ @@ -447,5 +454,6 @@ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 36369e76cc63..33d2cd04d254 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -50,6 +50,25 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif +#ifdef CONFIG_RETPOLINE +# define DISABLE_RETPOLINE 0 +#else +# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) +#endif + +#ifdef CONFIG_RETHUNK +# define DISABLE_RETHUNK 0 +#else +# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) +#endif + +#ifdef CONFIG_CPU_UNRET_ENTRY +# define DISABLE_UNRET 0 +#else +# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) +#endif + #ifdef CONFIG_INTEL_IOMMU_SVM # define DISABLE_ENQCMD 0 #else @@ -82,7 +101,7 @@ #define DISABLED_MASK8 (DISABLE_TDX_GUEST) #define DISABLED_MASK9 (DISABLE_SGX) #define DISABLED_MASK10 0 -#define DISABLED_MASK11 0 +#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) #define DISABLED_MASK12 0 #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 2eab6a3a8a8c..cc615be27a54 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -95,6 +95,7 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ #define ARCH_CAP_SSB_NO BIT(4) /* * Not susceptible to Speculative Store Bypass @@ -576,6 +577,9 @@ /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 +#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 +#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 5d26f3c6f918..80cd7843c677 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -45,6 +45,19 @@ * .zero 4 * __BTF_ID__func__vfs_fallocate__4: * .zero 4 + * + * set8 - store symbol size into first 4 bytes and sort following + * ID list + * + * __BTF_ID__set8__list: + * .zero 8 + * list: + * __BTF_ID__func__vfs_getattr__3: + * .zero 4 + * .word (1 << 0) | (1 << 2) + * __BTF_ID__func__vfs_fallocate__5: + * .zero 4 + * .word (1 << 3) | (1 << 1) | (1 << 2) */ #define _GNU_SOURCE @@ -72,6 +85,7 @@ #define BTF_TYPEDEF "typedef" #define BTF_FUNC "func" #define BTF_SET "set" +#define BTF_SET8 "set8" #define ADDR_CNT 100 @@ -84,6 +98,7 @@ struct btf_id { }; int addr_cnt; bool is_set; + bool is_set8; Elf64_Addr addr[ADDR_CNT]; }; @@ -231,14 +246,14 @@ static char *get_id(const char *prefix_end) return id; } -static struct btf_id *add_set(struct object *obj, char *name) +static struct btf_id *add_set(struct object *obj, char *name, bool is_set8) { /* * __BTF_ID__set__name * name = ^ * id = ^ */ - char *id = name + sizeof(BTF_SET "__") - 1; + char *id = name + (is_set8 ? sizeof(BTF_SET8 "__") : sizeof(BTF_SET "__")) - 1; int len = strlen(name); if (id >= name + len) { @@ -444,9 +459,21 @@ static int symbols_collect(struct object *obj) } else if (!strncmp(prefix, BTF_FUNC, sizeof(BTF_FUNC) - 1)) { obj->nr_funcs++; id = add_symbol(&obj->funcs, prefix, sizeof(BTF_FUNC) - 1); + /* set8 */ + } else if (!strncmp(prefix, BTF_SET8, sizeof(BTF_SET8) - 1)) { + id = add_set(obj, prefix, true); + /* + * SET8 objects store list's count, which is encoded + * in symbol's size, together with 'cnt' field hence + * that - 1. + */ + if (id) { + id->cnt = sym.st_size / sizeof(uint64_t) - 1; + id->is_set8 = true; + } /* set */ } else if (!strncmp(prefix, BTF_SET, sizeof(BTF_SET) - 1)) { - id = add_set(obj, prefix); + id = add_set(obj, prefix, false); /* * SET objects store list's count, which is encoded * in symbol's size, together with 'cnt' field hence @@ -571,7 +598,8 @@ static int id_patch(struct object *obj, struct btf_id *id) int *ptr = data->d_buf; int i; - if (!id->id && !id->is_set) + /* For set, set8, id->id may be 0 */ + if (!id->id && !id->is_set && !id->is_set8) pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name); for (i = 0; i < id->addr_cnt; i++) { @@ -643,13 +671,13 @@ static int sets_patch(struct object *obj) } idx = idx / sizeof(int); - base = &ptr[idx] + 1; + base = &ptr[idx] + (id->is_set8 ? 2 : 1); cnt = ptr[idx]; pr_debug("sorting addr %5lu: cnt %6d [%s]\n", (idx + 1) * sizeof(int), cnt, id->name); - qsort(base, cnt, sizeof(int), cmp_id); + qsort(base, cnt, id->is_set8 ? sizeof(uint64_t) : sizeof(int), cmp_id); next = rb_next(next); } diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index da6de16a3dfb..8b3d87b82b7a 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -4,7 +4,7 @@ include ../../scripts/Makefile.include OUTPUT ?= $(abspath .output)/ BPFTOOL_OUTPUT := $(OUTPUT)bpftool/ -DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bpftool +DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bootstrap/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) LIBBPF_SRC := $(abspath ../../lib/bpf) BPFOBJ_OUTPUT := $(OUTPUT)libbpf/ @@ -86,6 +86,5 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OU $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) \ DESTDIR=$(BPFOBJ_OUTPUT) prefix= $(abspath $@) install_headers -$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) \ - ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) +$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) bootstrap diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3dd13fe738b9..59a217ca2dfd 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2361,7 +2361,8 @@ union bpf_attr { * Pull in non-linear data in case the *skb* is non-linear and not * all of *len* are part of the linear section. Make *len* bytes * from *skb* readable and writable. If a zero value is passed for - * *len*, then the whole length of the *skb* is pulled. + * *len*, then all bytes in the linear part of *skb* will be made + * readable and writable. * * This helper is only needed for reading and writing with direct * packet access. diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 5088bd9f1922..811897dadcae 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -2083,6 +2083,7 @@ struct kvm_stats_header { #define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES #define KVM_STATS_BASE_SHIFT 8 diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 11f9096407fc..f4d3e1e2abe2 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -2,6 +2,8 @@ #ifndef __BPF_TRACING_H__ #define __BPF_TRACING_H__ +#include <bpf/bpf_helpers.h> + /* Scan the ARCH passed in from ARCH env variable (see Makefile) */ #if defined(__TARGET_ARCH_x86) #define bpf_target_x86 @@ -140,7 +142,7 @@ struct pt_regs___s390 { #define __PT_RC_REG gprs[2] #define __PT_SP_REG gprs[15] #define __PT_IP_REG psw.addr -#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; }) +#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x) #define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___s390 *)(x), orig_gpr2) #elif defined(bpf_target_arm) @@ -174,7 +176,7 @@ struct pt_regs___arm64 { #define __PT_RC_REG regs[0] #define __PT_SP_REG sp #define __PT_IP_REG pc -#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; }) +#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x) #define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___arm64 *)(x), orig_x0) #elif defined(bpf_target_mips) @@ -493,39 +495,62 @@ typeof(name(0)) name(struct pt_regs *ctx) \ } \ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) +/* If kernel has CONFIG_ARCH_HAS_SYSCALL_WRAPPER, read pt_regs directly */ #define ___bpf_syscall_args0() ctx -#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs) -#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs) -#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs) -#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs) -#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs) +#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_SYSCALL(regs) +#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_SYSCALL(regs) +#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_SYSCALL(regs) +#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_SYSCALL(regs) +#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_SYSCALL(regs) #define ___bpf_syscall_args(args...) ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args) +/* If kernel doesn't have CONFIG_ARCH_HAS_SYSCALL_WRAPPER, we have to BPF_CORE_READ from pt_regs */ +#define ___bpf_syswrap_args0() ctx +#define ___bpf_syswrap_args1(x) ___bpf_syswrap_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args2(x, args...) ___bpf_syswrap_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args3(x, args...) ___bpf_syswrap_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args4(x, args...) ___bpf_syswrap_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args5(x, args...) ___bpf_syswrap_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args(args...) ___bpf_apply(___bpf_syswrap_args, ___bpf_narg(args))(args) + /* - * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for + * BPF_KSYSCALL is a variant of BPF_KPROBE, which is intended for * tracing syscall functions, like __x64_sys_close. It hides the underlying * platform-specific low-level way of getting syscall input arguments from * struct pt_regs, and provides a familiar typed and named function arguments * syntax and semantics of accessing syscall input parameters. * - * Original struct pt_regs* context is preserved as 'ctx' argument. This might + * Original struct pt_regs * context is preserved as 'ctx' argument. This might * be necessary when using BPF helpers like bpf_perf_event_output(). * - * This macro relies on BPF CO-RE support. + * At the moment BPF_KSYSCALL does not handle all the calling convention + * quirks for mmap(), clone() and compat syscalls transparrently. This may or + * may not change in the future. User needs to take extra measures to handle + * such quirks explicitly, if necessary. + * + * This macro relies on BPF CO-RE support and virtual __kconfig externs. */ -#define BPF_KPROBE_SYSCALL(name, args...) \ +#define BPF_KSYSCALL(name, args...) \ name(struct pt_regs *ctx); \ +extern _Bool LINUX_HAS_SYSCALL_WRAPPER __kconfig; \ static __attribute__((always_inline)) typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args); \ typeof(name(0)) name(struct pt_regs *ctx) \ { \ - struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx); \ + struct pt_regs *regs = LINUX_HAS_SYSCALL_WRAPPER \ + ? (struct pt_regs *)PT_REGS_PARM1(ctx) \ + : ctx; \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - return ____##name(___bpf_syscall_args(args)); \ + if (LINUX_HAS_SYSCALL_WRAPPER) \ + return ____##name(___bpf_syswrap_args(args)); \ + else \ + return ____##name(___bpf_syscall_args(args)); \ _Pragma("GCC diagnostic pop") \ } \ static __attribute__((always_inline)) typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args) +#define BPF_KPROBE_SYSCALL BPF_KSYSCALL + #endif diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 400e84fd0578..627edb5bb6de 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -2045,7 +2045,7 @@ static int btf_dump_get_enum_value(struct btf_dump *d, *value = *(__s64 *)data; return 0; case 4: - *value = is_signed ? *(__s32 *)data : *(__u32 *)data; + *value = is_signed ? (__s64)*(__s32 *)data : *(__u32 *)data; return 0; case 2: *value = is_signed ? *(__s16 *)data : *(__u16 *)data; diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 927745b08014..23f5c46708f8 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -533,7 +533,7 @@ void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name, gen->attach_kind = kind; ret = snprintf(gen->attach_target, sizeof(gen->attach_target), "%s%s", prefix, attach_name); - if (ret == sizeof(gen->attach_target)) + if (ret >= sizeof(gen->attach_target)) gen->error = -ENOSPC; } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index cb49408eb298..b01fe01b0761 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1694,7 +1694,7 @@ static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, switch (ext->kcfg.type) { case KCFG_BOOL: if (value == 'm') { - pr_warn("extern (kcfg) %s=%c should be tristate or char\n", + pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n", ext->name, value); return -EINVAL; } @@ -1715,7 +1715,7 @@ static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, case KCFG_INT: case KCFG_CHAR_ARR: default: - pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n", + pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n", ext->name, value); return -EINVAL; } @@ -1729,7 +1729,8 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, size_t len; if (ext->kcfg.type != KCFG_CHAR_ARR) { - pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value); + pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n", + ext->name, value); return -EINVAL; } @@ -1743,7 +1744,7 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, /* strip quotes */ len -= 2; if (len >= ext->kcfg.sz) { - pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", + pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n", ext->name, value, len, ext->kcfg.sz - 1); len = ext->kcfg.sz - 1; } @@ -1800,13 +1801,20 @@ static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v) static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, __u64 value) { - if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { - pr_warn("extern (kcfg) %s=%llu should be integer\n", + if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR && + ext->kcfg.type != KCFG_BOOL) { + pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n", ext->name, (unsigned long long)value); return -EINVAL; } + if (ext->kcfg.type == KCFG_BOOL && value > 1) { + pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n", + ext->name, (unsigned long long)value); + return -EINVAL; + + } if (!is_kcfg_value_in_range(ext, value)) { - pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n", + pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n", ext->name, (unsigned long long)value, ext->kcfg.sz); return -ERANGE; } @@ -1870,16 +1878,19 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj, /* assume integer */ err = parse_u64(value, &num); if (err) { - pr_warn("extern (kcfg) %s=%s should be integer\n", - ext->name, value); + pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value); return err; } + if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { + pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value); + return -EINVAL; + } err = set_kcfg_value_num(ext, ext_val, num); break; } if (err) return err; - pr_debug("extern (kcfg) %s=%s\n", ext->name, value); + pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value); return 0; } @@ -2320,6 +2331,37 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, return 0; } +static size_t adjust_ringbuf_sz(size_t sz) +{ + __u32 page_sz = sysconf(_SC_PAGE_SIZE); + __u32 mul; + + /* if user forgot to set any size, make sure they see error */ + if (sz == 0) + return 0; + /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be + * a power-of-2 multiple of kernel's page size. If user diligently + * satisified these conditions, pass the size through. + */ + if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) + return sz; + + /* Otherwise find closest (page_sz * power_of_2) product bigger than + * user-set size to satisfy both user size request and kernel + * requirements and substitute correct max_entries for map creation. + */ + for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { + if (mul * page_sz > sz) + return mul * page_sz; + } + + /* if it's impossible to satisfy the conditions (i.e., user size is + * very close to UINT_MAX but is not a power-of-2 multiple of + * page_size) then just return original size and let kernel reject it + */ + return sz; +} + static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def) { map->def.type = def->map_type; @@ -2333,6 +2375,10 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def map->btf_key_type_id = def->key_type_id; map->btf_value_type_id = def->value_type_id; + /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ + if (map->def.type == BPF_MAP_TYPE_RINGBUF) + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); + if (def->parts & MAP_DEF_MAP_TYPE) pr_debug("map '%s': found type = %u.\n", map->name, def->map_type); @@ -3687,7 +3733,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) ext->kcfg.type = find_kcfg_type(obj->btf, t->type, &ext->kcfg.is_signed); if (ext->kcfg.type == KCFG_UNKNOWN) { - pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name); + pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name); return -ENOTSUP; } } else if (strcmp(sec_name, KSYMS_SEC) == 0) { @@ -4232,7 +4278,7 @@ int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) int bpf_map__reuse_fd(struct bpf_map *map, int fd) { struct bpf_map_info info = {}; - __u32 len = sizeof(info); + __u32 len = sizeof(info), name_len; int new_fd, err; char *new_name; @@ -4242,7 +4288,12 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) if (err) return libbpf_err(err); - new_name = strdup(info.name); + name_len = strlen(info.name); + if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0) + new_name = strdup(map->name); + else + new_name = strdup(info.name); + if (!new_name) return libbpf_err(-errno); @@ -4301,9 +4352,15 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map) int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) { - if (map->fd >= 0) + if (map->obj->loaded) return libbpf_err(-EBUSY); + map->def.max_entries = max_entries; + + /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ + if (map->def.type == BPF_MAP_TYPE_RINGBUF) + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); + return 0; } @@ -4654,6 +4711,8 @@ static int probe_kern_btf_enum64(void) strs, sizeof(strs))); } +static int probe_kern_syscall_wrapper(void); + enum kern_feature_result { FEAT_UNKNOWN = 0, FEAT_SUPPORTED = 1, @@ -4722,6 +4781,9 @@ static struct kern_feature_desc { [FEAT_BTF_ENUM64] = { "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, }, + [FEAT_SYSCALL_WRAPPER] = { + "Kernel using syscall wrapper", probe_kern_syscall_wrapper, + }, }; bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) @@ -4854,37 +4916,6 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) static void bpf_map__destroy(struct bpf_map *map); -static size_t adjust_ringbuf_sz(size_t sz) -{ - __u32 page_sz = sysconf(_SC_PAGE_SIZE); - __u32 mul; - - /* if user forgot to set any size, make sure they see error */ - if (sz == 0) - return 0; - /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be - * a power-of-2 multiple of kernel's page size. If user diligently - * satisified these conditions, pass the size through. - */ - if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) - return sz; - - /* Otherwise find closest (page_sz * power_of_2) product bigger than - * user-set size to satisfy both user size request and kernel - * requirements and substitute correct max_entries for map creation. - */ - for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { - if (mul * page_sz > sz) - return mul * page_sz; - } - - /* if it's impossible to satisfy the conditions (i.e., user size is - * very close to UINT_MAX but is not a power-of-2 multiple of - * page_size) then just return original size and let kernel reject it - */ - return sz; -} - static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { LIBBPF_OPTS(bpf_map_create_opts, create_attr); @@ -4923,9 +4954,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b } switch (def->type) { - case BPF_MAP_TYPE_RINGBUF: - map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); - /* fallthrough */ case BPF_MAP_TYPE_PERF_EVENT_ARRAY: case BPF_MAP_TYPE_CGROUP_ARRAY: case BPF_MAP_TYPE_STACK_TRACE: @@ -7282,14 +7310,14 @@ static int kallsyms_cb(unsigned long long sym_addr, char sym_type, return 0; if (ext->is_set && ext->ksym.addr != sym_addr) { - pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n", + pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n", sym_name, ext->ksym.addr, sym_addr); return -EINVAL; } if (!ext->is_set) { ext->is_set = true; ext->ksym.addr = sym_addr; - pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr); + pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr); } return 0; } @@ -7493,28 +7521,52 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, for (i = 0; i < obj->nr_extern; i++) { ext = &obj->externs[i]; - if (ext->type == EXT_KCFG && - strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { - void *ext_val = kcfg_data + ext->kcfg.data_off; - __u32 kver = get_kernel_version(); + if (ext->type == EXT_KSYM) { + if (ext->ksym.type_id) + need_vmlinux_btf = true; + else + need_kallsyms = true; + continue; + } else if (ext->type == EXT_KCFG) { + void *ext_ptr = kcfg_data + ext->kcfg.data_off; + __u64 value = 0; + + /* Kconfig externs need actual /proc/config.gz */ + if (str_has_pfx(ext->name, "CONFIG_")) { + need_config = true; + continue; + } - if (!kver) { - pr_warn("failed to get kernel version\n"); + /* Virtual kcfg externs are customly handled by libbpf */ + if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { + value = get_kernel_version(); + if (!value) { + pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name); + return -EINVAL; + } + } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) { + value = kernel_supports(obj, FEAT_BPF_COOKIE); + } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) { + value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER); + } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) { + /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed + * __kconfig externs, where LINUX_ ones are virtual and filled out + * customly by libbpf (their values don't come from Kconfig). + * If LINUX_xxx variable is not recognized by libbpf, but is marked + * __weak, it defaults to zero value, just like for CONFIG_xxx + * externs. + */ + pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name); return -EINVAL; } - err = set_kcfg_value_num(ext, ext_val, kver); + + err = set_kcfg_value_num(ext, ext_ptr, value); if (err) return err; - pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver); - } else if (ext->type == EXT_KCFG && str_has_pfx(ext->name, "CONFIG_")) { - need_config = true; - } else if (ext->type == EXT_KSYM) { - if (ext->ksym.type_id) - need_vmlinux_btf = true; - else - need_kallsyms = true; + pr_debug("extern (kcfg) '%s': set to 0x%llx\n", + ext->name, (long long)value); } else { - pr_warn("unrecognized extern '%s'\n", ext->name); + pr_warn("extern '%s': unrecognized extern kind\n", ext->name); return -EINVAL; } } @@ -7550,10 +7602,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, ext = &obj->externs[i]; if (!ext->is_set && !ext->is_weak) { - pr_warn("extern %s (strong) not resolved\n", ext->name); + pr_warn("extern '%s' (strong): not resolved\n", ext->name); return -ESRCH; } else if (!ext->is_set) { - pr_debug("extern %s (weak) not resolved, defaulting to zero\n", + pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n", ext->name); } } @@ -8381,6 +8433,7 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); @@ -8401,6 +8454,8 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), + SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), @@ -9757,7 +9812,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, { struct perf_event_attr attr = {}; char errmsg[STRERR_BUFSIZE]; - int type, pfd, err; + int type, pfd; if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) return -EINVAL; @@ -9793,14 +9848,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, pid < 0 ? -1 : pid /* pid */, pid == -1 ? 0 : -1 /* cpu */, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); - if (pfd < 0) { - err = -errno; - pr_warn("%s perf_event_open() failed: %s\n", - uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return err; - } - return pfd; + return pfd >= 0 ? pfd : -errno; } static int append_to_file(const char *file, const char *fmt, ...) @@ -9823,6 +9871,34 @@ static int append_to_file(const char *file, const char *fmt, ...) return err; } +#define DEBUGFS "/sys/kernel/debug/tracing" +#define TRACEFS "/sys/kernel/tracing" + +static bool use_debugfs(void) +{ + static int has_debugfs = -1; + + if (has_debugfs < 0) + has_debugfs = access(DEBUGFS, F_OK) == 0; + + return has_debugfs == 1; +} + +static const char *tracefs_path(void) +{ + return use_debugfs() ? DEBUGFS : TRACEFS; +} + +static const char *tracefs_kprobe_events(void) +{ + return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events"; +} + +static const char *tracefs_uprobe_events(void) +{ + return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events"; +} + static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, const char *kfunc_name, size_t offset) { @@ -9835,9 +9911,7 @@ static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, const char *kfunc_name, size_t offset) { - const char *file = "/sys/kernel/debug/tracing/kprobe_events"; - - return append_to_file(file, "%c:%s/%s %s+0x%zx", + return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx", retprobe ? 'r' : 'p', retprobe ? "kretprobes" : "kprobes", probe_name, kfunc_name, offset); @@ -9845,18 +9919,16 @@ static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe) { - const char *file = "/sys/kernel/debug/tracing/kprobe_events"; - - return append_to_file(file, "-:%s/%s", retprobe ? "kretprobes" : "kprobes", probe_name); + return append_to_file(tracefs_kprobe_events(), "-:%s/%s", + retprobe ? "kretprobes" : "kprobes", probe_name); } static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe) { char file[256]; - snprintf(file, sizeof(file), - "/sys/kernel/debug/tracing/events/%s/%s/id", - retprobe ? "kretprobes" : "kprobes", probe_name); + snprintf(file, sizeof(file), "%s/events/%s/%s/id", + tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name); return parse_uint_from_file(file, "%d\n"); } @@ -9905,6 +9977,60 @@ err_clean_legacy: return err; } +static const char *arch_specific_syscall_pfx(void) +{ +#if defined(__x86_64__) + return "x64"; +#elif defined(__i386__) + return "ia32"; +#elif defined(__s390x__) + return "s390x"; +#elif defined(__s390__) + return "s390"; +#elif defined(__arm__) + return "arm"; +#elif defined(__aarch64__) + return "arm64"; +#elif defined(__mips__) + return "mips"; +#elif defined(__riscv) + return "riscv"; +#else + return NULL; +#endif +} + +static int probe_kern_syscall_wrapper(void) +{ + char syscall_name[64]; + const char *ksys_pfx; + + ksys_pfx = arch_specific_syscall_pfx(); + if (!ksys_pfx) + return 0; + + snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); + + if (determine_kprobe_perf_type() >= 0) { + int pfd; + + pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0); + if (pfd >= 0) + close(pfd); + + return pfd >= 0 ? 1 : 0; + } else { /* legacy mode */ + char probe_name[128]; + + gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); + if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) + return 0; + + (void)remove_kprobe_event_legacy(probe_name, false); + return 1; + } +} + struct bpf_link * bpf_program__attach_kprobe_opts(const struct bpf_program *prog, const char *func_name, @@ -9990,6 +10116,29 @@ struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, return bpf_program__attach_kprobe_opts(prog, func_name, &opts); } +struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, + const char *syscall_name, + const struct bpf_ksyscall_opts *opts) +{ + LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); + char func_name[128]; + + if (!OPTS_VALID(opts, bpf_ksyscall_opts)) + return libbpf_err_ptr(-EINVAL); + + if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) { + snprintf(func_name, sizeof(func_name), "__%s_sys_%s", + arch_specific_syscall_pfx(), syscall_name); + } else { + snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); + } + + kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false); + kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); + + return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts); +} + /* Adapted from perf/util/string.c */ static bool glob_match(const char *str, const char *pat) { @@ -10160,6 +10309,27 @@ static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf return libbpf_get_error(*link); } +static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + LIBBPF_OPTS(bpf_ksyscall_opts, opts); + const char *syscall_name; + + *link = NULL; + + /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */ + if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0) + return 0; + + opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/"); + if (opts.retprobe) + syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1; + else + syscall_name = prog->sec_name + sizeof("ksyscall/") - 1; + + *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts); + return *link ? 0 : -errno; +} + static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); @@ -10208,9 +10378,7 @@ static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, const char *binary_path, size_t offset) { - const char *file = "/sys/kernel/debug/tracing/uprobe_events"; - - return append_to_file(file, "%c:%s/%s %s:0x%zx", + return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx", retprobe ? 'r' : 'p', retprobe ? "uretprobes" : "uprobes", probe_name, binary_path, offset); @@ -10218,18 +10386,16 @@ static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe) { - const char *file = "/sys/kernel/debug/tracing/uprobe_events"; - - return append_to_file(file, "-:%s/%s", retprobe ? "uretprobes" : "uprobes", probe_name); + return append_to_file(tracefs_uprobe_events(), "-:%s/%s", + retprobe ? "uretprobes" : "uprobes", probe_name); } static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe) { char file[512]; - snprintf(file, sizeof(file), - "/sys/kernel/debug/tracing/events/%s/%s/id", - retprobe ? "uretprobes" : "uprobes", probe_name); + snprintf(file, sizeof(file), "%s/events/%s/%s/id", + tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name); return parse_uint_from_file(file, "%d\n"); } @@ -10545,7 +10711,10 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); - if (binary_path && !strchr(binary_path, '/')) { + if (!binary_path) + return libbpf_err_ptr(-EINVAL); + + if (!strchr(binary_path, '/')) { err = resolve_full_path(binary_path, full_binary_path, sizeof(full_binary_path)); if (err) { @@ -10559,11 +10728,6 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, if (func_name) { long sym_off; - if (!binary_path) { - pr_warn("prog '%s': name-based attach requires binary_path\n", - prog->name); - return libbpf_err_ptr(-EINVAL); - } sym_off = elf_find_func_offset(binary_path, func_name); if (sym_off < 0) return libbpf_err_ptr(sym_off); @@ -10711,6 +10875,9 @@ struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, return libbpf_err_ptr(-EINVAL); } + if (!binary_path) + return libbpf_err_ptr(-EINVAL); + if (!strchr(binary_path, '/')) { err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); if (err) { @@ -10776,9 +10943,8 @@ static int determine_tracepoint_id(const char *tp_category, char file[PATH_MAX]; int ret; - ret = snprintf(file, sizeof(file), - "/sys/kernel/debug/tracing/events/%s/%s/id", - tp_category, tp_name); + ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id", + tracefs_path(), tp_category, tp_name); if (ret < 0) return -errno; if (ret >= sizeof(file)) { @@ -11728,6 +11894,22 @@ int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx) return cpu_buf->fd; } +int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size) +{ + struct perf_cpu_buf *cpu_buf; + + if (buf_idx >= pb->cpu_cnt) + return libbpf_err(-EINVAL); + + cpu_buf = pb->cpu_bufs[buf_idx]; + if (!cpu_buf) + return libbpf_err(-ENOENT); + + *buf = cpu_buf->base; + *buf_size = pb->mmap_size; + return 0; +} + /* * Consume data from perf ring buffer corresponding to slot *buf_idx* in * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index e4d5353f757b..61493c4cddac 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -457,6 +457,52 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, const char *pattern, const struct bpf_kprobe_multi_opts *opts); +struct bpf_ksyscall_opts { + /* size of this struct, for forward/backward compatiblity */ + size_t sz; + /* custom user-provided value fetchable through bpf_get_attach_cookie() */ + __u64 bpf_cookie; + /* attach as return probe? */ + bool retprobe; + size_t :0; +}; +#define bpf_ksyscall_opts__last_field retprobe + +/** + * @brief **bpf_program__attach_ksyscall()** attaches a BPF program + * to kernel syscall handler of a specified syscall. Optionally it's possible + * to request to install retprobe that will be triggered at syscall exit. It's + * also possible to associate BPF cookie (though options). + * + * Libbpf automatically will determine correct full kernel function name, + * which depending on system architecture and kernel version/configuration + * could be of the form __<arch>_sys_<syscall> or __se_sys_<syscall>, and will + * attach specified program using kprobe/kretprobe mechanism. + * + * **bpf_program__attach_ksyscall()** is an API counterpart of declarative + * **SEC("ksyscall/<syscall>")** annotation of BPF programs. + * + * At the moment **SEC("ksyscall")** and **bpf_program__attach_ksyscall()** do + * not handle all the calling convention quirks for mmap(), clone() and compat + * syscalls. It also only attaches to "native" syscall interfaces. If host + * system supports compat syscalls or defines 32-bit syscalls in 64-bit + * kernel, such syscall interfaces won't be attached to by libbpf. + * + * These limitations may or may not change in the future. Therefore it is + * recommended to use SEC("kprobe") for these syscalls or if working with + * compat and 32-bit interfaces is required. + * + * @param prog BPF program to attach + * @param syscall_name Symbolic name of the syscall (e.g., "bpf") + * @param opts Additional options (see **struct bpf_ksyscall_opts**) + * @return Reference to the newly created BPF link; or NULL is returned on + * error, error code is stored in errno + */ +LIBBPF_API struct bpf_link * +bpf_program__attach_ksyscall(const struct bpf_program *prog, + const char *syscall_name, + const struct bpf_ksyscall_opts *opts); + struct bpf_uprobe_opts { /* size of this struct, for forward/backward compatiblity */ size_t sz; @@ -1053,6 +1099,22 @@ LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb); LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx); LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb); LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx); +/** + * @brief **perf_buffer__buffer()** returns the per-cpu raw mmap()'ed underlying + * memory region of the ring buffer. + * This ring buffer can be used to implement a custom events consumer. + * The ring buffer starts with the *struct perf_event_mmap_page*, which + * holds the ring buffer managment fields, when accessing the header + * structure it's important to be SMP aware. + * You can refer to *perf_event_read_simple* for a simple example. + * @param pb the perf buffer structure + * @param buf_idx the buffer index to retreive + * @param buf (out) gets the base pointer of the mmap()'ed memory + * @param buf_size (out) gets the size of the mmap()'ed region + * @return 0 on success, negative error code for failure + */ +LIBBPF_API int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, + size_t *buf_size); struct bpf_prog_linfo; struct bpf_prog_info; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 94b589ecfeaa..0625adb9e888 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -356,10 +356,12 @@ LIBBPF_0.8.0 { LIBBPF_1.0.0 { global: bpf_prog_query_opts; + bpf_program__attach_ksyscall; btf__add_enum64; btf__add_enum64_value; libbpf_bpf_attach_type_str; libbpf_bpf_link_type_str; libbpf_bpf_map_type_str; libbpf_bpf_prog_type_str; + perf_buffer__buffer; }; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 9cd7829cbe41..4135ae0a2bc3 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -108,9 +108,9 @@ static inline bool str_has_sfx(const char *str, const char *sfx) size_t str_len = strlen(str); size_t sfx_len = strlen(sfx); - if (sfx_len <= str_len) - return strcmp(str + str_len - sfx_len, sfx); - return false; + if (sfx_len > str_len) + return false; + return strcmp(str + str_len - sfx_len, sfx) == 0; } /* Symbol versioning is different between static and shared library. @@ -352,6 +352,8 @@ enum kern_feature_id { FEAT_BPF_COOKIE, /* BTF_KIND_ENUM64 support and BTF_KIND_ENUM kflag support */ FEAT_BTF_ENUM64, + /* Kernel uses syscall wrapper (CONFIG_ARCH_HAS_SYSCALL_WRAPPER) */ + FEAT_SYSCALL_WRAPPER, __FEAT_CNT, }; diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h index 4181fddb3687..4f2adc0bd6ca 100644 --- a/tools/lib/bpf/usdt.bpf.h +++ b/tools/lib/bpf/usdt.bpf.h @@ -6,7 +6,6 @@ #include <linux/errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include <bpf/bpf_core_read.h> /* Below types and maps are internal implementation details of libbpf's USDT * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should @@ -30,14 +29,6 @@ #ifndef BPF_USDT_MAX_IP_CNT #define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT) #endif -/* We use BPF CO-RE to detect support for BPF cookie from BPF side. This is - * the only dependency on CO-RE, so if it's undesirable, user can override - * BPF_USDT_HAS_BPF_COOKIE to specify whether to BPF cookie is supported or not. - */ -#ifndef BPF_USDT_HAS_BPF_COOKIE -#define BPF_USDT_HAS_BPF_COOKIE \ - bpf_core_enum_value_exists(enum bpf_func_id___usdt, BPF_FUNC_get_attach_cookie___usdt) -#endif enum __bpf_usdt_arg_type { BPF_USDT_ARG_CONST, @@ -83,15 +74,12 @@ struct { __type(value, __u32); } __bpf_usdt_ip_to_spec_id SEC(".maps") __weak; -/* don't rely on user's BPF code to have latest definition of bpf_func_id */ -enum bpf_func_id___usdt { - BPF_FUNC_get_attach_cookie___usdt = 0xBAD, /* value doesn't matter */ -}; +extern const _Bool LINUX_HAS_BPF_COOKIE __kconfig; static __always_inline int __bpf_usdt_spec_id(struct pt_regs *ctx) { - if (!BPF_USDT_HAS_BPF_COOKIE) { + if (!LINUX_HAS_BPF_COOKIE) { long ip = PT_REGS_IP(ctx); int *spec_id_ptr; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 897fc504918b..f075cf37a65e 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4280,6 +4280,7 @@ static int trace__replay(struct trace *trace) goto out; evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_enter"); + trace->syscalls.events.sys_enter = evsel; /* older kernels have syscalls tp versus raw_syscalls */ if (evsel == NULL) evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_enter"); @@ -4292,6 +4293,7 @@ static int trace__replay(struct trace *trace) } evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_exit"); + trace->syscalls.events.sys_exit = evsel; if (evsel == NULL) evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit"); if (evsel && diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index 4ad0dfbc8b21..7c7d20fc503a 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -20,8 +20,6 @@ #include "tsc.h" #include "mmap.h" #include "tests.h" -#include "pmu.h" -#include "pmu-hybrid.h" /* * Except x86_64/i386 and Arm64, other archs don't support TSC in perf. Just @@ -106,28 +104,21 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su evlist__config(evlist, &opts, NULL); - evsel = evlist__first(evlist); - - evsel->core.attr.comm = 1; - evsel->core.attr.disabled = 1; - evsel->core.attr.enable_on_exec = 0; - - /* - * For hybrid "cycles:u", it creates two events. - * Init the second evsel here. - */ - if (perf_pmu__has_hybrid() && perf_pmu__hybrid_mounted("cpu_atom")) { - evsel = evsel__next(evsel); + /* For hybrid "cycles:u", it creates two events */ + evlist__for_each_entry(evlist, evsel) { evsel->core.attr.comm = 1; evsel->core.attr.disabled = 1; evsel->core.attr.enable_on_exec = 0; } - if (evlist__open(evlist) == -ENOENT) { - err = TEST_SKIP; + ret = evlist__open(evlist); + if (ret < 0) { + if (ret == -ENOENT) + err = TEST_SKIP; + else + pr_debug("evlist__open() failed\n"); goto out_err; } - CHECK__(evlist__open(evlist)); CHECK__(evlist__mmap(evlist, UINT_MAX)); @@ -167,10 +158,12 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su goto next_event; if (strcmp(event->comm.comm, comm1) == 0) { + CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event)); CHECK__(evsel__parse_sample(evsel, event, &sample)); comm1_time = sample.time; } if (strcmp(event->comm.comm, comm2) == 0) { + CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event)); CHECK__(evsel__parse_sample(evsel, event, &sample)); comm2_time = sample.time; } diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index e585e1cefc77..792cb15bac40 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -148,13 +148,13 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; -BTF_SET_START(bpf_testmod_check_kfunc_ids) -BTF_ID(func, bpf_testmod_test_mod_kfunc) -BTF_SET_END(bpf_testmod_check_kfunc_ids) +BTF_SET8_START(bpf_testmod_check_kfunc_ids) +BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) +BTF_SET8_END(bpf_testmod_check_kfunc_ids) static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &bpf_testmod_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &bpf_testmod_check_kfunc_ids, }; extern int bpf_fentry_test1(int a); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 7ff5fa93d056..a33874b081b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -27,6 +27,7 @@ #include "bpf_iter_test_kern5.skel.h" #include "bpf_iter_test_kern6.skel.h" #include "bpf_iter_bpf_link.skel.h" +#include "bpf_iter_ksym.skel.h" static int duration; @@ -1120,6 +1121,19 @@ static void test_link_iter(void) bpf_iter_bpf_link__destroy(skel); } +static void test_ksym_iter(void) +{ + struct bpf_iter_ksym *skel; + + skel = bpf_iter_ksym__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_ksym__open_and_load")) + return; + + do_dummy_read(skel->progs.dump_ksym); + + bpf_iter_ksym__destroy(skel); +} + #define CMP_BUFFER_SIZE 1024 static char task_vma_output[CMP_BUFFER_SIZE]; static char proc_maps_output[CMP_BUFFER_SIZE]; @@ -1267,4 +1281,6 @@ void test_bpf_iter(void) test_buf_neg_offset(); if (test__start_subtest("link-iter")) test_link_iter(); + if (test__start_subtest("ksym")) + test_ksym_iter(); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index dd30b1e3a67c..7a74a1579076 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -2,13 +2,29 @@ #include <test_progs.h> #include <network_helpers.h> #include "test_bpf_nf.skel.h" +#include "test_bpf_nf_fail.skel.h" + +static char log_buf[1024 * 1024]; + +struct { + const char *prog_name; + const char *err_msg; +} test_bpf_nf_fail_tests[] = { + { "alloc_release", "kernel function bpf_ct_release args#0 expected pointer to STRUCT nf_conn but" }, + { "insert_insert", "kernel function bpf_ct_insert_entry args#0 expected pointer to STRUCT nf_conn___init but" }, + { "lookup_insert", "kernel function bpf_ct_insert_entry args#0 expected pointer to STRUCT nf_conn___init but" }, + { "set_timeout_after_insert", "kernel function bpf_ct_set_timeout args#0 expected pointer to STRUCT nf_conn___init but" }, + { "set_status_after_insert", "kernel function bpf_ct_set_status args#0 expected pointer to STRUCT nf_conn___init but" }, + { "change_timeout_after_alloc", "kernel function bpf_ct_change_timeout args#0 expected pointer to STRUCT nf_conn but" }, + { "change_status_after_alloc", "kernel function bpf_ct_change_status args#0 expected pointer to STRUCT nf_conn but" }, +}; enum { TEST_XDP, TEST_TC_BPF, }; -void test_bpf_nf_ct(int mode) +static void test_bpf_nf_ct(int mode) { struct test_bpf_nf *skel; int prog_fd, err; @@ -39,14 +55,60 @@ void test_bpf_nf_ct(int mode) ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id"); ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup"); ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple"); + ASSERT_EQ(skel->data->test_alloc_entry, 0, "Test for alloc new entry"); + ASSERT_EQ(skel->data->test_insert_entry, 0, "Test for insert new entry"); + ASSERT_EQ(skel->data->test_succ_lookup, 0, "Test for successful lookup"); + /* allow some tolerance for test_delta_timeout value to avoid races. */ + ASSERT_GT(skel->bss->test_delta_timeout, 8, "Test for min ct timeout update"); + ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update"); + /* expected status is IPS_SEEN_REPLY */ + ASSERT_EQ(skel->bss->test_status, 2, "Test for ct status update "); end: test_bpf_nf__destroy(skel); } +static void test_bpf_nf_ct_fail(const char *prog_name, const char *err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf, + .kernel_log_size = sizeof(log_buf), + .kernel_log_level = 1); + struct test_bpf_nf_fail *skel; + struct bpf_program *prog; + int ret; + + skel = test_bpf_nf_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "test_bpf_nf_fail__open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto end; + + bpf_program__set_autoload(prog, true); + + ret = test_bpf_nf_fail__load(skel); + if (!ASSERT_ERR(ret, "test_bpf_nf_fail__load must fail")) + goto end; + + if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) { + fprintf(stderr, "Expected: %s\n", err_msg); + fprintf(stderr, "Verifier: %s\n", log_buf); + } + +end: + test_bpf_nf_fail__destroy(skel); +} + void test_bpf_nf(void) { + int i; if (test__start_subtest("xdp-ct")) test_bpf_nf_ct(TEST_XDP); if (test__start_subtest("tc-bpf-ct")) test_bpf_nf_ct(TEST_TC_BPF); + for (i = 0; i < ARRAY_SIZE(test_bpf_nf_fail_tests); i++) { + if (test__start_subtest(test_bpf_nf_fail_tests[i].prog_name)) + test_bpf_nf_ct_fail(test_bpf_nf_fail_tests[i].prog_name, + test_bpf_nf_fail_tests[i].err_msg); + } } diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 941b0100bafa..ef6528b8084c 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -5338,7 +5338,7 @@ static void do_test_pprint(int test_num) ret = snprintf(pin_path, sizeof(pin_path), "%s/%s", "/sys/fs/bpf", test->map_name); - if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long", + if (CHECK(ret >= sizeof(pin_path), "pin_path %s/%s is too long", "/sys/fs/bpf", test->map_name)) { err = -1; goto done; diff --git a/tools/testing/selftests/bpf/prog_tests/core_extern.c b/tools/testing/selftests/bpf/prog_tests/core_extern.c index 1931a158510e..63a51e9f3630 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_extern.c +++ b/tools/testing/selftests/bpf/prog_tests/core_extern.c @@ -39,6 +39,7 @@ static struct test_case { "CONFIG_STR=\"abracad\"\n" "CONFIG_MISSING=0", .data = { + .unkn_virt_val = 0, .bpf_syscall = false, .tristate_val = TRI_MODULE, .bool_val = true, @@ -121,7 +122,7 @@ static struct test_case { void test_core_extern(void) { const uint32_t kern_ver = get_kernel_version(); - int err, duration = 0, i, j; + int err, i, j; struct test_core_extern *skel = NULL; uint64_t *got, *exp; int n = sizeof(*skel->data) / sizeof(uint64_t); @@ -136,19 +137,17 @@ void test_core_extern(void) continue; skel = test_core_extern__open_opts(&opts); - if (CHECK(!skel, "skel_open", "skeleton open failed\n")) + if (!ASSERT_OK_PTR(skel, "skel_open")) goto cleanup; err = test_core_extern__load(skel); if (t->fails) { - CHECK(!err, "skel_load", - "shouldn't succeed open/load of skeleton\n"); + ASSERT_ERR(err, "skel_load_should_fail"); goto cleanup; - } else if (CHECK(err, "skel_load", - "failed to open/load skeleton\n")) { + } else if (!ASSERT_OK(err, "skel_load")) { goto cleanup; } err = test_core_extern__attach(skel); - if (CHECK(err, "attach_raw_tp", "failed attach: %d\n", err)) + if (!ASSERT_OK(err, "attach_raw_tp")) goto cleanup; usleep(1); @@ -158,9 +157,7 @@ void test_core_extern(void) got = (uint64_t *)skel->data; exp = (uint64_t *)&t->data; for (j = 0; j < n; j++) { - CHECK(got[j] != exp[j], "check_res", - "result #%d: expected %llx, but got %llx\n", - j, (__u64)exp[j], (__u64)got[j]); + ASSERT_EQ(got[j], exp[j], "result"); } cleanup: test_core_extern__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 335917df0614..d457a55ff408 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -364,6 +364,8 @@ static int get_syms(char ***symsp, size_t *cntp) continue; if (!strncmp(name, "rcu_", 4)) continue; + if (!strcmp(name, "bpf_dispatcher_xdp_func")) + continue; if (!strncmp(name, "__ftrace_invalid_address__", sizeof("__ftrace_invalid_address__") - 1)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index eb5f7f5aa81a..1455911d9fcb 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -50,6 +50,13 @@ void test_ringbuf_multi(void) if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; + /* validate ringbuf size adjustment logic */ + ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), page_size, "rb1_size_before"); + ASSERT_OK(bpf_map__set_max_entries(skel->maps.ringbuf1, page_size + 1), "rb1_resize"); + ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), 2 * page_size, "rb1_size_after"); + ASSERT_OK(bpf_map__set_max_entries(skel->maps.ringbuf1, page_size), "rb1_reset"); + ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), page_size, "rb1_size_final"); + proto_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, page_size, NULL); if (CHECK(proto_fd < 0, "bpf_map_create", "bpf_map_create failed\n")) goto cleanup; @@ -65,6 +72,10 @@ void test_ringbuf_multi(void) close(proto_fd); proto_fd = -1; + /* make sure we can't resize ringbuf after object load */ + if (!ASSERT_ERR(bpf_map__set_max_entries(skel->maps.ringbuf1, 3 * page_size), "rb1_resize_after_load")) + goto cleanup; + /* only trigger BPF program for current process */ skel->bss->pid = getpid(); diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c index 180afd632f4c..99dac5292b41 100644 --- a/tools/testing/selftests/bpf/prog_tests/skeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -122,6 +122,8 @@ void test_skeleton(void) ASSERT_EQ(skel->bss->out_mostly_var, 123, "out_mostly_var"); + ASSERT_EQ(bss->huge_arr[ARRAY_SIZE(bss->huge_arr) - 1], 123, "huge_arr"); + elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz); ASSERT_OK_PTR(elf_bytes, "elf_bytes"); ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz"); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h index 97ec8bc76ae6..e9846606690d 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -22,6 +22,7 @@ #define BTF_F_NONAME BTF_F_NONAME___not_used #define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used #define BTF_F_ZERO BTF_F_ZERO___not_used +#define bpf_iter__ksym bpf_iter__ksym___not_used #include "vmlinux.h" #undef bpf_iter_meta #undef bpf_iter__bpf_map @@ -44,6 +45,7 @@ #undef BTF_F_NONAME #undef BTF_F_PTR_RAW #undef BTF_F_ZERO +#undef bpf_iter__ksym struct bpf_iter_meta { struct seq_file *seq; @@ -151,3 +153,8 @@ enum { BTF_F_PTR_RAW = (1ULL << 2), BTF_F_ZERO = (1ULL << 3), }; + +struct bpf_iter__ksym { + struct bpf_iter_meta *meta; + struct kallsym_iter *ksym; +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c new file mode 100644 index 000000000000..285c008cbf9c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022, Oracle and/or its affiliates. */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +unsigned long last_sym_value = 0; + +static inline char tolower(char c) +{ + if (c >= 'A' && c <= 'Z') + c += ('a' - 'A'); + return c; +} + +static inline char toupper(char c) +{ + if (c >= 'a' && c <= 'z') + c -= ('a' - 'A'); + return c; +} + +/* Dump symbols with max size; the latter is calculated by caching symbol N value + * and when iterating on symbol N+1, we can print max size of symbol N via + * address of N+1 - address of N. + */ +SEC("iter/ksym") +int dump_ksym(struct bpf_iter__ksym *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct kallsym_iter *iter = ctx->ksym; + __u32 seq_num = ctx->meta->seq_num; + unsigned long value; + char type; + int ret; + + if (!iter) + return 0; + + if (seq_num == 0) { + BPF_SEQ_PRINTF(seq, "ADDR TYPE NAME MODULE_NAME KIND MAX_SIZE\n"); + return 0; + } + if (last_sym_value) + BPF_SEQ_PRINTF(seq, "0x%x\n", iter->value - last_sym_value); + else + BPF_SEQ_PRINTF(seq, "\n"); + + value = iter->show_value ? iter->value : 0; + + last_sym_value = value; + + type = iter->type; + + if (iter->module_name[0]) { + type = iter->exported ? toupper(type) : tolower(type); + BPF_SEQ_PRINTF(seq, "0x%llx %c %s [ %s ] ", + value, type, iter->name, iter->module_name); + } else { + BPF_SEQ_PRINTF(seq, "0x%llx %c %s ", value, type, iter->name); + } + if (!iter->pos_arch_end || iter->pos_arch_end > iter->pos) + BPF_SEQ_PRINTF(seq, "CORE "); + else if (!iter->pos_mod_end || iter->pos_mod_end > iter->pos) + BPF_SEQ_PRINTF(seq, "MOD "); + else if (!iter->pos_ftrace_mod_end || iter->pos_ftrace_mod_end > iter->pos) + BPF_SEQ_PRINTF(seq, "FTRACE_MOD "); + else if (!iter->pos_bpf_end || iter->pos_bpf_end > iter->pos) + BPF_SEQ_PRINTF(seq, "BPF "); + else + BPF_SEQ_PRINTF(seq, "KPROBE "); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c index 05838ed9b89c..e1e11897e99b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c +++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c @@ -64,9 +64,9 @@ int BPF_KPROBE(handle_sys_prctl) return 0; } -SEC("kprobe/" SYS_PREFIX "sys_prctl") -int BPF_KPROBE_SYSCALL(prctl_enter, int option, unsigned long arg2, - unsigned long arg3, unsigned long arg4, unsigned long arg5) +SEC("ksyscall/prctl") +int BPF_KSYSCALL(prctl_enter, int option, unsigned long arg2, + unsigned long arg3, unsigned long arg4, unsigned long arg5) { pid_t pid = bpf_get_current_pid_tgid() >> 32; diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index f1c88ad368ef..a1e45fec8938 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -1,11 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2017 Facebook -#include <linux/ptrace.h> -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include <stdbool.h> +#include <bpf/bpf_core_read.h> #include "bpf_misc.h" int kprobe_res = 0; @@ -31,8 +30,8 @@ int handle_kprobe(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYS_PREFIX "sys_nanosleep") -int BPF_KPROBE(handle_kprobe_auto) +SEC("ksyscall/nanosleep") +int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __kernel_timespec *rem) { kprobe2_res = 11; return 0; @@ -56,11 +55,11 @@ int handle_kretprobe(struct pt_regs *ctx) return 0; } -SEC("kretprobe/" SYS_PREFIX "sys_nanosleep") -int BPF_KRETPROBE(handle_kretprobe_auto) +SEC("kretsyscall/nanosleep") +int BPF_KRETPROBE(handle_kretprobe_auto, int ret) { kretprobe2_res = 22; - return 0; + return ret; } SEC("uprobe") diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c index f00a9731930e..196cd8dfe42a 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -8,6 +8,8 @@ #define EINVAL 22 #define ENOENT 2 +extern unsigned long CONFIG_HZ __kconfig; + int test_einval_bpf_tuple = 0; int test_einval_reserved = 0; int test_einval_netns_id = 0; @@ -16,6 +18,11 @@ int test_eproto_l4proto = 0; int test_enonet_netns_id = 0; int test_enoent_lookup = 0; int test_eafnosupport = 0; +int test_alloc_entry = -EINVAL; +int test_insert_entry = -EAFNOSUPPORT; +int test_succ_lookup = -ENOENT; +u32 test_delta_timeout = 0; +u32 test_status = 0; struct nf_conn; @@ -26,31 +33,44 @@ struct bpf_ct_opts___local { u8 reserved[3]; } __attribute__((preserve_access_index)); +struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32, struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_skb_ct_alloc(struct __sk_buff *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32, struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_ct_insert_entry(struct nf_conn *) __ksym; void bpf_ct_release(struct nf_conn *) __ksym; +void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym; +int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym; +int bpf_ct_set_status(struct nf_conn *, u32) __ksym; +int bpf_ct_change_status(struct nf_conn *, u32) __ksym; static __always_inline void -nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32, - struct bpf_ct_opts___local *, u32), +nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32), + struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32), void *ctx) { struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 }; struct bpf_sock_tuple bpf_tuple; struct nf_conn *ct; + int err; __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4)); - ct = func(ctx, NULL, 0, &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, NULL, 0, &opts_def, sizeof(opts_def)); if (ct) bpf_ct_release(ct); else test_einval_bpf_tuple = opts_def.error; opts_def.reserved[0] = 1; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); opts_def.reserved[0] = 0; opts_def.l4proto = IPPROTO_TCP; if (ct) @@ -59,21 +79,24 @@ nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32, test_einval_reserved = opts_def.error; opts_def.netns_id = -2; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); opts_def.netns_id = -1; if (ct) bpf_ct_release(ct); else test_einval_netns_id = opts_def.error; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def) - 1); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def) - 1); if (ct) bpf_ct_release(ct); else test_einval_len_opts = opts_def.error; opts_def.l4proto = IPPROTO_ICMP; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); opts_def.l4proto = IPPROTO_TCP; if (ct) bpf_ct_release(ct); @@ -81,37 +104,75 @@ nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32, test_eproto_l4proto = opts_def.error; opts_def.netns_id = 0xf00f; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); opts_def.netns_id = -1; if (ct) bpf_ct_release(ct); else test_enonet_netns_id = opts_def.error; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); if (ct) bpf_ct_release(ct); else test_enoent_lookup = opts_def.error; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, + sizeof(opts_def)); if (ct) bpf_ct_release(ct); else test_eafnosupport = opts_def.error; + + bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */ + bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */ + bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */ + bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */ + + ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); + if (ct) { + struct nf_conn *ct_ins; + + bpf_ct_set_timeout(ct, 10000); + bpf_ct_set_status(ct, IPS_CONFIRMED); + + ct_ins = bpf_ct_insert_entry(ct); + if (ct_ins) { + struct nf_conn *ct_lk; + + ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), + &opts_def, sizeof(opts_def)); + if (ct_lk) { + /* update ct entry timeout */ + bpf_ct_change_timeout(ct_lk, 10000); + test_delta_timeout = ct_lk->timeout - bpf_jiffies64(); + test_delta_timeout /= CONFIG_HZ; + test_status = IPS_SEEN_REPLY; + bpf_ct_change_status(ct_lk, IPS_SEEN_REPLY); + bpf_ct_release(ct_lk); + test_succ_lookup = 0; + } + bpf_ct_release(ct_ins); + test_insert_entry = 0; + } + test_alloc_entry = 0; + } } SEC("xdp") int nf_xdp_ct_test(struct xdp_md *ctx) { - nf_ct_test((void *)bpf_xdp_ct_lookup, ctx); + nf_ct_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx); return 0; } SEC("tc") int nf_skb_ct_test(struct __sk_buff *ctx) { - nf_ct_test((void *)bpf_skb_ct_lookup, ctx); + nf_ct_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c new file mode 100644 index 000000000000..bf79af15c808 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +struct nf_conn; + +struct bpf_ct_opts___local { + s32 netns_id; + s32 error; + u8 l4proto; + u8 reserved[3]; +} __attribute__((preserve_access_index)); + +struct nf_conn *bpf_skb_ct_alloc(struct __sk_buff *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_ct_insert_entry(struct nf_conn *) __ksym; +void bpf_ct_release(struct nf_conn *) __ksym; +void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym; +int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym; +int bpf_ct_set_status(struct nf_conn *, u32) __ksym; +int bpf_ct_change_status(struct nf_conn *, u32) __ksym; + +SEC("?tc") +int alloc_release(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + bpf_ct_release(ct); + return 0; +} + +SEC("?tc") +int insert_insert(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + ct = bpf_ct_insert_entry(ct); + if (!ct) + return 0; + ct = bpf_ct_insert_entry(ct); + return 0; +} + +SEC("?tc") +int lookup_insert(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_lookup(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + bpf_ct_insert_entry(ct); + return 0; +} + +SEC("?tc") +int set_timeout_after_insert(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + ct = bpf_ct_insert_entry(ct); + if (!ct) + return 0; + bpf_ct_set_timeout(ct, 0); + return 0; +} + +SEC("?tc") +int set_status_after_insert(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + ct = bpf_ct_insert_entry(ct); + if (!ct) + return 0; + bpf_ct_set_status(ct, 0); + return 0; +} + +SEC("?tc") +int change_timeout_after_alloc(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + bpf_ct_change_timeout(ct, 0); + return 0; +} + +SEC("?tc") +int change_status_after_alloc(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + bpf_ct_change_status(ct, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_extern.c b/tools/testing/selftests/bpf/progs/test_core_extern.c index 3ac3603ad53d..a3c7c1042f35 100644 --- a/tools/testing/selftests/bpf/progs/test_core_extern.c +++ b/tools/testing/selftests/bpf/progs/test_core_extern.c @@ -11,6 +11,7 @@ static int (*bpf_missing_helper)(const void *arg1, int arg2) = (void *) 999; extern int LINUX_KERNEL_VERSION __kconfig; +extern int LINUX_UNKNOWN_VIRTUAL_EXTERN __kconfig __weak; extern bool CONFIG_BPF_SYSCALL __kconfig; /* strong */ extern enum libbpf_tristate CONFIG_TRISTATE __kconfig __weak; extern bool CONFIG_BOOL __kconfig __weak; @@ -22,6 +23,7 @@ extern const char CONFIG_STR[8] __kconfig __weak; extern uint64_t CONFIG_MISSING __kconfig __weak; uint64_t kern_ver = -1; +uint64_t unkn_virt_val = -1; uint64_t bpf_syscall = -1; uint64_t tristate_val = -1; uint64_t bool_val = -1; @@ -38,6 +40,7 @@ int handle_sys_enter(struct pt_regs *ctx) int i; kern_ver = LINUX_KERNEL_VERSION; + unkn_virt_val = LINUX_UNKNOWN_VIRTUAL_EXTERN; bpf_syscall = CONFIG_BPF_SYSCALL; tristate_val = CONFIG_TRISTATE; bool_val = CONFIG_BOOL; diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c index 702578a5e496..8e1495008e4d 100644 --- a/tools/testing/selftests/bpf/progs/test_probe_user.c +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -1,35 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 - -#include <linux/ptrace.h> -#include <linux/bpf.h> - -#include <netinet/in.h> - +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> #include "bpf_misc.h" static struct sockaddr_in old; -SEC("kprobe/" SYS_PREFIX "sys_connect") -int BPF_KPROBE(handle_sys_connect) +SEC("ksyscall/connect") +int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr, int addrlen) { -#if SYSCALL_WRAPPER == 1 - struct pt_regs *real_regs; -#endif struct sockaddr_in new; - void *ptr; - -#if SYSCALL_WRAPPER == 0 - ptr = (void *)PT_REGS_PARM2(ctx); -#else - real_regs = (struct pt_regs *)PT_REGS_PARM1(ctx); - bpf_probe_read_kernel(&ptr, sizeof(ptr), &PT_REGS_PARM2(real_regs)); -#endif - bpf_probe_read_user(&old, sizeof(old), ptr); + bpf_probe_read_user(&old, sizeof(old), uservaddr); __builtin_memset(&new, 0xab, sizeof(new)); - bpf_probe_write_user(ptr, &new, sizeof(new)); + bpf_probe_write_user(uservaddr, &new, sizeof(new)); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index 1b1187d2967b..1a4e93f6d9df 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -51,6 +51,8 @@ int out_dynarr[4] SEC(".data.dyn") = { 1, 2, 3, 4 }; int read_mostly_var __read_mostly; int out_mostly_var; +char huge_arr[16 * 1024 * 1024]; + SEC("raw_tp/sys_enter") int handler(const void *ctx) { @@ -71,6 +73,8 @@ int handler(const void *ctx) out_mostly_var = read_mostly_var; + huge_arr[sizeof(huge_arr) - 1] = 123; + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 125d872d7981..ba48fcb98ab2 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -239,7 +239,7 @@ bool parse_udp(void *data, void *data_end, udp = data + off; if (udp + 1 > data_end) - return 0; + return false; if (!is_icmp) { pckt->flow.port16[0] = udp->source; pckt->flow.port16[1] = udp->dest; @@ -247,7 +247,7 @@ bool parse_udp(void *data, void *data_end, pckt->flow.port16[0] = udp->dest; pckt->flow.port16[1] = udp->source; } - return 1; + return true; } static __attribute__ ((noinline)) @@ -261,7 +261,7 @@ bool parse_tcp(void *data, void *data_end, tcp = data + off; if (tcp + 1 > data_end) - return 0; + return false; if (tcp->syn) pckt->flags |= (1 << 1); if (!is_icmp) { @@ -271,7 +271,7 @@ bool parse_tcp(void *data, void *data_end, pckt->flow.port16[0] = tcp->dest; pckt->flow.port16[1] = tcp->source; } - return 1; + return true; } static __attribute__ ((noinline)) @@ -287,7 +287,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, void *data; if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) - return 0; + return false; data = (void *)(long)xdp->data; data_end = (void *)(long)xdp->data_end; new_eth = data; @@ -295,7 +295,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, old_eth = data + sizeof(struct ipv6hdr); if (new_eth + 1 > data_end || old_eth + 1 > data_end || ip6h + 1 > data_end) - return 0; + return false; memcpy(new_eth->eth_dest, cval->mac, 6); memcpy(new_eth->eth_source, old_eth->eth_dest, 6); new_eth->eth_proto = 56710; @@ -314,7 +314,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, ip6h->saddr.in6_u.u6_addr32[2] = 3; ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix; memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16); - return 1; + return true; } static __attribute__ ((noinline)) @@ -335,7 +335,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, ip_suffix <<= 15; ip_suffix ^= pckt->flow.src; if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) - return 0; + return false; data = (void *)(long)xdp->data; data_end = (void *)(long)xdp->data_end; new_eth = data; @@ -343,7 +343,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, old_eth = data + sizeof(struct iphdr); if (new_eth + 1 > data_end || old_eth + 1 > data_end || iph + 1 > data_end) - return 0; + return false; memcpy(new_eth->eth_dest, cval->mac, 6); memcpy(new_eth->eth_source, old_eth->eth_dest, 6); new_eth->eth_proto = 8; @@ -367,8 +367,8 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, csum += *next_iph_u16++; iph->check = ~((csum & 0xffff) + (csum >> 16)); if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) - return 0; - return 1; + return false; + return true; } static __attribute__ ((noinline)) @@ -386,10 +386,10 @@ bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4) else new_eth->eth_proto = 56710; if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr))) - return 0; + return false; *data = (void *)(long)xdp->data; *data_end = (void *)(long)xdp->data_end; - return 1; + return true; } static __attribute__ ((noinline)) @@ -404,10 +404,10 @@ bool decap_v4(struct xdp_md *xdp, void **data, void **data_end) memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); new_eth->eth_proto = 8; if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) - return 0; + return false; *data = (void *)(long)xdp->data; *data_end = (void *)(long)xdp->data_end; - return 1; + return true; } static __attribute__ ((noinline)) diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh index 392d28cc4e58..49936c4c8567 100755 --- a/tools/testing/selftests/bpf/test_xdp_veth.sh +++ b/tools/testing/selftests/bpf/test_xdp_veth.sh @@ -106,9 +106,9 @@ bpftool prog loadall \ bpftool map update pinned $BPF_DIR/maps/tx_port key 0 0 0 0 value 122 0 0 0 bpftool map update pinned $BPF_DIR/maps/tx_port key 1 0 0 0 value 133 0 0 0 bpftool map update pinned $BPF_DIR/maps/tx_port key 2 0 0 0 value 111 0 0 0 -ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0 -ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1 -ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2 +ip link set dev veth1 xdp pinned $BPF_DIR/progs/xdp_redirect_map_0 +ip link set dev veth2 xdp pinned $BPF_DIR/progs/xdp_redirect_map_1 +ip link set dev veth3 xdp pinned $BPF_DIR/progs/xdp_redirect_map_2 ip -n ${NS1} link set dev veth11 xdp obj xdp_dummy.o sec xdp ip -n ${NS2} link set dev veth22 xdp obj xdp_tx.o sec xdp diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c index 2d0023659d88..a535d41dc20d 100644 --- a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c +++ b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c @@ -251,6 +251,7 @@ .expected_insns = { PSEUDO_CALL_INSN() }, .unexpected_insns = { HELPER_CALL_INSN() }, .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } }, .func_info_cnt = 2, BTF_TYPES diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 743ed34c1238..3fb4f69b1962 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -219,6 +219,59 @@ .errstr = "variable ptr_ access var_off=(0x0; 0x7) disallowed", }, { + "calls: invalid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_acquire", 3 }, + { "bpf_kfunc_call_test_ref", 8 }, + { "bpf_kfunc_call_test_ref", 10 }, + }, + .result_unpriv = REJECT, + .result = REJECT, + .errstr = "R1 must be referenced", +}, +{ + "calls: valid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_acquire", 3 }, + { "bpf_kfunc_call_test_ref", 8 }, + { "bpf_kfunc_call_test_release", 10 }, + }, + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ "calls: basic sanity", .insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), diff --git a/tools/testing/selftests/net/ipv6_flowlabel.c b/tools/testing/selftests/net/ipv6_flowlabel.c index a7c41375374f..708a9822259d 100644 --- a/tools/testing/selftests/net/ipv6_flowlabel.c +++ b/tools/testing/selftests/net/ipv6_flowlabel.c @@ -9,6 +9,7 @@ #include <errno.h> #include <fcntl.h> #include <limits.h> +#include <linux/icmpv6.h> #include <linux/in6.h> #include <stdbool.h> #include <stdio.h> @@ -29,26 +30,48 @@ #ifndef IPV6_FLOWLABEL_MGR #define IPV6_FLOWLABEL_MGR 32 #endif +#ifndef IPV6_FLOWINFO_SEND +#define IPV6_FLOWINFO_SEND 33 +#endif #define FLOWLABEL_WILDCARD ((uint32_t) -1) static const char cfg_data[] = "a"; static uint32_t cfg_label = 1; +static bool use_ping; +static bool use_flowinfo_send; + +static struct icmp6hdr icmp6 = { + .icmp6_type = ICMPV6_ECHO_REQUEST +}; + +static struct sockaddr_in6 addr = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_LOOPBACK_INIT, +}; static void do_send(int fd, bool with_flowlabel, uint32_t flowlabel) { char control[CMSG_SPACE(sizeof(flowlabel))] = {0}; struct msghdr msg = {0}; - struct iovec iov = {0}; + struct iovec iov = { + .iov_base = (char *)cfg_data, + .iov_len = sizeof(cfg_data) + }; int ret; - iov.iov_base = (char *)cfg_data; - iov.iov_len = sizeof(cfg_data); + if (use_ping) { + iov.iov_base = &icmp6; + iov.iov_len = sizeof(icmp6); + } msg.msg_iov = &iov; msg.msg_iovlen = 1; - if (with_flowlabel) { + if (use_flowinfo_send) { + msg.msg_name = &addr; + msg.msg_namelen = sizeof(addr); + } else if (with_flowlabel) { struct cmsghdr *cm; cm = (void *)control; @@ -94,6 +117,8 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect) ret = recvmsg(fd, &msg, 0); if (ret == -1) error(1, errno, "recv"); + if (use_ping) + goto parse_cmsg; if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) error(1, 0, "recv: truncated"); if (ret != sizeof(cfg_data)) @@ -101,6 +126,7 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect) if (memcmp(data, cfg_data, sizeof(data))) error(1, 0, "recv: data mismatch"); +parse_cmsg: cm = CMSG_FIRSTHDR(&msg); if (with_flowlabel) { if (!cm) @@ -114,9 +140,11 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect) flowlabel = ntohl(*(uint32_t *)CMSG_DATA(cm)); fprintf(stderr, "recv with label %u\n", flowlabel); - if (expect != FLOWLABEL_WILDCARD && expect != flowlabel) + if (expect != FLOWLABEL_WILDCARD && expect != flowlabel) { fprintf(stderr, "recv: incorrect flowlabel %u != %u\n", flowlabel, expect); + error(1, 0, "recv: flowlabel is wrong"); + } } else { fprintf(stderr, "recv without label\n"); @@ -165,11 +193,17 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "l:")) != -1) { + while ((c = getopt(argc, argv, "l:ps")) != -1) { switch (c) { case 'l': cfg_label = strtoul(optarg, NULL, 0); break; + case 'p': + use_ping = true; + break; + case 's': + use_flowinfo_send = true; + break; default: error(1, 0, "%s: parse error", argv[0]); } @@ -178,27 +212,30 @@ static void parse_opts(int argc, char **argv) int main(int argc, char **argv) { - struct sockaddr_in6 addr = { - .sin6_family = AF_INET6, - .sin6_port = htons(8000), - .sin6_addr = IN6ADDR_LOOPBACK_INIT, - }; const int one = 1; int fdt, fdr; + int prot = 0; + + addr.sin6_port = htons(8000); parse_opts(argc, argv); - fdt = socket(PF_INET6, SOCK_DGRAM, 0); + if (use_ping) { + fprintf(stderr, "attempting to use ping sockets\n"); + prot = IPPROTO_ICMPV6; + } + + fdt = socket(PF_INET6, SOCK_DGRAM, prot); if (fdt == -1) error(1, errno, "socket t"); - fdr = socket(PF_INET6, SOCK_DGRAM, 0); + fdr = use_ping ? fdt : socket(PF_INET6, SOCK_DGRAM, 0); if (fdr == -1) error(1, errno, "socket r"); if (connect(fdt, (void *)&addr, sizeof(addr))) error(1, errno, "connect"); - if (bind(fdr, (void *)&addr, sizeof(addr))) + if (!use_ping && bind(fdr, (void *)&addr, sizeof(addr))) error(1, errno, "bind"); flowlabel_get(fdt, cfg_label, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE); @@ -216,13 +253,21 @@ int main(int argc, char **argv) do_recv(fdr, false, 0); } + if (use_flowinfo_send) { + fprintf(stderr, "using IPV6_FLOWINFO_SEND to send label\n"); + addr.sin6_flowinfo = htonl(cfg_label); + if (setsockopt(fdt, SOL_IPV6, IPV6_FLOWINFO_SEND, &one, + sizeof(one)) == -1) + error(1, errno, "setsockopt flowinfo_send"); + } + fprintf(stderr, "send label\n"); do_send(fdt, true, cfg_label); do_recv(fdr, true, cfg_label); if (close(fdr)) error(1, errno, "close r"); - if (close(fdt)) + if (!use_ping && close(fdt)) error(1, errno, "close t"); return 0; diff --git a/tools/testing/selftests/net/ipv6_flowlabel.sh b/tools/testing/selftests/net/ipv6_flowlabel.sh index d3bc6442704e..cee95e252bee 100755 --- a/tools/testing/selftests/net/ipv6_flowlabel.sh +++ b/tools/testing/selftests/net/ipv6_flowlabel.sh @@ -18,4 +18,20 @@ echo "TEST datapath (with auto-flowlabels)" ./in_netns.sh \ sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=1 && ./ipv6_flowlabel -l 1' +echo "TEST datapath (with ping-sockets)" +./in_netns.sh \ + sh -c 'sysctl -q -w net.ipv6.flowlabel_reflect=4 && \ + sysctl -q -w net.ipv4.ping_group_range="0 2147483647" && \ + ./ipv6_flowlabel -l 1 -p' + +echo "TEST datapath (with flowinfo-send)" +./in_netns.sh \ + sh -c './ipv6_flowlabel -l 1 -s' + +echo "TEST datapath (with ping-sockets flowinfo-send)" +./in_netns.sh \ + sh -c 'sysctl -q -w net.ipv6.flowlabel_reflect=4 && \ + sysctl -q -w net.ipv4.ping_group_range="0 2147483647" && \ + ./ipv6_flowlabel -l 1 -p -s' + echo OK. All tests passed diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index dc26aae0feb0..4ecbac197c46 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1597,6 +1597,38 @@ TEST_F(tls_err, bad_cmsg) EXPECT_EQ(errno, EBADMSG); } +TEST_F(tls_err, timeo) +{ + struct timeval tv = { .tv_usec = 10000, }; + char buf[128]; + int ret; + + if (self->notls) + SKIP(return, "no TLS support"); + + ret = setsockopt(self->cfd2, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + ASSERT_EQ(ret, 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + usleep(1000); /* Give child a head start */ + + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EAGAIN); + + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EAGAIN); + + wait(&ret); + } else { + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EAGAIN); + exit(0); + } +} + TEST(non_established) { struct tls12_crypto_info_aes_gcm_256 tls12; struct sockaddr_in addr; |