From 86a559787e6f5cf662c081363f64a20cad654195 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 27 Aug 2018 09:32:17 +0800 Subject: virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT Negotiation of the VIRTIO_BALLOON_F_FREE_PAGE_HINT feature indicates the support of reporting hints of guest free pages to host via virtio-balloon. Currenlty, only free page blocks of MAX_ORDER - 1 are reported. They are obtained one by one from the mm free list via the regular allocation function. Host requests the guest to report free page hints by sending a new cmd id to the guest via the free_page_report_cmd_id configuration register. When the guest starts to report, it first sends a start cmd to host via the free page vq, which acks to host the cmd id received. When the guest finishes reporting free pages, a stop cmd is sent to host via the vq. Host may also send a stop cmd id to the guest to stop the reporting. VIRTIO_BALLOON_CMD_ID_STOP: Host sends this cmd to stop the guest reporting. VIRTIO_BALLOON_CMD_ID_DONE: Host sends this cmd to tell the guest that the reported pages are ready to be freed. Why does the guest free the reported pages when host tells it is ready to free? This is because freeing pages appears to be expensive for live migration. free_pages() dirties memory very quickly and makes the live migraion not converge in some cases. So it is good to delay the free_page operation when the migration is done, and host sends a command to guest about that. Why do we need the new VIRTIO_BALLOON_CMD_ID_DONE, instead of reusing VIRTIO_BALLOON_CMD_ID_STOP? This is because live migration is usually done in several rounds. At the end of each round, host needs to send a VIRTIO_BALLOON_CMD_ID_STOP cmd to the guest to stop (or say pause) the reporting. The guest resumes the reporting when it receives a new command id at the beginning of the next round. So we need a new cmd id to distinguish between "stop reporting" and "ready to free the reported pages". TODO: - Add a batch page allocation API to amortize the allocation overhead. Signed-off-by: Wei Wang Signed-off-by: Liang Li Cc: Michael S. Tsirkin Cc: Michal Hocko Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_balloon.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 13b8cb563892..47c9eb401c08 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -34,15 +34,20 @@ #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ +#define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 +#define VIRTIO_BALLOON_CMD_ID_STOP 0 +#define VIRTIO_BALLOON_CMD_ID_DONE 1 struct virtio_balloon_config { /* Number of pages host wants Guest to give up. */ __u32 num_pages; /* Number of pages we've actually got in balloon. */ __u32 actual; + /* Free page report command id, readonly by guest */ + __u32 free_page_report_cmd_id; }; #define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */ -- cgit v1.2.3 From 2e991629bcf55a43681aec1ee096eeb03cf81709 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 27 Aug 2018 09:32:19 +0800 Subject: virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the guest is using page poisoning. Guest writes to the poison_val config field to tell host about the page poisoning value that is in use. Suggested-by: Michael S. Tsirkin Signed-off-by: Wei Wang Cc: Michael S. Tsirkin Cc: Michal Hocko Cc: Andrew Morton Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 10 ++++++++++ include/uapi/linux/virtio_balloon.h | 3 +++ 2 files changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index a18567889289..728ecd1eea30 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -825,6 +825,7 @@ static int virtio_balloon_register_shrinker(struct virtio_balloon *vb) static int virtballoon_probe(struct virtio_device *vdev) { struct virtio_balloon *vb; + __u32 poison_val; int err; if (!vdev->config->get) { @@ -892,6 +893,11 @@ static int virtballoon_probe(struct virtio_device *vdev) vb->num_free_page_blocks = 0; spin_lock_init(&vb->free_page_list_lock); INIT_LIST_HEAD(&vb->free_page_list); + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) { + memset(&poison_val, PAGE_POISON, sizeof(poison_val)); + virtio_cwrite(vb->vdev, struct virtio_balloon_config, + poison_val, &poison_val); + } } /* * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a @@ -992,6 +998,9 @@ static int virtballoon_restore(struct virtio_device *vdev) static int virtballoon_validate(struct virtio_device *vdev) { + if (!page_poisoning_enabled()) + __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON); + __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM); return 0; } @@ -1001,6 +1010,7 @@ static unsigned int features[] = { VIRTIO_BALLOON_F_STATS_VQ, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, VIRTIO_BALLOON_F_FREE_PAGE_HINT, + VIRTIO_BALLOON_F_PAGE_POISON, }; static struct virtio_driver virtio_balloon_driver = { diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 47c9eb401c08..a1966cd7b677 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -35,6 +35,7 @@ #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ #define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */ +#define VIRTIO_BALLOON_F_PAGE_POISON 4 /* Guest is using page poisoning */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 @@ -48,6 +49,8 @@ struct virtio_balloon_config { __u32 actual; /* Free page report command id, readonly by guest */ __u32 free_page_report_cmd_id; + /* Stores PAGE_POISON if page poisoning is in use */ + __u32 poison_val; }; #define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */ -- cgit v1.2.3 From 5a8de47b3c250521dd632cdedaac6db88367defa Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 24 Oct 2018 13:54:03 +0200 Subject: netfilter: bridge: define INT_MIN & INT_MAX in userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With 4.19, programs like ebtables fail to build when they include "linux/netfilter_bridge.h". It is caused by commit 94276fa8a2a4 which added a use of INT_MIN and INT_MAX to the header: : In file included from /usr/include/linux/netfilter_bridge/ebtables.h:18, : from include/ebtables_u.h:28, : from communication.c:23: : /usr/include/linux/netfilter_bridge.h:30:20: error: 'INT_MIN' undeclared here (not in a function) : NF_BR_PRI_FIRST = INT_MIN, : ^~~~~~~ Define these constants by including "limits.h" when !__KERNEL__ (the same way as for other netfilter_* headers). Fixes: 94276fa8a2a4 ("netfilter: bridge: Expose nf_tables bridge hook priorities through uapi") Signed-off-by: Jiri Slaby Acked-by: Máté Eckl Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter_bridge.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter_bridge.h b/include/uapi/linux/netfilter_bridge.h index 156ccd089df1..1610fdbab98d 100644 --- a/include/uapi/linux/netfilter_bridge.h +++ b/include/uapi/linux/netfilter_bridge.h @@ -11,6 +11,10 @@ #include #include +#ifndef __KERNEL__ +#include /* for INT_MIN, INT_MAX */ +#endif + /* Bridge Hooks */ /* After promisc drops, checksum checks. */ #define NF_BR_PRE_ROUTING 0 -- cgit v1.2.3 From 70025f84e5b79627a6739533c4fe7cef5b605886 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2018 17:46:51 +0100 Subject: KEYS: Provide key type operations for asymmetric key ops [ver #2] Provide five new operations in the key_type struct that can be used to provide access to asymmetric key operations. These will be implemented for the asymmetric key type in a later patch and may refer to a key retained in RAM by the kernel or a key retained in crypto hardware. int (*asym_query)(const struct kernel_pkey_params *params, struct kernel_pkey_query *info); int (*asym_eds_op)(struct kernel_pkey_params *params, const void *in, void *out); int (*asym_verify_signature)(struct kernel_pkey_params *params, const void *in, const void *in2); Since encrypt, decrypt and sign are identical in their interfaces, they're rolled together in the asym_eds_op() operation and there's an operation ID in the params argument to distinguish them. Verify is different in that we supply the data and the signature instead and get an error value (or 0) as the only result on the expectation that this may well be how a hardware crypto device may work. Signed-off-by: David Howells Tested-by: Marcel Holtmann Reviewed-by: Marcel Holtmann Reviewed-by: Denis Kenzior Tested-by: Denis Kenzior Signed-off-by: James Morris --- Documentation/security/keys/core.rst | 106 +++++++++++++++++++++++++++++++++++ include/linux/key-type.h | 11 ++++ include/linux/keyctl.h | 46 +++++++++++++++ include/uapi/linux/keyctl.h | 5 ++ 4 files changed, 168 insertions(+) create mode 100644 include/linux/keyctl.h (limited to 'include/uapi/linux') diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst index 9ce7256c6edb..c144978479d5 100644 --- a/Documentation/security/keys/core.rst +++ b/Documentation/security/keys/core.rst @@ -1483,6 +1483,112 @@ The structure has a number of fields, some of which are mandatory: attempted key link operation. If there is no match, -EINVAL is returned. + * ``int (*asym_eds_op)(struct kernel_pkey_params *params, + const void *in, void *out);`` + ``int (*asym_verify_signature)(struct kernel_pkey_params *params, + const void *in, const void *in2);`` + + These methods are optional. If provided the first allows a key to be + used to encrypt, decrypt or sign a blob of data, and the second allows a + key to verify a signature. + + In all cases, the following information is provided in the params block:: + + struct kernel_pkey_params { + struct key *key; + const char *encoding; + const char *hash_algo; + char *info; + __u32 in_len; + union { + __u32 out_len; + __u32 in2_len; + }; + enum kernel_pkey_operation op : 8; + }; + + This includes the key to be used; a string indicating the encoding to use + (for instance, "pkcs1" may be used with an RSA key to indicate + RSASSA-PKCS1-v1.5 or RSAES-PKCS1-v1.5 encoding or "raw" if no encoding); + the name of the hash algorithm used to generate the data for a signature + (if appropriate); the sizes of the input and output (or second input) + buffers; and the ID of the operation to be performed. + + For a given operation ID, the input and output buffers are used as + follows:: + + Operation ID in,in_len out,out_len in2,in2_len + ======================= =============== =============== =============== + kernel_pkey_encrypt Raw data Encrypted data - + kernel_pkey_decrypt Encrypted data Raw data - + kernel_pkey_sign Raw data Signature - + kernel_pkey_verify Raw data - Signature + + asym_eds_op() deals with encryption, decryption and signature creation as + specified by params->op. Note that params->op is also set for + asym_verify_signature(). + + Encrypting and signature creation both take raw data in the input buffer + and return the encrypted result in the output buffer. Padding may have + been added if an encoding was set. In the case of signature creation, + depending on the encoding, the padding created may need to indicate the + digest algorithm - the name of which should be supplied in hash_algo. + + Decryption takes encrypted data in the input buffer and returns the raw + data in the output buffer. Padding will get checked and stripped off if + an encoding was set. + + Verification takes raw data in the input buffer and the signature in the + second input buffer and checks that the one matches the other. Padding + will be validated. Depending on the encoding, the digest algorithm used + to generate the raw data may need to be indicated in hash_algo. + + If successful, asym_eds_op() should return the number of bytes written + into the output buffer. asym_verify_signature() should return 0. + + A variety of errors may be returned, including EOPNOTSUPP if the operation + is not supported; EKEYREJECTED if verification fails; ENOPKG if the + required crypto isn't available. + + + * ``int (*asym_query)(const struct kernel_pkey_params *params, + struct kernel_pkey_query *info);`` + + This method is optional. If provided it allows information about the + public or asymmetric key held in the key to be determined. + + The parameter block is as for asym_eds_op() and co. but in_len and out_len + are unused. The encoding and hash_algo fields should be used to reduce + the returned buffer/data sizes as appropriate. + + If successful, the following information is filled in:: + + struct kernel_pkey_query { + __u32 supported_ops; + __u32 key_size; + __u16 max_data_size; + __u16 max_sig_size; + __u16 max_enc_size; + __u16 max_dec_size; + }; + + The supported_ops field will contain a bitmask indicating what operations + are supported by the key, including encryption of a blob, decryption of a + blob, signing a blob and verifying the signature on a blob. The following + constants are defined for this:: + + KEYCTL_SUPPORTS_{ENCRYPT,DECRYPT,SIGN,VERIFY} + + The key_size field is the size of the key in bits. max_data_size and + max_sig_size are the maximum raw data and signature sizes for creation and + verification of a signature; max_enc_size and max_dec_size are the maximum + raw data and signature sizes for encryption and decryption. The + max_*_size fields are measured in bytes. + + If successful, 0 will be returned. If the key doesn't support this, + EOPNOTSUPP will be returned. + + Request-Key Callback Service ============================ diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 05d8fb5a06c4..bc9af551fc83 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -17,6 +17,9 @@ #ifdef CONFIG_KEYS +struct kernel_pkey_query; +struct kernel_pkey_params; + /* * key under-construction record * - passed to the request_key actor if supplied @@ -155,6 +158,14 @@ struct key_type { */ struct key_restriction *(*lookup_restriction)(const char *params); + /* Asymmetric key accessor functions. */ + int (*asym_query)(const struct kernel_pkey_params *params, + struct kernel_pkey_query *info); + int (*asym_eds_op)(struct kernel_pkey_params *params, + const void *in, void *out); + int (*asym_verify_signature)(struct kernel_pkey_params *params, + const void *in, const void *in2); + /* internal fields */ struct list_head link; /* link in types list */ struct lock_class_key lock_class; /* key->sem lock class */ diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h new file mode 100644 index 000000000000..c7c48c79ce0e --- /dev/null +++ b/include/linux/keyctl.h @@ -0,0 +1,46 @@ +/* keyctl kernel bits + * + * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef __LINUX_KEYCTL_H +#define __LINUX_KEYCTL_H + +#include + +struct kernel_pkey_query { + __u32 supported_ops; /* Which ops are supported */ + __u32 key_size; /* Size of the key in bits */ + __u16 max_data_size; /* Maximum size of raw data to sign in bytes */ + __u16 max_sig_size; /* Maximum size of signature in bytes */ + __u16 max_enc_size; /* Maximum size of encrypted blob in bytes */ + __u16 max_dec_size; /* Maximum size of decrypted blob in bytes */ +}; + +enum kernel_pkey_operation { + kernel_pkey_encrypt, + kernel_pkey_decrypt, + kernel_pkey_sign, + kernel_pkey_verify, +}; + +struct kernel_pkey_params { + struct key *key; + const char *encoding; /* Encoding (eg. "oaep" or "raw" for none) */ + const char *hash_algo; /* Digest algorithm used (eg. "sha1") or NULL if N/A */ + char *info; /* Modified info string to be released later */ + __u32 in_len; /* Input data size */ + union { + __u32 out_len; /* Output buffer size (enc/dec/sign) */ + __u32 in2_len; /* 2nd input data size (verify) */ + }; + enum kernel_pkey_operation op : 8; +}; + +#endif /* __LINUX_KEYCTL_H */ diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index 0f3cb13db8e9..1d1e9f2877af 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -82,4 +82,9 @@ struct keyctl_kdf_params { __u32 __spare[8]; }; +#define KEYCTL_SUPPORTS_ENCRYPT 0x01 +#define KEYCTL_SUPPORTS_DECRYPT 0x02 +#define KEYCTL_SUPPORTS_SIGN 0x04 +#define KEYCTL_SUPPORTS_VERIFY 0x08 + #endif /* _LINUX_KEYCTL_H */ -- cgit v1.2.3 From 00d60fd3b93219ea854220f0fd264b86398cbc53 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2018 17:46:59 +0100 Subject: KEYS: Provide keyctls to drive the new key type ops for asymmetric keys [ver #2] Provide five keyctl functions that permit userspace to make use of the new key type ops for accessing and driving asymmetric keys. (*) Query an asymmetric key. long keyctl(KEYCTL_PKEY_QUERY, key_serial_t key, unsigned long reserved, struct keyctl_pkey_query *info); Get information about an asymmetric key. The information is returned in the keyctl_pkey_query struct: __u32 supported_ops; A bit mask of flags indicating which ops are supported. This is constructed from a bitwise-OR of: KEYCTL_SUPPORTS_{ENCRYPT,DECRYPT,SIGN,VERIFY} __u32 key_size; The size in bits of the key. __u16 max_data_size; __u16 max_sig_size; __u16 max_enc_size; __u16 max_dec_size; The maximum sizes in bytes of a blob of data to be signed, a signature blob, a blob to be encrypted and a blob to be decrypted. reserved must be set to 0. This is intended for future use to hand over one or more passphrases needed unlock a key. If successful, 0 is returned. If the key is not an asymmetric key, EOPNOTSUPP is returned. (*) Encrypt, decrypt, sign or verify a blob using an asymmetric key. long keyctl(KEYCTL_PKEY_ENCRYPT, const struct keyctl_pkey_params *params, const char *info, const void *in, void *out); long keyctl(KEYCTL_PKEY_DECRYPT, const struct keyctl_pkey_params *params, const char *info, const void *in, void *out); long keyctl(KEYCTL_PKEY_SIGN, const struct keyctl_pkey_params *params, const char *info, const void *in, void *out); long keyctl(KEYCTL_PKEY_VERIFY, const struct keyctl_pkey_params *params, const char *info, const void *in, const void *in2); Use an asymmetric key to perform a public-key cryptographic operation a blob of data. The parameter block pointed to by params contains a number of integer values: __s32 key_id; __u32 in_len; __u32 out_len; __u32 in2_len; For a given operation, the in and out buffers are used as follows: Operation ID in,in_len out,out_len in2,in2_len ======================= =============== =============== =========== KEYCTL_PKEY_ENCRYPT Raw data Encrypted data - KEYCTL_PKEY_DECRYPT Encrypted data Raw data - KEYCTL_PKEY_SIGN Raw data Signature - KEYCTL_PKEY_VERIFY Raw data - Signature info is a string of key=value pairs that supply supplementary information. The __spare space in the parameter block must be set to 0. This is intended, amongst other things, to allow the passing of passphrases required to unlock a key. If successful, encrypt, decrypt and sign all return the amount of data written into the output buffer. Verification returns 0 on success. Signed-off-by: David Howells Tested-by: Marcel Holtmann Reviewed-by: Marcel Holtmann Reviewed-by: Denis Kenzior Tested-by: Denis Kenzior Signed-off-by: James Morris --- Documentation/security/keys/core.rst | 111 ++++++++++++ include/uapi/linux/keyctl.h | 25 +++ security/keys/Makefile | 1 + security/keys/compat.c | 18 ++ security/keys/internal.h | 39 +++++ security/keys/keyctl.c | 24 +++ security/keys/keyctl_pkey.c | 323 +++++++++++++++++++++++++++++++++++ 7 files changed, 541 insertions(+) create mode 100644 security/keys/keyctl_pkey.c (limited to 'include/uapi/linux') diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst index c144978479d5..9521c4207f01 100644 --- a/Documentation/security/keys/core.rst +++ b/Documentation/security/keys/core.rst @@ -859,6 +859,7 @@ The keyctl syscall functions are: and either the buffer length or the OtherInfo length exceeds the allowed length. + * Restrict keyring linkage:: long keyctl(KEYCTL_RESTRICT_KEYRING, key_serial_t keyring, @@ -890,6 +891,116 @@ The keyctl syscall functions are: applicable to the asymmetric key type. + * Query an asymmetric key:: + + long keyctl(KEYCTL_PKEY_QUERY, + key_serial_t key_id, unsigned long reserved, + struct keyctl_pkey_query *info); + + Get information about an asymmetric key. The information is returned in + the keyctl_pkey_query struct:: + + __u32 supported_ops; + __u32 key_size; + __u16 max_data_size; + __u16 max_sig_size; + __u16 max_enc_size; + __u16 max_dec_size; + __u32 __spare[10]; + + ``supported_ops`` contains a bit mask of flags indicating which ops are + supported. This is constructed from a bitwise-OR of:: + + KEYCTL_SUPPORTS_{ENCRYPT,DECRYPT,SIGN,VERIFY} + + ``key_size`` indicated the size of the key in bits. + + ``max_*_size`` indicate the maximum sizes in bytes of a blob of data to be + signed, a signature blob, a blob to be encrypted and a blob to be + decrypted. + + ``__spare[]`` must be set to 0. This is intended for future use to hand + over one or more passphrases needed unlock a key. + + If successful, 0 is returned. If the key is not an asymmetric key, + EOPNOTSUPP is returned. + + + * Encrypt, decrypt, sign or verify a blob using an asymmetric key:: + + long keyctl(KEYCTL_PKEY_ENCRYPT, + const struct keyctl_pkey_params *params, + const char *info, + const void *in, + void *out); + + long keyctl(KEYCTL_PKEY_DECRYPT, + const struct keyctl_pkey_params *params, + const char *info, + const void *in, + void *out); + + long keyctl(KEYCTL_PKEY_SIGN, + const struct keyctl_pkey_params *params, + const char *info, + const void *in, + void *out); + + long keyctl(KEYCTL_PKEY_VERIFY, + const struct keyctl_pkey_params *params, + const char *info, + const void *in, + const void *in2); + + Use an asymmetric key to perform a public-key cryptographic operation a + blob of data. For encryption and verification, the asymmetric key may + only need the public parts to be available, but for decryption and signing + the private parts are required also. + + The parameter block pointed to by params contains a number of integer + values:: + + __s32 key_id; + __u32 in_len; + __u32 out_len; + __u32 in2_len; + + ``key_id`` is the ID of the asymmetric key to be used. ``in_len`` and + ``in2_len`` indicate the amount of data in the in and in2 buffers and + ``out_len`` indicates the size of the out buffer as appropriate for the + above operations. + + For a given operation, the in and out buffers are used as follows:: + + Operation ID in,in_len out,out_len in2,in2_len + ======================= =============== =============== =============== + KEYCTL_PKEY_ENCRYPT Raw data Encrypted data - + KEYCTL_PKEY_DECRYPT Encrypted data Raw data - + KEYCTL_PKEY_SIGN Raw data Signature - + KEYCTL_PKEY_VERIFY Raw data - Signature + + ``info`` is a string of key=value pairs that supply supplementary + information. These include: + + ``enc=`` The encoding of the encrypted/signature blob. This + can be "pkcs1" for RSASSA-PKCS1-v1.5 or + RSAES-PKCS1-v1.5; "pss" for "RSASSA-PSS"; "oaep" for + "RSAES-OAEP". If omitted or is "raw", the raw output + of the encryption function is specified. + + ``hash=`` If the data buffer contains the output of a hash + function and the encoding includes some indication of + which hash function was used, the hash function can be + specified with this, eg. "hash=sha256". + + The ``__spare[]`` space in the parameter block must be set to 0. This is + intended, amongst other things, to allow the passing of passphrases + required to unlock a key. + + If successful, encrypt, decrypt and sign all return the amount of data + written into the output buffer. Verification returns 0 on success. + + Kernel Services =============== diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index 1d1e9f2877af..f45ee0f69c0c 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -61,6 +61,11 @@ #define KEYCTL_INVALIDATE 21 /* invalidate a key */ #define KEYCTL_GET_PERSISTENT 22 /* get a user's persistent keyring */ #define KEYCTL_DH_COMPUTE 23 /* Compute Diffie-Hellman values */ +#define KEYCTL_PKEY_QUERY 24 /* Query public key parameters */ +#define KEYCTL_PKEY_ENCRYPT 25 /* Encrypt a blob using a public key */ +#define KEYCTL_PKEY_DECRYPT 26 /* Decrypt a blob using a public key */ +#define KEYCTL_PKEY_SIGN 27 /* Create a public key signature */ +#define KEYCTL_PKEY_VERIFY 28 /* Verify a public key signature */ #define KEYCTL_RESTRICT_KEYRING 29 /* Restrict keys allowed to link to a keyring */ /* keyctl structures */ @@ -87,4 +92,24 @@ struct keyctl_kdf_params { #define KEYCTL_SUPPORTS_SIGN 0x04 #define KEYCTL_SUPPORTS_VERIFY 0x08 +struct keyctl_pkey_query { + __u32 supported_ops; /* Which ops are supported */ + __u32 key_size; /* Size of the key in bits */ + __u16 max_data_size; /* Maximum size of raw data to sign in bytes */ + __u16 max_sig_size; /* Maximum size of signature in bytes */ + __u16 max_enc_size; /* Maximum size of encrypted blob in bytes */ + __u16 max_dec_size; /* Maximum size of decrypted blob in bytes */ + __u32 __spare[10]; +}; + +struct keyctl_pkey_params { + __s32 key_id; /* Serial no. of public key to use */ + __u32 in_len; /* Input data size */ + union { + __u32 out_len; /* Output buffer size (encrypt/decrypt/sign) */ + __u32 in2_len; /* 2nd input data size (verify) */ + }; + __u32 __spare[7]; +}; + #endif /* _LINUX_KEYCTL_H */ diff --git a/security/keys/Makefile b/security/keys/Makefile index ef1581b337a3..9cef54064f60 100644 --- a/security/keys/Makefile +++ b/security/keys/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_SYSCTL) += sysctl.o obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o obj-$(CONFIG_KEY_DH_OPERATIONS) += dh.o +obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += keyctl_pkey.o # # Key types diff --git a/security/keys/compat.c b/security/keys/compat.c index e87c89c0177c..9482df601dc3 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -141,6 +141,24 @@ COMPAT_SYSCALL_DEFINE5(keyctl, u32, option, return keyctl_restrict_keyring(arg2, compat_ptr(arg3), compat_ptr(arg4)); + case KEYCTL_PKEY_QUERY: + if (arg3 != 0) + return -EINVAL; + return keyctl_pkey_query(arg2, + compat_ptr(arg4), + compat_ptr(arg5)); + + case KEYCTL_PKEY_ENCRYPT: + case KEYCTL_PKEY_DECRYPT: + case KEYCTL_PKEY_SIGN: + return keyctl_pkey_e_d_s(option, + compat_ptr(arg2), compat_ptr(arg3), + compat_ptr(arg4), compat_ptr(arg5)); + + case KEYCTL_PKEY_VERIFY: + return keyctl_pkey_verify(compat_ptr(arg2), compat_ptr(arg3), + compat_ptr(arg4), compat_ptr(arg5)); + default: return -EOPNOTSUPP; } diff --git a/security/keys/internal.h b/security/keys/internal.h index 9f8208dc0e55..74cb0ff42fed 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -298,6 +298,45 @@ static inline long compat_keyctl_dh_compute( #endif #endif +#ifdef CONFIG_ASYMMETRIC_KEY_TYPE +extern long keyctl_pkey_query(key_serial_t, + const char __user *, + struct keyctl_pkey_query __user *); + +extern long keyctl_pkey_verify(const struct keyctl_pkey_params __user *, + const char __user *, + const void __user *, const void __user *); + +extern long keyctl_pkey_e_d_s(int, + const struct keyctl_pkey_params __user *, + const char __user *, + const void __user *, void __user *); +#else +static inline long keyctl_pkey_query(key_serial_t id, + const char __user *_info, + struct keyctl_pkey_query __user *_res) +{ + return -EOPNOTSUPP; +} + +static inline long keyctl_pkey_verify(const struct keyctl_pkey_params __user *params, + const char __user *_info, + const void __user *_in, + const void __user *_in2) +{ + return -EOPNOTSUPP; +} + +static inline long keyctl_pkey_e_d_s(int op, + const struct keyctl_pkey_params __user *params, + const char __user *_info, + const void __user *_in, + void __user *_out) +{ + return -EOPNOTSUPP; +} +#endif + /* * Debugging key validation */ diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 1ffe60bb2845..18619690ce77 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1747,6 +1747,30 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3, (const char __user *) arg3, (const char __user *) arg4); + case KEYCTL_PKEY_QUERY: + if (arg3 != 0) + return -EINVAL; + return keyctl_pkey_query((key_serial_t)arg2, + (const char __user *)arg4, + (struct keyctl_pkey_query *)arg5); + + case KEYCTL_PKEY_ENCRYPT: + case KEYCTL_PKEY_DECRYPT: + case KEYCTL_PKEY_SIGN: + return keyctl_pkey_e_d_s( + option, + (const struct keyctl_pkey_params __user *)arg2, + (const char __user *)arg3, + (const void __user *)arg4, + (void __user *)arg5); + + case KEYCTL_PKEY_VERIFY: + return keyctl_pkey_verify( + (const struct keyctl_pkey_params __user *)arg2, + (const char __user *)arg3, + (const void __user *)arg4, + (const void __user *)arg5); + default: return -EOPNOTSUPP; } diff --git a/security/keys/keyctl_pkey.c b/security/keys/keyctl_pkey.c new file mode 100644 index 000000000000..783978842f13 --- /dev/null +++ b/security/keys/keyctl_pkey.c @@ -0,0 +1,323 @@ +/* Public-key operation keyctls + * + * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +static void keyctl_pkey_params_free(struct kernel_pkey_params *params) +{ + kfree(params->info); + key_put(params->key); +} + +enum { + Opt_err = -1, + Opt_enc, /* "enc=" eg. "enc=oaep" */ + Opt_hash, /* "hash=" eg. "hash=sha1" */ +}; + +static const match_table_t param_keys = { + { Opt_enc, "enc=%s" }, + { Opt_hash, "hash=%s" }, + { Opt_err, NULL } +}; + +/* + * Parse the information string which consists of key=val pairs. + */ +static int keyctl_pkey_params_parse(struct kernel_pkey_params *params) +{ + unsigned long token_mask = 0; + substring_t args[MAX_OPT_ARGS]; + char *c = params->info, *p, *q; + int token; + + while ((p = strsep(&c, " \t"))) { + if (*p == '\0' || *p == ' ' || *p == '\t') + continue; + token = match_token(p, param_keys, args); + if (__test_and_set_bit(token, &token_mask)) + return -EINVAL; + q = args[0].from; + if (!q[0]) + return -EINVAL; + + switch (token) { + case Opt_enc: + params->encoding = q; + break; + + case Opt_hash: + params->hash_algo = q; + break; + + default: + return -EINVAL; + } + } + + return 0; +} + +/* + * Interpret parameters. Callers must always call the free function + * on params, even if an error is returned. + */ +static int keyctl_pkey_params_get(key_serial_t id, + const char __user *_info, + struct kernel_pkey_params *params) +{ + key_ref_t key_ref; + void *p; + int ret; + + memset(params, 0, sizeof(*params)); + params->encoding = "raw"; + + p = strndup_user(_info, PAGE_SIZE); + if (IS_ERR(p)) + return PTR_ERR(p); + params->info = p; + + ret = keyctl_pkey_params_parse(params); + if (ret < 0) + return ret; + + key_ref = lookup_user_key(id, 0, KEY_NEED_SEARCH); + if (IS_ERR(key_ref)) + return PTR_ERR(key_ref); + params->key = key_ref_to_ptr(key_ref); + + if (!params->key->type->asym_query) + return -EOPNOTSUPP; + + return 0; +} + +/* + * Get parameters from userspace. Callers must always call the free function + * on params, even if an error is returned. + */ +static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_params, + const char __user *_info, + int op, + struct kernel_pkey_params *params) +{ + struct keyctl_pkey_params uparams; + struct kernel_pkey_query info; + int ret; + + memset(params, 0, sizeof(*params)); + params->encoding = "raw"; + + if (copy_from_user(&uparams, _params, sizeof(uparams)) != 0) + return -EFAULT; + + ret = keyctl_pkey_params_get(uparams.key_id, _info, params); + if (ret < 0) + return ret; + + ret = params->key->type->asym_query(params, &info); + if (ret < 0) + return ret; + + switch (op) { + case KEYCTL_PKEY_ENCRYPT: + case KEYCTL_PKEY_DECRYPT: + if (uparams.in_len > info.max_enc_size || + uparams.out_len > info.max_dec_size) + return -EINVAL; + break; + case KEYCTL_PKEY_SIGN: + case KEYCTL_PKEY_VERIFY: + if (uparams.in_len > info.max_sig_size || + uparams.out_len > info.max_data_size) + return -EINVAL; + break; + default: + BUG(); + } + + params->in_len = uparams.in_len; + params->out_len = uparams.out_len; + return 0; +} + +/* + * Query information about an asymmetric key. + */ +long keyctl_pkey_query(key_serial_t id, + const char __user *_info, + struct keyctl_pkey_query __user *_res) +{ + struct kernel_pkey_params params; + struct kernel_pkey_query res; + long ret; + + memset(¶ms, 0, sizeof(params)); + + ret = keyctl_pkey_params_get(id, _info, ¶ms); + if (ret < 0) + goto error; + + ret = params.key->type->asym_query(¶ms, &res); + if (ret < 0) + goto error; + + ret = -EFAULT; + if (copy_to_user(_res, &res, sizeof(res)) == 0 && + clear_user(_res->__spare, sizeof(_res->__spare)) == 0) + ret = 0; + +error: + keyctl_pkey_params_free(¶ms); + return ret; +} + +/* + * Encrypt/decrypt/sign + * + * Encrypt data, decrypt data or sign data using a public key. + * + * _info is a string of supplementary information in key=val format. For + * instance, it might contain: + * + * "enc=pkcs1 hash=sha256" + * + * where enc= specifies the encoding and hash= selects the OID to go in that + * particular encoding if required. If enc= isn't supplied, it's assumed that + * the caller is supplying raw values. + * + * If successful, the amount of data written into the output buffer is + * returned. + */ +long keyctl_pkey_e_d_s(int op, + const struct keyctl_pkey_params __user *_params, + const char __user *_info, + const void __user *_in, + void __user *_out) +{ + struct kernel_pkey_params params; + void *in, *out; + long ret; + + ret = keyctl_pkey_params_get_2(_params, _info, op, ¶ms); + if (ret < 0) + goto error_params; + + ret = -EOPNOTSUPP; + if (!params.key->type->asym_eds_op) + goto error_params; + + switch (op) { + case KEYCTL_PKEY_ENCRYPT: + params.op = kernel_pkey_encrypt; + break; + case KEYCTL_PKEY_DECRYPT: + params.op = kernel_pkey_decrypt; + break; + case KEYCTL_PKEY_SIGN: + params.op = kernel_pkey_sign; + break; + default: + BUG(); + } + + in = memdup_user(_in, params.in_len); + if (IS_ERR(in)) { + ret = PTR_ERR(in); + goto error_params; + } + + ret = -ENOMEM; + out = kmalloc(params.out_len, GFP_KERNEL); + if (!out) + goto error_in; + + ret = params.key->type->asym_eds_op(¶ms, in, out); + if (ret < 0) + goto error_out; + + if (copy_to_user(_out, out, ret) != 0) + ret = -EFAULT; + +error_out: + kfree(out); +error_in: + kfree(in); +error_params: + keyctl_pkey_params_free(¶ms); + return ret; +} + +/* + * Verify a signature. + * + * Verify a public key signature using the given key, or if not given, search + * for a matching key. + * + * _info is a string of supplementary information in key=val format. For + * instance, it might contain: + * + * "enc=pkcs1 hash=sha256" + * + * where enc= specifies the signature blob encoding and hash= selects the OID + * to go in that particular encoding. If enc= isn't supplied, it's assumed + * that the caller is supplying raw values. + * + * If successful, 0 is returned. + */ +long keyctl_pkey_verify(const struct keyctl_pkey_params __user *_params, + const char __user *_info, + const void __user *_in, + const void __user *_in2) +{ + struct kernel_pkey_params params; + void *in, *in2; + long ret; + + ret = keyctl_pkey_params_get_2(_params, _info, KEYCTL_PKEY_VERIFY, + ¶ms); + if (ret < 0) + goto error_params; + + ret = -EOPNOTSUPP; + if (!params.key->type->asym_verify_signature) + goto error_params; + + in = memdup_user(_in, params.in_len); + if (IS_ERR(in)) { + ret = PTR_ERR(in); + goto error_params; + } + + in2 = memdup_user(_in2, params.in2_len); + if (IS_ERR(in2)) { + ret = PTR_ERR(in2); + goto error_in; + } + + params.op = kernel_pkey_verify; + ret = params.key->type->asym_verify_signature(¶ms, in, in2); + + kfree(in2); +error_in: + kfree(in); +error_params: + keyctl_pkey_params_free(¶ms); + return ret; +} -- cgit v1.2.3 From 4269fea768a11a447d8de620ce420f2214d4685c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 26 Oct 2018 11:14:28 +0200 Subject: Revert "netfilter: nft_numgen: add map lookups for numgen random operations" Laura found a better way to do this from userspace without requiring kernel infrastructure, revert this. Fixes: 978d8f9055c3 ("netfilter: nft_numgen: add map lookups for numgen random operations") Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 +- net/netfilter/nft_numgen.c | 127 ------------------------------- 2 files changed, 2 insertions(+), 129 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 579974b0bf0d..7de4f1bdaf06 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1635,8 +1635,8 @@ enum nft_ng_attributes { NFTA_NG_MODULUS, NFTA_NG_TYPE, NFTA_NG_OFFSET, - NFTA_NG_SET_NAME, - NFTA_NG_SET_ID, + NFTA_NG_SET_NAME, /* deprecated */ + NFTA_NG_SET_ID, /* deprecated */ __NFTA_NG_MAX }; #define NFTA_NG_MAX (__NFTA_NG_MAX - 1) diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 649d1700ec5b..3cc1b3dc3c3c 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -24,7 +24,6 @@ struct nft_ng_inc { u32 modulus; atomic_t counter; u32 offset; - struct nft_set *map; }; static u32 nft_ng_inc_gen(struct nft_ng_inc *priv) @@ -48,34 +47,11 @@ static void nft_ng_inc_eval(const struct nft_expr *expr, regs->data[priv->dreg] = nft_ng_inc_gen(priv); } -static void nft_ng_inc_map_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_ng_inc *priv = nft_expr_priv(expr); - const struct nft_set *map = priv->map; - const struct nft_set_ext *ext; - u32 result; - bool found; - - result = nft_ng_inc_gen(priv); - found = map->ops->lookup(nft_net(pkt), map, &result, &ext); - - if (!found) - return; - - nft_data_copy(®s->data[priv->dreg], - nft_set_ext_data(ext), map->dlen); -} - static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { [NFTA_NG_DREG] = { .type = NLA_U32 }, [NFTA_NG_MODULUS] = { .type = NLA_U32 }, [NFTA_NG_TYPE] = { .type = NLA_U32 }, [NFTA_NG_OFFSET] = { .type = NLA_U32 }, - [NFTA_NG_SET_NAME] = { .type = NLA_STRING, - .len = NFT_SET_MAXNAMELEN - 1 }, - [NFTA_NG_SET_ID] = { .type = NLA_U32 }, }; static int nft_ng_inc_init(const struct nft_ctx *ctx, @@ -101,22 +77,6 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, sizeof(u32)); } -static int nft_ng_inc_map_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_ng_inc *priv = nft_expr_priv(expr); - u8 genmask = nft_genmask_next(ctx->net); - - nft_ng_inc_init(ctx, expr, tb); - - priv->map = nft_set_lookup_global(ctx->net, ctx->table, - tb[NFTA_NG_SET_NAME], - tb[NFTA_NG_SET_ID], genmask); - - return PTR_ERR_OR_ZERO(priv->map); -} - static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, u32 modulus, enum nft_ng_types type, u32 offset) { @@ -143,27 +103,10 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) priv->offset); } -static int nft_ng_inc_map_dump(struct sk_buff *skb, - const struct nft_expr *expr) -{ - const struct nft_ng_inc *priv = nft_expr_priv(expr); - - if (nft_ng_dump(skb, priv->dreg, priv->modulus, - NFT_NG_INCREMENTAL, priv->offset) || - nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name)) - goto nla_put_failure; - - return 0; - -nla_put_failure: - return -1; -} - struct nft_ng_random { enum nft_registers dreg:8; u32 modulus; u32 offset; - struct nft_set *map; }; static u32 nft_ng_random_gen(struct nft_ng_random *priv) @@ -183,25 +126,6 @@ static void nft_ng_random_eval(const struct nft_expr *expr, regs->data[priv->dreg] = nft_ng_random_gen(priv); } -static void nft_ng_random_map_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_ng_random *priv = nft_expr_priv(expr); - const struct nft_set *map = priv->map; - const struct nft_set_ext *ext; - u32 result; - bool found; - - result = nft_ng_random_gen(priv); - found = map->ops->lookup(nft_net(pkt), map, &result, &ext); - if (!found) - return; - - nft_data_copy(®s->data[priv->dreg], - nft_set_ext_data(ext), map->dlen); -} - static int nft_ng_random_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -226,21 +150,6 @@ static int nft_ng_random_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, sizeof(u32)); } -static int nft_ng_random_map_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_ng_random *priv = nft_expr_priv(expr); - u8 genmask = nft_genmask_next(ctx->net); - - nft_ng_random_init(ctx, expr, tb); - priv->map = nft_set_lookup_global(ctx->net, ctx->table, - tb[NFTA_NG_SET_NAME], - tb[NFTA_NG_SET_ID], genmask); - - return PTR_ERR_OR_ZERO(priv->map); -} - static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ng_random *priv = nft_expr_priv(expr); @@ -249,22 +158,6 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) priv->offset); } -static int nft_ng_random_map_dump(struct sk_buff *skb, - const struct nft_expr *expr) -{ - const struct nft_ng_random *priv = nft_expr_priv(expr); - - if (nft_ng_dump(skb, priv->dreg, priv->modulus, - NFT_NG_RANDOM, priv->offset) || - nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name)) - goto nla_put_failure; - - return 0; - -nla_put_failure: - return -1; -} - static struct nft_expr_type nft_ng_type; static const struct nft_expr_ops nft_ng_inc_ops = { .type = &nft_ng_type, @@ -274,14 +167,6 @@ static const struct nft_expr_ops nft_ng_inc_ops = { .dump = nft_ng_inc_dump, }; -static const struct nft_expr_ops nft_ng_inc_map_ops = { - .type = &nft_ng_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)), - .eval = nft_ng_inc_map_eval, - .init = nft_ng_inc_map_init, - .dump = nft_ng_inc_map_dump, -}; - static const struct nft_expr_ops nft_ng_random_ops = { .type = &nft_ng_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)), @@ -290,14 +175,6 @@ static const struct nft_expr_ops nft_ng_random_ops = { .dump = nft_ng_random_dump, }; -static const struct nft_expr_ops nft_ng_random_map_ops = { - .type = &nft_ng_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)), - .eval = nft_ng_random_map_eval, - .init = nft_ng_random_map_init, - .dump = nft_ng_random_map_dump, -}; - static const struct nft_expr_ops * nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { @@ -312,12 +189,8 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) switch (type) { case NFT_NG_INCREMENTAL: - if (tb[NFTA_NG_SET_NAME]) - return &nft_ng_inc_map_ops; return &nft_ng_inc_ops; case NFT_NG_RANDOM: - if (tb[NFTA_NG_SET_NAME]) - return &nft_ng_random_map_ops; return &nft_ng_random_ops; } -- cgit v1.2.3 From 4f8f382e635707ddaddf8269a116e4f8cc8835c0 Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 30 Oct 2018 22:24:04 -0700 Subject: perf tools: Don't clone maps from parent when synthesizing forks When synthesizing FORK events, we are trying to create thread objects for the already running tasks on the machine. Normally, for a kernel FORK event, we want to clone the parent's maps because that is what the kernel just did. But when synthesizing, this should not be done. If we do, we end up with overlapping maps as we process the sythesized MMAP2 events that get delivered shortly thereafter. Use the FORK event misc flags in an internal way to signal this situation, so we can elide the map clone when appropriate. Signed-off-by: David S. Miller Cc: Don Zickus Cc: Jiri Olsa Cc: Joe Mario Link: http://lkml.kernel.org/r/20181030.222404.2085088822877051075.davem@davemloft.net [ Added comment about flag use in machine__process_fork_event(), use ternary op in thread__clone_map_groups() as suggested by Jiri ] Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 2 ++ tools/include/uapi/linux/perf_event.h | 2 ++ tools/perf/util/event.c | 1 + tools/perf/util/machine.c | 19 ++++++++++++++++++- tools/perf/util/thread.c | 13 +++++-------- tools/perf/util/thread.h | 2 +- 6 files changed, 29 insertions(+), 10 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index f35eb72739c0..9de8780ac8d9 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -646,10 +646,12 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_FORK_EXEC - PERF_RECORD_FORK event (perf internal) * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) +#define PERF_RECORD_MISC_FORK_EXEC (1 << 13) #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) /* * These PERF_RECORD_MISC_* flags below are safely reused diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index f35eb72739c0..9de8780ac8d9 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -646,10 +646,12 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_FORK_EXEC - PERF_RECORD_FORK event (perf internal) * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) +#define PERF_RECORD_MISC_FORK_EXEC (1 << 13) #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) /* * These PERF_RECORD_MISC_* flags below are safely reused diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index bc646185f8d9..e9c108a6b1c3 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -308,6 +308,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool, event->fork.pid = tgid; event->fork.tid = pid; event->fork.header.type = PERF_RECORD_FORK; + event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC; event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 8ee8ab39d8ac..8f36ce813bc5 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1708,6 +1708,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event struct thread *parent = machine__findnew_thread(machine, event->fork.ppid, event->fork.ptid); + bool do_maps_clone = true; int err = 0; if (dump_trace) @@ -1736,9 +1737,25 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); + /* + * When synthesizing FORK events, we are trying to create thread + * objects for the already running tasks on the machine. + * + * Normally, for a kernel FORK event, we want to clone the parent's + * maps because that is what the kernel just did. + * + * But when synthesizing, this should not be done. If we do, we end up + * with overlapping maps as we process the sythesized MMAP2 events that + * get delivered shortly thereafter. + * + * Use the FORK event misc flags in an internal way to signal this + * situation, so we can elide the map clone when appropriate. + */ + if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC) + do_maps_clone = false; if (thread == NULL || parent == NULL || - thread__fork(thread, parent, sample->time) < 0) { + thread__fork(thread, parent, sample->time, do_maps_clone) < 0) { dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); err = -1; } diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 2048d393ece6..3d9ed7d0e281 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -330,7 +330,8 @@ static int thread__prepare_access(struct thread *thread) } static int thread__clone_map_groups(struct thread *thread, - struct thread *parent) + struct thread *parent, + bool do_maps_clone) { /* This is new thread, we share map groups for process. */ if (thread->pid_ == parent->pid_) @@ -341,15 +342,11 @@ static int thread__clone_map_groups(struct thread *thread, thread->pid_, thread->tid, parent->pid_, parent->tid); return 0; } - /* But this one is new process, copy maps. */ - if (map_groups__clone(thread, parent->mg) < 0) - return -ENOMEM; - - return 0; + return do_maps_clone ? map_groups__clone(thread, parent->mg) : 0; } -int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) +int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone) { if (parent->comm_set) { const char *comm = thread__comm_str(parent); @@ -362,7 +359,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) } thread->ppid = parent->tid; - return thread__clone_map_groups(thread, parent); + return thread__clone_map_groups(thread, parent, do_maps_clone); } void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 36c09a9904e6..30e2b4c165fe 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -89,7 +89,7 @@ struct comm *thread__comm(const struct thread *thread); struct comm *thread__exec_comm(const struct thread *thread); const char *thread__comm_str(const struct thread *thread); int thread__insert_map(struct thread *thread, struct map *map); -int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp); +int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone); size_t thread__fprintf(struct thread *thread, FILE *fp); struct thread *thread__main_thread(struct machine *machine, struct thread *thread); -- cgit v1.2.3 From dafb7f9aef2fd44991ff1691721ff765a23be27b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 2 Nov 2018 06:36:32 -0400 Subject: v4l2-controls: add a missing include As warned by "make headers_check", the definition for the linux-specific integer types is missing: ./usr/include/linux/v4l2-controls.h:1105: found __[us]{8,16,32,64} type without #include Fixes: c27bb30e7b6d ("media: v4l: Add definitions for MPEG-2 slice format and metadata") Reported-by: Linus Torvalds Reported-by: Stephen Rothwell Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 51b095898f4b..86a54916206f 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -47,6 +47,8 @@ * videodev2.h. */ +#include + #ifndef __LINUX_V4L2_CONTROLS_H #define __LINUX_V4L2_CONTROLS_H -- cgit v1.2.3 From fd82d61ba142f0b83463e47064bf5460aac57b6e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 3 Nov 2018 13:59:45 +0800 Subject: sctp: fix strchange_flags name for Stream Change Event As defined in rfc6525#section-6.1.3, SCTP_STREAM_CHANGE_DENIED and SCTP_STREAM_CHANGE_FAILED should be used instead of SCTP_ASSOC_CHANGE_DENIED and SCTP_ASSOC_CHANGE_FAILED. To keep the compatibility, fix it by adding two macros. Fixes: b444153fb5a6 ("sctp: add support for generating add stream change event notification") Reported-by: Jianwen Ji Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 34dd3d497f2c..680ecc3bf2a9 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -568,6 +568,8 @@ struct sctp_assoc_reset_event { #define SCTP_ASSOC_CHANGE_DENIED 0x0004 #define SCTP_ASSOC_CHANGE_FAILED 0x0008 +#define SCTP_STREAM_CHANGE_DENIED SCTP_ASSOC_CHANGE_DENIED +#define SCTP_STREAM_CHANGE_FAILED SCTP_ASSOC_CHANGE_FAILED struct sctp_stream_change_event { __u16 strchange_type; __u16 strchange_flags; -- cgit v1.2.3 From 12480e3b16982c4026de10dd8155823219cd6391 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 3 Nov 2018 14:01:31 +0800 Subject: sctp: define SCTP_SS_DEFAULT for Stream schedulers According to rfc8260#section-4.3.2, SCTP_SS_DEFAULT is required to defined as SCTP_SS_FCFS or SCTP_SS_RR. SCTP_SS_FCFS is used for SCTP_SS_DEFAULT's value in this patch. Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") Reported-by: Jianwen Ji Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 1 + net/sctp/outqueue.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 680ecc3bf2a9..c81feb373d3e 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -1153,6 +1153,7 @@ struct sctp_add_streams { /* SCTP Stream schedulers */ enum sctp_sched_type { SCTP_SS_FCFS, + SCTP_SS_DEFAULT = SCTP_SS_FCFS, SCTP_SS_PRIO, SCTP_SS_RR, SCTP_SS_MAX = SCTP_SS_RR diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 9cb854b05342..c37e1c2dec9d 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -212,7 +212,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) INIT_LIST_HEAD(&q->retransmit); INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); - sctp_sched_set_sched(asoc, SCTP_SS_FCFS); + sctp_sched_set_sched(asoc, SCTP_SS_DEFAULT); } /* Free the outqueue structure and any related pending chunks. -- cgit v1.2.3 From aba118389a6fb2ad7958de0f37b5869852bd38cf Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Thu, 1 Nov 2018 14:03:08 +0300 Subject: uapi: fix linux/kfd_ioctl.h userspace compilation errors Consistently use types provided by via to fix the following linux/kfd_ioctl.h userspace compilation errors: /usr/include/linux/kfd_ioctl.h:250:2: error: unknown type name 'uint32_t' uint32_t reset_type; /usr/include/linux/kfd_ioctl.h:251:2: error: unknown type name 'uint32_t' uint32_t reset_cause; /usr/include/linux/kfd_ioctl.h:252:2: error: unknown type name 'uint32_t' uint32_t memory_lost; /usr/include/linux/kfd_ioctl.h:253:2: error: unknown type name 'uint32_t' uint32_t gpu_id; Fixes: 0c119abad7f0d ("drm/amd: Add kfd ioctl defines for hw_exception event") Cc: # v4.19 Signed-off-by: Dmitry V. Levin Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index f5ff8a76e208..dae897f38e59 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -255,10 +255,10 @@ struct kfd_hsa_memory_exception_data { /* hw exception data */ struct kfd_hsa_hw_exception_data { - uint32_t reset_type; - uint32_t reset_cause; - uint32_t memory_lost; - uint32_t gpu_id; + __u32 reset_type; + __u32 reset_cause; + __u32 memory_lost; + __u32 gpu_id; }; /* Event data */ -- cgit v1.2.3 From 8e7f91719db36440d63de37331367be9700ca0c7 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Thu, 1 Nov 2018 14:03:28 +0300 Subject: uapi: fix more linux/kfd_ioctl.h userspace compilation errors Consistently use types provided by via to fix struct kfd_ioctl_get_queue_wave_state_args userspace compilation errors. Fixes: 5df099e8bc83f ("drm/amdkfd: Add wavefront context save state retrieval ioctl") Signed-off-by: Dmitry V. Levin Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index dae897f38e59..b01eb502d49c 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -83,11 +83,11 @@ struct kfd_ioctl_set_cu_mask_args { }; struct kfd_ioctl_get_queue_wave_state_args { - uint64_t ctl_stack_address; /* to KFD */ - uint32_t ctl_stack_used_size; /* from KFD */ - uint32_t save_area_used_size; /* from KFD */ - uint32_t queue_id; /* to KFD */ - uint32_t pad; + __u64 ctl_stack_address; /* to KFD */ + __u32 ctl_stack_used_size; /* from KFD */ + __u32 save_area_used_size; /* from KFD */ + __u32 queue_id; /* to KFD */ + __u32 pad; }; /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ -- cgit v1.2.3 From df18bfd35bbf7cb1a420b5beede1de29343793b3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Nov 2018 07:09:07 -0400 Subject: media: v4l: fix uapi mpeg slice params definition We get a headers_check warning about the newly defined ioctl command structures: ./usr/include/linux/v4l2-controls.h:1105: found __[us]{8,16,32,64} type without #include This is resolved by including linux/types.h, as suggested by the warning, but there is another problem: Three of the four structures have an odd number of __u8 headers, but are aligned to 32 bit in the v4l2_ctrl_mpeg2_slice_params, so we get an implicit padding byte for each one. To solve that, let's add explicit padding that can be set to zero and verified in the kernel. Fixes: c27bb30e7b6d ("media: v4l: Add definitions for MPEG-2 slice format and metadata") Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-ctrls.c | 5 +++++ include/uapi/linux/v4l2-controls.h | 7 +++++-- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index 6e37950292cd..5f2b033a7a42 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -1664,6 +1664,11 @@ static int std_validate(const struct v4l2_ctrl *ctrl, u32 idx, p_mpeg2_slice_params->forward_ref_index >= VIDEO_MAX_FRAME) return -EINVAL; + if (p_mpeg2_slice_params->pad || + p_mpeg2_slice_params->picture.pad || + p_mpeg2_slice_params->sequence.pad) + return -EINVAL; + return 0; case V4L2_CTRL_TYPE_MPEG2_QUANTIZATION: diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 86a54916206f..998983a6e6b7 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -47,11 +47,11 @@ * videodev2.h. */ -#include - #ifndef __LINUX_V4L2_CONTROLS_H #define __LINUX_V4L2_CONTROLS_H +#include + /* Control classes */ #define V4L2_CTRL_CLASS_USER 0x00980000 /* Old-style 'user' controls */ #define V4L2_CTRL_CLASS_MPEG 0x00990000 /* MPEG-compression controls */ @@ -1112,6 +1112,7 @@ struct v4l2_mpeg2_sequence { __u8 profile_and_level_indication; __u8 progressive_sequence; __u8 chroma_format; + __u8 pad; }; struct v4l2_mpeg2_picture { @@ -1130,6 +1131,7 @@ struct v4l2_mpeg2_picture { __u8 alternate_scan; __u8 repeat_first_field; __u8 progressive_frame; + __u8 pad; }; struct v4l2_ctrl_mpeg2_slice_params { @@ -1144,6 +1146,7 @@ struct v4l2_ctrl_mpeg2_slice_params { __u8 backward_ref_index; __u8 forward_ref_index; + __u8 pad; }; struct v4l2_ctrl_mpeg2_quantization { -- cgit v1.2.3 From 9137bb27e60e554dab694eafa4cca241fa3a694f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:53 +0100 Subject: x86/speculation: Add prctl() control for indirect branch speculation Add the PR_SPEC_INDIRECT_BRANCH option for the PR_GET_SPECULATION_CTRL and PR_SET_SPECULATION_CTRL prctls to allow fine grained per task control of indirect branch speculation via STIBP and IBPB. Invocations: Check indirect branch speculation status with - prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0); Enable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0); Disable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0); Force disable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0); See Documentation/userspace-api/spec_ctrl.rst. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.866780996@linutronix.de --- Documentation/userspace-api/spec_ctrl.rst | 9 +++++ arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 67 +++++++++++++++++++++++++++++++ arch/x86/kernel/process.c | 5 +++ include/linux/sched.h | 9 +++++ include/uapi/linux/prctl.h | 1 + tools/include/uapi/linux/prctl.h | 1 + 7 files changed, 93 insertions(+) (limited to 'include/uapi/linux') diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst index 32f3d55c54b7..c4dbe6f7cdae 100644 --- a/Documentation/userspace-api/spec_ctrl.rst +++ b/Documentation/userspace-api/spec_ctrl.rst @@ -92,3 +92,12 @@ Speculation misfeature controls * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0); + +- PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes + (Mitigate Spectre V2 style attacks against user processes) + + Invocations: + * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index d4d35baf0430..2adbe7b047fa 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -232,6 +232,7 @@ enum spectre_v2_mitigation { enum spectre_v2_user_mitigation { SPECTRE_V2_USER_NONE, SPECTRE_V2_USER_STRICT, + SPECTRE_V2_USER_PRCTL, }; /* The Speculative Store Bypass disable variants */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9cab538e10f1..74359fff87fd 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -566,6 +566,8 @@ void arch_smt_update(void) case SPECTRE_V2_USER_STRICT: update_stibp_strict(); break; + case SPECTRE_V2_USER_PRCTL: + break; } mutex_unlock(&spec_ctrl_mutex); @@ -752,12 +754,50 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) return 0; } +static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + switch (ctrl) { + case PR_SPEC_ENABLE: + if (spectre_v2_user == SPECTRE_V2_USER_NONE) + return 0; + /* + * Indirect branch speculation is always disabled in strict + * mode. + */ + if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + return -EPERM; + task_clear_spec_ib_disable(task); + task_update_spec_tif(task); + break; + case PR_SPEC_DISABLE: + case PR_SPEC_FORCE_DISABLE: + /* + * Indirect branch speculation is always allowed when + * mitigation is force disabled. + */ + if (spectre_v2_user == SPECTRE_V2_USER_NONE) + return -EPERM; + if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + return 0; + task_set_spec_ib_disable(task); + if (ctrl == PR_SPEC_FORCE_DISABLE) + task_set_spec_ib_force_disable(task); + task_update_spec_tif(task); + break; + default: + return -ERANGE; + } + return 0; +} + int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, unsigned long ctrl) { switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_set(task, ctrl); + case PR_SPEC_INDIRECT_BRANCH: + return ib_prctl_set(task, ctrl); default: return -ENODEV; } @@ -790,11 +830,34 @@ static int ssb_prctl_get(struct task_struct *task) } } +static int ib_prctl_get(struct task_struct *task) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return PR_SPEC_NOT_AFFECTED; + + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return PR_SPEC_ENABLE; + case SPECTRE_V2_USER_PRCTL: + if (task_spec_ib_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ib_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + case SPECTRE_V2_USER_STRICT: + return PR_SPEC_DISABLE; + default: + return PR_SPEC_NOT_AFFECTED; + } +} + int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) { switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_get(task); + case PR_SPEC_INDIRECT_BRANCH: + return ib_prctl_get(task); default: return -ENODEV; } @@ -974,6 +1037,8 @@ static char *stibp_state(void) return ", STIBP: disabled"; case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; + case SPECTRE_V2_USER_PRCTL: + return ""; } return ""; } @@ -986,6 +1051,8 @@ static char *ibpb_state(void) return ", IBPB: disabled"; case SPECTRE_V2_USER_STRICT: return ", IBPB: always-on"; + case SPECTRE_V2_USER_PRCTL: + return ""; } } return ""; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index afbe2eb4a1c6..7d31192296a8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -450,6 +450,11 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) set_tsk_thread_flag(tsk, TIF_SSBD); else clear_tsk_thread_flag(tsk, TIF_SSBD); + + if (task_spec_ib_disable(tsk)) + set_tsk_thread_flag(tsk, TIF_SPEC_IB); + else + clear_tsk_thread_flag(tsk, TIF_SPEC_IB); } /* Return the updated threadinfo flags*/ return task_thread_info(tsk)->flags; diff --git a/include/linux/sched.h b/include/linux/sched.h index a51c13c2b1a0..d607db5fcc6a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1453,6 +1453,8 @@ static inline bool is_percpu_thread(void) #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ #define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */ #define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/ +#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */ +#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */ #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ @@ -1484,6 +1486,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) +TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable) +TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable) +TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable) + +TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) +TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) + static inline void current_restore_flags(unsigned long orig_flags, unsigned long flags) { diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index c0d7ea0bf5b6..b17201edfa09 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -212,6 +212,7 @@ struct prctl_mm_map { #define PR_SET_SPECULATION_CTRL 53 /* Speculation control variants */ # define PR_SPEC_STORE_BYPASS 0 +# define PR_SPEC_INDIRECT_BRANCH 1 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */ # define PR_SPEC_NOT_AFFECTED 0 # define PR_SPEC_PRCTL (1UL << 0) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index c0d7ea0bf5b6..b17201edfa09 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -212,6 +212,7 @@ struct prctl_mm_map { #define PR_SET_SPECULATION_CTRL 53 /* Speculation control variants */ # define PR_SPEC_STORE_BYPASS 0 +# define PR_SPEC_INDIRECT_BRANCH 1 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */ # define PR_SPEC_NOT_AFFECTED 0 # define PR_SPEC_PRCTL (1UL << 0) -- cgit v1.2.3 From b7df9ada9a7700dbcca1ba53d217c01e3d48179c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 1 Dec 2018 01:18:53 +0100 Subject: bpf: fix pointer offsets in context for 32 bit Currently, pointer offsets in three BPF context structures are broken in two scenarios: i) 32 bit compiled applications running on 64 bit kernels, and ii) LLVM compiled BPF programs running on 32 bit kernels. The latter is due to BPF target machine being strictly 64 bit. So in each of the cases the offsets will mismatch in verifier when checking / rewriting context access. Fix this by providing a helper macro __bpf_md_ptr() that will enforce padding up to 64 bit and proper alignment, and for context access a macro bpf_ctx_range_ptr() which will cover full 64 bit member range on 32 bit archs. For flow_keys, we additionally need to force the size check to sizeof(__u64) as with other pointer types. Fixes: d58e468b1112 ("flow_dissector: implements flow dissector BPF hook") Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") Fixes: 2dbb9b9e6df6 ("bpf: Introduce BPF_PROG_TYPE_SK_REUSEPORT") Reported-by: David S. Miller Signed-off-by: Daniel Borkmann Acked-by: David S. Miller Tested-by: David S. Miller Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 7 +++++++ include/uapi/linux/bpf.h | 17 ++++++++++++----- net/core/filter.c | 16 ++++++++-------- tools/include/uapi/linux/bpf.h | 17 ++++++++++++----- 4 files changed, 39 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 448dcc448f1f..795ff0b869bb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -449,6 +449,13 @@ struct sock_reuseport; offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 #define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2) \ offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1 +#if BITS_PER_LONG == 64 +# define bpf_ctx_range_ptr(TYPE, MEMBER) \ + offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 +#else +# define bpf_ctx_range_ptr(TYPE, MEMBER) \ + offsetof(TYPE, MEMBER) ... offsetof(TYPE, MEMBER) + 8 - 1 +#endif /* BITS_PER_LONG == 64 */ #define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE) \ ({ \ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 852dc17ab47a..426b5c8a245b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2422,6 +2422,12 @@ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_SEG6_INLINE }; +#define __bpf_md_ptr(type, name) \ +union { \ + type name; \ + __u64 :64; \ +} __attribute__((aligned(8))) + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -2456,7 +2462,7 @@ struct __sk_buff { /* ... here. */ __u32 data_meta; - struct bpf_flow_keys *flow_keys; + __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); }; struct bpf_tunnel_key { @@ -2572,8 +2578,8 @@ enum sk_action { * be added to the end of this structure */ struct sk_msg_md { - void *data; - void *data_end; + __bpf_md_ptr(void *, data); + __bpf_md_ptr(void *, data_end); __u32 family; __u32 remote_ip4; /* Stored in network byte order */ @@ -2589,8 +2595,9 @@ struct sk_reuseport_md { * Start of directly accessible data. It begins from * the tcp/udp header. */ - void *data; - void *data_end; /* End of directly accessible data */ + __bpf_md_ptr(void *, data); + /* End of directly accessible data */ + __bpf_md_ptr(void *, data_end); /* * Total length of packet (starting from the tcp/udp header). * Note that the directly accessible bytes (data_end - data) diff --git a/net/core/filter.c b/net/core/filter.c index 9a1327eb25fa..6ee605da990f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5435,8 +5435,8 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type if (size != size_default) return false; break; - case bpf_ctx_range(struct __sk_buff, flow_keys): - if (size != sizeof(struct bpf_flow_keys *)) + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): + if (size != sizeof(__u64)) return false; break; default: @@ -5464,7 +5464,7 @@ static bool sk_filter_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_end): - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): return false; } @@ -5489,7 +5489,7 @@ static bool cg_skb_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): return false; case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_end): @@ -5530,7 +5530,7 @@ static bool lwt_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): return false; } @@ -5756,7 +5756,7 @@ static bool tc_cls_act_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_end): info->reg_type = PTR_TO_PACKET_END; break; - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): return false; } @@ -5958,7 +5958,7 @@ static bool sk_skb_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): return false; } @@ -6039,7 +6039,7 @@ static bool flow_dissector_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_end): info->reg_type = PTR_TO_PACKET_END; break; - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): info->reg_type = PTR_TO_FLOW_KEYS; break; case bpf_ctx_range(struct __sk_buff, tc_classid): diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 852dc17ab47a..426b5c8a245b 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2422,6 +2422,12 @@ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_SEG6_INLINE }; +#define __bpf_md_ptr(type, name) \ +union { \ + type name; \ + __u64 :64; \ +} __attribute__((aligned(8))) + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -2456,7 +2462,7 @@ struct __sk_buff { /* ... here. */ __u32 data_meta; - struct bpf_flow_keys *flow_keys; + __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); }; struct bpf_tunnel_key { @@ -2572,8 +2578,8 @@ enum sk_action { * be added to the end of this structure */ struct sk_msg_md { - void *data; - void *data_end; + __bpf_md_ptr(void *, data); + __bpf_md_ptr(void *, data_end); __u32 family; __u32 remote_ip4; /* Stored in network byte order */ @@ -2589,8 +2595,9 @@ struct sk_reuseport_md { * Start of directly accessible data. It begins from * the tcp/udp header. */ - void *data; - void *data_end; /* End of directly accessible data */ + __bpf_md_ptr(void *, data); + /* End of directly accessible data */ + __bpf_md_ptr(void *, data_end); /* * Total length of packet (starting from the tcp/udp header). * Note that the directly accessible bytes (data_end - data) -- cgit v1.2.3 From f71c6143c2038df1cb43a4b9c90740d14f77467c Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 30 Nov 2018 15:32:20 -0800 Subject: bpf: Support sk lookup in netns with id 0 David Ahern and Nicolas Dichtel report that the handling of the netns id 0 is incorrect for the BPF socket lookup helpers: rather than finding the netns with id 0, it is resolving to the current netns. This renders the netns_id 0 inaccessible. To fix this, adjust the API for the netns to treat all negative s32 values as a lookup in the current netns (including u64 values which when truncated to s32 become negative), while any values with a positive value in the signed 32-bit integer space would result in a lookup for a socket in the netns corresponding to that id. As before, if the netns with that ID does not exist, no socket will be found. Any netns outside of these ranges will fail to find a corresponding socket, as those values are reserved for future usage. Signed-off-by: Joe Stringer Acked-by: Nicolas Dichtel Acked-by: Joey Pabalinas Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 35 ++++++++++++-------- net/core/filter.c | 11 ++++--- tools/include/uapi/linux/bpf.h | 39 +++++++++++++++-------- tools/testing/selftests/bpf/bpf_helpers.h | 4 +-- tools/testing/selftests/bpf/test_sk_lookup_kern.c | 18 +++++------ 5 files changed, 63 insertions(+), 44 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 426b5c8a245b..cba518c57229 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2170,7 +2170,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for TCP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, @@ -2187,12 +2187,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2202,7 +2204,7 @@ union bpf_attr { * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. * - * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for UDP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, @@ -2219,12 +2221,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2405,6 +2409,9 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output for sk_buff input context. */ #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) +/* Current network namespace */ +#define BPF_F_CURRENT_NETNS (-1L) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, diff --git a/net/core/filter.c b/net/core/filter.c index 6ee605da990f..8d2c629501e2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4890,22 +4890,23 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, struct net *net; family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6; - if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags)) + if (unlikely(family == AF_UNSPEC || flags || + !((s32)netns_id < 0 || netns_id <= S32_MAX))) goto out; if (skb->dev) caller_net = dev_net(skb->dev); else caller_net = sock_net(skb->sk); - if (netns_id) { + if ((s32)netns_id < 0) { + net = caller_net; + sk = sk_lookup(net, tuple, skb, family, proto); + } else { net = get_net_ns_by_id(caller_net, netns_id); if (unlikely(!net)) goto out; sk = sk_lookup(net, tuple, skb, family, proto); put_net(net); - } else { - net = caller_net; - sk = sk_lookup(net, tuple, skb, family, proto); } if (sk) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 426b5c8a245b..76b265c7d93e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2170,7 +2170,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for TCP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, @@ -2187,12 +2187,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2201,8 +2203,10 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, *struct bpf_sock* + * return is from reuse->socks[] using hash of the packet. * - * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for UDP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, @@ -2219,12 +2223,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2233,6 +2239,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, *struct bpf_sock* + * return is from reuse->socks[] using hash of the packet. * * int bpf_sk_release(struct bpf_sock *sk) * Description @@ -2405,6 +2413,9 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output for sk_buff input context. */ #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) +/* Current network namespace */ +#define BPF_F_CURRENT_NETNS (-1L) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 686e57ce40f4..efb6c13ab0de 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -154,12 +154,12 @@ static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) = (void *) BPF_FUNC_skb_ancestor_cgroup_id; static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, - int size, unsigned int netns_id, + int size, unsigned long long netns_id, unsigned long long flags) = (void *) BPF_FUNC_sk_lookup_tcp; static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, struct bpf_sock_tuple *tuple, - int size, unsigned int netns_id, + int size, unsigned long long netns_id, unsigned long long flags) = (void *) BPF_FUNC_sk_lookup_udp; static int (*bpf_sk_release)(struct bpf_sock *sk) = diff --git a/tools/testing/selftests/bpf/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/test_sk_lookup_kern.c index b745bdc08c2b..e21cd736c196 100644 --- a/tools/testing/selftests/bpf/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/test_sk_lookup_kern.c @@ -72,7 +72,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb) return TC_ACT_SHOT; tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); - sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, 0, 0); + sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); if (sk) bpf_sk_release(sk); return sk ? TC_ACT_OK : TC_ACT_UNSPEC; @@ -84,7 +84,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb) struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) bpf_sk_release(sk); return 0; @@ -97,7 +97,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb) struct bpf_sock *sk; __u32 family = 0; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) { bpf_sk_release(sk); family = sk->family; @@ -112,7 +112,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb) struct bpf_sock *sk; __u32 family; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) { sk += 1; bpf_sk_release(sk); @@ -127,7 +127,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) struct bpf_sock *sk; __u32 family; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); sk += 1; if (sk) bpf_sk_release(sk); @@ -139,7 +139,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; - bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); return 0; } @@ -149,7 +149,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb) struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); bpf_sk_release(sk); bpf_sk_release(sk); return 0; @@ -161,7 +161,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb) struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); bpf_sk_release(sk); return 0; } @@ -169,7 +169,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb) void lookup_no_release(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; - bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); } SEC("fail_no_release_subcall") -- cgit v1.2.3 From d74286d2c25ad29dbf9e342955dd8dc31f21653b Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 30 Nov 2018 15:32:21 -0800 Subject: bpf: Improve socket lookup reuseport documentation Improve the wording around socket lookup for reuseport sockets, and ensure that both bpf.h headers are in sync. Signed-off-by: Joe Stringer Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 4 ++++ tools/include/uapi/linux/bpf.h | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index cba518c57229..72c453a8bf50 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2203,6 +2203,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, the *struct bpf_sock* + * result is from reuse->socks[] using the hash of the tuple. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description @@ -2237,6 +2239,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, the *struct bpf_sock* + * result is from reuse->socks[] using the hash of the tuple. * * int bpf_sk_release(struct bpf_sock *sk) * Description diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 76b265c7d93e..72c453a8bf50 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2203,8 +2203,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, *struct bpf_sock* - * return is from reuse->socks[] using hash of the packet. + * For sockets with reuseport option, the *struct bpf_sock* + * result is from reuse->socks[] using the hash of the tuple. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description @@ -2239,8 +2239,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, *struct bpf_sock* - * return is from reuse->socks[] using hash of the packet. + * For sockets with reuseport option, the *struct bpf_sock* + * result is from reuse->socks[] using the hash of the tuple. * * int bpf_sk_release(struct bpf_sock *sk) * Description -- cgit v1.2.3 From 52ea899637c746984d657b508da6e3f2686adfca Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Wed, 5 Dec 2018 10:42:21 +1000 Subject: Input: add `REL_WHEEL_HI_RES` and `REL_HWHEEL_HI_RES` This event code represents scroll reports from high-resolution wheels and is modelled after the approach Windows uses. The value 120 is one detent (wheel click) of movement. Mice with higher-resolution scrolling can send fractions of 120 which must be accumulated in userspace. Userspace can either wait for a full 120 to accumulate or scroll by fractions of one logical scroll movement as the events come in. 120 was picked as magic number because it has a high number of integer fractions that can be used by high-resolution wheels. For more information see https://docs.microsoft.com/en-us/previous-versions/windows/hardware/design/dn613912(v=vs.85) These new axes obsolete REL_WHEEL and REL_HWHEEL. The legacy axes are emulated by the kernel but the most accurate (and most granular) data is available through the new axes. Signed-off-by: Peter Hutterer Acked-by: Dmitry Torokhov Verified-by: Harry Cutts Signed-off-by: Benjamin Tissoires --- Documentation/input/event-codes.rst | 21 ++++++++++++++++++++- include/uapi/linux/input-event-codes.h | 2 ++ 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/Documentation/input/event-codes.rst b/Documentation/input/event-codes.rst index a8c0873beb95..b24b5343f5eb 100644 --- a/Documentation/input/event-codes.rst +++ b/Documentation/input/event-codes.rst @@ -190,7 +190,26 @@ A few EV_REL codes have special meanings: * REL_WHEEL, REL_HWHEEL: - These codes are used for vertical and horizontal scroll wheels, - respectively. + respectively. The value is the number of detents moved on the wheel, the + physical size of which varies by device. For high-resolution wheels + this may be an approximation based on the high-resolution scroll events, + see REL_WHEEL_HI_RES. These event codes are legacy codes and + REL_WHEEL_HI_RES and REL_HWHEEL_HI_RES should be preferred where + available. + +* REL_WHEEL_HI_RES, REL_HWHEEL_HI_RES: + + - High-resolution scroll wheel data. The accumulated value 120 represents + movement by one detent. For devices that do not provide high-resolution + scrolling, the value is always a multiple of 120. For devices with + high-resolution scrolling, the value may be a fraction of 120. + + If a vertical scroll wheel supports high-resolution scrolling, this code + will be emitted in addition to REL_WHEEL or REL_HWHEEL. The REL_WHEEL + and REL_HWHEEL may be an approximation based on the high-resolution + scroll events. There is no guarantee that the high-resolution data + is a multiple of 120 at the time of an emulated REL_WHEEL or REL_HWHEEL + event. EV_ABS ------ diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 3eb5a4c3d60a..265ef2028660 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -716,6 +716,8 @@ * the situation described above. */ #define REL_RESERVED 0x0a +#define REL_WHEEL_HI_RES 0x0b +#define REL_HWHEEL_HI_RES 0x0c #define REL_MAX 0x0f #define REL_CNT (REL_MAX+1) -- cgit v1.2.3