diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-05-14 00:32:22 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-05-14 00:53:05 +0300 |
commit | 84c7d76b5ab6a52e1b3d8101b9f910c128dca396 (patch) | |
tree | c015aaa6f0cacdf36488eb3bcbde834892679a15 | |
parent | 87caef42200cd44f8b808ec2f8ac2257f3e0a8c1 (diff) | |
parent | 13909a0c88972c5ef5d13f44d1a8bf065a31bdf4 (diff) | |
download | linux-84c7d76b5ab6a52e1b3d8101b9f910c128dca396.tar.xz |
Merge tag 'v6.10-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu:
"API:
- Remove crypto stats interface
Algorithms:
- Add faster AES-XTS on modern x86_64 CPUs
- Forbid curves with order less than 224 bits in ecc (FIPS 186-5)
- Add ECDSA NIST P521
Drivers:
- Expose otp zone in atmel
- Add dh fallback for primes > 4K in qat
- Add interface for live migration in qat
- Use dma for aes requests in starfive
- Add full DMA support for stm32mpx in stm32
- Add Tegra Security Engine driver
Others:
- Introduce scope-based x509_certificate allocation"
* tag 'v6.10-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (123 commits)
crypto: atmel-sha204a - provide the otp content
crypto: atmel-sha204a - add reading from otp zone
crypto: atmel-i2c - rename read function
crypto: atmel-i2c - add missing arg description
crypto: iaa - Use kmemdup() instead of kzalloc() and memcpy()
crypto: sahara - use 'time_left' variable with wait_for_completion_timeout()
crypto: api - use 'time_left' variable with wait_for_completion_killable_timeout()
crypto: caam - i.MX8ULP donot have CAAM page0 access
crypto: caam - init-clk based on caam-page0-access
crypto: starfive - Use fallback for unaligned dma access
crypto: starfive - Do not free stack buffer
crypto: starfive - Skip unneeded fallback allocation
crypto: starfive - Skip dma setup for zeroed message
crypto: hisilicon/sec2 - fix for register offset
crypto: hisilicon/debugfs - mask the unnecessary info from the dump
crypto: qat - specify firmware files for 402xx
crypto: x86/aes-gcm - simplify GCM hash subkey derivation
crypto: x86/aes-gcm - delete unused GCM assembly code
crypto: x86/aes-xts - simplify loop in xts_crypt_slowpath()
hwrng: stm32 - repair clock handling
...
153 files changed, 10315 insertions, 3205 deletions
diff --git a/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-aes.yaml b/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-aes.yaml new file mode 100644 index 000000000000..cb47ae2889b6 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-aes.yaml @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/crypto/nvidia,tegra234-se-aes.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NVIDIA Tegra Security Engine for AES algorithms + +description: + The Tegra Security Engine accelerates the following AES encryption/decryption + algorithms - AES-ECB, AES-CBC, AES-OFB, AES-XTS, AES-CTR, AES-GCM, AES-CCM, + AES-CMAC + +maintainers: + - Akhil R <akhilrajeev@nvidia.com> + +properties: + compatible: + const: nvidia,tegra234-se-aes + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + iommus: + maxItems: 1 + + dma-coherent: true + +required: + - compatible + - reg + - clocks + - iommus + +additionalProperties: false + +examples: + - | + #include <dt-bindings/memory/tegra234-mc.h> + #include <dt-bindings/clock/tegra234-clock.h> + + crypto@15820000 { + compatible = "nvidia,tegra234-se-aes"; + reg = <0x15820000 0x10000>; + clocks = <&bpmp TEGRA234_CLK_SE>; + iommus = <&smmu TEGRA234_SID_SES_SE1>; + dma-coherent; + }; +... diff --git a/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-hash.yaml b/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-hash.yaml new file mode 100644 index 000000000000..f57ef10645e2 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-hash.yaml @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/crypto/nvidia,tegra234-se-hash.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NVIDIA Tegra Security Engine for HASH algorithms + +description: + The Tegra Security HASH Engine accelerates the following HASH functions - + SHA1, SHA224, SHA256, SHA384, SHA512, SHA3-224, SHA3-256, SHA3-384, SHA3-512 + HMAC(SHA224), HMAC(SHA256), HMAC(SHA384), HMAC(SHA512) + +maintainers: + - Akhil R <akhilrajeev@nvidia.com> + +properties: + compatible: + const: nvidia,tegra234-se-hash + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + iommus: + maxItems: 1 + + dma-coherent: true + +required: + - compatible + - reg + - clocks + - iommus + +additionalProperties: false + +examples: + - | + #include <dt-bindings/memory/tegra234-mc.h> + #include <dt-bindings/clock/tegra234-clock.h> + + crypto@15840000 { + compatible = "nvidia,tegra234-se-hash"; + reg = <0x15840000 0x10000>; + clocks = <&bpmp TEGRA234_CLK_SE>; + iommus = <&smmu TEGRA234_SID_SES_SE2>; + dma-coherent; + }; +... diff --git a/Documentation/devicetree/bindings/crypto/omap-sham.txt b/Documentation/devicetree/bindings/crypto/omap-sham.txt deleted file mode 100644 index ad9115569611..000000000000 --- a/Documentation/devicetree/bindings/crypto/omap-sham.txt +++ /dev/null @@ -1,28 +0,0 @@ -OMAP SoC SHA crypto Module - -Required properties: - -- compatible : Should contain entries for this and backward compatible - SHAM versions: - - "ti,omap2-sham" for OMAP2 & OMAP3. - - "ti,omap4-sham" for OMAP4 and AM33XX. - - "ti,omap5-sham" for OMAP5, DRA7 and AM43XX. -- ti,hwmods: Name of the hwmod associated with the SHAM module -- reg : Offset and length of the register set for the module -- interrupts : the interrupt-specifier for the SHAM module. - -Optional properties: -- dmas: DMA specifiers for the rx dma. See the DMA client binding, - Documentation/devicetree/bindings/dma/dma.txt -- dma-names: DMA request name. Should be "rx" if a dma is present. - -Example: - /* AM335x */ - sham: sham@53100000 { - compatible = "ti,omap4-sham"; - ti,hwmods = "sham"; - reg = <0x53100000 0x200>; - interrupts = <109>; - dmas = <&edma 36>; - dma-names = "rx"; - }; diff --git a/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml b/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml index e91bc7dc6ad3..0304f074cf08 100644 --- a/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml +++ b/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml @@ -15,6 +15,7 @@ properties: - enum: - qcom,sa8775p-inline-crypto-engine - qcom,sc7180-inline-crypto-engine + - qcom,sc7280-inline-crypto-engine - qcom,sm8450-inline-crypto-engine - qcom,sm8550-inline-crypto-engine - qcom,sm8650-inline-crypto-engine diff --git a/Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml b/Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml index 71a2876bd6e4..7ccb6e1641d0 100644 --- a/Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml +++ b/Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml @@ -12,7 +12,9 @@ maintainers: properties: compatible: - const: starfive,jh7110-crypto + enum: + - starfive,jh7110-crypto + - starfive,jh8100-crypto reg: maxItems: 1 @@ -28,7 +30,10 @@ properties: - const: ahb interrupts: - maxItems: 1 + minItems: 1 + items: + - description: SHA2 module irq + - description: SM3 module irq resets: maxItems: 1 @@ -54,6 +59,27 @@ required: additionalProperties: false +allOf: + - if: + properties: + compatible: + const: starfive,jh7110-crypto + + then: + properties: + interrupts: + maxItems: 1 + + - if: + properties: + compatible: + const: starfive,jh8100-crypto + + then: + properties: + interrupts: + minItems: 2 + examples: - | crypto: crypto@16000000 { diff --git a/Documentation/devicetree/bindings/crypto/ti,omap-sham.yaml b/Documentation/devicetree/bindings/crypto/ti,omap-sham.yaml new file mode 100644 index 000000000000..d69b50228009 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/ti,omap-sham.yaml @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/crypto/ti,omap-sham.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: OMAP SoC SHA crypto Module + +maintainers: + - Animesh Agarwal <animeshagarwal28@gmail.com> + +properties: + compatible: + enum: + - ti,omap2-sham + - ti,omap4-sham + - ti,omap5-sham + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + dmas: + maxItems: 1 + + dma-names: + const: rx + + ti,hwmods: + description: Name of the hwmod associated with the SHAM module + $ref: /schemas/types.yaml#/definitions/string + enum: [sham] + +dependencies: + dmas: [dma-names] + +additionalProperties: false + +required: + - compatible + - ti,hwmods + - reg + - interrupts + +examples: + - | + sham@53100000 { + compatible = "ti,omap4-sham"; + ti,hwmods = "sham"; + reg = <0x53100000 0x200>; + interrupts = <109>; + dmas = <&edma 36>; + dma-names = "rx"; + }; diff --git a/Documentation/driver-api/crypto/iaa/iaa-crypto.rst b/Documentation/driver-api/crypto/iaa/iaa-crypto.rst index de587cf9cbed..f4fba897d931 100644 --- a/Documentation/driver-api/crypto/iaa/iaa-crypto.rst +++ b/Documentation/driver-api/crypto/iaa/iaa-crypto.rst @@ -179,7 +179,9 @@ has the old 'iax' device naming in place) :: # configure wq1.0 - accel-config config-wq --group-id=0 --mode=dedicated --type=kernel --name="iaa_crypto" --device_name="crypto" iax1/wq1.0 + accel-config config-wq --group-id=0 --mode=dedicated --type=kernel --priority=10 --name="iaa_crypto" --driver-name="crypto" iax1/wq1.0 + + accel-config config-engine iax1/engine1.0 --group-id=0 # enable IAA device iax1 @@ -321,33 +323,30 @@ driver will generate statistics which can be accessed in debugfs at:: # ls -al /sys/kernel/debug/iaa-crypto/ total 0 - drwxr-xr-x 2 root root 0 Mar 3 09:35 . - drwx------ 47 root root 0 Mar 3 09:35 .. - -rw-r--r-- 1 root root 0 Mar 3 09:35 max_acomp_delay_ns - -rw-r--r-- 1 root root 0 Mar 3 09:35 max_adecomp_delay_ns - -rw-r--r-- 1 root root 0 Mar 3 09:35 max_comp_delay_ns - -rw-r--r-- 1 root root 0 Mar 3 09:35 max_decomp_delay_ns - -rw-r--r-- 1 root root 0 Mar 3 09:35 stats_reset - -rw-r--r-- 1 root root 0 Mar 3 09:35 total_comp_bytes_out - -rw-r--r-- 1 root root 0 Mar 3 09:35 total_comp_calls - -rw-r--r-- 1 root root 0 Mar 3 09:35 total_decomp_bytes_in - -rw-r--r-- 1 root root 0 Mar 3 09:35 total_decomp_calls - -rw-r--r-- 1 root root 0 Mar 3 09:35 wq_stats - -Most of the above statisticss are self-explanatory. The wq_stats file -shows per-wq stats, a set for each iaa device and wq in addition to -some global stats:: + drwxr-xr-x 2 root root 0 Mar 3 07:55 . + drwx------ 53 root root 0 Mar 3 07:55 .. + -rw-r--r-- 1 root root 0 Mar 3 07:55 global_stats + -rw-r--r-- 1 root root 0 Mar 3 07:55 stats_reset + -rw-r--r-- 1 root root 0 Mar 3 07:55 wq_stats - # cat wq_stats +The global_stats file shows a set of global statistics collected since +the driver has been loaded or reset:: + + # cat global_stats global stats: - total_comp_calls: 100 - total_decomp_calls: 100 - total_comp_bytes_out: 22800 - total_decomp_bytes_in: 22800 + total_comp_calls: 4300 + total_decomp_calls: 4164 + total_sw_decomp_calls: 0 + total_comp_bytes_out: 5993989 + total_decomp_bytes_in: 5993989 total_completion_einval_errors: 0 total_completion_timeout_errors: 0 - total_completion_comp_buf_overflow_errors: 0 + total_completion_comp_buf_overflow_errors: 136 + +The wq_stats file shows per-wq stats, a set for each iaa device and wq +in addition to some global stats:: + # cat wq_stats iaa device: id: 1 n_wqs: 1 @@ -379,21 +378,36 @@ some global stats:: iaa device: id: 5 n_wqs: 1 - comp_calls: 100 - comp_bytes: 22800 - decomp_calls: 100 - decomp_bytes: 22800 + comp_calls: 1360 + comp_bytes: 1999776 + decomp_calls: 0 + decomp_bytes: 0 wqs: name: iaa_crypto - comp_calls: 100 - comp_bytes: 22800 - decomp_calls: 100 - decomp_bytes: 22800 + comp_calls: 1360 + comp_bytes: 1999776 + decomp_calls: 0 + decomp_bytes: 0 -Writing 0 to 'stats_reset' resets all the stats, including the + iaa device: + id: 7 + n_wqs: 1 + comp_calls: 2940 + comp_bytes: 3994213 + decomp_calls: 4164 + decomp_bytes: 5993989 + wqs: + name: iaa_crypto + comp_calls: 2940 + comp_bytes: 3994213 + decomp_calls: 4164 + decomp_bytes: 5993989 + ... + +Writing to 'stats_reset' resets all the stats, including the per-device and per-wq stats:: - # echo 0 > stats_reset + # echo 1 > stats_reset # cat wq_stats global stats: total_comp_calls: 0 @@ -536,12 +550,20 @@ The below script automatically does that:: echo "End Disable IAA" + echo "Reload iaa_crypto module" + + rmmod iaa_crypto + modprobe iaa_crypto + + echo "End Reload iaa_crypto module" + # # configure iaa wqs and devices # echo "Configure IAA" for ((i = 1; i < ${num_iaa} * 2; i += 2)); do - accel-config config-wq --group-id=0 --mode=dedicated --size=128 --priority=10 --type=kernel --name="iaa_crypto" --driver_name="crypto" iax${i}/wq${i} + accel-config config-wq --group-id=0 --mode=dedicated --wq-size=128 --priority=10 --type=kernel --name="iaa_crypto" --driver-name="crypto" iax${i}/wq${i}.0 + accel-config config-engine iax${i}/engine${i}.0 --group-id=0 done echo "End Configure IAA" @@ -552,10 +574,10 @@ The below script automatically does that:: echo "Enable IAA" for ((i = 1; i < ${num_iaa} * 2; i += 2)); do - echo enable iaa iaa${i} - accel-config enable-device iaa${i} - echo enable wq iaa${i}/wq${i}.0 - accel-config enable-wq iaa${i}/wq${i}.0 + echo enable iaa iax${i} + accel-config enable-device iax${i} + echo enable wq iax${i}/wq${i}.0 + accel-config enable-wq iax${i}/wq${i}.0 done echo "End Enable IAA" diff --git a/MAINTAINERS b/MAINTAINERS index bd7c38c1e000..c8d98580f821 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21764,6 +21764,11 @@ M: Prashant Gaikwad <pgaikwad@nvidia.com> S: Supported F: drivers/clk/tegra/ +TEGRA CRYPTO DRIVERS +M: Akhil R <akhilrajeev@nvidia.com> +S: Supported +F: drivers/crypto/tegra/* + TEGRA DMA DRIVERS M: Laxman Dewangan <ldewangan@nvidia.com> M: Jon Hunter <jonathanh@nvidia.com> diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index 1dc5bbbfeed2..b262eaa9170c 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S @@ -25,33 +25,28 @@ .endm /* preload all round keys */ - .macro load_round_keys, rounds, rk - cmp \rounds, #12 - blo 2222f /* 128 bits */ - beq 1111f /* 192 bits */ - ld1 {v17.4s-v18.4s}, [\rk], #32 -1111: ld1 {v19.4s-v20.4s}, [\rk], #32 -2222: ld1 {v21.4s-v24.4s}, [\rk], #64 - ld1 {v25.4s-v28.4s}, [\rk], #64 - ld1 {v29.4s-v31.4s}, [\rk] + .macro load_round_keys, rk, nr, tmp + add \tmp, \rk, \nr, sxtw #4 + sub \tmp, \tmp, #160 + ld1 {v17.4s-v20.4s}, [\rk] + ld1 {v21.4s-v24.4s}, [\tmp], #64 + ld1 {v25.4s-v28.4s}, [\tmp], #64 + ld1 {v29.4s-v31.4s}, [\tmp] .endm /* prepare for encryption with key in rk[] */ .macro enc_prepare, rounds, rk, temp - mov \temp, \rk - load_round_keys \rounds, \temp + load_round_keys \rk, \rounds, \temp .endm /* prepare for encryption (again) but with new key in rk[] */ .macro enc_switch_key, rounds, rk, temp - mov \temp, \rk - load_round_keys \rounds, \temp + load_round_keys \rk, \rounds, \temp .endm /* prepare for decryption with key in rk[] */ .macro dec_prepare, rounds, rk, temp - mov \temp, \rk - load_round_keys \rounds, \temp + load_round_keys \rk, \rounds, \temp .endm .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4 @@ -110,14 +105,13 @@ /* up to 5 interleaved blocks */ .macro do_block_Nx, enc, rounds, i0, i1, i2, i3, i4 - cmp \rounds, #12 - blo 2222f /* 128 bits */ - beq 1111f /* 192 bits */ + tbz \rounds, #2, .L\@ /* 128 bits */ round_Nx \enc, v17, \i0, \i1, \i2, \i3, \i4 round_Nx \enc, v18, \i0, \i1, \i2, \i3, \i4 -1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4 + tbz \rounds, #1, .L\@ /* 192 bits */ + round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4 round_Nx \enc, v20, \i0, \i1, \i2, \i3, \i4 -2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 +.L\@: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 round_Nx \enc, \key, \i0, \i1, \i2, \i3, \i4 .endr fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3, \i4 diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S index 9de7fbc797af..3a8961b6ea51 100644 --- a/arch/arm64/crypto/aes-neon.S +++ b/arch/arm64/crypto/aes-neon.S @@ -99,16 +99,16 @@ ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds -1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ +.La\@: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ movi v15.16b, #0x40 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ sub_bytes \in - subs \i, \i, #1 + sub \i, \i, #1 ld1 {v15.4s}, [\rkp], #16 - beq 2222f + cbz \i, .Lb\@ mix_columns \in, \enc - b 1111b -2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ + b .La\@ +.Lb\@: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ .endm .macro encrypt_block, in, rounds, rk, rkp, i @@ -206,7 +206,7 @@ ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds -1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ +.La\@: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ @@ -216,13 +216,13 @@ tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ sub_bytes_4x \in0, \in1, \in2, \in3 - subs \i, \i, #1 + sub \i, \i, #1 ld1 {v15.4s}, [\rkp], #16 - beq 2222f + cbz \i, .Lb\@ mix_columns_2x \in0, \in1, \enc mix_columns_2x \in2, \in3, \enc - b 1111b -2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + b .La\@ +.Lb\@: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index f1fb01cd5a25..145342e46ea8 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -760,7 +760,6 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_CRYPTO_STATS=y CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_SHA1_S390=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index d33f814f78b2..dc237896f99d 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -745,7 +745,6 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_CRYPTO_STATS=y CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_SHA1_S390=m diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler index 8ad41da301e5..59aedf32c4ea 100644 --- a/arch/x86/Kconfig.assembler +++ b/arch/x86/Kconfig.assembler @@ -25,6 +25,16 @@ config AS_GFNI help Supported by binutils >= 2.30 and LLVM integrated assembler +config AS_VAES + def_bool $(as-instr,vaesenc %ymm0$(comma)%ymm1$(comma)%ymm2) + help + Supported by binutils >= 2.30 and LLVM integrated assembler + +config AS_VPCLMULQDQ + def_bool $(as-instr,vpclmulqdq \$0x10$(comma)%ymm0$(comma)%ymm1$(comma)%ymm2) + help + Supported by binutils >= 2.30 and LLVM integrated assembler + config AS_WRUSS def_bool $(as-instr,wrussq %rax$(comma)(%rbx)) help diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 9aa46093c91b..9c5ce5613738 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -48,7 +48,8 @@ chacha-x86_64-$(CONFIG_AS_AVX512) += chacha-avx512vl-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o -aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o +aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o \ + aes_ctrby8_avx-x86_64.o aes-xts-avx-x86_64.o obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ssse3_glue.o diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S new file mode 100644 index 000000000000..48f97b79f7a9 --- /dev/null +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S @@ -0,0 +1,845 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * AES-XTS for modern x86_64 CPUs + * + * Copyright 2024 Google LLC + * + * Author: Eric Biggers <ebiggers@google.com> + */ + +/* + * This file implements AES-XTS for modern x86_64 CPUs. To handle the + * complexities of coding for x86 SIMD, e.g. where every vector length needs + * different code, it uses a macro to generate several implementations that + * share similar source code but are targeted at different CPUs, listed below: + * + * AES-NI + AVX + * - 128-bit vectors (1 AES block per vector) + * - VEX-coded instructions + * - xmm0-xmm15 + * - This is for older CPUs that lack VAES but do have AVX. + * + * VAES + VPCLMULQDQ + AVX2 + * - 256-bit vectors (2 AES blocks per vector) + * - VEX-coded instructions + * - ymm0-ymm15 + * - This is for CPUs that have VAES but lack AVX512 or AVX10, + * e.g. Intel's Alder Lake and AMD's Zen 3. + * + * VAES + VPCLMULQDQ + AVX10/256 + BMI2 + * - 256-bit vectors (2 AES blocks per vector) + * - EVEX-coded instructions + * - ymm0-ymm31 + * - This is for CPUs that have AVX512 but where using zmm registers causes + * downclocking, and for CPUs that have AVX10/256 but not AVX10/512. + * - By "AVX10/256" we really mean (AVX512BW + AVX512VL) || AVX10/256. + * To avoid confusion with 512-bit, we just write AVX10/256. + * + * VAES + VPCLMULQDQ + AVX10/512 + BMI2 + * - Same as the previous one, but upgrades to 512-bit vectors + * (4 AES blocks per vector) in zmm0-zmm31. + * - This is for CPUs that have good AVX512 or AVX10/512 support. + * + * This file doesn't have an implementation for AES-NI alone (without AVX), as + * the lack of VEX would make all the assembly code different. + * + * When we use VAES, we also use VPCLMULQDQ to parallelize the computation of + * the XTS tweaks. This avoids a bottleneck. Currently there don't seem to be + * any CPUs that support VAES but not VPCLMULQDQ. If that changes, we might + * need to start also providing an implementation using VAES alone. + * + * The AES-XTS implementations in this file support everything required by the + * crypto API, including support for arbitrary input lengths and multi-part + * processing. However, they are most heavily optimized for the common case of + * power-of-2 length inputs that are processed in a single part (disk sectors). + */ + +#include <linux/linkage.h> +#include <linux/cfi_types.h> + +.section .rodata +.p2align 4 +.Lgf_poly: + // The low 64 bits of this value represent the polynomial x^7 + x^2 + x + // + 1. It is the value that must be XOR'd into the low 64 bits of the + // tweak each time a 1 is carried out of the high 64 bits. + // + // The high 64 bits of this value is just the internal carry bit that + // exists when there's a carry out of the low 64 bits of the tweak. + .quad 0x87, 1 + + // This table contains constants for vpshufb and vpblendvb, used to + // handle variable byte shifts and blending during ciphertext stealing + // on CPUs that don't support AVX10-style masking. +.Lcts_permute_table: + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 +.text + +// Function parameters +.set KEY, %rdi // Initially points to crypto_aes_ctx, then is + // advanced to point to 7th-from-last round key +.set SRC, %rsi // Pointer to next source data +.set DST, %rdx // Pointer to next destination data +.set LEN, %ecx // Remaining length in bytes +.set LEN8, %cl +.set LEN64, %rcx +.set TWEAK, %r8 // Pointer to next tweak + +// %rax holds the AES key length in bytes. +.set KEYLEN, %eax +.set KEYLEN64, %rax + +// %r9-r11 are available as temporaries. + +.macro _define_Vi i +.if VL == 16 + .set V\i, %xmm\i +.elseif VL == 32 + .set V\i, %ymm\i +.elseif VL == 64 + .set V\i, %zmm\i +.else + .error "Unsupported Vector Length (VL)" +.endif +.endm + +.macro _define_aliases + // Define register aliases V0-V15, or V0-V31 if all 32 SIMD registers + // are available, that map to the xmm, ymm, or zmm registers according + // to the selected Vector Length (VL). + _define_Vi 0 + _define_Vi 1 + _define_Vi 2 + _define_Vi 3 + _define_Vi 4 + _define_Vi 5 + _define_Vi 6 + _define_Vi 7 + _define_Vi 8 + _define_Vi 9 + _define_Vi 10 + _define_Vi 11 + _define_Vi 12 + _define_Vi 13 + _define_Vi 14 + _define_Vi 15 +.if USE_AVX10 + _define_Vi 16 + _define_Vi 17 + _define_Vi 18 + _define_Vi 19 + _define_Vi 20 + _define_Vi 21 + _define_Vi 22 + _define_Vi 23 + _define_Vi 24 + _define_Vi 25 + _define_Vi 26 + _define_Vi 27 + _define_Vi 28 + _define_Vi 29 + _define_Vi 30 + _define_Vi 31 +.endif + + // V0-V3 hold the data blocks during the main loop, or temporary values + // otherwise. V4-V5 hold temporary values. + + // V6-V9 hold XTS tweaks. Each 128-bit lane holds one tweak. + .set TWEAK0_XMM, %xmm6 + .set TWEAK0, V6 + .set TWEAK1_XMM, %xmm7 + .set TWEAK1, V7 + .set TWEAK2, V8 + .set TWEAK3, V9 + + // V10-V13 are used for computing the next values of TWEAK[0-3]. + .set NEXT_TWEAK0, V10 + .set NEXT_TWEAK1, V11 + .set NEXT_TWEAK2, V12 + .set NEXT_TWEAK3, V13 + + // V14 holds the constant from .Lgf_poly, copied to all 128-bit lanes. + .set GF_POLY_XMM, %xmm14 + .set GF_POLY, V14 + + // V15 holds the key for AES "round 0", copied to all 128-bit lanes. + .set KEY0_XMM, %xmm15 + .set KEY0, V15 + + // If 32 SIMD registers are available, then V16-V29 hold the remaining + // AES round keys, copied to all 128-bit lanes. + // + // AES-128, AES-192, and AES-256 use different numbers of round keys. + // To allow handling all three variants efficiently, we align the round + // keys to the *end* of this register range. I.e., AES-128 uses + // KEY5-KEY14, AES-192 uses KEY3-KEY14, and AES-256 uses KEY1-KEY14. + // (All also use KEY0 for the XOR-only "round" at the beginning.) +.if USE_AVX10 + .set KEY1_XMM, %xmm16 + .set KEY1, V16 + .set KEY2_XMM, %xmm17 + .set KEY2, V17 + .set KEY3_XMM, %xmm18 + .set KEY3, V18 + .set KEY4_XMM, %xmm19 + .set KEY4, V19 + .set KEY5_XMM, %xmm20 + .set KEY5, V20 + .set KEY6_XMM, %xmm21 + .set KEY6, V21 + .set KEY7_XMM, %xmm22 + .set KEY7, V22 + .set KEY8_XMM, %xmm23 + .set KEY8, V23 + .set KEY9_XMM, %xmm24 + .set KEY9, V24 + .set KEY10_XMM, %xmm25 + .set KEY10, V25 + .set KEY11_XMM, %xmm26 + .set KEY11, V26 + .set KEY12_XMM, %xmm27 + .set KEY12, V27 + .set KEY13_XMM, %xmm28 + .set KEY13, V28 + .set KEY14_XMM, %xmm29 + .set KEY14, V29 +.endif + // V30-V31 are currently unused. +.endm + +// Move a vector between memory and a register. +.macro _vmovdqu src, dst +.if VL < 64 + vmovdqu \src, \dst +.else + vmovdqu8 \src, \dst +.endif +.endm + +// Broadcast a 128-bit value into a vector. +.macro _vbroadcast128 src, dst +.if VL == 16 && !USE_AVX10 + vmovdqu \src, \dst +.elseif VL == 32 && !USE_AVX10 + vbroadcasti128 \src, \dst +.else + vbroadcasti32x4 \src, \dst +.endif +.endm + +// XOR two vectors together. +.macro _vpxor src1, src2, dst +.if USE_AVX10 + vpxord \src1, \src2, \dst +.else + vpxor \src1, \src2, \dst +.endif +.endm + +// XOR three vectors together. +.macro _xor3 src1, src2, src3_and_dst +.if USE_AVX10 + // vpternlogd with immediate 0x96 is a three-argument XOR. + vpternlogd $0x96, \src1, \src2, \src3_and_dst +.else + vpxor \src1, \src3_and_dst, \src3_and_dst + vpxor \src2, \src3_and_dst, \src3_and_dst +.endif +.endm + +// Given a 128-bit XTS tweak in the xmm register \src, compute the next tweak +// (by multiplying by the polynomial 'x') and write it to \dst. +.macro _next_tweak src, tmp, dst + vpshufd $0x13, \src, \tmp + vpaddq \src, \src, \dst + vpsrad $31, \tmp, \tmp + vpand GF_POLY_XMM, \tmp, \tmp + vpxor \tmp, \dst, \dst +.endm + +// Given the XTS tweak(s) in the vector \src, compute the next vector of +// tweak(s) (by multiplying by the polynomial 'x^(VL/16)') and write it to \dst. +// +// If VL > 16, then there are multiple tweaks, and we use vpclmulqdq to compute +// all tweaks in the vector in parallel. If VL=16, we just do the regular +// computation without vpclmulqdq, as it's the faster method for a single tweak. +.macro _next_tweakvec src, tmp1, tmp2, dst +.if VL == 16 + _next_tweak \src, \tmp1, \dst +.else + vpsrlq $64 - VL/16, \src, \tmp1 + vpclmulqdq $0x01, GF_POLY, \tmp1, \tmp2 + vpslldq $8, \tmp1, \tmp1 + vpsllq $VL/16, \src, \dst + _xor3 \tmp1, \tmp2, \dst +.endif +.endm + +// Given the first XTS tweak at (TWEAK), compute the first set of tweaks and +// store them in the vector registers TWEAK0-TWEAK3. Clobbers V0-V5. +.macro _compute_first_set_of_tweaks + vmovdqu (TWEAK), TWEAK0_XMM + _vbroadcast128 .Lgf_poly(%rip), GF_POLY +.if VL == 16 + // With VL=16, multiplying by x serially is fastest. + _next_tweak TWEAK0, %xmm0, TWEAK1 + _next_tweak TWEAK1, %xmm0, TWEAK2 + _next_tweak TWEAK2, %xmm0, TWEAK3 +.else +.if VL == 32 + // Compute the second block of TWEAK0. + _next_tweak TWEAK0_XMM, %xmm0, %xmm1 + vinserti128 $1, %xmm1, TWEAK0, TWEAK0 +.elseif VL == 64 + // Compute the remaining blocks of TWEAK0. + _next_tweak TWEAK0_XMM, %xmm0, %xmm1 + _next_tweak %xmm1, %xmm0, %xmm2 + _next_tweak %xmm2, %xmm0, %xmm3 + vinserti32x4 $1, %xmm1, TWEAK0, TWEAK0 + vinserti32x4 $2, %xmm2, TWEAK0, TWEAK0 + vinserti32x4 $3, %xmm3, TWEAK0, TWEAK0 +.endif + // Compute TWEAK[1-3] from TWEAK0. + vpsrlq $64 - 1*VL/16, TWEAK0, V0 + vpsrlq $64 - 2*VL/16, TWEAK0, V2 + vpsrlq $64 - 3*VL/16, TWEAK0, V4 + vpclmulqdq $0x01, GF_POLY, V0, V1 + vpclmulqdq $0x01, GF_POLY, V2, V3 + vpclmulqdq $0x01, GF_POLY, V4, V5 + vpslldq $8, V0, V0 + vpslldq $8, V2, V2 + vpslldq $8, V4, V4 + vpsllq $1*VL/16, TWEAK0, TWEAK1 + vpsllq $2*VL/16, TWEAK0, TWEAK2 + vpsllq $3*VL/16, TWEAK0, TWEAK3 +.if USE_AVX10 + vpternlogd $0x96, V0, V1, TWEAK1 + vpternlogd $0x96, V2, V3, TWEAK2 + vpternlogd $0x96, V4, V5, TWEAK3 +.else + vpxor V0, TWEAK1, TWEAK1 + vpxor V2, TWEAK2, TWEAK2 + vpxor V4, TWEAK3, TWEAK3 + vpxor V1, TWEAK1, TWEAK1 + vpxor V3, TWEAK2, TWEAK2 + vpxor V5, TWEAK3, TWEAK3 +.endif +.endif +.endm + +// Do one step in computing the next set of tweaks using the method of just +// multiplying by x repeatedly (the same method _next_tweak uses). +.macro _tweak_step_mulx i +.if \i == 0 + .set PREV_TWEAK, TWEAK3 + .set NEXT_TWEAK, NEXT_TWEAK0 +.elseif \i == 5 + .set PREV_TWEAK, NEXT_TWEAK0 + .set NEXT_TWEAK, NEXT_TWEAK1 +.elseif \i == 10 + .set PREV_TWEAK, NEXT_TWEAK1 + .set NEXT_TWEAK, NEXT_TWEAK2 +.elseif \i == 15 + .set PREV_TWEAK, NEXT_TWEAK2 + .set NEXT_TWEAK, NEXT_TWEAK3 +.endif +.if \i >= 0 && \i < 20 && \i % 5 == 0 + vpshufd $0x13, PREV_TWEAK, V5 +.elseif \i >= 0 && \i < 20 && \i % 5 == 1 + vpaddq PREV_TWEAK, PREV_TWEAK, NEXT_TWEAK +.elseif \i >= 0 && \i < 20 && \i % 5 == 2 + vpsrad $31, V5, V5 +.elseif \i >= 0 && \i < 20 && \i % 5 == 3 + vpand GF_POLY, V5, V5 +.elseif \i >= 0 && \i < 20 && \i % 5 == 4 + vpxor V5, NEXT_TWEAK, NEXT_TWEAK +.elseif \i == 1000 + vmovdqa NEXT_TWEAK0, TWEAK0 + vmovdqa NEXT_TWEAK1, TWEAK1 + vmovdqa NEXT_TWEAK2, TWEAK2 + vmovdqa NEXT_TWEAK3, TWEAK3 +.endif +.endm + +// Do one step in computing the next set of tweaks using the VPCLMULQDQ method +// (the same method _next_tweakvec uses for VL > 16). This means multiplying +// each tweak by x^(4*VL/16) independently. Since 4*VL/16 is a multiple of 8 +// when VL > 16 (which it is here), the needed shift amounts are byte-aligned, +// which allows the use of vpsrldq and vpslldq to do 128-bit wide shifts. +.macro _tweak_step_pclmul i +.if \i == 0 + vpsrldq $(128 - 4*VL/16) / 8, TWEAK0, NEXT_TWEAK0 +.elseif \i == 2 + vpsrldq $(128 - 4*VL/16) / 8, TWEAK1, NEXT_TWEAK1 +.elseif \i == 4 + vpsrldq $(128 - 4*VL/16) / 8, TWEAK2, NEXT_TWEAK2 +.elseif \i == 6 + vpsrldq $(128 - 4*VL/16) / 8, TWEAK3, NEXT_TWEAK3 +.elseif \i == 8 + vpclmulqdq $0x00, GF_POLY, NEXT_TWEAK0, NEXT_TWEAK0 +.elseif \i == 10 + vpclmulqdq $0x00, GF_POLY, NEXT_TWEAK1, NEXT_TWEAK1 +.elseif \i == 12 + vpclmulqdq $0x00, GF_POLY, NEXT_TWEAK2, NEXT_TWEAK2 +.elseif \i == 14 + vpclmulqdq $0x00, GF_POLY, NEXT_TWEAK3, NEXT_TWEAK3 +.elseif \i == 1000 + vpslldq $(4*VL/16) / 8, TWEAK0, TWEAK0 + vpslldq $(4*VL/16) / 8, TWEAK1, TWEAK1 + vpslldq $(4*VL/16) / 8, TWEAK2, TWEAK2 + vpslldq $(4*VL/16) / 8, TWEAK3, TWEAK3 + _vpxor NEXT_TWEAK0, TWEAK0, TWEAK0 + _vpxor NEXT_TWEAK1, TWEAK1, TWEAK1 + _vpxor NEXT_TWEAK2, TWEAK2, TWEAK2 + _vpxor NEXT_TWEAK3, TWEAK3, TWEAK3 +.endif +.endm + +// _tweak_step does one step of the computation of the next set of tweaks from +// TWEAK[0-3]. To complete all steps, this is invoked with increasing values of +// \i that include at least 0 through 19, then 1000 which signals the last step. +// +// This is used to interleave the computation of the next set of tweaks with the +// AES en/decryptions, which increases performance in some cases. +.macro _tweak_step i +.if VL == 16 + _tweak_step_mulx \i +.else + _tweak_step_pclmul \i +.endif +.endm + +.macro _setup_round_keys enc + + // Select either the encryption round keys or the decryption round keys. +.if \enc + .set OFFS, 0 +.else + .set OFFS, 240 +.endif + + // Load the round key for "round 0". + _vbroadcast128 OFFS(KEY), KEY0 + + // Increment KEY to make it so that 7*16(KEY) is the last round key. + // For AES-128, increment by 3*16, resulting in the 10 round keys (not + // counting the zero-th round key which was just loaded into KEY0) being + // -2*16(KEY) through 7*16(KEY). For AES-192, increment by 5*16 and use + // 12 round keys -4*16(KEY) through 7*16(KEY). For AES-256, increment + // by 7*16 and use 14 round keys -6*16(KEY) through 7*16(KEY). + // + // This rebasing provides two benefits. First, it makes the offset to + // any round key be in the range [-96, 112], fitting in a signed byte. + // This shortens VEX-encoded instructions that access the later round + // keys which otherwise would need 4-byte offsets. Second, it makes it + // easy to do AES-128 and AES-192 by skipping irrelevant rounds at the + // beginning. Skipping rounds at the end doesn't work as well because + // the last round needs different instructions. + // + // An alternative approach would be to roll up all the round loops. We + // don't do that because it isn't compatible with caching the round keys + // in registers which we do when possible (see below), and also because + // it seems unwise to rely *too* heavily on the CPU's branch predictor. + lea OFFS-16(KEY, KEYLEN64, 4), KEY + + // If all 32 SIMD registers are available, cache all the round keys. +.if USE_AVX10 + cmp $24, KEYLEN + jl .Laes128\@ + je .Laes192\@ + _vbroadcast128 -6*16(KEY), KEY1 + _vbroadcast128 -5*16(KEY), KEY2 +.Laes192\@: + _vbroadcast128 -4*16(KEY), KEY3 + _vbroadcast128 -3*16(KEY), KEY4 +.Laes128\@: + _vbroadcast128 -2*16(KEY), KEY5 + _vbroadcast128 -1*16(KEY), KEY6 + _vbroadcast128 0*16(KEY), KEY7 + _vbroadcast128 1*16(KEY), KEY8 + _vbroadcast128 2*16(KEY), KEY9 + _vbroadcast128 3*16(KEY), KEY10 + _vbroadcast128 4*16(KEY), KEY11 + _vbroadcast128 5*16(KEY), KEY12 + _vbroadcast128 6*16(KEY), KEY13 + _vbroadcast128 7*16(KEY), KEY14 +.endif +.endm + +// Do a single round of AES encryption (if \enc==1) or decryption (if \enc==0) +// on the block(s) in \data using the round key(s) in \key. The register length +// determines the number of AES blocks en/decrypted. +.macro _vaes enc, last, key, data +.if \enc +.if \last + vaesenclast \key, \data, \data +.else + vaesenc \key, \data, \data +.endif +.else +.if \last + vaesdeclast \key, \data, \data +.else + vaesdec \key, \data, \data +.endif +.endif +.endm + +// Do a single round of AES en/decryption on the block(s) in \data, using the +// same key for all block(s). The round key is loaded from the appropriate +// register or memory location for round \i. May clobber V4. +.macro _vaes_1x enc, last, i, xmm_suffix, data +.if USE_AVX10 + _vaes \enc, \last, KEY\i\xmm_suffix, \data +.else +.ifnb \xmm_suffix + _vaes \enc, \last, (\i-7)*16(KEY), \data +.else + _vbroadcast128 (\i-7)*16(KEY), V4 + _vaes \enc, \last, V4, \data +.endif +.endif +.endm + +// Do a single round of AES en/decryption on the blocks in registers V0-V3, +// using the same key for all blocks. The round key is loaded from the +// appropriate register or memory location for round \i. In addition, does two +// steps of the computation of the next set of tweaks. May clobber V4. +.macro _vaes_4x enc, last, i +.if USE_AVX10 + _tweak_step (2*(\i-5)) + _vaes \enc, \last, KEY\i, V0 + _vaes \enc, \last, KEY\i, V1 + _tweak_step (2*(\i-5) + 1) + _vaes \enc, \last, KEY\i, V2 + _vaes \enc, \last, KEY\i, V3 +.else + _vbroadcast128 (\i-7)*16(KEY), V4 + _tweak_step (2*(\i-5)) + _vaes \enc, \last, V4, V0 + _vaes \enc, \last, V4, V1 + _tweak_step (2*(\i-5) + 1) + _vaes \enc, \last, V4, V2 + _vaes \enc, \last, V4, V3 +.endif +.endm + +// Do tweaked AES en/decryption (i.e., XOR with \tweak, then AES en/decrypt, +// then XOR with \tweak again) of the block(s) in \data. To process a single +// block, use xmm registers and set \xmm_suffix=_XMM. To process a vector of +// length VL, use V* registers and leave \xmm_suffix empty. May clobber V4. +.macro _aes_crypt enc, xmm_suffix, tweak, data + _xor3 KEY0\xmm_suffix, \tweak, \data + cmp $24, KEYLEN + jl .Laes128\@ + je .Laes192\@ + _vaes_1x \enc, 0, 1, \xmm_suffix, \data + _vaes_1x \enc, 0, 2, \xmm_suffix, \data +.Laes192\@: + _vaes_1x \enc, 0, 3, \xmm_suffix, \data + _vaes_1x \enc, 0, 4, \xmm_suffix, \data +.Laes128\@: + _vaes_1x \enc, 0, 5, \xmm_suffix, \data + _vaes_1x \enc, 0, 6, \xmm_suffix, \data + _vaes_1x \enc, 0, 7, \xmm_suffix, \data + _vaes_1x \enc, 0, 8, \xmm_suffix, \data + _vaes_1x \enc, 0, 9, \xmm_suffix, \data + _vaes_1x \enc, 0, 10, \xmm_suffix, \data + _vaes_1x \enc, 0, 11, \xmm_suffix, \data + _vaes_1x \enc, 0, 12, \xmm_suffix, \data + _vaes_1x \enc, 0, 13, \xmm_suffix, \data + _vaes_1x \enc, 1, 14, \xmm_suffix, \data + _vpxor \tweak, \data, \data +.endm + +.macro _aes_xts_crypt enc + _define_aliases + +.if !\enc + // When decrypting a message whose length isn't a multiple of the AES + // block length, exclude the last full block from the main loop by + // subtracting 16 from LEN. This is needed because ciphertext stealing + // decryption uses the last two tweaks in reverse order. We'll handle + // the last full block and the partial block specially at the end. + lea -16(LEN), %eax + test $15, LEN8 + cmovnz %eax, LEN +.endif + + // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256). + movl 480(KEY), KEYLEN + + // Setup the pointer to the round keys and cache as many as possible. + _setup_round_keys \enc + + // Compute the first set of tweaks TWEAK[0-3]. + _compute_first_set_of_tweaks + + sub $4*VL, LEN + jl .Lhandle_remainder\@ + +.Lmain_loop\@: + // This is the main loop, en/decrypting 4*VL bytes per iteration. + + // XOR each source block with its tweak and the zero-th round key. +.if USE_AVX10 + vmovdqu8 0*VL(SRC), V0 + vmovdqu8 1*VL(SRC), V1 + vmovdqu8 2*VL(SRC), V2 + vmovdqu8 3*VL(SRC), V3 + vpternlogd $0x96, TWEAK0, KEY0, V0 + vpternlogd $0x96, TWEAK1, KEY0, V1 + vpternlogd $0x96, TWEAK2, KEY0, V2 + vpternlogd $0x96, TWEAK3, KEY0, V3 +.else + vpxor 0*VL(SRC), KEY0, V0 + vpxor 1*VL(SRC), KEY0, V1 + vpxor 2*VL(SRC), KEY0, V2 + vpxor 3*VL(SRC), KEY0, V3 + vpxor TWEAK0, V0, V0 + vpxor TWEAK1, V1, V1 + vpxor TWEAK2, V2, V2 + vpxor TWEAK3, V3, V3 +.endif + cmp $24, KEYLEN + jl .Laes128\@ + je .Laes192\@ + // Do all the AES rounds on the data blocks, interleaved with + // the computation of the next set of tweaks. + _vaes_4x \enc, 0, 1 + _vaes_4x \enc, 0, 2 +.Laes192\@: + _vaes_4x \enc, 0, 3 + _vaes_4x \enc, 0, 4 +.Laes128\@: + _vaes_4x \enc, 0, 5 + _vaes_4x \enc, 0, 6 + _vaes_4x \enc, 0, 7 + _vaes_4x \enc, 0, 8 + _vaes_4x \enc, 0, 9 + _vaes_4x \enc, 0, 10 + _vaes_4x \enc, 0, 11 + _vaes_4x \enc, 0, 12 + _vaes_4x \enc, 0, 13 + _vaes_4x \enc, 1, 14 + + // XOR in the tweaks again. + _vpxor TWEAK0, V0, V0 + _vpxor TWEAK1, V1, V1 + _vpxor TWEAK2, V2, V2 + _vpxor TWEAK3, V3, V3 + + // Store the destination blocks. + _vmovdqu V0, 0*VL(DST) + _vmovdqu V1, 1*VL(DST) + _vmovdqu V2, 2*VL(DST) + _vmovdqu V3, 3*VL(DST) + + // Finish computing the next set of tweaks. + _tweak_step 1000 + + add $4*VL, SRC + add $4*VL, DST + sub $4*VL, LEN + jge .Lmain_loop\@ + + // Check for the uncommon case where the data length isn't a multiple of + // 4*VL. Handle it out-of-line in order to optimize for the common + // case. In the common case, just fall through to the ret. + test $4*VL-1, LEN8 + jnz .Lhandle_remainder\@ +.Ldone\@: + // Store the next tweak back to *TWEAK to support continuation calls. + vmovdqu TWEAK0_XMM, (TWEAK) +.if VL > 16 + vzeroupper +.endif + RET + +.Lhandle_remainder\@: + + // En/decrypt any remaining full blocks, one vector at a time. +.if VL > 16 + add $3*VL, LEN // Undo extra sub of 4*VL, then sub VL. + jl .Lvec_at_a_time_done\@ +.Lvec_at_a_time\@: + _vmovdqu (SRC), V0 + _aes_crypt \enc, , TWEAK0, V0 + _vmovdqu V0, (DST) + _next_tweakvec TWEAK0, V0, V1, TWEAK0 + add $VL, SRC + add $VL, DST + sub $VL, LEN + jge .Lvec_at_a_time\@ +.Lvec_at_a_time_done\@: + add $VL-16, LEN // Undo extra sub of VL, then sub 16. +.else + add $4*VL-16, LEN // Undo extra sub of 4*VL, then sub 16. +.endif + + // En/decrypt any remaining full blocks, one at a time. + jl .Lblock_at_a_time_done\@ +.Lblock_at_a_time\@: + vmovdqu (SRC), %xmm0 + _aes_crypt \enc, _XMM, TWEAK0_XMM, %xmm0 + vmovdqu %xmm0, (DST) + _next_tweak TWEAK0_XMM, %xmm0, TWEAK0_XMM + add $16, SRC + add $16, DST + sub $16, LEN + jge .Lblock_at_a_time\@ +.Lblock_at_a_time_done\@: + add $16, LEN // Undo the extra sub of 16. + // Now 0 <= LEN <= 15. If LEN is zero, we're done. + jz .Ldone\@ + + // Otherwise 1 <= LEN <= 15, but the real remaining length is 16 + LEN. + // Do ciphertext stealing to process the last 16 + LEN bytes. + +.if \enc + // If encrypting, the main loop already encrypted the last full block to + // create the CTS intermediate ciphertext. Prepare for the rest of CTS + // by rewinding the pointers and loading the intermediate ciphertext. + sub $16, SRC + sub $16, DST + vmovdqu (DST), %xmm0 +.else + // If decrypting, the main loop didn't decrypt the last full block + // because CTS decryption uses the last two tweaks in reverse order. + // Do it now by advancing the tweak and decrypting the last full block. + _next_tweak TWEAK0_XMM, %xmm0, TWEAK1_XMM + vmovdqu (SRC), %xmm0 + _aes_crypt \enc, _XMM, TWEAK1_XMM, %xmm0 +.endif + +.if USE_AVX10 + // Create a mask that has the first LEN bits set. + mov $-1, %r9d + bzhi LEN, %r9d, %r9d + kmovd %r9d, %k1 + + // Swap the first LEN bytes of the en/decryption of the last full block + // with the partial block. Note that to support in-place en/decryption, + // the load from the src partial block must happen before the store to + // the dst partial block. + vmovdqa %xmm0, %xmm1 + vmovdqu8 16(SRC), %xmm0{%k1} + vmovdqu8 %xmm1, 16(DST){%k1} +.else + lea .Lcts_permute_table(%rip), %r9 + + // Load the src partial block, left-aligned. Note that to support + // in-place en/decryption, this must happen before the store to the dst + // partial block. + vmovdqu (SRC, LEN64, 1), %xmm1 + + // Shift the first LEN bytes of the en/decryption of the last full block + // to the end of a register, then store it to DST+LEN. This stores the + // dst partial block. It also writes to the second part of the dst last + // full block, but that part is overwritten later. + vpshufb (%r9, LEN64, 1), %xmm0, %xmm2 + vmovdqu %xmm2, (DST, LEN64, 1) + + // Make xmm3 contain [16-LEN,16-LEN+1,...,14,15,0x80,0x80,...]. + sub LEN64, %r9 + vmovdqu 32(%r9), %xmm3 + + // Shift the src partial block to the beginning of its register. + vpshufb %xmm3, %xmm1, %xmm1 + + // Do a blend to generate the src partial block followed by the second + // part of the en/decryption of the last full block. + vpblendvb %xmm3, %xmm0, %xmm1, %xmm0 +.endif + // En/decrypt again and store the last full block. + _aes_crypt \enc, _XMM, TWEAK0_XMM, %xmm0 + vmovdqu %xmm0, (DST) + jmp .Ldone\@ +.endm + +// void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, +// u8 iv[AES_BLOCK_SIZE]); +SYM_TYPED_FUNC_START(aes_xts_encrypt_iv) + vmovdqu (%rsi), %xmm0 + vpxor (%rdi), %xmm0, %xmm0 + movl 480(%rdi), %eax // AES key length + lea -16(%rdi, %rax, 4), %rdi + cmp $24, %eax + jl .Lencrypt_iv_aes128 + je .Lencrypt_iv_aes192 + vaesenc -6*16(%rdi), %xmm0, %xmm0 + vaesenc -5*16(%rdi), %xmm0, %xmm0 +.Lencrypt_iv_aes192: + vaesenc -4*16(%rdi), %xmm0, %xmm0 + vaesenc -3*16(%rdi), %xmm0, %xmm0 +.Lencrypt_iv_aes128: + vaesenc -2*16(%rdi), %xmm0, %xmm0 + vaesenc -1*16(%rdi), %xmm0, %xmm0 + vaesenc 0*16(%rdi), %xmm0, %xmm0 + vaesenc 1*16(%rdi), %xmm0, %xmm0 + vaesenc 2*16(%rdi), %xmm0, %xmm0 + vaesenc 3*16(%rdi), %xmm0, %xmm0 + vaesenc 4*16(%rdi), %xmm0, %xmm0 + vaesenc 5*16(%rdi), %xmm0, %xmm0 + vaesenc 6*16(%rdi), %xmm0, %xmm0 + vaesenclast 7*16(%rdi), %xmm0, %xmm0 + vmovdqu %xmm0, (%rsi) + RET +SYM_FUNC_END(aes_xts_encrypt_iv) + +// Below are the actual AES-XTS encryption and decryption functions, +// instantiated from the above macro. They all have the following prototype: +// +// void (*xts_asm_func)(const struct crypto_aes_ctx *key, +// const u8 *src, u8 *dst, unsigned int len, +// u8 tweak[AES_BLOCK_SIZE]); +// +// |key| is the data key. |tweak| contains the next tweak; the encryption of +// the original IV with the tweak key was already done. This function supports +// incremental computation, but |len| must always be >= 16 (AES_BLOCK_SIZE), and +// |len| must be a multiple of 16 except on the last call. If |len| is a +// multiple of 16, then this function updates |tweak| to contain the next tweak. + +.set VL, 16 +.set USE_AVX10, 0 +SYM_TYPED_FUNC_START(aes_xts_encrypt_aesni_avx) + _aes_xts_crypt 1 +SYM_FUNC_END(aes_xts_encrypt_aesni_avx) +SYM_TYPED_FUNC_START(aes_xts_decrypt_aesni_avx) + _aes_xts_crypt 0 +SYM_FUNC_END(aes_xts_decrypt_aesni_avx) + +#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) +.set VL, 32 +.set USE_AVX10, 0 +SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx2) + _aes_xts_crypt 1 +SYM_FUNC_END(aes_xts_encrypt_vaes_avx2) +SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx2) + _aes_xts_crypt 0 +SYM_FUNC_END(aes_xts_decrypt_vaes_avx2) + +.set VL, 32 +.set USE_AVX10, 1 +SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_256) + _aes_xts_crypt 1 +SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_256) +SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_256) + _aes_xts_crypt 0 +SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_256) + +.set VL, 64 +.set USE_AVX10, 1 +SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_512) + _aes_xts_crypt 1 +SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_512) +SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_512) + _aes_xts_crypt 0 +SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_512) +#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 411d8c83e88a..39066b57a70e 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -83,9 +83,6 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff .text - -#define STACK_OFFSET 8*3 - #define AadHash 16*0 #define AadLen 16*1 #define InLen (16*1)+8 @@ -116,11 +113,6 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff #define arg4 rcx #define arg5 r8 #define arg6 r9 -#define arg7 STACK_OFFSET+8(%rsp) -#define arg8 STACK_OFFSET+16(%rsp) -#define arg9 STACK_OFFSET+24(%rsp) -#define arg10 STACK_OFFSET+32(%rsp) -#define arg11 STACK_OFFSET+40(%rsp) #define keysize 2*15*16(%arg1) #endif @@ -1507,184 +1499,6 @@ _esb_loop_\@: MOVADQ (%r10),\TMP1 aesenclast \TMP1,\XMM0 .endm -/***************************************************************************** -* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. -* struct gcm_context_data *data -* // Context data -* u8 *out, // Plaintext output. Encrypt in-place is allowed. -* const u8 *in, // Ciphertext input -* u64 plaintext_len, // Length of data in bytes for decryption. -* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) -* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) -* // concatenated with 0x00000001. 16-byte aligned pointer. -* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. -* const u8 *aad, // Additional Authentication Data (AAD) -* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes -* u8 *auth_tag, // Authenticated Tag output. The driver will compare this to the -* // given authentication tag and only return the plaintext if they match. -* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 -* // (most likely), 12 or 8. -* -* Assumptions: -* -* keys: -* keys are pre-expanded and aligned to 16 bytes. we are using the first -* set of 11 keys in the data structure void *aes_ctx -* -* iv: -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | Salt (From the SA) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | Initialization Vector | -* | (This is the sequence number from IPSec header) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x1 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* -* -* AAD: -* AAD padded to 128 bits with 0 -* for example, assume AAD is a u32 vector -* -* if AAD is 8 bytes: -* AAD[3] = {A0, A1}; -* padded AAD in xmm register = {A1 A0 0 0} -* -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | SPI (A1) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 32-bit Sequence Number (A0) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x0 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* AAD Format with 32-bit Sequence Number -* -* if AAD is 12 bytes: -* AAD[3] = {A0, A1, A2}; -* padded AAD in xmm register = {A2 A1 A0 0} -* -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | SPI (A2) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 64-bit Extended Sequence Number {A1,A0} | -* | | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x0 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* AAD Format with 64-bit Extended Sequence Number -* -* poly = x^128 + x^127 + x^126 + x^121 + 1 -* -*****************************************************************************/ -SYM_FUNC_START(aesni_gcm_dec) - FUNC_SAVE - - GCM_INIT %arg6, arg7, arg8, arg9 - GCM_ENC_DEC dec - GCM_COMPLETE arg10, arg11 - FUNC_RESTORE - RET -SYM_FUNC_END(aesni_gcm_dec) - - -/***************************************************************************** -* void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. -* struct gcm_context_data *data -* // Context data -* u8 *out, // Ciphertext output. Encrypt in-place is allowed. -* const u8 *in, // Plaintext input -* u64 plaintext_len, // Length of data in bytes for encryption. -* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) -* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) -* // concatenated with 0x00000001. 16-byte aligned pointer. -* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. -* const u8 *aad, // Additional Authentication Data (AAD) -* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes -* u8 *auth_tag, // Authenticated Tag output. -* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), -* // 12 or 8. -* -* Assumptions: -* -* keys: -* keys are pre-expanded and aligned to 16 bytes. we are using the -* first set of 11 keys in the data structure void *aes_ctx -* -* -* iv: -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | Salt (From the SA) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | Initialization Vector | -* | (This is the sequence number from IPSec header) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x1 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* -* -* AAD: -* AAD padded to 128 bits with 0 -* for example, assume AAD is a u32 vector -* -* if AAD is 8 bytes: -* AAD[3] = {A0, A1}; -* padded AAD in xmm register = {A1 A0 0 0} -* -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | SPI (A1) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 32-bit Sequence Number (A0) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x0 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* AAD Format with 32-bit Sequence Number -* -* if AAD is 12 bytes: -* AAD[3] = {A0, A1, A2}; -* padded AAD in xmm register = {A2 A1 A0 0} -* -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | SPI (A2) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 64-bit Extended Sequence Number {A1,A0} | -* | | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x0 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* AAD Format with 64-bit Extended Sequence Number -* -* poly = x^128 + x^127 + x^126 + x^121 + 1 -***************************************************************************/ -SYM_FUNC_START(aesni_gcm_enc) - FUNC_SAVE - - GCM_INIT %arg6, arg7, arg8, arg9 - GCM_ENC_DEC enc - - GCM_COMPLETE arg10, arg11 - FUNC_RESTORE - RET -SYM_FUNC_END(aesni_gcm_enc) /***************************************************************************** * void aesni_gcm_init(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. @@ -1820,8 +1634,8 @@ SYM_FUNC_START_LOCAL(_key_expansion_256b) SYM_FUNC_END(_key_expansion_256b) /* - * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, - * unsigned int key_len) + * void aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, + * unsigned int key_len) */ SYM_FUNC_START(aesni_set_key) FRAME_BEGIN @@ -1926,7 +1740,6 @@ SYM_FUNC_START(aesni_set_key) sub $0x10, UKEYP cmp TKEYP, KEYP jb .Ldec_key_loop - xor AREG, AREG #ifndef __x86_64__ popl KEYP #endif @@ -2826,28 +2639,24 @@ SYM_FUNC_END(aesni_ctr_enc) .previous /* - * _aesni_gf128mul_x_ble: internal ABI - * Multiply in GF(2^128) for XTS IVs + * _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs * input: * IV: current IV * GF128MUL_MASK == mask with 0x87 and 0x01 * output: * IV: next IV * changed: - * CTR: == temporary value + * KEY: == temporary value */ -#define _aesni_gf128mul_x_ble() \ - pshufd $0x13, IV, KEY; \ - paddq IV, IV; \ - psrad $31, KEY; \ - pand GF128MUL_MASK, KEY; \ - pxor KEY, IV; +.macro _aesni_gf128mul_x_ble + pshufd $0x13, IV, KEY + paddq IV, IV + psrad $31, KEY + pand GF128MUL_MASK, KEY + pxor KEY, IV +.endm -/* - * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst, - * const u8 *src, unsigned int len, le128 *iv) - */ -SYM_FUNC_START(aesni_xts_encrypt) +.macro _aesni_xts_crypt enc FRAME_BEGIN #ifndef __x86_64__ pushl IVP @@ -2866,35 +2675,46 @@ SYM_FUNC_START(aesni_xts_encrypt) movups (IVP), IV mov 480(KEYP), KLEN +.if !\enc + add $240, KEYP -.Lxts_enc_loop4: + test $15, LEN + jz .Lxts_loop4\@ + sub $16, LEN +.endif + +.Lxts_loop4\@: sub $64, LEN - jl .Lxts_enc_1x + jl .Lxts_1x\@ movdqa IV, STATE1 movdqu 0x00(INP), IN pxor IN, STATE1 movdqu IV, 0x00(OUTP) - _aesni_gf128mul_x_ble() + _aesni_gf128mul_x_ble movdqa IV, STATE2 movdqu 0x10(INP), IN pxor IN, STATE2 movdqu IV, 0x10(OUTP) - _aesni_gf128mul_x_ble() + _aesni_gf128mul_x_ble movdqa IV, STATE3 movdqu 0x20(INP), IN pxor IN, STATE3 movdqu IV, 0x20(OUTP) - _aesni_gf128mul_x_ble() + _aesni_gf128mul_x_ble movdqa IV, STATE4 movdqu 0x30(INP), IN pxor IN, STATE4 movdqu IV, 0x30(OUTP) +.if \enc call _aesni_enc4 +.else + call _aesni_dec4 +.endif movdqu 0x00(OUTP), IN pxor IN, STATE1 @@ -2912,17 +2732,17 @@ SYM_FUNC_START(aesni_xts_encrypt) pxor IN, STATE4 movdqu STATE4, 0x30(OUTP) - _aesni_gf128mul_x_ble() + _aesni_gf128mul_x_ble add $64, INP add $64, OUTP test LEN, LEN - jnz .Lxts_enc_loop4 + jnz .Lxts_loop4\@ -.Lxts_enc_ret_iv: +.Lxts_ret_iv\@: movups IV, (IVP) -.Lxts_enc_ret: +.Lxts_ret\@: #ifndef __x86_64__ popl KLEN popl KEYP @@ -2932,201 +2752,60 @@ SYM_FUNC_START(aesni_xts_encrypt) FRAME_END RET -.Lxts_enc_1x: +.Lxts_1x\@: add $64, LEN - jz .Lxts_enc_ret_iv + jz .Lxts_ret_iv\@ +.if \enc sub $16, LEN - jl .Lxts_enc_cts4 + jl .Lxts_cts4\@ +.endif -.Lxts_enc_loop1: +.Lxts_loop1\@: movdqu (INP), STATE +.if \enc pxor IV, STATE call _aesni_enc1 - pxor IV, STATE - _aesni_gf128mul_x_ble() - - test LEN, LEN - jz .Lxts_enc_out - +.else add $16, INP sub $16, LEN - jl .Lxts_enc_cts1 - - movdqu STATE, (OUTP) - add $16, OUTP - jmp .Lxts_enc_loop1 - -.Lxts_enc_out: - movdqu STATE, (OUTP) - jmp .Lxts_enc_ret_iv - -.Lxts_enc_cts4: - movdqa STATE4, STATE - sub $16, OUTP - -.Lxts_enc_cts1: -#ifndef __x86_64__ - lea .Lcts_permute_table, T1 -#else - lea .Lcts_permute_table(%rip), T1 -#endif - add LEN, INP /* rewind input pointer */ - add $16, LEN /* # bytes in final block */ - movups (INP), IN1 - - mov T1, IVP - add $32, IVP - add LEN, T1 - sub LEN, IVP - add OUTP, LEN - - movups (T1), %xmm4 - movaps STATE, IN2 - pshufb %xmm4, STATE - movups STATE, (LEN) - - movups (IVP), %xmm0 - pshufb %xmm0, IN1 - pblendvb IN2, IN1 - movaps IN1, STATE - + jl .Lxts_cts1\@ pxor IV, STATE - call _aesni_enc1 + call _aesni_dec1 +.endif pxor IV, STATE + _aesni_gf128mul_x_ble - movups STATE, (OUTP) - jmp .Lxts_enc_ret -SYM_FUNC_END(aesni_xts_encrypt) - -/* - * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst, - * const u8 *src, unsigned int len, le128 *iv) - */ -SYM_FUNC_START(aesni_xts_decrypt) - FRAME_BEGIN -#ifndef __x86_64__ - pushl IVP - pushl LEN - pushl KEYP - pushl KLEN - movl (FRAME_OFFSET+20)(%esp), KEYP # ctx - movl (FRAME_OFFSET+24)(%esp), OUTP # dst - movl (FRAME_OFFSET+28)(%esp), INP # src - movl (FRAME_OFFSET+32)(%esp), LEN # len - movl (FRAME_OFFSET+36)(%esp), IVP # iv - movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK -#else - movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK -#endif - movups (IVP), IV - - mov 480(KEYP), KLEN - add $240, KEYP - - test $15, LEN - jz .Lxts_dec_loop4 - sub $16, LEN - -.Lxts_dec_loop4: - sub $64, LEN - jl .Lxts_dec_1x - - movdqa IV, STATE1 - movdqu 0x00(INP), IN - pxor IN, STATE1 - movdqu IV, 0x00(OUTP) - - _aesni_gf128mul_x_ble() - movdqa IV, STATE2 - movdqu 0x10(INP), IN - pxor IN, STATE2 - movdqu IV, 0x10(OUTP) - - _aesni_gf128mul_x_ble() - movdqa IV, STATE3 - movdqu 0x20(INP), IN - pxor IN, STATE3 - movdqu IV, 0x20(OUTP) - - _aesni_gf128mul_x_ble() - movdqa IV, STATE4 - movdqu 0x30(INP), IN - pxor IN, STATE4 - movdqu IV, 0x30(OUTP) - - call _aesni_dec4 - - movdqu 0x00(OUTP), IN - pxor IN, STATE1 - movdqu STATE1, 0x00(OUTP) - - movdqu 0x10(OUTP), IN - pxor IN, STATE2 - movdqu STATE2, 0x10(OUTP) - - movdqu 0x20(OUTP), IN - pxor IN, STATE3 - movdqu STATE3, 0x20(OUTP) - - movdqu 0x30(OUTP), IN - pxor IN, STATE4 - movdqu STATE4, 0x30(OUTP) - - _aesni_gf128mul_x_ble() - - add $64, INP - add $64, OUTP test LEN, LEN - jnz .Lxts_dec_loop4 - -.Lxts_dec_ret_iv: - movups IV, (IVP) - -.Lxts_dec_ret: -#ifndef __x86_64__ - popl KLEN - popl KEYP - popl LEN - popl IVP -#endif - FRAME_END - RET - -.Lxts_dec_1x: - add $64, LEN - jz .Lxts_dec_ret_iv - -.Lxts_dec_loop1: - movdqu (INP), STATE + jz .Lxts_out\@ +.if \enc add $16, INP sub $16, LEN - jl .Lxts_dec_cts1 - - pxor IV, STATE - call _aesni_dec1 - pxor IV, STATE - _aesni_gf128mul_x_ble() - - test LEN, LEN - jz .Lxts_dec_out + jl .Lxts_cts1\@ +.endif movdqu STATE, (OUTP) add $16, OUTP - jmp .Lxts_dec_loop1 + jmp .Lxts_loop1\@ -.Lxts_dec_out: +.Lxts_out\@: movdqu STATE, (OUTP) - jmp .Lxts_dec_ret_iv + jmp .Lxts_ret_iv\@ -.Lxts_dec_cts1: +.if \enc +.Lxts_cts4\@: + movdqa STATE4, STATE + sub $16, OUTP +.Lxts_cts1\@: +.else +.Lxts_cts1\@: movdqa IV, STATE4 - _aesni_gf128mul_x_ble() + _aesni_gf128mul_x_ble pxor IV, STATE call _aesni_dec1 pxor IV, STATE - +.endif #ifndef __x86_64__ lea .Lcts_permute_table, T1 #else @@ -3152,10 +2831,32 @@ SYM_FUNC_START(aesni_xts_decrypt) pblendvb IN2, IN1 movaps IN1, STATE +.if \enc + pxor IV, STATE + call _aesni_enc1 + pxor IV, STATE +.else pxor STATE4, STATE call _aesni_dec1 pxor STATE4, STATE +.endif movups STATE, (OUTP) - jmp .Lxts_dec_ret -SYM_FUNC_END(aesni_xts_decrypt) + jmp .Lxts_ret\@ +.endm + +/* + * void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst, + * const u8 *src, unsigned int len, le128 *iv) + */ +SYM_FUNC_START(aesni_xts_enc) + _aesni_xts_crypt 1 +SYM_FUNC_END(aesni_xts_enc) + +/* + * void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *dst, + * const u8 *src, unsigned int len, le128 *iv) + */ +SYM_FUNC_START(aesni_xts_dec) + _aesni_xts_crypt 0 +SYM_FUNC_END(aesni_xts_dec) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index b1d90c25975a..5b25d2a58aeb 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -40,7 +40,6 @@ #define AESNI_ALIGN 16 #define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN))) #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE - 1)) -#define RFC4106_HASH_SUBKEY_SIZE 16 #define AESNI_ALIGN_EXTRA ((AESNI_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1)) #define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA) #define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA) @@ -87,8 +86,8 @@ static inline void *aes_align_addr(void *addr) return PTR_ALIGN(addr, AESNI_ALIGN); } -asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, - unsigned int key_len); +asmlinkage void aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, + unsigned int key_len); asmlinkage void aesni_enc(const void *ctx, u8 *out, const u8 *in); asmlinkage void aesni_dec(const void *ctx, u8 *out, const u8 *in); asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out, @@ -107,11 +106,11 @@ asmlinkage void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, #define AVX_GEN2_OPTSIZE 640 #define AVX_GEN4_OPTSIZE 4096 -asmlinkage void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len, u8 *iv); +asmlinkage void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); -asmlinkage void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len, u8 *iv); +asmlinkage void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); #ifdef CONFIG_X86_64 @@ -233,19 +232,17 @@ static int aes_set_key_common(struct crypto_aes_ctx *ctx, { int err; - if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 && - key_len != AES_KEYSIZE_256) - return -EINVAL; - if (!crypto_simd_usable()) - err = aes_expandkey(ctx, in_key, key_len); - else { - kernel_fpu_begin(); - err = aesni_set_key(ctx, in_key, key_len); - kernel_fpu_end(); - } + return aes_expandkey(ctx, in_key, key_len); - return err; + err = aes_check_keylen(key_len); + if (err) + return err; + + kernel_fpu_begin(); + aesni_set_key(ctx, in_key, key_len); + kernel_fpu_end(); + return 0; } static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, @@ -592,23 +589,12 @@ static int xctr_crypt(struct skcipher_request *req) return err; } -static int -rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) +static int aes_gcm_derive_hash_subkey(const struct crypto_aes_ctx *aes_key, + u8 hash_subkey[AES_BLOCK_SIZE]) { - struct crypto_aes_ctx ctx; - int ret; - - ret = aes_expandkey(&ctx, key, key_len); - if (ret) - return ret; - - /* Clear the data in the hash sub key container to zero.*/ - /* We want to cipher all zeros to create the hash sub key. */ - memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE); - - aes_encrypt(&ctx, hash_subkey, hash_subkey); + static const u8 zeroes[AES_BLOCK_SIZE]; - memzero_explicit(&ctx, sizeof(ctx)); + aes_encrypt(aes_key, hash_subkey, zeroes); return 0; } @@ -626,7 +612,8 @@ static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key, memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce)); return aes_set_key_common(&ctx->aes_key_expanded, key, key_len) ?: - rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); + aes_gcm_derive_hash_subkey(&ctx->aes_key_expanded, + ctx->hash_subkey); } /* This is the Integrity Check Value (aka the authentication tag) length and can @@ -877,7 +864,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req) } #endif -static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, +static int xts_setkey_aesni(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); @@ -898,108 +885,149 @@ static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, return aes_set_key_common(&ctx->tweak_ctx, key + keylen, keylen); } -static int xts_crypt(struct skcipher_request *req, bool encrypt) +typedef void (*xts_encrypt_iv_func)(const struct crypto_aes_ctx *tweak_key, + u8 iv[AES_BLOCK_SIZE]); +typedef void (*xts_crypt_func)(const struct crypto_aes_ctx *key, + const u8 *src, u8 *dst, unsigned int len, + u8 tweak[AES_BLOCK_SIZE]); + +/* This handles cases where the source and/or destination span pages. */ +static noinline int +xts_crypt_slowpath(struct skcipher_request *req, xts_crypt_func crypt_func) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); + const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); int tail = req->cryptlen % AES_BLOCK_SIZE; + struct scatterlist sg_src[2], sg_dst[2]; struct skcipher_request subreq; struct skcipher_walk walk; + struct scatterlist *src, *dst; int err; - if (req->cryptlen < AES_BLOCK_SIZE) - return -EINVAL; - - err = skcipher_walk_virt(&walk, req, false); - if (!walk.nbytes) - return err; - - if (unlikely(tail > 0 && walk.nbytes < walk.total)) { - int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; - - skcipher_walk_abort(&walk); - + /* + * If the message length isn't divisible by the AES block size, then + * separate off the last full block and the partial block. This ensures + * that they are processed in the same call to the assembly function, + * which is required for ciphertext stealing. + */ + if (tail) { skcipher_request_set_tfm(&subreq, tfm); skcipher_request_set_callback(&subreq, skcipher_request_flags(req), NULL, NULL); skcipher_request_set_crypt(&subreq, req->src, req->dst, - blocks * AES_BLOCK_SIZE, req->iv); + req->cryptlen - tail - AES_BLOCK_SIZE, + req->iv); req = &subreq; + } - err = skcipher_walk_virt(&walk, req, false); - if (!walk.nbytes) - return err; - } else { - tail = 0; + err = skcipher_walk_virt(&walk, req, false); + + while (walk.nbytes) { + kernel_fpu_begin(); + (*crypt_func)(&ctx->crypt_ctx, + walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes & ~(AES_BLOCK_SIZE - 1), req->iv); + kernel_fpu_end(); + err = skcipher_walk_done(&walk, + walk.nbytes & (AES_BLOCK_SIZE - 1)); } - kernel_fpu_begin(); + if (err || !tail) + return err; - /* calculate first value of T */ - aesni_enc(&ctx->tweak_ctx, walk.iv, walk.iv); + /* Do ciphertext stealing with the last full block and partial block. */ - while (walk.nbytes > 0) { - int nbytes = walk.nbytes; - - if (nbytes < walk.total) - nbytes &= ~(AES_BLOCK_SIZE - 1); - - if (encrypt) - aesni_xts_encrypt(&ctx->crypt_ctx, - walk.dst.virt.addr, walk.src.virt.addr, - nbytes, walk.iv); - else - aesni_xts_decrypt(&ctx->crypt_ctx, - walk.dst.virt.addr, walk.src.virt.addr, - nbytes, walk.iv); - kernel_fpu_end(); + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); - if (walk.nbytes > 0) - kernel_fpu_begin(); - } + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; - if (unlikely(tail > 0 && !err)) { - struct scatterlist sg_src[2], sg_dst[2]; - struct scatterlist *src, *dst; + kernel_fpu_begin(); + (*crypt_func)(&ctx->crypt_ctx, walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes, req->iv); + kernel_fpu_end(); - dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); - if (req->dst != req->src) - dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + return skcipher_walk_done(&walk, 0); +} - skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, - req->iv); +/* __always_inline to avoid indirect call in fastpath */ +static __always_inline int +xts_crypt(struct skcipher_request *req, xts_encrypt_iv_func encrypt_iv, + xts_crypt_func crypt_func) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); + const unsigned int cryptlen = req->cryptlen; + struct scatterlist *src = req->src; + struct scatterlist *dst = req->dst; - err = skcipher_walk_virt(&walk, &subreq, false); - if (err) - return err; + if (unlikely(cryptlen < AES_BLOCK_SIZE)) + return -EINVAL; - kernel_fpu_begin(); - if (encrypt) - aesni_xts_encrypt(&ctx->crypt_ctx, - walk.dst.virt.addr, walk.src.virt.addr, - walk.nbytes, walk.iv); - else - aesni_xts_decrypt(&ctx->crypt_ctx, - walk.dst.virt.addr, walk.src.virt.addr, - walk.nbytes, walk.iv); - kernel_fpu_end(); + kernel_fpu_begin(); + (*encrypt_iv)(&ctx->tweak_ctx, req->iv); - err = skcipher_walk_done(&walk, 0); + /* + * In practice, virtually all XTS plaintexts and ciphertexts are either + * 512 or 4096 bytes, aligned such that they don't span page boundaries. + * To optimize the performance of these cases, and also any other case + * where no page boundary is spanned, the below fast-path handles + * single-page sources and destinations as efficiently as possible. + */ + if (likely(src->length >= cryptlen && dst->length >= cryptlen && + src->offset + cryptlen <= PAGE_SIZE && + dst->offset + cryptlen <= PAGE_SIZE)) { + struct page *src_page = sg_page(src); + struct page *dst_page = sg_page(dst); + void *src_virt = kmap_local_page(src_page) + src->offset; + void *dst_virt = kmap_local_page(dst_page) + dst->offset; + + (*crypt_func)(&ctx->crypt_ctx, src_virt, dst_virt, cryptlen, + req->iv); + kunmap_local(dst_virt); + kunmap_local(src_virt); + kernel_fpu_end(); + return 0; } - return err; + kernel_fpu_end(); + return xts_crypt_slowpath(req, crypt_func); +} + +static void aesni_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, + u8 iv[AES_BLOCK_SIZE]) +{ + aesni_enc(tweak_key, iv, iv); +} + +static void aesni_xts_encrypt(const struct crypto_aes_ctx *key, + const u8 *src, u8 *dst, unsigned int len, + u8 tweak[AES_BLOCK_SIZE]) +{ + aesni_xts_enc(key, dst, src, len, tweak); } -static int xts_encrypt(struct skcipher_request *req) +static void aesni_xts_decrypt(const struct crypto_aes_ctx *key, + const u8 *src, u8 *dst, unsigned int len, + u8 tweak[AES_BLOCK_SIZE]) { - return xts_crypt(req, true); + aesni_xts_dec(key, dst, src, len, tweak); } -static int xts_decrypt(struct skcipher_request *req) +static int xts_encrypt_aesni(struct skcipher_request *req) { - return xts_crypt(req, false); + return xts_crypt(req, aesni_xts_encrypt_iv, aesni_xts_encrypt); +} + +static int xts_decrypt_aesni(struct skcipher_request *req) +{ + return xts_crypt(req, aesni_xts_encrypt_iv, aesni_xts_decrypt); } static struct crypto_alg aesni_cipher_alg = { @@ -1103,9 +1131,9 @@ static struct skcipher_alg aesni_skciphers[] = { .max_keysize = 2 * AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, .walksize = 2 * AES_BLOCK_SIZE, - .setkey = xts_aesni_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, + .setkey = xts_setkey_aesni, + .encrypt = xts_encrypt_aesni, + .decrypt = xts_decrypt_aesni, } }; @@ -1137,7 +1165,149 @@ static struct skcipher_alg aesni_xctr = { }; static struct simd_skcipher_alg *aesni_simd_xctr; -#endif /* CONFIG_X86_64 */ + +asmlinkage void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, + u8 iv[AES_BLOCK_SIZE]); + +#define DEFINE_XTS_ALG(suffix, driver_name, priority) \ + \ +asmlinkage void \ +aes_xts_encrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \ + u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); \ +asmlinkage void \ +aes_xts_decrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \ + u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); \ + \ +static int xts_encrypt_##suffix(struct skcipher_request *req) \ +{ \ + return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_encrypt_##suffix); \ +} \ + \ +static int xts_decrypt_##suffix(struct skcipher_request *req) \ +{ \ + return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_decrypt_##suffix); \ +} \ + \ +static struct skcipher_alg aes_xts_alg_##suffix = { \ + .base = { \ + .cra_name = "__xts(aes)", \ + .cra_driver_name = "__" driver_name, \ + .cra_priority = priority, \ + .cra_flags = CRYPTO_ALG_INTERNAL, \ + .cra_blocksize = AES_BLOCK_SIZE, \ + .cra_ctxsize = XTS_AES_CTX_SIZE, \ + .cra_module = THIS_MODULE, \ + }, \ + .min_keysize = 2 * AES_MIN_KEY_SIZE, \ + .max_keysize = 2 * AES_MAX_KEY_SIZE, \ + .ivsize = AES_BLOCK_SIZE, \ + .walksize = 2 * AES_BLOCK_SIZE, \ + .setkey = xts_setkey_aesni, \ + .encrypt = xts_encrypt_##suffix, \ + .decrypt = xts_decrypt_##suffix, \ +}; \ + \ +static struct simd_skcipher_alg *aes_xts_simdalg_##suffix + +DEFINE_XTS_ALG(aesni_avx, "xts-aes-aesni-avx", 500); +#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) +DEFINE_XTS_ALG(vaes_avx2, "xts-aes-vaes-avx2", 600); +DEFINE_XTS_ALG(vaes_avx10_256, "xts-aes-vaes-avx10_256", 700); +DEFINE_XTS_ALG(vaes_avx10_512, "xts-aes-vaes-avx10_512", 800); +#endif + +/* + * This is a list of CPU models that are known to suffer from downclocking when + * zmm registers (512-bit vectors) are used. On these CPUs, the AES-XTS + * implementation with zmm registers won't be used by default. An + * implementation with ymm registers (256-bit vectors) will be used instead. + */ +static const struct x86_cpu_id zmm_exclusion_list[] = { + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_SKYLAKE_X }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_X }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_D }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_L }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_NNPI }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_TIGERLAKE_L }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_TIGERLAKE }, + /* Allow Rocket Lake and later, and Sapphire Rapids and later. */ + /* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */ + {}, +}; + +static int __init register_xts_algs(void) +{ + int err; + + if (!boot_cpu_has(X86_FEATURE_AVX)) + return 0; + err = simd_register_skciphers_compat(&aes_xts_alg_aesni_avx, 1, + &aes_xts_simdalg_aesni_avx); + if (err) + return err; +#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) + if (!boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_VAES) || + !boot_cpu_has(X86_FEATURE_VPCLMULQDQ) || + !boot_cpu_has(X86_FEATURE_PCLMULQDQ) || + !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) + return 0; + err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx2, 1, + &aes_xts_simdalg_vaes_avx2); + if (err) + return err; + + if (!boot_cpu_has(X86_FEATURE_AVX512BW) || + !boot_cpu_has(X86_FEATURE_AVX512VL) || + !boot_cpu_has(X86_FEATURE_BMI2) || + !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | + XFEATURE_MASK_AVX512, NULL)) + return 0; + + err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_256, 1, + &aes_xts_simdalg_vaes_avx10_256); + if (err) + return err; + + if (x86_match_cpu(zmm_exclusion_list)) + aes_xts_alg_vaes_avx10_512.base.cra_priority = 1; + + err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_512, 1, + &aes_xts_simdalg_vaes_avx10_512); + if (err) + return err; +#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ + return 0; +} + +static void unregister_xts_algs(void) +{ + if (aes_xts_simdalg_aesni_avx) + simd_unregister_skciphers(&aes_xts_alg_aesni_avx, 1, + &aes_xts_simdalg_aesni_avx); +#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) + if (aes_xts_simdalg_vaes_avx2) + simd_unregister_skciphers(&aes_xts_alg_vaes_avx2, 1, + &aes_xts_simdalg_vaes_avx2); + if (aes_xts_simdalg_vaes_avx10_256) + simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_256, 1, + &aes_xts_simdalg_vaes_avx10_256); + if (aes_xts_simdalg_vaes_avx10_512) + simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_512, 1, + &aes_xts_simdalg_vaes_avx10_512); +#endif +} +#else /* CONFIG_X86_64 */ +static int __init register_xts_algs(void) +{ + return 0; +} + +static void unregister_xts_algs(void) +{ +} +#endif /* !CONFIG_X86_64 */ #ifdef CONFIG_X86_64 static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key, @@ -1146,7 +1316,8 @@ static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key, struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(aead); return aes_set_key_common(&ctx->aes_key_expanded, key, key_len) ?: - rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); + aes_gcm_derive_hash_subkey(&ctx->aes_key_expanded, + ctx->hash_subkey); } static int generic_gcmaes_encrypt(struct aead_request *req) @@ -1276,13 +1447,21 @@ static int __init aesni_init(void) goto unregister_aeads; #endif /* CONFIG_X86_64 */ + err = register_xts_algs(); + if (err) + goto unregister_xts; + return 0; +unregister_xts: + unregister_xts_algs(); #ifdef CONFIG_X86_64 + if (aesni_simd_xctr) + simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr); unregister_aeads: +#endif /* CONFIG_X86_64 */ simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads), aesni_simd_aeads); -#endif /* CONFIG_X86_64 */ unregister_skciphers: simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), @@ -1303,6 +1482,7 @@ static void __exit aesni_exit(void) if (boot_cpu_has(X86_FEATURE_AVX)) simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr); #endif /* CONFIG_X86_64 */ + unregister_xts_algs(); } late_initcall(aesni_init); diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S index ef73a3ab8726..791386d9a83a 100644 --- a/arch/x86/crypto/nh-avx2-x86_64.S +++ b/arch/x86/crypto/nh-avx2-x86_64.S @@ -154,5 +154,6 @@ SYM_TYPED_FUNC_START(nh_avx2) vpaddq T1, T0, T0 vpaddq T4, T0, T0 vmovdqu T0, (HASH) + vzeroupper RET SYM_FUNC_END(nh_avx2) diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 9918212faf91..0ffb072be956 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -716,6 +716,7 @@ SYM_TYPED_FUNC_START(sha256_transform_rorx) popq %r13 popq %r12 popq %rbx + vzeroupper RET SYM_FUNC_END(sha256_transform_rorx) diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index 537b6dcd7ed8..d515a55a3bc1 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -62,20 +62,41 @@ #define SHA256CONSTANTS %rax -#define MSG %xmm0 +#define MSG %xmm0 /* sha256rnds2 implicit operand */ #define STATE0 %xmm1 #define STATE1 %xmm2 -#define MSGTMP0 %xmm3 -#define MSGTMP1 %xmm4 -#define MSGTMP2 %xmm5 -#define MSGTMP3 %xmm6 -#define MSGTMP4 %xmm7 +#define MSG0 %xmm3 +#define MSG1 %xmm4 +#define MSG2 %xmm5 +#define MSG3 %xmm6 +#define TMP %xmm7 #define SHUF_MASK %xmm8 #define ABEF_SAVE %xmm9 #define CDGH_SAVE %xmm10 +.macro do_4rounds i, m0, m1, m2, m3 +.if \i < 16 + movdqu \i*4(DATA_PTR), \m0 + pshufb SHUF_MASK, \m0 +.endif + movdqa (\i-32)*4(SHA256CONSTANTS), MSG + paddd \m0, MSG + sha256rnds2 STATE0, STATE1 +.if \i >= 12 && \i < 60 + movdqa \m0, TMP + palignr $4, \m3, TMP + paddd TMP, \m1 + sha256msg2 \m0, \m1 +.endif + punpckhqdq MSG, MSG + sha256rnds2 STATE1, STATE0 +.if \i >= 4 && \i < 52 + sha256msg1 \m0, \m3 +.endif +.endm + /* * Intel SHA Extensions optimized implementation of a SHA-256 update function * @@ -86,9 +107,6 @@ * store partial blocks. All message padding and hash value initialization must * be done outside the update function. * - * The indented lines in the loop are instructions related to rounds processing. - * The non-indented lines are instructions related to the message schedule. - * * void sha256_ni_transform(uint32_t *digest, const void *data, uint32_t numBlocks); * digest : pointer to digest @@ -108,202 +126,29 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) * Need to reorder these appropriately * DCBA, HGFE -> ABEF, CDGH */ - movdqu 0*16(DIGEST_PTR), STATE0 - movdqu 1*16(DIGEST_PTR), STATE1 + movdqu 0*16(DIGEST_PTR), STATE0 /* DCBA */ + movdqu 1*16(DIGEST_PTR), STATE1 /* HGFE */ - pshufd $0xB1, STATE0, STATE0 /* CDAB */ - pshufd $0x1B, STATE1, STATE1 /* EFGH */ - movdqa STATE0, MSGTMP4 - palignr $8, STATE1, STATE0 /* ABEF */ - pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ + movdqa STATE0, TMP + punpcklqdq STATE1, STATE0 /* FEBA */ + punpckhqdq TMP, STATE1 /* DCHG */ + pshufd $0x1B, STATE0, STATE0 /* ABEF */ + pshufd $0xB1, STATE1, STATE1 /* CDGH */ movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK - lea K256(%rip), SHA256CONSTANTS + lea K256+32*4(%rip), SHA256CONSTANTS .Lloop0: /* Save hash values for addition after rounds */ movdqa STATE0, ABEF_SAVE movdqa STATE1, CDGH_SAVE - /* Rounds 0-3 */ - movdqu 0*16(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, MSGTMP0 - paddd 0*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - - /* Rounds 4-7 */ - movdqu 1*16(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, MSGTMP1 - paddd 1*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 - - /* Rounds 8-11 */ - movdqu 2*16(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, MSGTMP2 - paddd 2*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 - - /* Rounds 12-15 */ - movdqu 3*16(DATA_PTR), MSG - pshufb SHUF_MASK, MSG - movdqa MSG, MSGTMP3 - paddd 3*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP3, MSGTMP4 - palignr $4, MSGTMP2, MSGTMP4 - paddd MSGTMP4, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 - - /* Rounds 16-19 */ - movdqa MSGTMP0, MSG - paddd 4*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP0, MSGTMP4 - palignr $4, MSGTMP3, MSGTMP4 - paddd MSGTMP4, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 - - /* Rounds 20-23 */ - movdqa MSGTMP1, MSG - paddd 5*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP1, MSGTMP4 - palignr $4, MSGTMP0, MSGTMP4 - paddd MSGTMP4, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 - - /* Rounds 24-27 */ - movdqa MSGTMP2, MSG - paddd 6*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP2, MSGTMP4 - palignr $4, MSGTMP1, MSGTMP4 - paddd MSGTMP4, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 - - /* Rounds 28-31 */ - movdqa MSGTMP3, MSG - paddd 7*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP3, MSGTMP4 - palignr $4, MSGTMP2, MSGTMP4 - paddd MSGTMP4, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 - - /* Rounds 32-35 */ - movdqa MSGTMP0, MSG - paddd 8*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP0, MSGTMP4 - palignr $4, MSGTMP3, MSGTMP4 - paddd MSGTMP4, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 - - /* Rounds 36-39 */ - movdqa MSGTMP1, MSG - paddd 9*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP1, MSGTMP4 - palignr $4, MSGTMP0, MSGTMP4 - paddd MSGTMP4, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 - - /* Rounds 40-43 */ - movdqa MSGTMP2, MSG - paddd 10*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP2, MSGTMP4 - palignr $4, MSGTMP1, MSGTMP4 - paddd MSGTMP4, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 - - /* Rounds 44-47 */ - movdqa MSGTMP3, MSG - paddd 11*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP3, MSGTMP4 - palignr $4, MSGTMP2, MSGTMP4 - paddd MSGTMP4, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 - - /* Rounds 48-51 */ - movdqa MSGTMP0, MSG - paddd 12*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP0, MSGTMP4 - palignr $4, MSGTMP3, MSGTMP4 - paddd MSGTMP4, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 - - /* Rounds 52-55 */ - movdqa MSGTMP1, MSG - paddd 13*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP1, MSGTMP4 - palignr $4, MSGTMP0, MSGTMP4 - paddd MSGTMP4, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - - /* Rounds 56-59 */ - movdqa MSGTMP2, MSG - paddd 14*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - movdqa MSGTMP2, MSGTMP4 - palignr $4, MSGTMP1, MSGTMP4 - paddd MSGTMP4, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 - - /* Rounds 60-63 */ - movdqa MSGTMP3, MSG - paddd 15*16(SHA256CONSTANTS), MSG - sha256rnds2 STATE0, STATE1 - pshufd $0x0E, MSG, MSG - sha256rnds2 STATE1, STATE0 +.irp i, 0, 16, 32, 48 + do_4rounds (\i + 0), MSG0, MSG1, MSG2, MSG3 + do_4rounds (\i + 4), MSG1, MSG2, MSG3, MSG0 + do_4rounds (\i + 8), MSG2, MSG3, MSG0, MSG1 + do_4rounds (\i + 12), MSG3, MSG0, MSG1, MSG2 +.endr /* Add current hash values with previously saved */ paddd ABEF_SAVE, STATE0 @@ -315,14 +160,14 @@ SYM_TYPED_FUNC_START(sha256_ni_transform) jne .Lloop0 /* Write hash values back in the correct order */ - pshufd $0x1B, STATE0, STATE0 /* FEBA */ - pshufd $0xB1, STATE1, STATE1 /* DCHG */ - movdqa STATE0, MSGTMP4 - pblendw $0xF0, STATE1, STATE0 /* DCBA */ - palignr $8, MSGTMP4, STATE1 /* HGFE */ - - movdqu STATE0, 0*16(DIGEST_PTR) - movdqu STATE1, 1*16(DIGEST_PTR) + movdqa STATE0, TMP + punpcklqdq STATE1, STATE0 /* GHEF */ + punpckhqdq TMP, STATE1 /* ABCD */ + pshufd $0xB1, STATE0, STATE0 /* HGFE */ + pshufd $0x1B, STATE1, STATE1 /* DCBA */ + + movdqu STATE1, 0*16(DIGEST_PTR) + movdqu STATE0, 1*16(DIGEST_PTR) .Ldone_hash: diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index f08496cd6870..24973f42c43f 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -680,6 +680,7 @@ SYM_TYPED_FUNC_START(sha512_transform_rorx) pop %r12 pop %rbx + vzeroupper RET SYM_FUNC_END(sha512_transform_rorx) diff --git a/crypto/Kconfig b/crypto/Kconfig index 2903ce19f15c..5688d42a59c2 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1456,26 +1456,6 @@ config CRYPTO_USER_API_ENABLE_OBSOLETE already been phased out from internal use by the kernel, and are only useful for userspace clients that still rely on them. -config CRYPTO_STATS - bool "Crypto usage statistics" - depends on CRYPTO_USER - help - Enable the gathering of crypto stats. - - Enabling this option reduces the performance of the crypto API. It - should only be enabled when there is actually a use case for it. - - This collects data sizes, numbers of requests, and numbers - of errors processed by: - - AEAD ciphers (encrypt, decrypt) - - asymmetric key ciphers (encrypt, decrypt, verify, sign) - - symmetric key ciphers (encrypt, decrypt) - - compression algorithms (compress, decompress) - - hash algorithms (hash) - - key-agreement protocol primitives (setsecret, generate - public key, compute shared secret) - - RNG (generate, seed) - endmenu config CRYPTO_HASH_INFO diff --git a/crypto/Makefile b/crypto/Makefile index 408f0a1f9ab9..de9a3312a2c8 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -69,8 +69,6 @@ cryptomgr-y := algboss.o testmgr.o obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o obj-$(CONFIG_CRYPTO_USER) += crypto_user.o -crypto_user-y := crypto_user_base.o -crypto_user-$(CONFIG_CRYPTO_STATS) += crypto_user_stat.o obj-$(CONFIG_CRYPTO_CMAC) += cmac.o obj-$(CONFIG_CRYPTO_HMAC) += hmac.o obj-$(CONFIG_CRYPTO_VMAC) += vmac.o diff --git a/crypto/acompress.c b/crypto/acompress.c index 1c682810a484..6fdf0ff9f3c0 100644 --- a/crypto/acompress.c +++ b/crypto/acompress.c @@ -93,32 +93,6 @@ static unsigned int crypto_acomp_extsize(struct crypto_alg *alg) return extsize; } -static inline int __crypto_acomp_report_stat(struct sk_buff *skb, - struct crypto_alg *alg) -{ - struct comp_alg_common *calg = __crypto_comp_alg_common(alg); - struct crypto_istat_compress *istat = comp_get_stat(calg); - struct crypto_stat_compress racomp; - - memset(&racomp, 0, sizeof(racomp)); - - strscpy(racomp.type, "acomp", sizeof(racomp.type)); - racomp.stat_compress_cnt = atomic64_read(&istat->compress_cnt); - racomp.stat_compress_tlen = atomic64_read(&istat->compress_tlen); - racomp.stat_decompress_cnt = atomic64_read(&istat->decompress_cnt); - racomp.stat_decompress_tlen = atomic64_read(&istat->decompress_tlen); - racomp.stat_err_cnt = atomic64_read(&istat->err_cnt); - - return nla_put(skb, CRYPTOCFGA_STAT_ACOMP, sizeof(racomp), &racomp); -} - -#ifdef CONFIG_CRYPTO_STATS -int crypto_acomp_report_stat(struct sk_buff *skb, struct crypto_alg *alg) -{ - return __crypto_acomp_report_stat(skb, alg); -} -#endif - static const struct crypto_type crypto_acomp_type = { .extsize = crypto_acomp_extsize, .init_tfm = crypto_acomp_init_tfm, @@ -128,9 +102,6 @@ static const struct crypto_type crypto_acomp_type = { #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_acomp_report, #endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_acomp_report_stat, -#endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_ACOMPRESS_MASK, .type = CRYPTO_ALG_TYPE_ACOMPRESS, @@ -184,13 +155,9 @@ EXPORT_SYMBOL_GPL(acomp_request_free); void comp_prepare_alg(struct comp_alg_common *alg) { - struct crypto_istat_compress *istat = comp_get_stat(alg); struct crypto_alg *base = &alg->base; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - memset(istat, 0, sizeof(*istat)); } int crypto_register_acomp(struct acomp_alg *alg) diff --git a/crypto/aead.c b/crypto/aead.c index 54906633566a..cade532413bf 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -20,15 +20,6 @@ #include "internal.h" -static inline struct crypto_istat_aead *aead_get_stat(struct aead_alg *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - return &alg->stat; -#else - return NULL; -#endif -} - static int setkey_unaligned(struct crypto_aead *tfm, const u8 *key, unsigned int keylen) { @@ -45,8 +36,7 @@ static int setkey_unaligned(struct crypto_aead *tfm, const u8 *key, alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); memcpy(alignbuffer, key, keylen); ret = crypto_aead_alg(tfm)->setkey(tfm, alignbuffer, keylen); - memset(alignbuffer, 0, keylen); - kfree(buffer); + kfree_sensitive(buffer); return ret; } @@ -90,62 +80,28 @@ int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize) } EXPORT_SYMBOL_GPL(crypto_aead_setauthsize); -static inline int crypto_aead_errstat(struct crypto_istat_aead *istat, int err) -{ - if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) - return err; - - if (err && err != -EINPROGRESS && err != -EBUSY) - atomic64_inc(&istat->err_cnt); - - return err; -} - int crypto_aead_encrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct aead_alg *alg = crypto_aead_alg(aead); - struct crypto_istat_aead *istat; - int ret; - - istat = aead_get_stat(alg); - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - atomic64_inc(&istat->encrypt_cnt); - atomic64_add(req->cryptlen, &istat->encrypt_tlen); - } if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY) - ret = -ENOKEY; - else - ret = alg->encrypt(req); + return -ENOKEY; - return crypto_aead_errstat(istat, ret); + return crypto_aead_alg(aead)->encrypt(req); } EXPORT_SYMBOL_GPL(crypto_aead_encrypt); int crypto_aead_decrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct aead_alg *alg = crypto_aead_alg(aead); - struct crypto_istat_aead *istat; - int ret; - - istat = aead_get_stat(alg); - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - atomic64_inc(&istat->encrypt_cnt); - atomic64_add(req->cryptlen, &istat->encrypt_tlen); - } if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY) - ret = -ENOKEY; - else if (req->cryptlen < crypto_aead_authsize(aead)) - ret = -EINVAL; - else - ret = alg->decrypt(req); + return -ENOKEY; + + if (req->cryptlen < crypto_aead_authsize(aead)) + return -EINVAL; - return crypto_aead_errstat(istat, ret); + return crypto_aead_alg(aead)->decrypt(req); } EXPORT_SYMBOL_GPL(crypto_aead_decrypt); @@ -215,26 +171,6 @@ static void crypto_aead_free_instance(struct crypto_instance *inst) aead->free(aead); } -static int __maybe_unused crypto_aead_report_stat( - struct sk_buff *skb, struct crypto_alg *alg) -{ - struct aead_alg *aead = container_of(alg, struct aead_alg, base); - struct crypto_istat_aead *istat = aead_get_stat(aead); - struct crypto_stat_aead raead; - - memset(&raead, 0, sizeof(raead)); - - strscpy(raead.type, "aead", sizeof(raead.type)); - - raead.stat_encrypt_cnt = atomic64_read(&istat->encrypt_cnt); - raead.stat_encrypt_tlen = atomic64_read(&istat->encrypt_tlen); - raead.stat_decrypt_cnt = atomic64_read(&istat->decrypt_cnt); - raead.stat_decrypt_tlen = atomic64_read(&istat->decrypt_tlen); - raead.stat_err_cnt = atomic64_read(&istat->err_cnt); - - return nla_put(skb, CRYPTOCFGA_STAT_AEAD, sizeof(raead), &raead); -} - static const struct crypto_type crypto_aead_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_aead_init_tfm, @@ -245,9 +181,6 @@ static const struct crypto_type crypto_aead_type = { #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_aead_report, #endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_aead_report_stat, -#endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, .type = CRYPTO_ALG_TYPE_AEAD, @@ -277,7 +210,6 @@ EXPORT_SYMBOL_GPL(crypto_has_aead); static int aead_prepare_alg(struct aead_alg *alg) { - struct crypto_istat_aead *istat = aead_get_stat(alg); struct crypto_alg *base = &alg->base; if (max3(alg->maxauthsize, alg->ivsize, alg->chunksize) > @@ -291,9 +223,6 @@ static int aead_prepare_alg(struct aead_alg *alg) base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_AEAD; - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - memset(istat, 0, sizeof(*istat)); - return 0; } diff --git a/crypto/ahash.c b/crypto/ahash.c index 0ac83f7f701d..bcd9de009a91 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -27,22 +27,6 @@ #define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e -static inline struct crypto_istat_hash *ahash_get_stat(struct ahash_alg *alg) -{ - return hash_get_stat(&alg->halg); -} - -static inline int crypto_ahash_errstat(struct ahash_alg *alg, int err) -{ - if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) - return err; - - if (err && err != -EINPROGRESS && err != -EBUSY) - atomic64_inc(&ahash_get_stat(alg)->err_cnt); - - return err; -} - /* * For an ahash tfm that is using an shash algorithm (instead of an ahash * algorithm), this returns the underlying shash tfm. @@ -344,75 +328,47 @@ static void ahash_restore_req(struct ahash_request *req, int err) int crypto_ahash_update(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ahash_alg *alg; if (likely(tfm->using_shash)) return shash_ahash_update(req, ahash_request_ctx(req)); - alg = crypto_ahash_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_add(req->nbytes, &ahash_get_stat(alg)->hash_tlen); - return crypto_ahash_errstat(alg, alg->update(req)); + return crypto_ahash_alg(tfm)->update(req); } EXPORT_SYMBOL_GPL(crypto_ahash_update); int crypto_ahash_final(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ahash_alg *alg; if (likely(tfm->using_shash)) return crypto_shash_final(ahash_request_ctx(req), req->result); - alg = crypto_ahash_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_inc(&ahash_get_stat(alg)->hash_cnt); - return crypto_ahash_errstat(alg, alg->final(req)); + return crypto_ahash_alg(tfm)->final(req); } EXPORT_SYMBOL_GPL(crypto_ahash_final); int crypto_ahash_finup(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ahash_alg *alg; if (likely(tfm->using_shash)) return shash_ahash_finup(req, ahash_request_ctx(req)); - alg = crypto_ahash_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_hash *istat = ahash_get_stat(alg); - - atomic64_inc(&istat->hash_cnt); - atomic64_add(req->nbytes, &istat->hash_tlen); - } - return crypto_ahash_errstat(alg, alg->finup(req)); + return crypto_ahash_alg(tfm)->finup(req); } EXPORT_SYMBOL_GPL(crypto_ahash_finup); int crypto_ahash_digest(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ahash_alg *alg; - int err; if (likely(tfm->using_shash)) return shash_ahash_digest(req, prepare_shash_desc(req, tfm)); - alg = crypto_ahash_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_hash *istat = ahash_get_stat(alg); - - atomic64_inc(&istat->hash_cnt); - atomic64_add(req->nbytes, &istat->hash_tlen); - } - if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) - err = -ENOKEY; - else - err = alg->digest(req); + return -ENOKEY; - return crypto_ahash_errstat(alg, err); + return crypto_ahash_alg(tfm)->digest(req); } EXPORT_SYMBOL_GPL(crypto_ahash_digest); @@ -571,12 +527,6 @@ static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg) __crypto_hash_alg_common(alg)->digestsize); } -static int __maybe_unused crypto_ahash_report_stat( - struct sk_buff *skb, struct crypto_alg *alg) -{ - return crypto_hash_report_stat(skb, alg, "ahash"); -} - static const struct crypto_type crypto_ahash_type = { .extsize = crypto_ahash_extsize, .init_tfm = crypto_ahash_init_tfm, @@ -587,9 +537,6 @@ static const struct crypto_type crypto_ahash_type = { #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_ahash_report, #endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_ahash_report_stat, -#endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_AHASH_MASK, .type = CRYPTO_ALG_TYPE_AHASH, diff --git a/crypto/akcipher.c b/crypto/akcipher.c index 52813f0b19e4..e0ff5f4dda6d 100644 --- a/crypto/akcipher.c +++ b/crypto/akcipher.c @@ -70,30 +70,6 @@ static void crypto_akcipher_free_instance(struct crypto_instance *inst) akcipher->free(akcipher); } -static int __maybe_unused crypto_akcipher_report_stat( - struct sk_buff *skb, struct crypto_alg *alg) -{ - struct akcipher_alg *akcipher = __crypto_akcipher_alg(alg); - struct crypto_istat_akcipher *istat; - struct crypto_stat_akcipher rakcipher; - - istat = akcipher_get_stat(akcipher); - - memset(&rakcipher, 0, sizeof(rakcipher)); - - strscpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); - rakcipher.stat_encrypt_cnt = atomic64_read(&istat->encrypt_cnt); - rakcipher.stat_encrypt_tlen = atomic64_read(&istat->encrypt_tlen); - rakcipher.stat_decrypt_cnt = atomic64_read(&istat->decrypt_cnt); - rakcipher.stat_decrypt_tlen = atomic64_read(&istat->decrypt_tlen); - rakcipher.stat_sign_cnt = atomic64_read(&istat->sign_cnt); - rakcipher.stat_verify_cnt = atomic64_read(&istat->verify_cnt); - rakcipher.stat_err_cnt = atomic64_read(&istat->err_cnt); - - return nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER, - sizeof(rakcipher), &rakcipher); -} - static const struct crypto_type crypto_akcipher_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_akcipher_init_tfm, @@ -104,9 +80,6 @@ static const struct crypto_type crypto_akcipher_type = { #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_akcipher_report, #endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_akcipher_report_stat, -#endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_AHASH_MASK, .type = CRYPTO_ALG_TYPE_AKCIPHER, @@ -131,15 +104,11 @@ EXPORT_SYMBOL_GPL(crypto_alloc_akcipher); static void akcipher_prepare_alg(struct akcipher_alg *alg) { - struct crypto_istat_akcipher *istat = akcipher_get_stat(alg); struct crypto_alg *base = &alg->base; base->cra_type = &crypto_akcipher_type; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_AKCIPHER; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - memset(istat, 0, sizeof(*istat)); } static int akcipher_default_op(struct akcipher_request *req) diff --git a/crypto/algboss.c b/crypto/algboss.c index 0de1e6697949..1aa5f306998a 100644 --- a/crypto/algboss.c +++ b/crypto/algboss.c @@ -138,9 +138,6 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval) goto err_free_param; } - if (!i) - goto err_free_param; - param->tb[i + 1] = NULL; param->type.attr.rta_len = sizeof(param->type); diff --git a/crypto/api.c b/crypto/api.c index 7f402107f0cc..6aa5a3b4ed5e 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -202,18 +202,18 @@ static void crypto_start_test(struct crypto_larval *larval) static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg) { struct crypto_larval *larval = (void *)alg; - long timeout; + long time_left; if (!crypto_boot_test_finished()) crypto_start_test(larval); - timeout = wait_for_completion_killable_timeout( + time_left = wait_for_completion_killable_timeout( &larval->completion, 60 * HZ); alg = larval->adult; - if (timeout < 0) + if (time_left < 0) alg = ERR_PTR(-EINTR); - else if (!timeout) + else if (!time_left) alg = ERR_PTR(-ETIMEDOUT); else if (!alg) alg = ERR_PTR(-ENOENT); diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index e314fd57e6f8..3474fb34ded9 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -234,6 +234,7 @@ static int software_key_query(const struct kernel_pkey_params *params, info->key_size = len * 8; if (strncmp(pkey->pkey_algo, "ecdsa", 5) == 0) { + int slen = len; /* * ECDSA key sizes are much smaller than RSA, and thus could * operate on (hashed) inputs that are larger than key size. @@ -247,8 +248,19 @@ static int software_key_query(const struct kernel_pkey_params *params, * Verify takes ECDSA-Sig (described in RFC 5480) as input, * which is actually 2 'key_size'-bit integers encoded in * ASN.1. Account for the ASN.1 encoding overhead here. + * + * NIST P192/256/384 may prepend a '0' to a coordinate to + * indicate a positive integer. NIST P521 never needs it. */ - info->max_sig_size = 2 * (len + 3) + 2; + if (strcmp(pkey->pkey_algo, "ecdsa-nist-p521") != 0) + slen += 1; + /* Length of encoding the x & y coordinates */ + slen = 2 * (slen + 2); + /* + * If coordinate encoding takes at least 128 bytes then an + * additional byte for length encoding is needed. + */ + info->max_sig_size = 1 + (slen >= 128) + 1 + slen; } else { info->max_data_size = len; info->max_sig_size = len; diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index bb0bffa271b5..25cc4273472f 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -60,24 +60,23 @@ EXPORT_SYMBOL_GPL(x509_free_certificate); */ struct x509_certificate *x509_cert_parse(const void *data, size_t datalen) { - struct x509_certificate *cert; - struct x509_parse_context *ctx; + struct x509_certificate *cert __free(x509_free_certificate); + struct x509_parse_context *ctx __free(kfree) = NULL; struct asymmetric_key_id *kid; long ret; - ret = -ENOMEM; cert = kzalloc(sizeof(struct x509_certificate), GFP_KERNEL); if (!cert) - goto error_no_cert; + return ERR_PTR(-ENOMEM); cert->pub = kzalloc(sizeof(struct public_key), GFP_KERNEL); if (!cert->pub) - goto error_no_ctx; + return ERR_PTR(-ENOMEM); cert->sig = kzalloc(sizeof(struct public_key_signature), GFP_KERNEL); if (!cert->sig) - goto error_no_ctx; + return ERR_PTR(-ENOMEM); ctx = kzalloc(sizeof(struct x509_parse_context), GFP_KERNEL); if (!ctx) - goto error_no_ctx; + return ERR_PTR(-ENOMEM); ctx->cert = cert; ctx->data = (unsigned long)data; @@ -85,7 +84,7 @@ struct x509_certificate *x509_cert_parse(const void *data, size_t datalen) /* Attempt to decode the certificate */ ret = asn1_ber_decoder(&x509_decoder, ctx, data, datalen); if (ret < 0) - goto error_decode; + return ERR_PTR(ret); /* Decode the AuthorityKeyIdentifier */ if (ctx->raw_akid) { @@ -95,20 +94,19 @@ struct x509_certificate *x509_cert_parse(const void *data, size_t datalen) ctx->raw_akid, ctx->raw_akid_size); if (ret < 0) { pr_warn("Couldn't decode AuthKeyIdentifier\n"); - goto error_decode; + return ERR_PTR(ret); } } - ret = -ENOMEM; cert->pub->key = kmemdup(ctx->key, ctx->key_size, GFP_KERNEL); if (!cert->pub->key) - goto error_decode; + return ERR_PTR(-ENOMEM); cert->pub->keylen = ctx->key_size; cert->pub->params = kmemdup(ctx->params, ctx->params_size, GFP_KERNEL); if (!cert->pub->params) - goto error_decode; + return ERR_PTR(-ENOMEM); cert->pub->paramlen = ctx->params_size; cert->pub->algo = ctx->key_algo; @@ -116,33 +114,23 @@ struct x509_certificate *x509_cert_parse(const void *data, size_t datalen) /* Grab the signature bits */ ret = x509_get_sig_params(cert); if (ret < 0) - goto error_decode; + return ERR_PTR(ret); /* Generate cert issuer + serial number key ID */ kid = asymmetric_key_generate_id(cert->raw_serial, cert->raw_serial_size, cert->raw_issuer, cert->raw_issuer_size); - if (IS_ERR(kid)) { - ret = PTR_ERR(kid); - goto error_decode; - } + if (IS_ERR(kid)) + return ERR_CAST(kid); cert->id = kid; /* Detect self-signed certificates */ ret = x509_check_for_self_signed(cert); if (ret < 0) - goto error_decode; + return ERR_PTR(ret); - kfree(ctx); - return cert; - -error_decode: - kfree(ctx); -error_no_ctx: - x509_free_certificate(cert); -error_no_cert: - return ERR_PTR(ret); + return_ptr(cert); } EXPORT_SYMBOL_GPL(x509_cert_parse); @@ -546,6 +534,9 @@ int x509_extract_key_data(void *context, size_t hdrlen, case OID_id_ansip384r1: ctx->cert->pub->pkey_algo = "ecdsa-nist-p384"; break; + case OID_id_ansip521r1: + ctx->cert->pub->pkey_algo = "ecdsa-nist-p521"; + break; default: return -ENOPKG; } diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h index 97a886cbe01c..0688c222806b 100644 --- a/crypto/asymmetric_keys/x509_parser.h +++ b/crypto/asymmetric_keys/x509_parser.h @@ -5,6 +5,7 @@ * Written by David Howells (dhowells@redhat.com) */ +#include <linux/cleanup.h> #include <linux/time.h> #include <crypto/public_key.h> #include <keys/asymmetric-type.h> @@ -44,6 +45,8 @@ struct x509_certificate { * x509_cert_parser.c */ extern void x509_free_certificate(struct x509_certificate *cert); +DEFINE_FREE(x509_free_certificate, struct x509_certificate *, + if (!IS_ERR(_T)) x509_free_certificate(_T)) extern struct x509_certificate *x509_cert_parse(const void *data, size_t datalen); extern int x509_decode_time(time64_t *_t, size_t hdrlen, unsigned char tag, diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index 6a4f00be22fc..00ac7159fba2 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -161,12 +161,11 @@ not_self_signed: */ static int x509_key_preparse(struct key_preparsed_payload *prep) { - struct asymmetric_key_ids *kids; - struct x509_certificate *cert; + struct x509_certificate *cert __free(x509_free_certificate); + struct asymmetric_key_ids *kids __free(kfree) = NULL; + char *p, *desc __free(kfree) = NULL; const char *q; size_t srlen, sulen; - char *desc = NULL, *p; - int ret; cert = x509_cert_parse(prep->data, prep->datalen); if (IS_ERR(cert)) @@ -188,9 +187,8 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) } /* Don't permit addition of blacklisted keys */ - ret = -EKEYREJECTED; if (cert->blacklisted) - goto error_free_cert; + return -EKEYREJECTED; /* Propose a description */ sulen = strlen(cert->subject); @@ -202,10 +200,9 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) q = cert->raw_serial; } - ret = -ENOMEM; desc = kmalloc(sulen + 2 + srlen * 2 + 1, GFP_KERNEL); if (!desc) - goto error_free_cert; + return -ENOMEM; p = memcpy(desc, cert->subject, sulen); p += sulen; *p++ = ':'; @@ -215,16 +212,14 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) kids = kmalloc(sizeof(struct asymmetric_key_ids), GFP_KERNEL); if (!kids) - goto error_free_desc; + return -ENOMEM; kids->id[0] = cert->id; kids->id[1] = cert->skid; kids->id[2] = asymmetric_key_generate_id(cert->raw_subject, cert->raw_subject_size, "", 0); - if (IS_ERR(kids->id[2])) { - ret = PTR_ERR(kids->id[2]); - goto error_free_kids; - } + if (IS_ERR(kids->id[2])) + return PTR_ERR(kids->id[2]); /* We're pinning the module by being linked against it */ __module_get(public_key_subtype.owner); @@ -242,15 +237,7 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) cert->sig = NULL; desc = NULL; kids = NULL; - ret = 0; - -error_free_kids: - kfree(kids); -error_free_desc: - kfree(desc); -error_free_cert: - x509_free_certificate(cert); - return ret; + return 0; } static struct asymmetric_key_parser x509_key_parser = { diff --git a/crypto/cipher.c b/crypto/cipher.c index 47c77a3e5978..40cae908788e 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -34,8 +34,7 @@ static int setkey_unaligned(struct crypto_cipher *tfm, const u8 *key, alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); memcpy(alignbuffer, key, keylen); ret = cia->cia_setkey(crypto_cipher_tfm(tfm), alignbuffer, keylen); - memset(alignbuffer, 0, keylen); - kfree(buffer); + kfree_sensitive(buffer); return ret; } diff --git a/crypto/compress.h b/crypto/compress.h index 19f65516d699..c3cedfb5e606 100644 --- a/crypto/compress.h +++ b/crypto/compress.h @@ -13,14 +13,11 @@ struct acomp_req; struct comp_alg_common; -struct sk_buff; int crypto_init_scomp_ops_async(struct crypto_tfm *tfm); struct acomp_req *crypto_acomp_scomp_alloc_ctx(struct acomp_req *req); void crypto_acomp_scomp_free_ctx(struct acomp_req *req); -int crypto_acomp_report_stat(struct sk_buff *skb, struct crypto_alg *alg); - void comp_prepare_alg(struct comp_alg_common *alg); #endif /* _LOCAL_CRYPTO_COMPRESS_H */ diff --git a/crypto/crypto_user_base.c b/crypto/crypto_user.c index 3fa20f12989f..6c571834e86a 100644 --- a/crypto/crypto_user_base.c +++ b/crypto/crypto_user.c @@ -18,7 +18,6 @@ #include <crypto/internal/rng.h> #include <crypto/akcipher.h> #include <crypto/kpp.h> -#include <crypto/internal/cryptouser.h> #include "internal.h" @@ -33,7 +32,7 @@ struct crypto_dump_info { u16 nlmsg_flags; }; -struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact) +static struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact) { struct crypto_alg *q, *alg = NULL; @@ -387,6 +386,13 @@ static int crypto_del_rng(struct sk_buff *skb, struct nlmsghdr *nlh, return crypto_del_default_rng(); } +static int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, + struct nlattr **attrs) +{ + /* No longer supported */ + return -ENOTSUPP; +} + #define MSGSIZE(type) sizeof(struct type) static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = { diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c deleted file mode 100644 index d4f3d39b5137..000000000000 --- a/crypto/crypto_user_stat.c +++ /dev/null @@ -1,176 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Crypto user configuration API. - * - * Copyright (C) 2017-2018 Corentin Labbe <clabbe@baylibre.com> - * - */ - -#include <crypto/algapi.h> -#include <crypto/internal/cryptouser.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> -#include <net/netlink.h> -#include <net/sock.h> - -#define null_terminated(x) (strnlen(x, sizeof(x)) < sizeof(x)) - -struct crypto_dump_info { - struct sk_buff *in_skb; - struct sk_buff *out_skb; - u32 nlmsg_seq; - u16 nlmsg_flags; -}; - -static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg) -{ - struct crypto_stat_cipher rcipher; - - memset(&rcipher, 0, sizeof(rcipher)); - - strscpy(rcipher.type, "cipher", sizeof(rcipher.type)); - - return nla_put(skb, CRYPTOCFGA_STAT_CIPHER, sizeof(rcipher), &rcipher); -} - -static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg) -{ - struct crypto_stat_compress rcomp; - - memset(&rcomp, 0, sizeof(rcomp)); - - strscpy(rcomp.type, "compression", sizeof(rcomp.type)); - - return nla_put(skb, CRYPTOCFGA_STAT_COMPRESS, sizeof(rcomp), &rcomp); -} - -static int crypto_reportstat_one(struct crypto_alg *alg, - struct crypto_user_alg *ualg, - struct sk_buff *skb) -{ - memset(ualg, 0, sizeof(*ualg)); - - strscpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); - strscpy(ualg->cru_driver_name, alg->cra_driver_name, - sizeof(ualg->cru_driver_name)); - strscpy(ualg->cru_module_name, module_name(alg->cra_module), - sizeof(ualg->cru_module_name)); - - ualg->cru_type = 0; - ualg->cru_mask = 0; - ualg->cru_flags = alg->cra_flags; - ualg->cru_refcnt = refcount_read(&alg->cra_refcnt); - - if (nla_put_u32(skb, CRYPTOCFGA_PRIORITY_VAL, alg->cra_priority)) - goto nla_put_failure; - if (alg->cra_flags & CRYPTO_ALG_LARVAL) { - struct crypto_stat_larval rl; - - memset(&rl, 0, sizeof(rl)); - strscpy(rl.type, "larval", sizeof(rl.type)); - if (nla_put(skb, CRYPTOCFGA_STAT_LARVAL, sizeof(rl), &rl)) - goto nla_put_failure; - goto out; - } - - if (alg->cra_type && alg->cra_type->report_stat) { - if (alg->cra_type->report_stat(skb, alg)) - goto nla_put_failure; - goto out; - } - - switch (alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL)) { - case CRYPTO_ALG_TYPE_CIPHER: - if (crypto_report_cipher(skb, alg)) - goto nla_put_failure; - break; - case CRYPTO_ALG_TYPE_COMPRESS: - if (crypto_report_comp(skb, alg)) - goto nla_put_failure; - break; - default: - pr_err("ERROR: Unhandled alg %d in %s\n", - alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL), - __func__); - } - -out: - return 0; - -nla_put_failure: - return -EMSGSIZE; -} - -static int crypto_reportstat_alg(struct crypto_alg *alg, - struct crypto_dump_info *info) -{ - struct sk_buff *in_skb = info->in_skb; - struct sk_buff *skb = info->out_skb; - struct nlmsghdr *nlh; - struct crypto_user_alg *ualg; - int err = 0; - - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, info->nlmsg_seq, - CRYPTO_MSG_GETSTAT, sizeof(*ualg), info->nlmsg_flags); - if (!nlh) { - err = -EMSGSIZE; - goto out; - } - - ualg = nlmsg_data(nlh); - - err = crypto_reportstat_one(alg, ualg, skb); - if (err) { - nlmsg_cancel(skb, nlh); - goto out; - } - - nlmsg_end(skb, nlh); - -out: - return err; -} - -int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, - struct nlattr **attrs) -{ - struct net *net = sock_net(in_skb->sk); - struct crypto_user_alg *p = nlmsg_data(in_nlh); - struct crypto_alg *alg; - struct sk_buff *skb; - struct crypto_dump_info info; - int err; - - if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name)) - return -EINVAL; - - alg = crypto_alg_match(p, 0); - if (!alg) - return -ENOENT; - - err = -ENOMEM; - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); - if (!skb) - goto drop_alg; - - info.in_skb = in_skb; - info.out_skb = skb; - info.nlmsg_seq = in_nlh->nlmsg_seq; - info.nlmsg_flags = 0; - - err = crypto_reportstat_alg(alg, &info); - -drop_alg: - crypto_mod_put(alg); - - if (err) { - kfree_skb(skb); - return err; - } - - return nlmsg_unicast(net->crypto_nlsk, skb, NETLINK_CB(in_skb).portid); -} - -MODULE_LICENSE("GPL"); diff --git a/crypto/ecc.c b/crypto/ecc.c index f53fb4d6af99..c1d2e884be1e 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -60,6 +60,8 @@ const struct ecc_curve *ecc_get_curve(unsigned int curve_id) return &nist_p256; case ECC_CURVE_NIST_P384: return &nist_p384; + case ECC_CURVE_NIST_P521: + return &nist_p521; default: return NULL; } @@ -689,7 +691,7 @@ static void vli_mmod_barrett(u64 *result, u64 *product, const u64 *mod, static void vli_mmod_fast_192(u64 *result, const u64 *product, const u64 *curve_prime, u64 *tmp) { - const unsigned int ndigits = 3; + const unsigned int ndigits = ECC_CURVE_NIST_P192_DIGITS; int carry; vli_set(result, product, ndigits); @@ -717,7 +719,7 @@ static void vli_mmod_fast_256(u64 *result, const u64 *product, const u64 *curve_prime, u64 *tmp) { int carry; - const unsigned int ndigits = 4; + const unsigned int ndigits = ECC_CURVE_NIST_P256_DIGITS; /* t */ vli_set(result, product, ndigits); @@ -800,7 +802,7 @@ static void vli_mmod_fast_384(u64 *result, const u64 *product, const u64 *curve_prime, u64 *tmp) { int carry; - const unsigned int ndigits = 6; + const unsigned int ndigits = ECC_CURVE_NIST_P384_DIGITS; /* t */ vli_set(result, product, ndigits); @@ -902,6 +904,28 @@ static void vli_mmod_fast_384(u64 *result, const u64 *product, #undef AND64H #undef AND64L +/* + * Computes result = product % curve_prime + * from "Recommendations for Discrete Logarithm-Based Cryptography: + * Elliptic Curve Domain Parameters" section G.1.4 + */ +static void vli_mmod_fast_521(u64 *result, const u64 *product, + const u64 *curve_prime, u64 *tmp) +{ + const unsigned int ndigits = ECC_CURVE_NIST_P521_DIGITS; + size_t i; + + /* Initialize result with lowest 521 bits from product */ + vli_set(result, product, ndigits); + result[8] &= 0x1ff; + + for (i = 0; i < ndigits; i++) + tmp[i] = (product[8 + i] >> 9) | (product[9 + i] << 55); + tmp[8] &= 0x1ff; + + vli_mod_add(result, result, tmp, curve_prime, ndigits); +} + /* Computes result = product % curve_prime for different curve_primes. * * Note that curve_primes are distinguished just by heuristic check and @@ -932,15 +956,18 @@ static bool vli_mmod_fast(u64 *result, u64 *product, } switch (ndigits) { - case 3: + case ECC_CURVE_NIST_P192_DIGITS: vli_mmod_fast_192(result, product, curve_prime, tmp); break; - case 4: + case ECC_CURVE_NIST_P256_DIGITS: vli_mmod_fast_256(result, product, curve_prime, tmp); break; - case 6: + case ECC_CURVE_NIST_P384_DIGITS: vli_mmod_fast_384(result, product, curve_prime, tmp); break; + case ECC_CURVE_NIST_P521_DIGITS: + vli_mmod_fast_521(result, product, curve_prime, tmp); + break; default: pr_err_ratelimited("ecc: unsupported digits size!\n"); return false; @@ -1295,7 +1322,10 @@ static void ecc_point_mult(struct ecc_point *result, carry = vli_add(sk[0], scalar, curve->n, ndigits); vli_add(sk[1], sk[0], curve->n, ndigits); scalar = sk[!carry]; - num_bits = sizeof(u64) * ndigits * 8 + 1; + if (curve->nbits == 521) /* NIST P521 */ + num_bits = curve->nbits + 2; + else + num_bits = sizeof(u64) * ndigits * 8 + 1; vli_set(rx[1], point->x, ndigits); vli_set(ry[1], point->y, ndigits); @@ -1416,6 +1446,12 @@ void ecc_point_mult_shamir(const struct ecc_point *result, } EXPORT_SYMBOL(ecc_point_mult_shamir); +/* + * This function performs checks equivalent to Appendix A.4.2 of FIPS 186-5. + * Whereas A.4.2 results in an integer in the interval [1, n-1], this function + * ensures that the integer is in the range of [2, n-3]. We are slightly + * stricter because of the currently used scalar multiplication algorithm. + */ static int __ecc_is_key_valid(const struct ecc_curve *curve, const u64 *private_key, unsigned int ndigits) { @@ -1455,31 +1491,29 @@ int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits, EXPORT_SYMBOL(ecc_is_key_valid); /* - * ECC private keys are generated using the method of extra random bits, - * equivalent to that described in FIPS 186-4, Appendix B.4.1. - * - * d = (c mod(n–1)) + 1 where c is a string of random bits, 64 bits longer - * than requested - * 0 <= c mod(n-1) <= n-2 and implies that - * 1 <= d <= n-1 + * ECC private keys are generated using the method of rejection sampling, + * equivalent to that described in FIPS 186-5, Appendix A.2.2. * * This method generates a private key uniformly distributed in the range - * [1, n-1]. + * [2, n-3]. */ -int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey) +int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, + u64 *private_key) { const struct ecc_curve *curve = ecc_get_curve(curve_id); - u64 priv[ECC_MAX_DIGITS]; unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT; unsigned int nbits = vli_num_bits(curve->n, ndigits); int err; - /* Check that N is included in Table 1 of FIPS 186-4, section 6.1.1 */ - if (nbits < 160 || ndigits > ARRAY_SIZE(priv)) + /* + * Step 1 & 2: check that N is included in Table 1 of FIPS 186-5, + * section 6.1.1. + */ + if (nbits < 224) return -EINVAL; /* - * FIPS 186-4 recommends that the private key should be obtained from a + * FIPS 186-5 recommends that the private key should be obtained from a * RBG with a security strength equal to or greater than the security * strength associated with N. * @@ -1492,17 +1526,17 @@ int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey) if (crypto_get_default_rng()) return -EFAULT; - err = crypto_rng_get_bytes(crypto_default_rng, (u8 *)priv, nbytes); + /* Step 3: obtain N returned_bits from the DRBG. */ + err = crypto_rng_get_bytes(crypto_default_rng, + (u8 *)private_key, nbytes); crypto_put_default_rng(); if (err) return err; - /* Make sure the private key is in the valid range. */ - if (__ecc_is_key_valid(curve, priv, ndigits)) + /* Step 4: make sure the private key is in the valid range. */ + if (__ecc_is_key_valid(curve, private_key, ndigits)) return -EINVAL; - ecc_swap_digits(priv, privkey, ndigits); - return 0; } EXPORT_SYMBOL(ecc_gen_privkey); @@ -1512,23 +1546,20 @@ int ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits, { int ret = 0; struct ecc_point *pk; - u64 priv[ECC_MAX_DIGITS]; const struct ecc_curve *curve = ecc_get_curve(curve_id); - if (!private_key || !curve || ndigits > ARRAY_SIZE(priv)) { + if (!private_key) { ret = -EINVAL; goto out; } - ecc_swap_digits(private_key, priv, ndigits); - pk = ecc_alloc_point(ndigits); if (!pk) { ret = -ENOMEM; goto out; } - ecc_point_mult(pk, &curve->g, priv, NULL, curve, ndigits); + ecc_point_mult(pk, &curve->g, private_key, NULL, curve, ndigits); /* SP800-56A rev 3 5.6.2.1.3 key check */ if (ecc_is_pubkey_valid_full(curve, pk)) { @@ -1612,13 +1643,11 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits, { int ret = 0; struct ecc_point *product, *pk; - u64 priv[ECC_MAX_DIGITS]; u64 rand_z[ECC_MAX_DIGITS]; unsigned int nbytes; const struct ecc_curve *curve = ecc_get_curve(curve_id); - if (!private_key || !public_key || !curve || - ndigits > ARRAY_SIZE(priv) || ndigits > ARRAY_SIZE(rand_z)) { + if (!private_key || !public_key || ndigits > ARRAY_SIZE(rand_z)) { ret = -EINVAL; goto out; } @@ -1639,15 +1668,13 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits, if (ret) goto err_alloc_product; - ecc_swap_digits(private_key, priv, ndigits); - product = ecc_alloc_point(ndigits); if (!product) { ret = -ENOMEM; goto err_alloc_product; } - ecc_point_mult(product, pk, priv, rand_z, curve, ndigits); + ecc_point_mult(product, pk, private_key, rand_z, curve, ndigits); if (ecc_point_is_zero(product)) { ret = -EFAULT; @@ -1657,7 +1684,6 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits, ecc_swap_digits(product->x, secret, ndigits); err_validity: - memzero_explicit(priv, sizeof(priv)); memzero_explicit(rand_z, sizeof(rand_z)); ecc_free_point(product); err_alloc_product: diff --git a/crypto/ecc_curve_defs.h b/crypto/ecc_curve_defs.h index 9719934c9428..0ecade7d02f5 100644 --- a/crypto/ecc_curve_defs.h +++ b/crypto/ecc_curve_defs.h @@ -17,6 +17,7 @@ static u64 nist_p192_b[] = { 0xFEB8DEECC146B9B1ull, 0x0FA7E9AB72243049ull, 0x64210519E59C80E7ull }; static struct ecc_curve nist_p192 = { .name = "nist_192", + .nbits = 192, .g = { .x = nist_p192_g_x, .y = nist_p192_g_y, @@ -43,6 +44,7 @@ static u64 nist_p256_b[] = { 0x3BCE3C3E27D2604Bull, 0x651D06B0CC53B0F6ull, 0xB3EBBD55769886BCull, 0x5AC635D8AA3A93E7ull }; static struct ecc_curve nist_p256 = { .name = "nist_256", + .nbits = 256, .g = { .x = nist_p256_g_x, .y = nist_p256_g_y, @@ -75,6 +77,7 @@ static u64 nist_p384_b[] = { 0x2a85c8edd3ec2aefull, 0xc656398d8a2ed19dull, 0x988e056be3f82d19ull, 0xb3312fa7e23ee7e4ull }; static struct ecc_curve nist_p384 = { .name = "nist_384", + .nbits = 384, .g = { .x = nist_p384_g_x, .y = nist_p384_g_y, @@ -86,6 +89,51 @@ static struct ecc_curve nist_p384 = { .b = nist_p384_b }; +/* NIST P-521 */ +static u64 nist_p521_g_x[] = { 0xf97e7e31c2e5bd66ull, 0x3348b3c1856a429bull, + 0xfe1dc127a2ffa8deull, 0xa14b5e77efe75928ull, + 0xf828af606b4d3dbaull, 0x9c648139053fb521ull, + 0x9e3ecb662395b442ull, 0x858e06b70404e9cdull, + 0xc6ull }; +static u64 nist_p521_g_y[] = { 0x88be94769fd16650ull, 0x353c7086a272c240ull, + 0xc550b9013fad0761ull, 0x97ee72995ef42640ull, + 0x17afbd17273e662cull, 0x98f54449579b4468ull, + 0x5c8a5fb42c7d1bd9ull, 0x39296a789a3bc004ull, + 0x118ull }; +static u64 nist_p521_p[] = { 0xffffffffffffffffull, 0xffffffffffffffffull, + 0xffffffffffffffffull, 0xffffffffffffffffull, + 0xffffffffffffffffull, 0xffffffffffffffffull, + 0xffffffffffffffffull, 0xffffffffffffffffull, + 0x1ffull }; +static u64 nist_p521_n[] = { 0xbb6fb71e91386409ull, 0x3bb5c9b8899c47aeull, + 0x7fcc0148f709a5d0ull, 0x51868783bf2f966bull, + 0xfffffffffffffffaull, 0xffffffffffffffffull, + 0xffffffffffffffffull, 0xffffffffffffffffull, + 0x1ffull }; +static u64 nist_p521_a[] = { 0xfffffffffffffffcull, 0xffffffffffffffffull, + 0xffffffffffffffffull, 0xffffffffffffffffull, + 0xffffffffffffffffull, 0xffffffffffffffffull, + 0xffffffffffffffffull, 0xffffffffffffffffull, + 0x1ffull }; +static u64 nist_p521_b[] = { 0xef451fd46b503f00ull, 0x3573df883d2c34f1ull, + 0x1652c0bd3bb1bf07ull, 0x56193951ec7e937bull, + 0xb8b489918ef109e1ull, 0xa2da725b99b315f3ull, + 0x929a21a0b68540eeull, 0x953eb9618e1c9a1full, + 0x051ull }; +static struct ecc_curve nist_p521 = { + .name = "nist_521", + .nbits = 521, + .g = { + .x = nist_p521_g_x, + .y = nist_p521_g_y, + .ndigits = 9, + }, + .p = nist_p521_p, + .n = nist_p521_n, + .a = nist_p521_a, + .b = nist_p521_b +}; + /* curve25519 */ static u64 curve25519_g_x[] = { 0x0000000000000009, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }; @@ -95,6 +143,7 @@ static u64 curve25519_a[] = { 0x000000000001DB41, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }; static const struct ecc_curve ecc_25519 = { .name = "curve25519", + .nbits = 255, .g = { .x = curve25519_g_x, .ndigits = 4, diff --git a/crypto/ecdh.c b/crypto/ecdh.c index 80afee3234fb..72cfd1590156 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -28,23 +28,28 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, { struct ecdh_ctx *ctx = ecdh_get_ctx(tfm); struct ecdh params; + int ret = 0; if (crypto_ecdh_decode_key(buf, len, ¶ms) < 0 || params.key_size > sizeof(u64) * ctx->ndigits) return -EINVAL; + memset(ctx->private_key, 0, sizeof(ctx->private_key)); + if (!params.key || !params.key_size) return ecc_gen_privkey(ctx->curve_id, ctx->ndigits, ctx->private_key); - memcpy(ctx->private_key, params.key, params.key_size); + ecc_digits_from_bytes(params.key, params.key_size, + ctx->private_key, ctx->ndigits); if (ecc_is_key_valid(ctx->curve_id, ctx->ndigits, ctx->private_key, params.key_size) < 0) { memzero_explicit(ctx->private_key, params.key_size); - return -EINVAL; + ret = -EINVAL; } - return 0; + + return ret; } static int ecdh_compute_value(struct kpp_request *req) diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index fbd76498aba8..258fffbf623d 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -35,8 +35,8 @@ struct ecdsa_signature_ctx { static int ecdsa_get_signature_rs(u64 *dest, size_t hdrlen, unsigned char tag, const void *value, size_t vlen, unsigned int ndigits) { - size_t keylen = ndigits * sizeof(u64); - ssize_t diff = vlen - keylen; + size_t bufsize = ndigits * sizeof(u64); + ssize_t diff = vlen - bufsize; const char *d = value; u8 rs[ECC_MAX_BYTES]; @@ -58,7 +58,7 @@ static int ecdsa_get_signature_rs(u64 *dest, size_t hdrlen, unsigned char tag, if (diff) return -EINVAL; } - if (-diff >= keylen) + if (-diff >= bufsize) return -EINVAL; if (diff) { @@ -122,7 +122,7 @@ static int _ecdsa_verify(struct ecc_ctx *ctx, const u64 *hash, const u64 *r, con /* res.x = res.x mod n (if res.x > order) */ if (unlikely(vli_cmp(res.x, curve->n, ndigits) == 1)) - /* faster alternative for NIST p384, p256 & p192 */ + /* faster alternative for NIST p521, p384, p256 & p192 */ vli_sub(res.x, res.x, curve->n, ndigits); if (!vli_cmp(res.x, r, ndigits)) @@ -138,7 +138,7 @@ static int ecdsa_verify(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct ecc_ctx *ctx = akcipher_tfm_ctx(tfm); - size_t keylen = ctx->curve->g.ndigits * sizeof(u64); + size_t bufsize = ctx->curve->g.ndigits * sizeof(u64); struct ecdsa_signature_ctx sig_ctx = { .curve = ctx->curve, }; @@ -165,14 +165,14 @@ static int ecdsa_verify(struct akcipher_request *req) goto error; /* if the hash is shorter then we will add leading zeros to fit to ndigits */ - diff = keylen - req->dst_len; + diff = bufsize - req->dst_len; if (diff >= 0) { if (diff) memset(rawhash, 0, diff); memcpy(&rawhash[diff], buffer + req->src_len, req->dst_len); } else if (diff < 0) { /* given hash is longer, we take the left-most bytes */ - memcpy(&rawhash, buffer + req->src_len, keylen); + memcpy(&rawhash, buffer + req->src_len, bufsize); } ecc_swap_digits((u64 *)rawhash, hash, ctx->curve->g.ndigits); @@ -222,28 +222,32 @@ static int ecdsa_ecc_ctx_reset(struct ecc_ctx *ctx) static int ecdsa_set_pub_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { struct ecc_ctx *ctx = akcipher_tfm_ctx(tfm); + unsigned int digitlen, ndigits; const unsigned char *d = key; - const u64 *digits = (const u64 *)&d[1]; - unsigned int ndigits; int ret; ret = ecdsa_ecc_ctx_reset(ctx); if (ret < 0) return ret; - if (keylen < 1 || (((keylen - 1) >> 1) % sizeof(u64)) != 0) + if (keylen < 1 || ((keylen - 1) & 1) != 0) return -EINVAL; /* we only accept uncompressed format indicated by '4' */ if (d[0] != 4) return -EINVAL; keylen--; - ndigits = (keylen >> 1) / sizeof(u64); + digitlen = keylen >> 1; + + ndigits = DIV_ROUND_UP(digitlen, sizeof(u64)); if (ndigits != ctx->curve->g.ndigits) return -EINVAL; - ecc_swap_digits(digits, ctx->pub_key.x, ndigits); - ecc_swap_digits(&digits[ndigits], ctx->pub_key.y, ndigits); + d++; + + ecc_digits_from_bytes(d, digitlen, ctx->pub_key.x, ndigits); + ecc_digits_from_bytes(&d[digitlen], digitlen, ctx->pub_key.y, ndigits); + ret = ecc_is_pubkey_valid_full(ctx->curve, &ctx->pub_key); ctx->pub_key_set = ret == 0; @@ -262,9 +266,31 @@ static unsigned int ecdsa_max_size(struct crypto_akcipher *tfm) { struct ecc_ctx *ctx = akcipher_tfm_ctx(tfm); - return ctx->pub_key.ndigits << ECC_DIGITS_TO_BYTES_SHIFT; + return DIV_ROUND_UP(ctx->curve->nbits, 8); +} + +static int ecdsa_nist_p521_init_tfm(struct crypto_akcipher *tfm) +{ + struct ecc_ctx *ctx = akcipher_tfm_ctx(tfm); + + return ecdsa_ecc_ctx_init(ctx, ECC_CURVE_NIST_P521); } +static struct akcipher_alg ecdsa_nist_p521 = { + .verify = ecdsa_verify, + .set_pub_key = ecdsa_set_pub_key, + .max_size = ecdsa_max_size, + .init = ecdsa_nist_p521_init_tfm, + .exit = ecdsa_exit_tfm, + .base = { + .cra_name = "ecdsa-nist-p521", + .cra_driver_name = "ecdsa-nist-p521-generic", + .cra_priority = 100, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct ecc_ctx), + }, +}; + static int ecdsa_nist_p384_init_tfm(struct crypto_akcipher *tfm) { struct ecc_ctx *ctx = akcipher_tfm_ctx(tfm); @@ -348,8 +374,15 @@ static int __init ecdsa_init(void) if (ret) goto nist_p384_error; + ret = crypto_register_akcipher(&ecdsa_nist_p521); + if (ret) + goto nist_p521_error; + return 0; +nist_p521_error: + crypto_unregister_akcipher(&ecdsa_nist_p384); + nist_p384_error: crypto_unregister_akcipher(&ecdsa_nist_p256); @@ -365,6 +398,7 @@ static void __exit ecdsa_exit(void) crypto_unregister_akcipher(&ecdsa_nist_p192); crypto_unregister_akcipher(&ecdsa_nist_p256); crypto_unregister_akcipher(&ecdsa_nist_p384); + crypto_unregister_akcipher(&ecdsa_nist_p521); } subsys_initcall(ecdsa_init); @@ -373,4 +407,8 @@ module_exit(ecdsa_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Stefan Berger <stefanb@linux.ibm.com>"); MODULE_DESCRIPTION("ECDSA generic algorithm"); +MODULE_ALIAS_CRYPTO("ecdsa-nist-p192"); +MODULE_ALIAS_CRYPTO("ecdsa-nist-p256"); +MODULE_ALIAS_CRYPTO("ecdsa-nist-p384"); +MODULE_ALIAS_CRYPTO("ecdsa-nist-p521"); MODULE_ALIAS_CRYPTO("ecdsa-generic"); diff --git a/crypto/ecrdsa.c b/crypto/ecrdsa.c index f3c6b5e15e75..3811f3805b5d 100644 --- a/crypto/ecrdsa.c +++ b/crypto/ecrdsa.c @@ -294,4 +294,5 @@ module_exit(ecrdsa_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Vitaly Chikunov <vt@altlinux.org>"); MODULE_DESCRIPTION("EC-RDSA generic algorithm"); +MODULE_ALIAS_CRYPTO("ecrdsa"); MODULE_ALIAS_CRYPTO("ecrdsa-generic"); diff --git a/crypto/ecrdsa_defs.h b/crypto/ecrdsa_defs.h index 0056335b9d03..1c2c2449e331 100644 --- a/crypto/ecrdsa_defs.h +++ b/crypto/ecrdsa_defs.h @@ -47,6 +47,7 @@ static u64 cp256a_b[] = { static struct ecc_curve gost_cp256a = { .name = "cp256a", + .nbits = 256, .g = { .x = cp256a_g_x, .y = cp256a_g_y, @@ -80,6 +81,7 @@ static u64 cp256b_b[] = { static struct ecc_curve gost_cp256b = { .name = "cp256b", + .nbits = 256, .g = { .x = cp256b_g_x, .y = cp256b_g_y, @@ -117,6 +119,7 @@ static u64 cp256c_b[] = { static struct ecc_curve gost_cp256c = { .name = "cp256c", + .nbits = 256, .g = { .x = cp256c_g_x, .y = cp256c_g_y, @@ -166,6 +169,7 @@ static u64 tc512a_b[] = { static struct ecc_curve gost_tc512a = { .name = "tc512a", + .nbits = 512, .g = { .x = tc512a_g_x, .y = tc512a_g_y, @@ -211,6 +215,7 @@ static u64 tc512b_b[] = { static struct ecc_curve gost_tc512b = { .name = "tc512b", + .nbits = 512, .g = { .x = tc512b_g_x, .y = tc512b_g_y, diff --git a/crypto/fips.c b/crypto/fips.c index 92fd506abb21..8a784018ebfc 100644 --- a/crypto/fips.c +++ b/crypto/fips.c @@ -63,7 +63,6 @@ static struct ctl_table crypto_sysctl_table[] = { .mode = 0444, .proc_handler = proc_dostring }, - {} }; static struct ctl_table_header *crypto_sysctls; diff --git a/crypto/hash.h b/crypto/hash.h index 93f6ba0df263..cf9aee07f77d 100644 --- a/crypto/hash.h +++ b/crypto/hash.h @@ -8,39 +8,9 @@ #define _LOCAL_CRYPTO_HASH_H #include <crypto/internal/hash.h> -#include <linux/cryptouser.h> #include "internal.h" -static inline struct crypto_istat_hash *hash_get_stat( - struct hash_alg_common *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - return &alg->stat; -#else - return NULL; -#endif -} - -static inline int crypto_hash_report_stat(struct sk_buff *skb, - struct crypto_alg *alg, - const char *type) -{ - struct hash_alg_common *halg = __crypto_hash_alg_common(alg); - struct crypto_istat_hash *istat = hash_get_stat(halg); - struct crypto_stat_hash rhash; - - memset(&rhash, 0, sizeof(rhash)); - - strscpy(rhash.type, type, sizeof(rhash.type)); - - rhash.stat_hash_cnt = atomic64_read(&istat->hash_cnt); - rhash.stat_hash_tlen = atomic64_read(&istat->hash_tlen); - rhash.stat_err_cnt = atomic64_read(&istat->err_cnt); - - return nla_put(skb, CRYPTOCFGA_STAT_HASH, sizeof(rhash), &rhash); -} - extern const struct crypto_type crypto_shash_type; int hash_prepare_alg(struct hash_alg_common *alg); diff --git a/crypto/jitterentropy-kcapi.c b/crypto/jitterentropy-kcapi.c index 76edbf8af0ac..c24d4ff2b4a8 100644 --- a/crypto/jitterentropy-kcapi.c +++ b/crypto/jitterentropy-kcapi.c @@ -61,8 +61,7 @@ void *jent_kvzalloc(unsigned int len) void jent_kvzfree(void *ptr, unsigned int len) { - memzero_explicit(ptr, len); - kvfree(ptr); + kvfree_sensitive(ptr, len); } void *jent_zalloc(unsigned int len) diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c index 26a9048bc893..d7056de8c0d7 100644 --- a/crypto/jitterentropy.c +++ b/crypto/jitterentropy.c @@ -157,8 +157,8 @@ struct rand_data { /* * See the SP 800-90B comment #10b for the corrected cutoff for the SP 800-90B * APT. - * http://www.untruth.org/~josh/sp80090b/UL%20SP800-90B-final%20comments%20v1.9%2020191212.pdf - * In in the syntax of R, this is C = 2 + qbinom(1 − 2^(−30), 511, 2^(-1/osr)). + * https://www.untruth.org/~josh/sp80090b/UL%20SP800-90B-final%20comments%20v1.9%2020191212.pdf + * In the syntax of R, this is C = 2 + qbinom(1 − 2^(−30), 511, 2^(-1/osr)). * (The original formula wasn't correct because the first symbol must * necessarily have been observed, so there is no chance of observing 0 of these * symbols.) diff --git a/crypto/kpp.c b/crypto/kpp.c index 33d44e59387f..ecc63a1a948d 100644 --- a/crypto/kpp.c +++ b/crypto/kpp.c @@ -66,29 +66,6 @@ static void crypto_kpp_free_instance(struct crypto_instance *inst) kpp->free(kpp); } -static int __maybe_unused crypto_kpp_report_stat( - struct sk_buff *skb, struct crypto_alg *alg) -{ - struct kpp_alg *kpp = __crypto_kpp_alg(alg); - struct crypto_istat_kpp *istat; - struct crypto_stat_kpp rkpp; - - istat = kpp_get_stat(kpp); - - memset(&rkpp, 0, sizeof(rkpp)); - - strscpy(rkpp.type, "kpp", sizeof(rkpp.type)); - - rkpp.stat_setsecret_cnt = atomic64_read(&istat->setsecret_cnt); - rkpp.stat_generate_public_key_cnt = - atomic64_read(&istat->generate_public_key_cnt); - rkpp.stat_compute_shared_secret_cnt = - atomic64_read(&istat->compute_shared_secret_cnt); - rkpp.stat_err_cnt = atomic64_read(&istat->err_cnt); - - return nla_put(skb, CRYPTOCFGA_STAT_KPP, sizeof(rkpp), &rkpp); -} - static const struct crypto_type crypto_kpp_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_kpp_init_tfm, @@ -99,9 +76,6 @@ static const struct crypto_type crypto_kpp_type = { #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_kpp_report, #endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_kpp_report_stat, -#endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, .type = CRYPTO_ALG_TYPE_KPP, @@ -131,15 +105,11 @@ EXPORT_SYMBOL_GPL(crypto_has_kpp); static void kpp_prepare_alg(struct kpp_alg *alg) { - struct crypto_istat_kpp *istat = kpp_get_stat(alg); struct crypto_alg *base = &alg->base; base->cra_type = &crypto_kpp_type; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_KPP; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - memset(istat, 0, sizeof(*istat)); } int crypto_register_kpp(struct kpp_alg *alg) diff --git a/crypto/lskcipher.c b/crypto/lskcipher.c index 0f1bd7dcde24..cdb4897c63e6 100644 --- a/crypto/lskcipher.c +++ b/crypto/lskcipher.c @@ -29,25 +29,6 @@ static inline struct lskcipher_alg *__crypto_lskcipher_alg( return container_of(alg, struct lskcipher_alg, co.base); } -static inline struct crypto_istat_cipher *lskcipher_get_stat( - struct lskcipher_alg *alg) -{ - return skcipher_get_stat_common(&alg->co); -} - -static inline int crypto_lskcipher_errstat(struct lskcipher_alg *alg, int err) -{ - struct crypto_istat_cipher *istat = lskcipher_get_stat(alg); - - if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) - return err; - - if (err) - atomic64_inc(&istat->err_cnt); - - return err; -} - static int lskcipher_setkey_unaligned(struct crypto_lskcipher *tfm, const u8 *key, unsigned int keylen) { @@ -147,20 +128,13 @@ static int crypto_lskcipher_crypt(struct crypto_lskcipher *tfm, const u8 *src, u32 flags)) { unsigned long alignmask = crypto_lskcipher_alignmask(tfm); - struct lskcipher_alg *alg = crypto_lskcipher_alg(tfm); - int ret; if (((unsigned long)src | (unsigned long)dst | (unsigned long)iv) & - alignmask) { - ret = crypto_lskcipher_crypt_unaligned(tfm, src, dst, len, iv, - crypt); - goto out; - } + alignmask) + return crypto_lskcipher_crypt_unaligned(tfm, src, dst, len, iv, + crypt); - ret = crypt(tfm, src, dst, len, iv, CRYPTO_LSKCIPHER_FLAG_FINAL); - -out: - return crypto_lskcipher_errstat(alg, ret); + return crypt(tfm, src, dst, len, iv, CRYPTO_LSKCIPHER_FLAG_FINAL); } int crypto_lskcipher_encrypt(struct crypto_lskcipher *tfm, const u8 *src, @@ -168,13 +142,6 @@ int crypto_lskcipher_encrypt(struct crypto_lskcipher *tfm, const u8 *src, { struct lskcipher_alg *alg = crypto_lskcipher_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_cipher *istat = lskcipher_get_stat(alg); - - atomic64_inc(&istat->encrypt_cnt); - atomic64_add(len, &istat->encrypt_tlen); - } - return crypto_lskcipher_crypt(tfm, src, dst, len, iv, alg->encrypt); } EXPORT_SYMBOL_GPL(crypto_lskcipher_encrypt); @@ -184,13 +151,6 @@ int crypto_lskcipher_decrypt(struct crypto_lskcipher *tfm, const u8 *src, { struct lskcipher_alg *alg = crypto_lskcipher_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_cipher *istat = lskcipher_get_stat(alg); - - atomic64_inc(&istat->decrypt_cnt); - atomic64_add(len, &istat->decrypt_tlen); - } - return crypto_lskcipher_crypt(tfm, src, dst, len, iv, alg->decrypt); } EXPORT_SYMBOL_GPL(crypto_lskcipher_decrypt); @@ -320,28 +280,6 @@ static int __maybe_unused crypto_lskcipher_report( sizeof(rblkcipher), &rblkcipher); } -static int __maybe_unused crypto_lskcipher_report_stat( - struct sk_buff *skb, struct crypto_alg *alg) -{ - struct lskcipher_alg *skcipher = __crypto_lskcipher_alg(alg); - struct crypto_istat_cipher *istat; - struct crypto_stat_cipher rcipher; - - istat = lskcipher_get_stat(skcipher); - - memset(&rcipher, 0, sizeof(rcipher)); - - strscpy(rcipher.type, "cipher", sizeof(rcipher.type)); - - rcipher.stat_encrypt_cnt = atomic64_read(&istat->encrypt_cnt); - rcipher.stat_encrypt_tlen = atomic64_read(&istat->encrypt_tlen); - rcipher.stat_decrypt_cnt = atomic64_read(&istat->decrypt_cnt); - rcipher.stat_decrypt_tlen = atomic64_read(&istat->decrypt_tlen); - rcipher.stat_err_cnt = atomic64_read(&istat->err_cnt); - - return nla_put(skb, CRYPTOCFGA_STAT_CIPHER, sizeof(rcipher), &rcipher); -} - static const struct crypto_type crypto_lskcipher_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_lskcipher_init_tfm, @@ -352,9 +290,6 @@ static const struct crypto_type crypto_lskcipher_type = { #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_lskcipher_report, #endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_lskcipher_report_stat, -#endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, .type = CRYPTO_ALG_TYPE_LSKCIPHER, diff --git a/crypto/rng.c b/crypto/rng.c index 279dffdebf59..9d8804e46422 100644 --- a/crypto/rng.c +++ b/crypto/rng.c @@ -30,30 +30,24 @@ static int crypto_default_rng_refcnt; int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) { - struct rng_alg *alg = crypto_rng_alg(tfm); u8 *buf = NULL; int err; - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_inc(&rng_get_stat(alg)->seed_cnt); - if (!seed && slen) { buf = kmalloc(slen, GFP_KERNEL); - err = -ENOMEM; if (!buf) - goto out; + return -ENOMEM; err = get_random_bytes_wait(buf, slen); if (err) - goto free_buf; + goto out; seed = buf; } - err = alg->seed(tfm, seed, slen); -free_buf: - kfree_sensitive(buf); + err = crypto_rng_alg(tfm)->seed(tfm, seed, slen); out: - return crypto_rng_errstat(alg, err); + kfree_sensitive(buf); + return err; } EXPORT_SYMBOL_GPL(crypto_rng_reset); @@ -91,27 +85,6 @@ static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg) seq_printf(m, "seedsize : %u\n", seedsize(alg)); } -static int __maybe_unused crypto_rng_report_stat( - struct sk_buff *skb, struct crypto_alg *alg) -{ - struct rng_alg *rng = __crypto_rng_alg(alg); - struct crypto_istat_rng *istat; - struct crypto_stat_rng rrng; - - istat = rng_get_stat(rng); - - memset(&rrng, 0, sizeof(rrng)); - - strscpy(rrng.type, "rng", sizeof(rrng.type)); - - rrng.stat_generate_cnt = atomic64_read(&istat->generate_cnt); - rrng.stat_generate_tlen = atomic64_read(&istat->generate_tlen); - rrng.stat_seed_cnt = atomic64_read(&istat->seed_cnt); - rrng.stat_err_cnt = atomic64_read(&istat->err_cnt); - - return nla_put(skb, CRYPTOCFGA_STAT_RNG, sizeof(rrng), &rrng); -} - static const struct crypto_type crypto_rng_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_rng_init_tfm, @@ -121,9 +94,6 @@ static const struct crypto_type crypto_rng_type = { #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_rng_report, #endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_rng_report_stat, -#endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, .type = CRYPTO_ALG_TYPE_RNG, @@ -199,7 +169,6 @@ EXPORT_SYMBOL_GPL(crypto_del_default_rng); int crypto_register_rng(struct rng_alg *alg) { - struct crypto_istat_rng *istat = rng_get_stat(alg); struct crypto_alg *base = &alg->base; if (alg->seedsize > PAGE_SIZE / 8) @@ -209,9 +178,6 @@ int crypto_register_rng(struct rng_alg *alg) base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_RNG; - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - memset(istat, 0, sizeof(*istat)); - return crypto_register_alg(base); } EXPORT_SYMBOL_GPL(crypto_register_rng); diff --git a/crypto/scompress.c b/crypto/scompress.c index 60bbb7ea4060..1cef6bb06a81 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -271,9 +271,6 @@ static const struct crypto_type crypto_scomp_type = { #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_scomp_report, #endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_acomp_report_stat, -#endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, .type = CRYPTO_ALG_TYPE_SCOMPRESS, diff --git a/crypto/shash.c b/crypto/shash.c index c3f7f6a25280..301ab42bf849 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -16,18 +16,6 @@ #include "hash.h" -static inline struct crypto_istat_hash *shash_get_stat(struct shash_alg *alg) -{ - return hash_get_stat(&alg->halg); -} - -static inline int crypto_shash_errstat(struct shash_alg *alg, int err) -{ - if (IS_ENABLED(CONFIG_CRYPTO_STATS) && err) - atomic64_inc(&shash_get_stat(alg)->err_cnt); - return err; -} - int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { @@ -61,29 +49,13 @@ EXPORT_SYMBOL_GPL(crypto_shash_setkey); int crypto_shash_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct shash_alg *shash = crypto_shash_alg(desc->tfm); - int err; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_add(len, &shash_get_stat(shash)->hash_tlen); - - err = shash->update(desc, data, len); - - return crypto_shash_errstat(shash, err); + return crypto_shash_alg(desc->tfm)->update(desc, data, len); } EXPORT_SYMBOL_GPL(crypto_shash_update); int crypto_shash_final(struct shash_desc *desc, u8 *out) { - struct shash_alg *shash = crypto_shash_alg(desc->tfm); - int err; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_inc(&shash_get_stat(shash)->hash_cnt); - - err = shash->final(desc, out); - - return crypto_shash_errstat(shash, err); + return crypto_shash_alg(desc->tfm)->final(desc, out); } EXPORT_SYMBOL_GPL(crypto_shash_final); @@ -99,20 +71,7 @@ static int shash_default_finup(struct shash_desc *desc, const u8 *data, int crypto_shash_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - struct crypto_shash *tfm = desc->tfm; - struct shash_alg *shash = crypto_shash_alg(tfm); - int err; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_hash *istat = shash_get_stat(shash); - - atomic64_inc(&istat->hash_cnt); - atomic64_add(len, &istat->hash_tlen); - } - - err = shash->finup(desc, data, len, out); - - return crypto_shash_errstat(shash, err); + return crypto_shash_alg(desc->tfm)->finup(desc, data, len, out); } EXPORT_SYMBOL_GPL(crypto_shash_finup); @@ -129,22 +88,11 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { struct crypto_shash *tfm = desc->tfm; - struct shash_alg *shash = crypto_shash_alg(tfm); - int err; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_hash *istat = shash_get_stat(shash); - - atomic64_inc(&istat->hash_cnt); - atomic64_add(len, &istat->hash_tlen); - } if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) - err = -ENOKEY; - else - err = shash->digest(desc, data, len, out); + return -ENOKEY; - return crypto_shash_errstat(shash, err); + return crypto_shash_alg(tfm)->digest(desc, data, len, out); } EXPORT_SYMBOL_GPL(crypto_shash_digest); @@ -265,12 +213,6 @@ static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg) seq_printf(m, "digestsize : %u\n", salg->digestsize); } -static int __maybe_unused crypto_shash_report_stat( - struct sk_buff *skb, struct crypto_alg *alg) -{ - return crypto_hash_report_stat(skb, alg, "shash"); -} - const struct crypto_type crypto_shash_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_shash_init_tfm, @@ -281,9 +223,6 @@ const struct crypto_type crypto_shash_type = { #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_shash_report, #endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_shash_report_stat, -#endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, .type = CRYPTO_ALG_TYPE_SHASH, @@ -350,7 +289,6 @@ EXPORT_SYMBOL_GPL(crypto_clone_shash); int hash_prepare_alg(struct hash_alg_common *alg) { - struct crypto_istat_hash *istat = hash_get_stat(alg); struct crypto_alg *base = &alg->base; if (alg->digestsize > HASH_MAX_DIGESTSIZE) @@ -362,9 +300,6 @@ int hash_prepare_alg(struct hash_alg_common *alg) base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - memset(istat, 0, sizeof(*istat)); - return 0; } diff --git a/crypto/sig.c b/crypto/sig.c index 224c47019297..7645bedf3a1f 100644 --- a/crypto/sig.c +++ b/crypto/sig.c @@ -45,16 +45,6 @@ static int __maybe_unused crypto_sig_report(struct sk_buff *skb, return nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER, sizeof(rsig), &rsig); } -static int __maybe_unused crypto_sig_report_stat(struct sk_buff *skb, - struct crypto_alg *alg) -{ - struct crypto_stat_akcipher rsig = {}; - - strscpy(rsig.type, "sig", sizeof(rsig.type)); - - return nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER, sizeof(rsig), &rsig); -} - static const struct crypto_type crypto_sig_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_sig_init_tfm, @@ -64,9 +54,6 @@ static const struct crypto_type crypto_sig_type = { #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_sig_report, #endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_sig_report_stat, -#endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_SIG_MASK, .type = CRYPTO_ALG_TYPE_SIG, diff --git a/crypto/skcipher.c b/crypto/skcipher.c index bc70e159d27d..ceed7f33a67b 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -89,25 +89,6 @@ static inline struct skcipher_alg *__crypto_skcipher_alg( return container_of(alg, struct skcipher_alg, base); } -static inline struct crypto_istat_cipher *skcipher_get_stat( - struct skcipher_alg *alg) -{ - return skcipher_get_stat_common(&alg->co); -} - -static inline int crypto_skcipher_errstat(struct skcipher_alg *alg, int err) -{ - struct crypto_istat_cipher *istat = skcipher_get_stat(alg); - - if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) - return err; - - if (err && err != -EINPROGRESS && err != -EBUSY) - atomic64_inc(&istat->err_cnt); - - return err; -} - static int skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize) { u8 *addr; @@ -654,23 +635,12 @@ int crypto_skcipher_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); - int ret; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_cipher *istat = skcipher_get_stat(alg); - - atomic64_inc(&istat->encrypt_cnt); - atomic64_add(req->cryptlen, &istat->encrypt_tlen); - } if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) - ret = -ENOKEY; - else if (alg->co.base.cra_type != &crypto_skcipher_type) - ret = crypto_lskcipher_encrypt_sg(req); - else - ret = alg->encrypt(req); - - return crypto_skcipher_errstat(alg, ret); + return -ENOKEY; + if (alg->co.base.cra_type != &crypto_skcipher_type) + return crypto_lskcipher_encrypt_sg(req); + return alg->encrypt(req); } EXPORT_SYMBOL_GPL(crypto_skcipher_encrypt); @@ -678,23 +648,12 @@ int crypto_skcipher_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); - int ret; - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_cipher *istat = skcipher_get_stat(alg); - - atomic64_inc(&istat->decrypt_cnt); - atomic64_add(req->cryptlen, &istat->decrypt_tlen); - } if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) - ret = -ENOKEY; - else if (alg->co.base.cra_type != &crypto_skcipher_type) - ret = crypto_lskcipher_decrypt_sg(req); - else - ret = alg->decrypt(req); - - return crypto_skcipher_errstat(alg, ret); + return -ENOKEY; + if (alg->co.base.cra_type != &crypto_skcipher_type) + return crypto_lskcipher_decrypt_sg(req); + return alg->decrypt(req); } EXPORT_SYMBOL_GPL(crypto_skcipher_decrypt); @@ -846,28 +805,6 @@ static int __maybe_unused crypto_skcipher_report( sizeof(rblkcipher), &rblkcipher); } -static int __maybe_unused crypto_skcipher_report_stat( - struct sk_buff *skb, struct crypto_alg *alg) -{ - struct skcipher_alg *skcipher = __crypto_skcipher_alg(alg); - struct crypto_istat_cipher *istat; - struct crypto_stat_cipher rcipher; - - istat = skcipher_get_stat(skcipher); - - memset(&rcipher, 0, sizeof(rcipher)); - - strscpy(rcipher.type, "cipher", sizeof(rcipher.type)); - - rcipher.stat_encrypt_cnt = atomic64_read(&istat->encrypt_cnt); - rcipher.stat_encrypt_tlen = atomic64_read(&istat->encrypt_tlen); - rcipher.stat_decrypt_cnt = atomic64_read(&istat->decrypt_cnt); - rcipher.stat_decrypt_tlen = atomic64_read(&istat->decrypt_tlen); - rcipher.stat_err_cnt = atomic64_read(&istat->err_cnt); - - return nla_put(skb, CRYPTOCFGA_STAT_CIPHER, sizeof(rcipher), &rcipher); -} - static const struct crypto_type crypto_skcipher_type = { .extsize = crypto_skcipher_extsize, .init_tfm = crypto_skcipher_init_tfm, @@ -878,9 +815,6 @@ static const struct crypto_type crypto_skcipher_type = { #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_skcipher_report, #endif -#ifdef CONFIG_CRYPTO_STATS - .report_stat = crypto_skcipher_report_stat, -#endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_SKCIPHER_MASK, .type = CRYPTO_ALG_TYPE_SKCIPHER, @@ -935,7 +869,6 @@ EXPORT_SYMBOL_GPL(crypto_has_skcipher); int skcipher_prepare_alg_common(struct skcipher_alg_common *alg) { - struct crypto_istat_cipher *istat = skcipher_get_stat_common(alg); struct crypto_alg *base = &alg->base; if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8 || @@ -948,9 +881,6 @@ int skcipher_prepare_alg_common(struct skcipher_alg_common *alg) base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - memset(istat, 0, sizeof(*istat)); - return 0; } diff --git a/crypto/skcipher.h b/crypto/skcipher.h index 16c9484360da..703651367dd8 100644 --- a/crypto/skcipher.h +++ b/crypto/skcipher.h @@ -10,16 +10,6 @@ #include <crypto/internal/skcipher.h> #include "internal.h" -static inline struct crypto_istat_cipher *skcipher_get_stat_common( - struct skcipher_alg_common *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - return &alg->stat; -#else - return NULL; -#endif -} - int crypto_lskcipher_encrypt_sg(struct skcipher_request *req); int crypto_lskcipher_decrypt_sg(struct skcipher_request *req); int crypto_init_lskcipher_ops_sg(struct crypto_tfm *tfm); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 3dddd288ca02..00f5a6cf341a 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -5098,6 +5098,13 @@ static const struct alg_test_desc alg_test_descs[] = { .akcipher = __VECS(ecdsa_nist_p384_tv_template) } }, { + .alg = "ecdsa-nist-p521", + .test = alg_test_akcipher, + .fips_allowed = 1, + .suite = { + .akcipher = __VECS(ecdsa_nist_p521_tv_template) + } + }, { .alg = "ecrdsa", .test = alg_test_akcipher, .suite = { diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 12e1c892f366..5350cfd9d325 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -1071,6 +1071,152 @@ static const struct akcipher_testvec ecdsa_nist_p384_tv_template[] = { }, }; +static const struct akcipher_testvec ecdsa_nist_p521_tv_template[] = { + { + .key = /* secp521r1(sha224) */ + "\x04\x01\x4f\x43\x18\xb6\xa9\xc9\x5d\x68\xd3\xa9\x42\xf8\x98\xc0" + "\xd2\xd1\xa9\x50\x3b\xe8\xc4\x40\xe6\x11\x78\x88\x4b\xbd\x76\xa7" + "\x9a\xe0\xdd\x31\xa4\x67\x78\x45\x33\x9e\x8c\xd1\xc7\x44\xac\x61" + "\x68\xc8\x04\xe7\x5c\x79\xb1\xf1\x41\x0c\x71\xc0\x53\xa8\xbc\xfb" + "\xf5\xca\xd4\x01\x40\xfd\xa3\x45\xda\x08\xe0\xb4\xcb\x28\x3b\x0a" + "\x02\x35\x5f\x02\x9f\x3f\xcd\xef\x08\x22\x40\x97\x74\x65\xb7\x76" + "\x85\xc7\xc0\x5c\xfb\x81\xe1\xa5\xde\x0c\x4e\x8b\x12\x31\xb6\x47" + "\xed\x37\x0f\x99\x3f\x26\xba\xa3\x8e\xff\x79\x34\x7c\x3a\xfe\x1f" + "\x3b\x83\x82\x2f\x14", + .key_len = 133, + .params = + "\x30\x10\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x05\x2b\x81\x04" + "\x00\x23", + .param_len = 18, + .m = + "\xa2\x3a\x6a\x8c\x7b\x3c\xf2\x51\xf8\xbe\x5f\x4f\x3b\x15\x05\xc4" + "\xb5\xbc\x19\xe7\x21\x85\xe9\x23\x06\x33\x62\xfb", + .m_size = 28, + .algo = OID_id_ecdsa_with_sha224, + .c = + "\x30\x81\x86\x02\x41\x01\xd6\x43\xe7\xff\x42\xb2\xba\x74\x35\xf6" + "\xdc\x6d\x02\x7b\x22\xac\xe2\xef\x07\x92\xee\x60\x94\x06\xf8\x3f" + "\x59\x0f\x74\xf0\x3f\xd8\x18\xc6\x37\x8a\xcb\xa7\xd8\x7d\x98\x85" + "\x29\x88\xff\x0b\x94\x94\x6c\xa6\x9b\x89\x8b\x1e\xfd\x09\x46\x6b" + "\xc7\xaf\x7a\xb9\x19\x0a\x02\x41\x3a\x26\x0d\x55\xcd\x23\x1e\x7d" + "\xa0\x5e\xf9\x88\xf3\xd2\x32\x90\x57\x0f\xf8\x65\x97\x6b\x09\x4d" + "\x22\x26\x0b\x5f\x49\x32\x6b\x91\x99\x30\x90\x0f\x1c\x8f\x78\xd3" + "\x9f\x0e\x64\xcc\xc4\xe8\x43\xd9\x0e\x1c\xad\x22\xda\x82\x00\x35" + "\xa3\x50\xb1\xa5\x98\x92\x2a\xa5\x52", + .c_size = 137, + .public_key_vec = true, + .siggen_sigver_test = true, + }, + { + .key = /* secp521r1(sha256) */ + "\x04\x01\x05\x3a\x6b\x3b\x5a\x0f\xa7\xb9\xb7\x32\x53\x4e\xe2\xae" + "\x0a\x52\xc5\xda\xdd\x5a\x79\x1c\x30\x2d\x33\x07\x79\xd5\x70\x14" + "\x61\x0c\xec\x26\x4d\xd8\x35\x57\x04\x1d\x88\x33\x4d\xce\x05\x36" + "\xa5\xaf\x56\x84\xfa\x0b\x9e\xff\x7b\x30\x4b\x92\x1d\x06\xf8\x81" + "\x24\x1e\x51\x00\x09\x21\x51\xf7\x46\x0a\x77\xdb\xb5\x0c\xe7\x9c" + "\xff\x27\x3c\x02\x71\xd7\x85\x36\xf1\xaa\x11\x59\xd8\xb8\xdc\x09" + "\xdc\x6d\x5a\x6f\x63\x07\x6c\xe1\xe5\x4d\x6e\x0f\x6e\xfb\x7c\x05" + "\x8a\xe9\x53\xa8\xcf\xce\x43\x0e\x82\x20\x86\xbc\x88\x9c\xb7\xe3" + "\xe6\x77\x1e\x1f\x8a", + .key_len = 133, + .params = + "\x30\x10\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x05\x2b\x81\x04" + "\x00\x23", + .param_len = 18, + .m = + "\xcc\x97\x73\x0c\x73\xa2\x53\x2b\xfa\xd7\x83\x1d\x0c\x72\x1b\x39" + "\x80\x71\x8d\xdd\xc5\x9b\xff\x55\x32\x98\x25\xa2\x58\x2e\xb7\x73", + .m_size = 32, + .algo = OID_id_ecdsa_with_sha256, + .c = + "\x30\x81\x88\x02\x42\x00\xcd\xa5\x5f\x57\x52\x27\x78\x3a\xb5\x06" + "\x0f\xfd\x83\xfc\x0e\xd9\xce\x50\x9f\x7d\x1f\xca\x8b\xa8\x2d\x56" + "\x3c\xf6\xf0\xd8\xe1\xb7\x5d\x95\x35\x6f\x02\x0e\xaf\xe1\x4c\xae" + "\xce\x54\x76\x9a\xc2\x8f\xb8\x38\x1f\x46\x0b\x04\x64\x34\x79\xde" + "\x7e\xd7\x59\x10\xe9\xd9\xd5\x02\x42\x01\xcf\x50\x85\x38\xf9\x15" + "\x83\x18\x04\x6b\x35\xae\x65\xb5\x99\x12\x0a\xa9\x79\x24\xb9\x37" + "\x35\xdd\xa0\xe0\x87\x2c\x44\x4b\x5a\xee\xaf\xfa\x10\xdd\x9b\xfb" + "\x36\x1a\x31\x03\x42\x02\x5f\x50\xf0\xa2\x0d\x1c\x57\x56\x8f\x12" + "\xb7\x1d\x91\x55\x38\xb6\xf6\x34\x65\xc7\xbd", + .c_size = 139, + .public_key_vec = true, + .siggen_sigver_test = true, + }, + { + .key = /* secp521r1(sha384) */ + "\x04\x00\x2e\xd6\x21\x04\x75\xc3\xdc\x7d\xff\x0e\xf3\x70\x25\x2b" + "\xad\x72\xfc\x5a\x91\xf1\xd5\x9c\x64\xf3\x1f\x47\x11\x10\x62\x33" + "\xfd\x2e\xe8\x32\xca\x9e\x6f\x0a\x4c\x5b\x35\x9a\x46\xc5\xe7\xd4" + "\x38\xda\xb2\xf0\xf4\x87\xf3\x86\xf4\xea\x70\xad\x1e\xd4\x78\x8c" + "\x36\x18\x17\x00\xa2\xa0\x34\x1b\x2e\x6a\xdf\x06\xd6\x99\x2d\x47" + "\x50\x92\x1a\x8a\x72\x9c\x23\x44\xfa\xa7\xa9\xed\xa6\xef\x26\x14" + "\xb3\x9d\xfe\x5e\xa3\x8c\xd8\x29\xf8\xdf\xad\xa6\xab\xfc\xdd\x46" + "\x22\x6e\xd7\x35\xc7\x23\xb7\x13\xae\xb6\x34\xff\xd7\x80\xe5\x39" + "\xb3\x3b\x5b\x1b\x94", + .key_len = 133, + .params = + "\x30\x10\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x05\x2b\x81\x04" + "\x00\x23", + .param_len = 18, + .m = + "\x36\x98\xd6\x82\xfa\xad\xed\x3c\xb9\x40\xb6\x4d\x9e\xb7\x04\x26" + "\xad\x72\x34\x44\xd2\x81\xb4\x9b\xbe\x01\x04\x7a\xd8\x50\xf8\x59" + "\xba\xad\x23\x85\x6b\x59\xbe\xfb\xf6\x86\xd4\x67\xa8\x43\x28\x76", + .m_size = 48, + .algo = OID_id_ecdsa_with_sha384, + .c = + "\x30\x81\x88\x02\x42\x00\x93\x96\x76\x3c\x27\xea\xaa\x9c\x26\xec" + "\x51\xdc\xe8\x35\x5e\xae\x16\xf2\x4b\x64\x98\xf7\xec\xda\xc7\x7e" + "\x42\x71\x86\x57\x2d\xf1\x7d\xe4\xdf\x9b\x7d\x9e\x47\xca\x33\x32" + "\x76\x06\xd0\xf9\xc0\xe4\xe6\x84\x59\xfd\x1a\xc4\x40\xdd\x43\xb8" + "\x6a\xdd\xfb\xe6\x63\x4e\x28\x02\x42\x00\xff\xc3\x6a\x87\x6e\xb5" + "\x13\x1f\x20\x55\xce\x37\x97\xc9\x05\x51\xe5\xe4\x3c\xbc\x93\x65" + "\x57\x1c\x30\xda\xa7\xcd\x26\x28\x76\x3b\x52\xdf\xc4\xc0\xdb\x54" + "\xdb\x8a\x0d\x6a\xc3\xf3\x7a\xd1\xfa\xe7\xa7\xe5\x5a\x94\x56\xcf" + "\x8f\xb4\x22\xc6\x4f\xab\x2b\x62\xc1\x42\xb1", + .c_size = 139, + .public_key_vec = true, + .siggen_sigver_test = true, + }, + { + .key = /* secp521r1(sha512) */ + "\x04\x00\xc7\x65\xee\x0b\x86\x7d\x8f\x02\xf1\x74\x5b\xb0\x4c\x3f" + "\xa6\x35\x60\x9f\x55\x23\x11\xcc\xdf\xb8\x42\x99\xee\x6c\x96\x6a" + "\x27\xa2\x56\xb2\x2b\x03\xad\x0f\xe7\x97\xde\x09\x5d\xb4\xc5\x5f" + "\xbd\x87\x37\xbf\x5a\x16\x35\x56\x08\xfd\x6f\x06\x1a\x1c\x84\xee" + "\xc3\x64\xb3\x00\x9e\xbd\x6e\x60\x76\xee\x69\xfd\x3a\xb8\xcd\x7e" + "\x91\x68\x53\x57\x44\x13\x2e\x77\x09\x2a\xbe\x48\xbd\x91\xd8\xf6" + "\x21\x16\x53\x99\xd5\xf0\x40\xad\xa6\xf8\x58\x26\xb6\x9a\xf8\x77" + "\xfe\x3a\x05\x1a\xdb\xa9\x0f\xc0\x6c\x76\x30\x8c\xd8\xde\x44\xae" + "\xd0\x17\xdf\x49\x6a", + .key_len = 133, + .params = + "\x30\x10\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x05\x2b\x81\x04" + "\x00\x23", + .param_len = 18, + .m = + "\x5c\xa6\xbc\x79\xb8\xa0\x1e\x11\x83\xf7\xe9\x05\xdf\xba\xf7\x69" + "\x97\x22\x32\xe4\x94\x7c\x65\xbd\x74\xc6\x9a\x8b\xbd\x0d\xdc\xed" + "\xf5\x9c\xeb\xe1\xc5\x68\x40\xf2\xc7\x04\xde\x9e\x0d\x76\xc5\xa3" + "\xf9\x3c\x6c\x98\x08\x31\xbd\x39\xe8\x42\x7f\x80\x39\x6f\xfe\x68", + .m_size = 64, + .algo = OID_id_ecdsa_with_sha512, + .c = + "\x30\x81\x88\x02\x42\x01\x5c\x71\x86\x96\xac\x21\x33\x7e\x4e\xaa" + "\x86\xec\xa8\x05\x03\x52\x56\x63\x0e\x02\xcc\x94\xa9\x05\xb9\xfb" + "\x62\x1e\x42\x03\x6c\x74\x8a\x1f\x12\x3e\xb7\x7e\x51\xff\x7f\x27" + "\x93\xe8\x6c\x49\x7d\x28\xfc\x80\xa6\x13\xfc\xb6\x90\xf7\xbb\x28" + "\xb5\x04\xb0\xb6\x33\x1c\x7e\x02\x42\x01\x70\x43\x52\x1d\xe3\xc6" + "\xbd\x5a\x40\x95\x35\x89\x4f\x41\x5f\x9e\x19\x88\x05\x3e\x43\x39" + "\x01\xbd\xb7\x7a\x76\x37\x51\x47\x49\x98\x12\x71\xd0\xe9\xca\xa7" + "\xc0\xcb\xaa\x00\x55\xbb\x6a\xb4\x73\x00\xd2\x72\x74\x13\x63\x39" + "\xa6\xe5\x25\x46\x1e\x77\x44\x78\xe0\xd1\x04", + .c_size = 139, + .public_key_vec = true, + .siggen_sigver_test = true, + }, +}; + /* * EC-RDSA test vectors are generated by gost-engine. */ diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index a3bbdd6e60fc..f5c71a617a99 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -382,7 +382,7 @@ static ssize_t rng_current_show(struct device *dev, if (IS_ERR(rng)) return PTR_ERR(rng); - ret = snprintf(buf, PAGE_SIZE, "%s\n", rng ? rng->name : "none"); + ret = sysfs_emit(buf, "%s\n", rng ? rng->name : "none"); put_rng(rng); return ret; diff --git a/drivers/char/hw_random/mxc-rnga.c b/drivers/char/hw_random/mxc-rnga.c index 07ec000e4cd7..94ee18a1120a 100644 --- a/drivers/char/hw_random/mxc-rnga.c +++ b/drivers/char/hw_random/mxc-rnga.c @@ -131,7 +131,7 @@ static void mxc_rnga_cleanup(struct hwrng *rng) __raw_writel(ctrl & ~RNGA_CONTROL_GO, mxc_rng->mem + RNGA_CONTROL); } -static int __init mxc_rnga_probe(struct platform_device *pdev) +static int mxc_rnga_probe(struct platform_device *pdev) { int err; struct mxc_rng *mxc_rng; @@ -176,7 +176,7 @@ err_ioremap: return err; } -static void __exit mxc_rnga_remove(struct platform_device *pdev) +static void mxc_rnga_remove(struct platform_device *pdev) { struct mxc_rng *mxc_rng = platform_get_drvdata(pdev); @@ -197,10 +197,11 @@ static struct platform_driver mxc_rnga_driver = { .name = "mxc_rnga", .of_match_table = mxc_rnga_of_match, }, - .remove_new = __exit_p(mxc_rnga_remove), + .probe = mxc_rnga_probe, + .remove_new = mxc_rnga_remove, }; -module_platform_driver_probe(mxc_rnga_driver, mxc_rnga_probe); +module_platform_driver(mxc_rnga_driver); MODULE_AUTHOR("Freescale Semiconductor, Inc."); MODULE_DESCRIPTION("H/W RNGA driver for i.MX"); diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c index 379bc245c520..0e903d6e22e3 100644 --- a/drivers/char/hw_random/stm32-rng.c +++ b/drivers/char/hw_random/stm32-rng.c @@ -220,7 +220,8 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) if (err && i > RNG_NB_RECOVER_TRIES) { dev_err((struct device *)priv->rng.priv, "Couldn't recover from seed error\n"); - return -ENOTRECOVERABLE; + retval = -ENOTRECOVERABLE; + goto exit_rpm; } continue; @@ -238,7 +239,8 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) if (err && i > RNG_NB_RECOVER_TRIES) { dev_err((struct device *)priv->rng.priv, "Couldn't recover from seed error"); - return -ENOTRECOVERABLE; + retval = -ENOTRECOVERABLE; + goto exit_rpm; } continue; @@ -250,6 +252,7 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) max -= sizeof(u32); } +exit_rpm: pm_runtime_mark_last_busy((struct device *) priv->rng.priv); pm_runtime_put_sync_autosuspend((struct device *) priv->rng.priv); @@ -353,13 +356,15 @@ static int stm32_rng_init(struct hwrng *rng) err = readl_relaxed_poll_timeout_atomic(priv->base + RNG_SR, reg, reg & RNG_SR_DRDY, 10, 100000); - if (err | (reg & ~RNG_SR_DRDY)) { + if (err || (reg & ~RNG_SR_DRDY)) { clk_disable_unprepare(priv->clk); dev_err((struct device *)priv->rng.priv, "%s: timeout:%x SR: %x!\n", __func__, err, reg); return -EINVAL; } + clk_disable_unprepare(priv->clk); + return 0; } @@ -384,6 +389,11 @@ static int __maybe_unused stm32_rng_runtime_suspend(struct device *dev) static int __maybe_unused stm32_rng_suspend(struct device *dev) { struct stm32_rng_private *priv = dev_get_drvdata(dev); + int err; + + err = clk_prepare_enable(priv->clk); + if (err) + return err; if (priv->data->has_cond_reset) { priv->pm_conf.nscr = readl_relaxed(priv->base + RNG_NSCR); @@ -465,6 +475,8 @@ static int __maybe_unused stm32_rng_resume(struct device *dev) writel_relaxed(reg, priv->base + RNG_CR); } + clk_disable_unprepare(priv->clk); + return 0; } diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index bfe18ebc2a98..94f23c6fc93b 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -644,6 +644,14 @@ config CRYPTO_DEV_ROCKCHIP_DEBUG This will create /sys/kernel/debug/rk3288_crypto/stats for displaying the number of requests per algorithm and other internal stats. +config CRYPTO_DEV_TEGRA + tristate "Enable Tegra Security Engine" + depends on TEGRA_HOST1X + select CRYPTO_ENGINE + + help + Select this to enable Tegra Security Engine which accelerates various + AES encryption/decryption and HASH algorithms. config CRYPTO_DEV_ZYNQMP_AES tristate "Support for Xilinx ZynqMP AES hw accelerator" diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 95331bc6456b..ad4ccef67d12 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/ obj-y += stm32/ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o +obj-$(CONFIG_CRYPTO_DEV_TEGRA) += tegra/ obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/ #obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/ diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c index 83a9093eff25..a895e4289efa 100644 --- a/drivers/crypto/atmel-i2c.c +++ b/drivers/crypto/atmel-i2c.c @@ -51,7 +51,7 @@ static void atmel_i2c_checksum(struct atmel_i2c_cmd *cmd) *__crc16 = cpu_to_le16(bitrev16(crc16(0, data, len))); } -void atmel_i2c_init_read_cmd(struct atmel_i2c_cmd *cmd) +void atmel_i2c_init_read_config_cmd(struct atmel_i2c_cmd *cmd) { cmd->word_addr = COMMAND; cmd->opcode = OPCODE_READ; @@ -68,7 +68,31 @@ void atmel_i2c_init_read_cmd(struct atmel_i2c_cmd *cmd) cmd->msecs = MAX_EXEC_TIME_READ; cmd->rxsize = READ_RSP_SIZE; } -EXPORT_SYMBOL(atmel_i2c_init_read_cmd); +EXPORT_SYMBOL(atmel_i2c_init_read_config_cmd); + +int atmel_i2c_init_read_otp_cmd(struct atmel_i2c_cmd *cmd, u16 addr) +{ + if (addr < 0 || addr > OTP_ZONE_SIZE) + return -1; + + cmd->word_addr = COMMAND; + cmd->opcode = OPCODE_READ; + /* + * Read the word from OTP zone that may contain e.g. serial + * numbers or similar if persistently pre-initialized and locked + */ + cmd->param1 = OTP_ZONE; + cmd->param2 = cpu_to_le16(addr); + cmd->count = READ_COUNT; + + atmel_i2c_checksum(cmd); + + cmd->msecs = MAX_EXEC_TIME_READ; + cmd->rxsize = READ_RSP_SIZE; + + return 0; +} +EXPORT_SYMBOL(atmel_i2c_init_read_otp_cmd); void atmel_i2c_init_random_cmd(struct atmel_i2c_cmd *cmd) { @@ -301,7 +325,7 @@ static int device_sanity_check(struct i2c_client *client) if (!cmd) return -ENOMEM; - atmel_i2c_init_read_cmd(cmd); + atmel_i2c_init_read_config_cmd(cmd); ret = atmel_i2c_send_receive(client, cmd); if (ret) diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h index c0bd429ee2c7..72f04c15682f 100644 --- a/drivers/crypto/atmel-i2c.h +++ b/drivers/crypto/atmel-i2c.h @@ -64,6 +64,10 @@ struct atmel_i2c_cmd { /* Definitions for eeprom organization */ #define CONFIGURATION_ZONE 0 +#define OTP_ZONE 1 + +/* Definitions for eeprom zone sizes */ +#define OTP_ZONE_SIZE 64 /* Definitions for Indexes common to all commands */ #define RSP_DATA_IDX 1 /* buffer index of data in response */ @@ -124,6 +128,7 @@ struct atmel_ecc_driver_data { * @wake_token : wake token array of zeros * @wake_token_sz : size in bytes of the wake_token * @tfm_count : number of active crypto transformations on i2c client + * @hwrng : hold the hardware generated rng * * Reads and writes from/to the i2c client are sequential. The first byte * transmitted to the device is treated as the byte size. Any attempt to send @@ -177,7 +182,8 @@ void atmel_i2c_flush_queue(void); int atmel_i2c_send_receive(struct i2c_client *client, struct atmel_i2c_cmd *cmd); -void atmel_i2c_init_read_cmd(struct atmel_i2c_cmd *cmd); +void atmel_i2c_init_read_config_cmd(struct atmel_i2c_cmd *cmd); +int atmel_i2c_init_read_otp_cmd(struct atmel_i2c_cmd *cmd, u16 addr); void atmel_i2c_init_random_cmd(struct atmel_i2c_cmd *cmd); void atmel_i2c_init_genkey_cmd(struct atmel_i2c_cmd *cmd, u16 keyid); int atmel_i2c_init_ecdh_cmd(struct atmel_i2c_cmd *cmd, diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index c77f482d2a97..24ffdf505023 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -91,6 +91,62 @@ static int atmel_sha204a_rng_read(struct hwrng *rng, void *data, size_t max, return max; } +static int atmel_sha204a_otp_read(struct i2c_client *client, u16 addr, u8 *otp) +{ + struct atmel_i2c_cmd cmd; + int ret = -1; + + if (atmel_i2c_init_read_otp_cmd(&cmd, addr) < 0) { + dev_err(&client->dev, "failed, invalid otp address %04X\n", + addr); + return ret; + } + + ret = atmel_i2c_send_receive(client, &cmd); + + if (cmd.data[0] == 0xff) { + dev_err(&client->dev, "failed, device not ready\n"); + return -ret; + } + + memcpy(otp, cmd.data+1, 4); + + return ret; +} + +static ssize_t otp_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u16 addr; + u8 otp[OTP_ZONE_SIZE]; + char *str = buf; + struct i2c_client *client = to_i2c_client(dev); + int i; + + for (addr = 0; addr < OTP_ZONE_SIZE/4; addr++) { + if (atmel_sha204a_otp_read(client, addr, otp + addr * 4) < 0) { + dev_err(dev, "failed to read otp zone\n"); + break; + } + } + + for (i = 0; i < addr*2; i++) + str += sprintf(str, "%02X", otp[i]); + str += sprintf(str, "\n"); + return str - buf; +} +static DEVICE_ATTR_RO(otp); + +static struct attribute *atmel_sha204a_attrs[] = { + &dev_attr_otp.attr, + NULL +}; + +static const struct attribute_group atmel_sha204a_groups = { + .name = "atsha204a", + .attrs = atmel_sha204a_attrs, +}; + static int atmel_sha204a_probe(struct i2c_client *client) { struct atmel_i2c_client_priv *i2c_priv; @@ -111,6 +167,16 @@ static int atmel_sha204a_probe(struct i2c_client *client) if (ret) dev_warn(&client->dev, "failed to register RNG (%d)\n", ret); + /* otp read out */ + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) + return -ENODEV; + + ret = sysfs_create_group(&client->dev.kobj, &atmel_sha204a_groups); + if (ret) { + dev_err(&client->dev, "failed to register sysfs entry\n"); + return ret; + } + return ret; } @@ -123,6 +189,8 @@ static void atmel_sha204a_remove(struct i2c_client *client) return; } + sysfs_remove_group(&client->dev.kobj, &atmel_sha204a_groups); + kfree((void *)i2c_priv->hwrng.priv); } diff --git a/drivers/crypto/bcm/spu2.c b/drivers/crypto/bcm/spu2.c index 07989bb8c220..3fdc64b5a65e 100644 --- a/drivers/crypto/bcm/spu2.c +++ b/drivers/crypto/bcm/spu2.c @@ -495,7 +495,7 @@ static void spu2_dump_omd(u8 *omd, u16 hash_key_len, u16 ciph_key_len, if (hash_iv_len) { packet_log(" Hash IV Length %u bytes\n", hash_iv_len); packet_dump(" hash IV: ", ptr, hash_iv_len); - ptr += ciph_key_len; + ptr += hash_iv_len; } if (ciph_iv_len) { diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index bdf367f3f679..bd418dea586d 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -512,6 +512,7 @@ static const struct of_device_id caam_match[] = { MODULE_DEVICE_TABLE(of, caam_match); struct caam_imx_data { + bool page0_access; const struct clk_bulk_data *clks; int num_clks; }; @@ -524,6 +525,7 @@ static const struct clk_bulk_data caam_imx6_clks[] = { }; static const struct caam_imx_data caam_imx6_data = { + .page0_access = true, .clks = caam_imx6_clks, .num_clks = ARRAY_SIZE(caam_imx6_clks), }; @@ -534,6 +536,7 @@ static const struct clk_bulk_data caam_imx7_clks[] = { }; static const struct caam_imx_data caam_imx7_data = { + .page0_access = true, .clks = caam_imx7_clks, .num_clks = ARRAY_SIZE(caam_imx7_clks), }; @@ -545,6 +548,7 @@ static const struct clk_bulk_data caam_imx6ul_clks[] = { }; static const struct caam_imx_data caam_imx6ul_data = { + .page0_access = true, .clks = caam_imx6ul_clks, .num_clks = ARRAY_SIZE(caam_imx6ul_clks), }; @@ -554,15 +558,19 @@ static const struct clk_bulk_data caam_vf610_clks[] = { }; static const struct caam_imx_data caam_vf610_data = { + .page0_access = true, .clks = caam_vf610_clks, .num_clks = ARRAY_SIZE(caam_vf610_clks), }; +static const struct caam_imx_data caam_imx8ulp_data; + static const struct soc_device_attribute caam_imx_soc_table[] = { { .soc_id = "i.MX6UL", .data = &caam_imx6ul_data }, { .soc_id = "i.MX6*", .data = &caam_imx6_data }, { .soc_id = "i.MX7*", .data = &caam_imx7_data }, { .soc_id = "i.MX8M*", .data = &caam_imx7_data }, + { .soc_id = "i.MX8ULP", .data = &caam_imx8ulp_data }, { .soc_id = "VF*", .data = &caam_vf610_data }, { .family = "Freescale i.MX" }, { /* sentinel */ } @@ -860,6 +868,7 @@ static int caam_probe(struct platform_device *pdev) int pg_size; int BLOCK_OFFSET = 0; bool reg_access = true; + const struct caam_imx_data *imx_soc_data; ctrlpriv = devm_kzalloc(&pdev->dev, sizeof(*ctrlpriv), GFP_KERNEL); if (!ctrlpriv) @@ -894,12 +903,20 @@ static int caam_probe(struct platform_device *pdev) return -EINVAL; } + imx_soc_data = imx_soc_match->data; + reg_access = reg_access && imx_soc_data->page0_access; + /* + * CAAM clocks cannot be controlled from kernel. + */ + if (!imx_soc_data->num_clks) + goto iomap_ctrl; + ret = init_clocks(dev, imx_soc_match->data); if (ret) return ret; } - +iomap_ctrl: /* Get configuration properties from device tree */ /* First, get register page */ ctrl = devm_of_iomap(dev, nprop, 0, NULL); diff --git a/drivers/crypto/ccp/sp-platform.c b/drivers/crypto/ccp/sp-platform.c index 473301237760..ff6ceb4feee0 100644 --- a/drivers/crypto/ccp/sp-platform.c +++ b/drivers/crypto/ccp/sp-platform.c @@ -39,44 +39,38 @@ static const struct sp_dev_vdata dev_vdata[] = { }, }; -#ifdef CONFIG_ACPI static const struct acpi_device_id sp_acpi_match[] = { { "AMDI0C00", (kernel_ulong_t)&dev_vdata[0] }, { }, }; MODULE_DEVICE_TABLE(acpi, sp_acpi_match); -#endif -#ifdef CONFIG_OF static const struct of_device_id sp_of_match[] = { { .compatible = "amd,ccp-seattle-v1a", .data = (const void *)&dev_vdata[0] }, { }, }; MODULE_DEVICE_TABLE(of, sp_of_match); -#endif static struct sp_dev_vdata *sp_get_of_version(struct platform_device *pdev) { -#ifdef CONFIG_OF const struct of_device_id *match; match = of_match_node(sp_of_match, pdev->dev.of_node); if (match && match->data) return (struct sp_dev_vdata *)match->data; -#endif + return NULL; } static struct sp_dev_vdata *sp_get_acpi_version(struct platform_device *pdev) { -#ifdef CONFIG_ACPI const struct acpi_device_id *match; match = acpi_match_device(sp_acpi_match, &pdev->dev); if (match && match->driver_data) return (struct sp_dev_vdata *)match->driver_data; -#endif + return NULL; } @@ -212,12 +206,8 @@ static int sp_platform_resume(struct platform_device *pdev) static struct platform_driver sp_platform_driver = { .driver = { .name = "ccp", -#ifdef CONFIG_ACPI .acpi_match_table = sp_acpi_match, -#endif -#ifdef CONFIG_OF .of_match_table = sp_of_match, -#endif }, .probe = sp_platform_probe, .remove_new = sp_platform_remove, diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c index cd67fa348ca7..1b9b7bccdeff 100644 --- a/drivers/crypto/hisilicon/debugfs.c +++ b/drivers/crypto/hisilicon/debugfs.c @@ -13,6 +13,7 @@ #define QM_DFX_COMMON_LEN 0xC3 #define QM_DFX_REGS_LEN 4UL #define QM_DBG_TMP_BUF_LEN 22 +#define QM_XQC_ADDR_MASK GENMASK(31, 0) #define CURRENT_FUN_MASK GENMASK(5, 0) #define CURRENT_Q_MASK GENMASK(31, 16) #define QM_SQE_ADDR_MASK GENMASK(7, 0) @@ -167,7 +168,6 @@ static void dump_show(struct hisi_qm *qm, void *info, static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) { struct device *dev = &qm->pdev->dev; - struct qm_sqc *sqc_curr; struct qm_sqc sqc; u32 qp_id; int ret; @@ -183,6 +183,8 @@ static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp_id, 1); if (!ret) { + sqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + sqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); dump_show(qm, &sqc, sizeof(struct qm_sqc), name); return 0; @@ -190,9 +192,10 @@ static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) down_read(&qm->qps_lock); if (qm->sqc) { - sqc_curr = qm->sqc + qp_id; - - dump_show(qm, sqc_curr, sizeof(*sqc_curr), "SOFT SQC"); + memcpy(&sqc, qm->sqc + qp_id * sizeof(struct qm_sqc), sizeof(struct qm_sqc)); + sqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + sqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); + dump_show(qm, &sqc, sizeof(struct qm_sqc), "SOFT SQC"); } up_read(&qm->qps_lock); @@ -202,7 +205,6 @@ static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name) { struct device *dev = &qm->pdev->dev; - struct qm_cqc *cqc_curr; struct qm_cqc cqc; u32 qp_id; int ret; @@ -218,6 +220,8 @@ static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name) ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp_id, 1); if (!ret) { + cqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + cqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); dump_show(qm, &cqc, sizeof(struct qm_cqc), name); return 0; @@ -225,9 +229,10 @@ static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name) down_read(&qm->qps_lock); if (qm->cqc) { - cqc_curr = qm->cqc + qp_id; - - dump_show(qm, cqc_curr, sizeof(*cqc_curr), "SOFT CQC"); + memcpy(&cqc, qm->cqc + qp_id * sizeof(struct qm_cqc), sizeof(struct qm_cqc)); + cqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + cqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); + dump_show(qm, &cqc, sizeof(struct qm_cqc), "SOFT CQC"); } up_read(&qm->qps_lock); @@ -263,6 +268,10 @@ static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, char *name) if (ret) return ret; + aeqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + aeqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); + eqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + eqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); dump_show(qm, xeqc, size, name); return ret; @@ -310,27 +319,26 @@ static int q_dump_param_parse(struct hisi_qm *qm, char *s, static int qm_sq_dump(struct hisi_qm *qm, char *s, char *name) { - u16 sq_depth = qm->qp_array->cq_depth; - void *sqe, *sqe_curr; + u16 sq_depth = qm->qp_array->sq_depth; struct hisi_qp *qp; u32 qp_id, sqe_id; + void *sqe; int ret; ret = q_dump_param_parse(qm, s, &sqe_id, &qp_id, sq_depth); if (ret) return ret; - sqe = kzalloc(qm->sqe_size * sq_depth, GFP_KERNEL); + sqe = kzalloc(qm->sqe_size, GFP_KERNEL); if (!sqe) return -ENOMEM; qp = &qm->qp_array[qp_id]; - memcpy(sqe, qp->sqe, qm->sqe_size * sq_depth); - sqe_curr = sqe + (u32)(sqe_id * qm->sqe_size); - memset(sqe_curr + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK, + memcpy(sqe, qp->sqe + sqe_id * qm->sqe_size, qm->sqe_size); + memset(sqe + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK, qm->debug.sqe_mask_len); - dump_show(qm, sqe_curr, qm->sqe_size, name); + dump_show(qm, sqe, qm->sqe_size, name); kfree(sqe); @@ -809,8 +817,14 @@ static void dfx_regs_uninit(struct hisi_qm *qm, { int i; + if (!dregs) + return; + /* Setting the pointer is NULL to prevent double free */ for (i = 0; i < reg_len; i++) { + if (!dregs[i].regs) + continue; + kfree(dregs[i].regs); dregs[i].regs = NULL; } @@ -860,14 +874,21 @@ alloc_error: static int qm_diff_regs_init(struct hisi_qm *qm, struct dfx_diff_registers *dregs, u32 reg_len) { + int ret; + qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); - if (IS_ERR(qm->debug.qm_diff_regs)) - return PTR_ERR(qm->debug.qm_diff_regs); + if (IS_ERR(qm->debug.qm_diff_regs)) { + ret = PTR_ERR(qm->debug.qm_diff_regs); + qm->debug.qm_diff_regs = NULL; + return ret; + } qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len); if (IS_ERR(qm->debug.acc_diff_regs)) { dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); - return PTR_ERR(qm->debug.acc_diff_regs); + ret = PTR_ERR(qm->debug.acc_diff_regs); + qm->debug.acc_diff_regs = NULL; + return ret; } return 0; @@ -908,7 +929,9 @@ static int qm_last_regs_init(struct hisi_qm *qm) static void qm_diff_regs_uninit(struct hisi_qm *qm, u32 reg_len) { dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len); + qm->debug.acc_diff_regs = NULL; dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); + qm->debug.qm_diff_regs = NULL; } /** @@ -1075,12 +1098,12 @@ static void qm_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir, { struct debugfs_file *file = qm->debug.files + index; - debugfs_create_file(qm_debug_file_name[index], 0600, dir, file, - &qm_debug_fops); - file->index = index; mutex_init(&file->lock); file->debug = &qm->debug; + + debugfs_create_file(qm_debug_file_name[index], 0600, dir, file, + &qm_debug_fops); } static int qm_debugfs_atomic64_set(void *data, u64 val) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index d93aa6630a57..10aa4da93323 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -106,7 +106,7 @@ #define HPRE_SHAPER_TYPE_RATE 640 #define HPRE_VIA_MSI_DSM 1 #define HPRE_SQE_MASK_OFFSET 8 -#define HPRE_SQE_MASK_LEN 24 +#define HPRE_SQE_MASK_LEN 44 #define HPRE_CTX_Q_NUM_DEF 1 #define HPRE_DFX_BASE 0x301000 @@ -1074,41 +1074,40 @@ static int hpre_debugfs_init(struct hisi_qm *qm) struct device *dev = &qm->pdev->dev; int ret; - qm->debug.debug_root = debugfs_create_dir(dev_name(dev), - hpre_debugfs_root); - - qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET; - qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN; ret = hisi_qm_regs_debugfs_init(qm, hpre_diff_regs, ARRAY_SIZE(hpre_diff_regs)); if (ret) { dev_warn(dev, "Failed to init HPRE diff regs!\n"); - goto debugfs_remove; + return ret; } + qm->debug.debug_root = debugfs_create_dir(dev_name(dev), + hpre_debugfs_root); + qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET; + qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN; + hisi_qm_debug_init(qm); if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_HPRE_PF) { ret = hpre_ctrl_debug_init(qm); if (ret) - goto failed_to_create; + goto debugfs_remove; } hpre_dfx_debug_init(qm); return 0; -failed_to_create: - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); debugfs_remove: debugfs_remove_recursive(qm->debug.debug_root); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); return ret; } static void hpre_debugfs_exit(struct hisi_qm *qm) { - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); - debugfs_remove_recursive(qm->debug.debug_root); + + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); } static int hpre_pre_store_cap_reg(struct hisi_qm *qm) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 92f0a1d9b4a6..3dac8d8e8568 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -645,6 +645,9 @@ int qm_set_and_get_xqc(struct hisi_qm *qm, u8 cmd, void *xqc, u32 qp_id, bool op tmp_xqc = qm->xqc_buf.aeqc; xqc_dma = qm->xqc_buf.aeqc_dma; break; + default: + dev_err(&qm->pdev->dev, "unknown mailbox cmd %u\n", cmd); + return -EINVAL; } /* Setting xqc will fail if master OOO is blocked. */ @@ -2893,12 +2896,9 @@ void hisi_qm_uninit(struct hisi_qm *qm) hisi_qm_set_state(qm, QM_NOT_READY); up_write(&qm->qps_lock); + qm_remove_uacce(qm); qm_irqs_unregister(qm); hisi_qm_pci_uninit(qm); - if (qm->use_sva) { - uacce_remove(qm->uacce); - qm->uacce = NULL; - } } EXPORT_SYMBOL_GPL(hisi_qm_uninit); diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 93a972fcbf63..0558f98e221f 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -481,8 +481,10 @@ static void sec_alg_resource_free(struct sec_ctx *ctx, if (ctx->pbuf_supported) sec_free_pbuf_resource(dev, qp_ctx->res); - if (ctx->alg_type == SEC_AEAD) + if (ctx->alg_type == SEC_AEAD) { sec_free_mac_resource(dev, qp_ctx->res); + sec_free_aiv_resource(dev, qp_ctx->res); + } } static int sec_alloc_qp_ctx_resource(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index c290d8937b19..75aad04ffe5e 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -99,8 +99,8 @@ #define SEC_DBGFS_VAL_MAX_LEN 20 #define SEC_SINGLE_PORT_MAX_TRANS 0x2060 -#define SEC_SQE_MASK_OFFSET 64 -#define SEC_SQE_MASK_LEN 48 +#define SEC_SQE_MASK_OFFSET 16 +#define SEC_SQE_MASK_LEN 108 #define SEC_SHAPER_TYPE_RATE 400 #define SEC_DFX_BASE 0x301000 @@ -152,7 +152,7 @@ static const struct hisi_qm_cap_info sec_basic_info[] = { {SEC_CORE_TYPE_NUM_CAP, 0x313c, 16, GENMASK(3, 0), 0x1, 0x1, 0x1}, {SEC_CORE_NUM_CAP, 0x313c, 8, GENMASK(7, 0), 0x4, 0x4, 0x4}, {SEC_CORES_PER_CLUSTER_NUM_CAP, 0x313c, 0, GENMASK(7, 0), 0x4, 0x4, 0x4}, - {SEC_CORE_ENABLE_BITMAP, 0x3140, 32, GENMASK(31, 0), 0x17F, 0x17F, 0xF}, + {SEC_CORE_ENABLE_BITMAP, 0x3140, 0, GENMASK(31, 0), 0x17F, 0x17F, 0xF}, {SEC_DRV_ALG_BITMAP_LOW, 0x3144, 0, GENMASK(31, 0), 0x18050CB, 0x18050CB, 0x18670CF}, {SEC_DRV_ALG_BITMAP_HIGH, 0x3148, 0, GENMASK(31, 0), 0x395C, 0x395C, 0x395C}, {SEC_DEV_ALG_BITMAP_LOW, 0x314c, 0, GENMASK(31, 0), 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, @@ -901,37 +901,36 @@ static int sec_debugfs_init(struct hisi_qm *qm) struct device *dev = &qm->pdev->dev; int ret; - qm->debug.debug_root = debugfs_create_dir(dev_name(dev), - sec_debugfs_root); - qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET; - qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN; - ret = hisi_qm_regs_debugfs_init(qm, sec_diff_regs, ARRAY_SIZE(sec_diff_regs)); if (ret) { dev_warn(dev, "Failed to init SEC diff regs!\n"); - goto debugfs_remove; + return ret; } + qm->debug.debug_root = debugfs_create_dir(dev_name(dev), + sec_debugfs_root); + qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET; + qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN; + hisi_qm_debug_init(qm); ret = sec_debug_init(qm); if (ret) - goto failed_to_create; + goto debugfs_remove; return 0; -failed_to_create: - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); debugfs_remove: - debugfs_remove_recursive(sec_debugfs_root); + debugfs_remove_recursive(qm->debug.debug_root); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); return ret; } static void sec_debugfs_exit(struct hisi_qm *qm) { - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); - debugfs_remove_recursive(qm->debug.debug_root); + + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); } static int sec_show_last_regs_init(struct hisi_qm *qm) @@ -1324,7 +1323,8 @@ static struct pci_driver sec_pci_driver = { .probe = sec_probe, .remove = sec_remove, .err_handler = &sec_err_handler, - .sriov_configure = hisi_qm_sriov_configure, + .sriov_configure = IS_ENABLED(CONFIG_PCI_IOV) ? + hisi_qm_sriov_configure : NULL, .shutdown = hisi_qm_dev_shutdown, .driver.pm = &sec_pm_ops, }; diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c index 0beca257c20b..568acd0aee3f 100644 --- a/drivers/crypto/hisilicon/sgl.c +++ b/drivers/crypto/hisilicon/sgl.c @@ -161,9 +161,6 @@ static struct hisi_acc_hw_sgl *acc_get_sgl(struct hisi_acc_sgl_pool *pool, struct mem_block *block; u32 block_index, offset; - if (!pool || !hw_sgl_dma || index >= pool->count) - return ERR_PTR(-EINVAL); - block = pool->mem_block; block_index = index / pool->sgl_num_per_block; offset = index % pool->sgl_num_per_block; @@ -230,7 +227,7 @@ hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, struct scatterlist *sg; int sg_n; - if (!dev || !sgl || !pool || !hw_sgl_dma) + if (!dev || !sgl || !pool || !hw_sgl_dma || index >= pool->count) return ERR_PTR(-EINVAL); sg_n = sg_nents(sgl); diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index c065fd867161..c94a7b20d07e 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -887,36 +887,34 @@ static int hisi_zip_ctrl_debug_init(struct hisi_qm *qm) static int hisi_zip_debugfs_init(struct hisi_qm *qm) { struct device *dev = &qm->pdev->dev; - struct dentry *dev_d; int ret; - dev_d = debugfs_create_dir(dev_name(dev), hzip_debugfs_root); - - qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET; - qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN; - qm->debug.debug_root = dev_d; ret = hisi_qm_regs_debugfs_init(qm, hzip_diff_regs, ARRAY_SIZE(hzip_diff_regs)); if (ret) { dev_warn(dev, "Failed to init ZIP diff regs!\n"); - goto debugfs_remove; + return ret; } + qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET; + qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN; + qm->debug.debug_root = debugfs_create_dir(dev_name(dev), + hzip_debugfs_root); + hisi_qm_debug_init(qm); if (qm->fun_type == QM_HW_PF) { ret = hisi_zip_ctrl_debug_init(qm); if (ret) - goto failed_to_create; + goto debugfs_remove; } hisi_zip_dfx_debug_init(qm); return 0; -failed_to_create: - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); debugfs_remove: - debugfs_remove_recursive(hzip_debugfs_root); + debugfs_remove_recursive(qm->debug.debug_root); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); return ret; } @@ -940,10 +938,10 @@ static void hisi_zip_debug_regs_clear(struct hisi_qm *qm) static void hisi_zip_debugfs_exit(struct hisi_qm *qm) { - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); - debugfs_remove_recursive(qm->debug.debug_root); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); + if (qm->fun_type == QM_HW_PF) { hisi_zip_debug_regs_clear(qm); qm->debug.curr_qm_qp_num = 0; diff --git a/drivers/crypto/intel/iaa/iaa_crypto.h b/drivers/crypto/intel/iaa/iaa_crypto.h index 2524091a5f70..56985e395263 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto.h +++ b/drivers/crypto/intel/iaa/iaa_crypto.h @@ -49,10 +49,10 @@ struct iaa_wq { struct iaa_device *iaa_device; - u64 comp_calls; - u64 comp_bytes; - u64 decomp_calls; - u64 decomp_bytes; + atomic64_t comp_calls; + atomic64_t comp_bytes; + atomic64_t decomp_calls; + atomic64_t decomp_bytes; }; struct iaa_device_compression_mode { @@ -73,10 +73,10 @@ struct iaa_device { int n_wq; struct list_head wqs; - u64 comp_calls; - u64 comp_bytes; - u64 decomp_calls; - u64 decomp_bytes; + atomic64_t comp_calls; + atomic64_t comp_bytes; + atomic64_t decomp_calls; + atomic64_t decomp_bytes; }; struct wq_table_entry { diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index b2191ade9011..e810d286ee8c 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -347,18 +347,16 @@ int add_iaa_compression_mode(const char *name, goto free; if (ll_table) { - mode->ll_table = kzalloc(ll_table_size, GFP_KERNEL); + mode->ll_table = kmemdup(ll_table, ll_table_size, GFP_KERNEL); if (!mode->ll_table) goto free; - memcpy(mode->ll_table, ll_table, ll_table_size); mode->ll_table_size = ll_table_size; } if (d_table) { - mode->d_table = kzalloc(d_table_size, GFP_KERNEL); + mode->d_table = kmemdup(d_table, d_table_size, GFP_KERNEL); if (!mode->d_table) goto free; - memcpy(mode->d_table, d_table, d_table_size); mode->d_table_size = d_table_size; } @@ -922,7 +920,7 @@ static void rebalance_wq_table(void) for_each_node_with_cpus(node) { node_cpus = cpumask_of_node(node); - for (cpu = 0; cpu < nr_cpus_per_node; cpu++) { + for (cpu = 0; cpu < cpumask_weight(node_cpus); cpu++) { int node_cpu = cpumask_nth(cpu, node_cpus); if (WARN_ON(node_cpu >= nr_cpu_ids)) { @@ -1079,8 +1077,8 @@ static void iaa_desc_complete(struct idxd_desc *idxd_desc, update_total_comp_bytes_out(ctx->req->dlen); update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen); } else { - update_total_decomp_bytes_in(ctx->req->dlen); - update_wq_decomp_bytes(iaa_wq->wq, ctx->req->dlen); + update_total_decomp_bytes_in(ctx->req->slen); + update_wq_decomp_bytes(iaa_wq->wq, ctx->req->slen); } if (ctx->compress && compression_ctx->verify_compress) { @@ -1498,7 +1496,6 @@ static int iaa_comp_acompress(struct acomp_req *req) u32 compression_crc; struct idxd_wq *wq; struct device *dev; - u64 start_time_ns; int order = -1; compression_ctx = crypto_tfm_ctx(tfm); @@ -1572,10 +1569,8 @@ static int iaa_comp_acompress(struct acomp_req *req) " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, req->dst, req->dlen, sg_dma_len(req->dst)); - start_time_ns = iaa_get_ts(); ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr, &req->dlen, &compression_crc, disable_async); - update_max_comp_delay_ns(start_time_ns); if (ret == -EINPROGRESS) return ret; @@ -1622,7 +1617,6 @@ static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req) struct iaa_wq *iaa_wq; struct device *dev; struct idxd_wq *wq; - u64 start_time_ns; int order = -1; cpu = get_cpu(); @@ -1679,10 +1673,8 @@ alloc_dest: dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p," " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, req->dst, req->dlen, sg_dma_len(req->dst)); - start_time_ns = iaa_get_ts(); ret = iaa_decompress(tfm, req, wq, src_addr, req->slen, dst_addr, &req->dlen, true); - update_max_decomp_delay_ns(start_time_ns); if (ret == -EOVERFLOW) { dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); req->dlen *= 2; @@ -1713,7 +1705,6 @@ static int iaa_comp_adecompress(struct acomp_req *req) int nr_sgs, cpu, ret = 0; struct iaa_wq *iaa_wq; struct device *dev; - u64 start_time_ns; struct idxd_wq *wq; if (!iaa_crypto_enabled) { @@ -1773,10 +1764,8 @@ static int iaa_comp_adecompress(struct acomp_req *req) " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, req->dst, req->dlen, sg_dma_len(req->dst)); - start_time_ns = iaa_get_ts(); ret = iaa_decompress(tfm, req, wq, src_addr, req->slen, dst_addr, &req->dlen, false); - update_max_decomp_delay_ns(start_time_ns); if (ret == -EINPROGRESS) return ret; @@ -2014,7 +2003,7 @@ static int __init iaa_crypto_init_module(void) int ret = 0; int node; - nr_cpus = num_online_cpus(); + nr_cpus = num_possible_cpus(); for_each_node_with_cpus(node) nr_nodes++; if (!nr_nodes) { diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.c b/drivers/crypto/intel/iaa/iaa_crypto_stats.c index c9f83af4b307..f5cc3d29ca19 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_stats.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.c @@ -17,141 +17,117 @@ #include "iaa_crypto.h" #include "iaa_crypto_stats.h" -static u64 total_comp_calls; -static u64 total_decomp_calls; -static u64 total_sw_decomp_calls; -static u64 max_comp_delay_ns; -static u64 max_decomp_delay_ns; -static u64 total_comp_bytes_out; -static u64 total_decomp_bytes_in; -static u64 total_completion_einval_errors; -static u64 total_completion_timeout_errors; -static u64 total_completion_comp_buf_overflow_errors; +static atomic64_t total_comp_calls; +static atomic64_t total_decomp_calls; +static atomic64_t total_sw_decomp_calls; +static atomic64_t total_comp_bytes_out; +static atomic64_t total_decomp_bytes_in; +static atomic64_t total_completion_einval_errors; +static atomic64_t total_completion_timeout_errors; +static atomic64_t total_completion_comp_buf_overflow_errors; static struct dentry *iaa_crypto_debugfs_root; void update_total_comp_calls(void) { - total_comp_calls++; + atomic64_inc(&total_comp_calls); } void update_total_comp_bytes_out(int n) { - total_comp_bytes_out += n; + atomic64_add(n, &total_comp_bytes_out); } void update_total_decomp_calls(void) { - total_decomp_calls++; + atomic64_inc(&total_decomp_calls); } void update_total_sw_decomp_calls(void) { - total_sw_decomp_calls++; + atomic64_inc(&total_sw_decomp_calls); } void update_total_decomp_bytes_in(int n) { - total_decomp_bytes_in += n; + atomic64_add(n, &total_decomp_bytes_in); } void update_completion_einval_errs(void) { - total_completion_einval_errors++; + atomic64_inc(&total_completion_einval_errors); } void update_completion_timeout_errs(void) { - total_completion_timeout_errors++; + atomic64_inc(&total_completion_timeout_errors); } void update_completion_comp_buf_overflow_errs(void) { - total_completion_comp_buf_overflow_errors++; -} - -void update_max_comp_delay_ns(u64 start_time_ns) -{ - u64 time_diff; - - time_diff = ktime_get_ns() - start_time_ns; - - if (time_diff > max_comp_delay_ns) - max_comp_delay_ns = time_diff; -} - -void update_max_decomp_delay_ns(u64 start_time_ns) -{ - u64 time_diff; - - time_diff = ktime_get_ns() - start_time_ns; - - if (time_diff > max_decomp_delay_ns) - max_decomp_delay_ns = time_diff; + atomic64_inc(&total_completion_comp_buf_overflow_errors); } void update_wq_comp_calls(struct idxd_wq *idxd_wq) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); - wq->comp_calls++; - wq->iaa_device->comp_calls++; + atomic64_inc(&wq->comp_calls); + atomic64_inc(&wq->iaa_device->comp_calls); } void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); - wq->comp_bytes += n; - wq->iaa_device->comp_bytes += n; + atomic64_add(n, &wq->comp_bytes); + atomic64_add(n, &wq->iaa_device->comp_bytes); } void update_wq_decomp_calls(struct idxd_wq *idxd_wq) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); - wq->decomp_calls++; - wq->iaa_device->decomp_calls++; + atomic64_inc(&wq->decomp_calls); + atomic64_inc(&wq->iaa_device->decomp_calls); } void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); - wq->decomp_bytes += n; - wq->iaa_device->decomp_bytes += n; + atomic64_add(n, &wq->decomp_bytes); + atomic64_add(n, &wq->iaa_device->decomp_bytes); } static void reset_iaa_crypto_stats(void) { - total_comp_calls = 0; - total_decomp_calls = 0; - total_sw_decomp_calls = 0; - max_comp_delay_ns = 0; - max_decomp_delay_ns = 0; - total_comp_bytes_out = 0; - total_decomp_bytes_in = 0; - total_completion_einval_errors = 0; - total_completion_timeout_errors = 0; - total_completion_comp_buf_overflow_errors = 0; + atomic64_set(&total_comp_calls, 0); + atomic64_set(&total_decomp_calls, 0); + atomic64_set(&total_sw_decomp_calls, 0); + atomic64_set(&total_comp_bytes_out, 0); + atomic64_set(&total_decomp_bytes_in, 0); + atomic64_set(&total_completion_einval_errors, 0); + atomic64_set(&total_completion_timeout_errors, 0); + atomic64_set(&total_completion_comp_buf_overflow_errors, 0); } static void reset_wq_stats(struct iaa_wq *wq) { - wq->comp_calls = 0; - wq->comp_bytes = 0; - wq->decomp_calls = 0; - wq->decomp_bytes = 0; + atomic64_set(&wq->comp_calls, 0); + atomic64_set(&wq->comp_bytes, 0); + atomic64_set(&wq->decomp_calls, 0); + atomic64_set(&wq->decomp_bytes, 0); } static void reset_device_stats(struct iaa_device *iaa_device) { struct iaa_wq *iaa_wq; - iaa_device->comp_calls = 0; - iaa_device->comp_bytes = 0; - iaa_device->decomp_calls = 0; - iaa_device->decomp_bytes = 0; + atomic64_set(&iaa_device->comp_calls, 0); + atomic64_set(&iaa_device->comp_bytes, 0); + atomic64_set(&iaa_device->decomp_calls, 0); + atomic64_set(&iaa_device->decomp_bytes, 0); list_for_each_entry(iaa_wq, &iaa_device->wqs, list) reset_wq_stats(iaa_wq); @@ -160,10 +136,14 @@ static void reset_device_stats(struct iaa_device *iaa_device) static void wq_show(struct seq_file *m, struct iaa_wq *iaa_wq) { seq_printf(m, " name: %s\n", iaa_wq->wq->name); - seq_printf(m, " comp_calls: %llu\n", iaa_wq->comp_calls); - seq_printf(m, " comp_bytes: %llu\n", iaa_wq->comp_bytes); - seq_printf(m, " decomp_calls: %llu\n", iaa_wq->decomp_calls); - seq_printf(m, " decomp_bytes: %llu\n\n", iaa_wq->decomp_bytes); + seq_printf(m, " comp_calls: %llu\n", + atomic64_read(&iaa_wq->comp_calls)); + seq_printf(m, " comp_bytes: %llu\n", + atomic64_read(&iaa_wq->comp_bytes)); + seq_printf(m, " decomp_calls: %llu\n", + atomic64_read(&iaa_wq->decomp_calls)); + seq_printf(m, " decomp_bytes: %llu\n\n", + atomic64_read(&iaa_wq->decomp_bytes)); } static void device_stats_show(struct seq_file *m, struct iaa_device *iaa_device) @@ -173,30 +153,41 @@ static void device_stats_show(struct seq_file *m, struct iaa_device *iaa_device) seq_puts(m, "iaa device:\n"); seq_printf(m, " id: %d\n", iaa_device->idxd->id); seq_printf(m, " n_wqs: %d\n", iaa_device->n_wq); - seq_printf(m, " comp_calls: %llu\n", iaa_device->comp_calls); - seq_printf(m, " comp_bytes: %llu\n", iaa_device->comp_bytes); - seq_printf(m, " decomp_calls: %llu\n", iaa_device->decomp_calls); - seq_printf(m, " decomp_bytes: %llu\n", iaa_device->decomp_bytes); + seq_printf(m, " comp_calls: %llu\n", + atomic64_read(&iaa_device->comp_calls)); + seq_printf(m, " comp_bytes: %llu\n", + atomic64_read(&iaa_device->comp_bytes)); + seq_printf(m, " decomp_calls: %llu\n", + atomic64_read(&iaa_device->decomp_calls)); + seq_printf(m, " decomp_bytes: %llu\n", + atomic64_read(&iaa_device->decomp_bytes)); seq_puts(m, " wqs:\n"); list_for_each_entry(iaa_wq, &iaa_device->wqs, list) wq_show(m, iaa_wq); } -static void global_stats_show(struct seq_file *m) +static int global_stats_show(struct seq_file *m, void *v) { seq_puts(m, "global stats:\n"); - seq_printf(m, " total_comp_calls: %llu\n", total_comp_calls); - seq_printf(m, " total_decomp_calls: %llu\n", total_decomp_calls); - seq_printf(m, " total_sw_decomp_calls: %llu\n", total_sw_decomp_calls); - seq_printf(m, " total_comp_bytes_out: %llu\n", total_comp_bytes_out); - seq_printf(m, " total_decomp_bytes_in: %llu\n", total_decomp_bytes_in); + seq_printf(m, " total_comp_calls: %llu\n", + atomic64_read(&total_comp_calls)); + seq_printf(m, " total_decomp_calls: %llu\n", + atomic64_read(&total_decomp_calls)); + seq_printf(m, " total_sw_decomp_calls: %llu\n", + atomic64_read(&total_sw_decomp_calls)); + seq_printf(m, " total_comp_bytes_out: %llu\n", + atomic64_read(&total_comp_bytes_out)); + seq_printf(m, " total_decomp_bytes_in: %llu\n", + atomic64_read(&total_decomp_bytes_in)); seq_printf(m, " total_completion_einval_errors: %llu\n", - total_completion_einval_errors); + atomic64_read(&total_completion_einval_errors)); seq_printf(m, " total_completion_timeout_errors: %llu\n", - total_completion_timeout_errors); + atomic64_read(&total_completion_timeout_errors)); seq_printf(m, " total_completion_comp_buf_overflow_errors: %llu\n\n", - total_completion_comp_buf_overflow_errors); + atomic64_read(&total_completion_comp_buf_overflow_errors)); + + return 0; } static int wq_stats_show(struct seq_file *m, void *v) @@ -205,8 +196,6 @@ static int wq_stats_show(struct seq_file *m, void *v) mutex_lock(&iaa_devices_lock); - global_stats_show(m); - list_for_each_entry(iaa_device, &iaa_devices, list) device_stats_show(m, iaa_device); @@ -243,6 +232,18 @@ static const struct file_operations wq_stats_fops = { .release = single_release, }; +static int global_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, global_stats_show, file); +} + +static const struct file_operations global_stats_fops = { + .open = global_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + DEFINE_DEBUGFS_ATTRIBUTE(wq_stats_reset_fops, NULL, iaa_crypto_stats_reset, "%llu\n"); int __init iaa_crypto_debugfs_init(void) @@ -252,20 +253,8 @@ int __init iaa_crypto_debugfs_init(void) iaa_crypto_debugfs_root = debugfs_create_dir("iaa_crypto", NULL); - debugfs_create_u64("max_comp_delay_ns", 0644, - iaa_crypto_debugfs_root, &max_comp_delay_ns); - debugfs_create_u64("max_decomp_delay_ns", 0644, - iaa_crypto_debugfs_root, &max_decomp_delay_ns); - debugfs_create_u64("total_comp_calls", 0644, - iaa_crypto_debugfs_root, &total_comp_calls); - debugfs_create_u64("total_decomp_calls", 0644, - iaa_crypto_debugfs_root, &total_decomp_calls); - debugfs_create_u64("total_sw_decomp_calls", 0644, - iaa_crypto_debugfs_root, &total_sw_decomp_calls); - debugfs_create_u64("total_comp_bytes_out", 0644, - iaa_crypto_debugfs_root, &total_comp_bytes_out); - debugfs_create_u64("total_decomp_bytes_in", 0644, - iaa_crypto_debugfs_root, &total_decomp_bytes_in); + debugfs_create_file("global_stats", 0644, iaa_crypto_debugfs_root, NULL, + &global_stats_fops); debugfs_create_file("wq_stats", 0644, iaa_crypto_debugfs_root, NULL, &wq_stats_fops); debugfs_create_file("stats_reset", 0644, iaa_crypto_debugfs_root, NULL, diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.h b/drivers/crypto/intel/iaa/iaa_crypto_stats.h index c916ca83f070..3787a5f507eb 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_stats.h +++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.h @@ -13,8 +13,6 @@ void update_total_comp_bytes_out(int n); void update_total_decomp_calls(void); void update_total_sw_decomp_calls(void); void update_total_decomp_bytes_in(int n); -void update_max_comp_delay_ns(u64 start_time_ns); -void update_max_decomp_delay_ns(u64 start_time_ns); void update_completion_einval_errs(void); void update_completion_timeout_errs(void); void update_completion_comp_buf_overflow_errs(void); @@ -24,8 +22,6 @@ void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n); void update_wq_decomp_calls(struct idxd_wq *idxd_wq); void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n); -static inline u64 iaa_get_ts(void) { return ktime_get_ns(); } - #else static inline int iaa_crypto_debugfs_init(void) { return 0; } static inline void iaa_crypto_debugfs_cleanup(void) {} @@ -35,8 +31,6 @@ static inline void update_total_comp_bytes_out(int n) {} static inline void update_total_decomp_calls(void) {} static inline void update_total_sw_decomp_calls(void) {} static inline void update_total_decomp_bytes_in(int n) {} -static inline void update_max_comp_delay_ns(u64 start_time_ns) {} -static inline void update_max_decomp_delay_ns(u64 start_time_ns) {} static inline void update_completion_einval_errs(void) {} static inline void update_completion_timeout_errs(void) {} static inline void update_completion_comp_buf_overflow_errs(void) {} @@ -46,8 +40,6 @@ static inline void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n) {} static inline void update_wq_decomp_calls(struct idxd_wq *idxd_wq) {} static inline void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n) {} -static inline u64 iaa_get_ts(void) { return 0; } - #endif // CONFIG_CRYPTO_DEV_IAA_CRYPTO_STATS #endif diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c index 1102c47f8293..78f0ea49254d 100644 --- a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c @@ -10,12 +10,14 @@ #include <adf_fw_config.h> #include <adf_gen4_config.h> #include <adf_gen4_dc.h> +#include <adf_gen4_hw_csr_data.h> #include <adf_gen4_hw_data.h> #include <adf_gen4_pfvf.h> #include <adf_gen4_pm.h> #include <adf_gen4_ras.h> #include <adf_gen4_timer.h> #include <adf_gen4_tl.h> +#include <adf_gen4_vf_mig.h> #include "adf_420xx_hw_data.h" #include "icp_qat_hw.h" @@ -296,7 +298,7 @@ static const u32 *adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev) { if (adf_gen4_init_thd2arb_map(accel_dev)) dev_warn(&GET_DEV(accel_dev), - "Generate of the thread to arbiter map failed"); + "Failed to generate thread to arbiter mapping"); return GET_HW_DATA(accel_dev)->thd_to_arb_map; } @@ -487,6 +489,7 @@ void adf_init_hw_data_420xx(struct adf_hw_device_data *hw_data, u32 dev_id) adf_gen4_init_dc_ops(&hw_data->dc_ops); adf_gen4_init_ras_ops(&hw_data->ras_ops); adf_gen4_init_tl_data(&hw_data->tl_data); + adf_gen4_init_vf_mig_ops(&hw_data->vfmig_ops); adf_init_rl_data(&hw_data->rl_data); } diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index 927506cf271d..9fd7ec53b9f3 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -10,12 +10,14 @@ #include <adf_fw_config.h> #include <adf_gen4_config.h> #include <adf_gen4_dc.h> +#include <adf_gen4_hw_csr_data.h> #include <adf_gen4_hw_data.h> #include <adf_gen4_pfvf.h> #include <adf_gen4_pm.h> #include "adf_gen4_ras.h" #include <adf_gen4_timer.h> #include <adf_gen4_tl.h> +#include <adf_gen4_vf_mig.h> #include "adf_4xxx_hw_data.h" #include "icp_qat_hw.h" @@ -208,7 +210,7 @@ static const u32 *adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev) { if (adf_gen4_init_thd2arb_map(accel_dev)) dev_warn(&GET_DEV(accel_dev), - "Generate of the thread to arbiter map failed"); + "Failed to generate thread to arbiter mapping"); return GET_HW_DATA(accel_dev)->thd_to_arb_map; } @@ -454,6 +456,8 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) hw_data->get_ring_to_svc_map = adf_gen4_get_ring_to_svc_map; hw_data->disable_iov = adf_disable_sriov; hw_data->ring_pair_reset = adf_gen4_ring_pair_reset; + hw_data->bank_state_save = adf_gen4_bank_state_save; + hw_data->bank_state_restore = adf_gen4_bank_state_restore; hw_data->enable_pm = adf_gen4_enable_pm; hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt; hw_data->dev_config = adf_gen4_dev_config; @@ -469,6 +473,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) adf_gen4_init_dc_ops(&hw_data->dc_ops); adf_gen4_init_ras_ops(&hw_data->ras_ops); adf_gen4_init_tl_data(&hw_data->tl_data); + adf_gen4_init_vf_mig_ops(&hw_data->vfmig_ops); adf_init_rl_data(&hw_data->rl_data); } diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c index 9762f2bf7727..d26564cebdec 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c @@ -197,7 +197,9 @@ module_pci_driver(adf_driver); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel"); MODULE_FIRMWARE(ADF_4XXX_FW); +MODULE_FIRMWARE(ADF_402XX_FW); MODULE_FIRMWARE(ADF_4XXX_MMP); +MODULE_FIRMWARE(ADF_402XX_MMP); MODULE_DESCRIPTION("Intel(R) QuickAssist Technology"); MODULE_VERSION(ADF_DRV_VERSION); MODULE_SOFTDEP("pre: crypto-intel_qat"); diff --git a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c index a882e0ea2279..201f9412c582 100644 --- a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -6,6 +6,7 @@ #include <adf_common_drv.h> #include <adf_gen2_config.h> #include <adf_gen2_dc.h> +#include <adf_gen2_hw_csr_data.h> #include <adf_gen2_hw_data.h> #include <adf_gen2_pfvf.h> #include "adf_c3xxx_hw_data.h" diff --git a/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c index 84d9486e04de..a512ca4efd3f 100644 --- a/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c +++ b/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c @@ -4,6 +4,7 @@ #include <adf_common_drv.h> #include <adf_gen2_config.h> #include <adf_gen2_dc.h> +#include <adf_gen2_hw_csr_data.h> #include <adf_gen2_hw_data.h> #include <adf_gen2_pfvf.h> #include <adf_pfvf_vf_msg.h> diff --git a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c index 48cf3eb7c734..6b5b0cf9c7c7 100644 --- a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c @@ -6,6 +6,7 @@ #include <adf_common_drv.h> #include <adf_gen2_config.h> #include <adf_gen2_dc.h> +#include <adf_gen2_hw_csr_data.h> #include <adf_gen2_hw_data.h> #include <adf_gen2_pfvf.h> #include "adf_c62x_hw_data.h" diff --git a/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c index 751d7aa57fc7..4aaaaf921734 100644 --- a/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c +++ b/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c @@ -4,6 +4,7 @@ #include <adf_common_drv.h> #include <adf_gen2_config.h> #include <adf_gen2_dc.h> +#include <adf_gen2_hw_csr_data.h> #include <adf_gen2_hw_data.h> #include <adf_gen2_pfvf.h> #include <adf_pfvf_vf_msg.h> diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile index 5915cde8a7aa..6f9266edc9f1 100644 --- a/drivers/crypto/intel/qat/qat_common/Makefile +++ b/drivers/crypto/intel/qat/qat_common/Makefile @@ -14,16 +14,20 @@ intel_qat-objs := adf_cfg.o \ adf_hw_arbiter.o \ adf_sysfs.o \ adf_sysfs_ras_counters.o \ + adf_gen2_hw_csr_data.o \ adf_gen2_hw_data.o \ adf_gen2_config.o \ adf_gen4_config.o \ + adf_gen4_hw_csr_data.o \ adf_gen4_hw_data.o \ + adf_gen4_vf_mig.o \ adf_gen4_pm.o \ adf_gen2_dc.o \ adf_gen4_dc.o \ adf_gen4_ras.o \ adf_gen4_timer.o \ adf_clock.o \ + adf_mstate_mgr.o \ qat_crypto.o \ qat_compression.o \ qat_comp_algs.o \ @@ -52,6 +56,6 @@ intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o \ intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \ adf_pfvf_pf_msg.o adf_pfvf_pf_proto.o \ adf_pfvf_vf_msg.o adf_pfvf_vf_proto.o \ - adf_gen2_pfvf.o adf_gen4_pfvf.o + adf_gen2_pfvf.o adf_gen4_pfvf.o qat_mig_dev.o intel_qat-$(CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION) += adf_heartbeat_inject.o diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h index 08658c3a01e9..7830ecb1a1f1 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h @@ -9,6 +9,7 @@ #include <linux/pci.h> #include <linux/ratelimit.h> #include <linux/types.h> +#include <linux/qat/qat_mig_dev.h> #include "adf_cfg_common.h" #include "adf_rl.h" #include "adf_telemetry.h" @@ -140,6 +141,40 @@ struct admin_info { u32 mailbox_offset; }; +struct ring_config { + u64 base; + u32 config; + u32 head; + u32 tail; + u32 reserved0; +}; + +struct bank_state { + u32 ringstat0; + u32 ringstat1; + u32 ringuostat; + u32 ringestat; + u32 ringnestat; + u32 ringnfstat; + u32 ringfstat; + u32 ringcstat0; + u32 ringcstat1; + u32 ringcstat2; + u32 ringcstat3; + u32 iaintflagen; + u32 iaintflagreg; + u32 iaintflagsrcsel0; + u32 iaintflagsrcsel1; + u32 iaintcolen; + u32 iaintcolctl; + u32 iaintflagandcolen; + u32 ringexpstat; + u32 ringexpintenable; + u32 ringsrvarben; + u32 reserved0; + struct ring_config rings[ADF_ETR_MAX_RINGS_PER_BANK]; +}; + struct adf_hw_csr_ops { u64 (*build_csr_ring_base_addr)(dma_addr_t addr, u32 size); u32 (*read_csr_ring_head)(void __iomem *csr_base_addr, u32 bank, @@ -150,22 +185,49 @@ struct adf_hw_csr_ops { u32 ring); void (*write_csr_ring_tail)(void __iomem *csr_base_addr, u32 bank, u32 ring, u32 value); + u32 (*read_csr_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_uo_stat)(void __iomem *csr_base_addr, u32 bank); u32 (*read_csr_e_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_ne_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_nf_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_f_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_c_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_exp_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_exp_int_en)(void __iomem *csr_base_addr, u32 bank); + void (*write_csr_exp_int_en)(void __iomem *csr_base_addr, u32 bank, + u32 value); + u32 (*read_csr_ring_config)(void __iomem *csr_base_addr, u32 bank, + u32 ring); void (*write_csr_ring_config)(void __iomem *csr_base_addr, u32 bank, u32 ring, u32 value); + dma_addr_t (*read_csr_ring_base)(void __iomem *csr_base_addr, u32 bank, + u32 ring); void (*write_csr_ring_base)(void __iomem *csr_base_addr, u32 bank, u32 ring, dma_addr_t addr); + u32 (*read_csr_int_en)(void __iomem *csr_base_addr, u32 bank); + void (*write_csr_int_en)(void __iomem *csr_base_addr, u32 bank, + u32 value); + u32 (*read_csr_int_flag)(void __iomem *csr_base_addr, u32 bank); void (*write_csr_int_flag)(void __iomem *csr_base_addr, u32 bank, u32 value); + u32 (*read_csr_int_srcsel)(void __iomem *csr_base_addr, u32 bank); void (*write_csr_int_srcsel)(void __iomem *csr_base_addr, u32 bank); + void (*write_csr_int_srcsel_w_val)(void __iomem *csr_base_addr, + u32 bank, u32 value); + u32 (*read_csr_int_col_en)(void __iomem *csr_base_addr, u32 bank); void (*write_csr_int_col_en)(void __iomem *csr_base_addr, u32 bank, u32 value); + u32 (*read_csr_int_col_ctl)(void __iomem *csr_base_addr, u32 bank); void (*write_csr_int_col_ctl)(void __iomem *csr_base_addr, u32 bank, u32 value); + u32 (*read_csr_int_flag_and_col)(void __iomem *csr_base_addr, + u32 bank); void (*write_csr_int_flag_and_col)(void __iomem *csr_base_addr, u32 bank, u32 value); + u32 (*read_csr_ring_srv_arb_en)(void __iomem *csr_base_addr, u32 bank); void (*write_csr_ring_srv_arb_en)(void __iomem *csr_base_addr, u32 bank, u32 value); + u32 (*get_int_col_ctl_enable_mask)(void); }; struct adf_cfg_device_data; @@ -197,6 +259,20 @@ struct adf_dc_ops { void (*build_deflate_ctx)(void *ctx); }; +struct qat_migdev_ops { + int (*init)(struct qat_mig_dev *mdev); + void (*cleanup)(struct qat_mig_dev *mdev); + void (*reset)(struct qat_mig_dev *mdev); + int (*open)(struct qat_mig_dev *mdev); + void (*close)(struct qat_mig_dev *mdev); + int (*suspend)(struct qat_mig_dev *mdev); + int (*resume)(struct qat_mig_dev *mdev); + int (*save_state)(struct qat_mig_dev *mdev); + int (*save_setup)(struct qat_mig_dev *mdev); + int (*load_state)(struct qat_mig_dev *mdev); + int (*load_setup)(struct qat_mig_dev *mdev, int size); +}; + struct adf_dev_err_mask { u32 cppagentcmdpar_mask; u32 parerr_ath_cph_mask; @@ -244,6 +320,10 @@ struct adf_hw_device_data { void (*enable_ints)(struct adf_accel_dev *accel_dev); void (*set_ssm_wdtimer)(struct adf_accel_dev *accel_dev); int (*ring_pair_reset)(struct adf_accel_dev *accel_dev, u32 bank_nr); + int (*bank_state_save)(struct adf_accel_dev *accel_dev, u32 bank_number, + struct bank_state *state); + int (*bank_state_restore)(struct adf_accel_dev *accel_dev, + u32 bank_number, struct bank_state *state); void (*reset_device)(struct adf_accel_dev *accel_dev); void (*set_msix_rttable)(struct adf_accel_dev *accel_dev); const char *(*uof_get_name)(struct adf_accel_dev *accel_dev, u32 obj_num); @@ -260,6 +340,7 @@ struct adf_hw_device_data { struct adf_dev_err_mask dev_err_mask; struct adf_rl_hw_data rl_data; struct adf_tl_hw_data tl_data; + struct qat_migdev_ops vfmig_ops; const char *fw_name; const char *fw_mmp_name; u32 fuses; @@ -316,6 +397,7 @@ struct adf_hw_device_data { #define GET_CSR_OPS(accel_dev) (&(accel_dev)->hw_device->csr_ops) #define GET_PFVF_OPS(accel_dev) (&(accel_dev)->hw_device->pfvf_ops) #define GET_DC_OPS(accel_dev) (&(accel_dev)->hw_device->dc_ops) +#define GET_VFMIG_OPS(accel_dev) (&(accel_dev)->hw_device->vfmig_ops) #define GET_TL_DATA(accel_dev) GET_HW_DATA(accel_dev)->tl_data #define accel_to_pci_dev(accel_ptr) accel_ptr->accel_pci_dev.pci_dev @@ -330,11 +412,17 @@ struct adf_fw_loader_data { struct adf_accel_vf_info { struct adf_accel_dev *accel_dev; struct mutex pf2vf_lock; /* protect CSR access for PF2VF messages */ + struct mutex pfvf_mig_lock; /* protects PFVF state for migration */ struct ratelimit_state vf2pf_ratelimit; u32 vf_nr; bool init; bool restarting; u8 vf_compat_ver; + /* + * Private area used for device migration. + * Memory allocation and free is managed by migration driver. + */ + void *mig_priv; }; struct adf_dc_data { diff --git a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h index 57328249c89e..3bec9e20bad0 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h @@ -248,6 +248,16 @@ static inline void __iomem *adf_get_pmisc_base(struct adf_accel_dev *accel_dev) return pmisc->virt_addr; } +static inline void __iomem *adf_get_etr_base(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_bar *etr; + + etr = &GET_BARS(accel_dev)[hw_data->get_etr_bar_id(hw_data)]; + + return etr->virt_addr; +} + static inline void __iomem *adf_get_aram_base(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.c new file mode 100644 index 000000000000..650c9edd8a66 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ +#include <linux/types.h> +#include "adf_gen2_hw_csr_data.h" + +static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size) +{ + return BUILD_RING_BASE_ADDR(addr, size); +} + +static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + return READ_CSR_RING_HEAD(csr_base_addr, bank, ring); +} + +static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value); +} + +static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + return READ_CSR_RING_TAIL(csr_base_addr, bank, ring); +} + +static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value); +} + +static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_E_STAT(csr_base_addr, bank); +} + +static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, + u32 ring, u32 value) +{ + WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value); +} + +static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring, + dma_addr_t addr) +{ + WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr); +} + +static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, u32 value) +{ + WRITE_CSR_INT_FLAG(csr_base_addr, bank, value); +} + +static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) +{ + WRITE_CSR_INT_SRCSEL(csr_base_addr, bank); +} + +static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value); +} + +static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value); +} + +static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value); +} + +static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value); +} + +void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) +{ + csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr; + csr_ops->read_csr_ring_head = read_csr_ring_head; + csr_ops->write_csr_ring_head = write_csr_ring_head; + csr_ops->read_csr_ring_tail = read_csr_ring_tail; + csr_ops->write_csr_ring_tail = write_csr_ring_tail; + csr_ops->read_csr_e_stat = read_csr_e_stat; + csr_ops->write_csr_ring_config = write_csr_ring_config; + csr_ops->write_csr_ring_base = write_csr_ring_base; + csr_ops->write_csr_int_flag = write_csr_int_flag; + csr_ops->write_csr_int_srcsel = write_csr_int_srcsel; + csr_ops->write_csr_int_col_en = write_csr_int_col_en; + csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl; + csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col; + csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en; +} +EXPORT_SYMBOL_GPL(adf_gen2_init_hw_csr_ops); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.h new file mode 100644 index 000000000000..55058b0f9e52 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation */ +#ifndef ADF_GEN2_HW_CSR_DATA_H_ +#define ADF_GEN2_HW_CSR_DATA_H_ + +#include <linux/bitops.h> +#include "adf_accel_devices.h" + +#define ADF_BANK_INT_SRC_SEL_MASK_0 0x4444444CUL +#define ADF_BANK_INT_SRC_SEL_MASK_X 0x44444444UL +#define ADF_RING_CSR_RING_CONFIG 0x000 +#define ADF_RING_CSR_RING_LBASE 0x040 +#define ADF_RING_CSR_RING_UBASE 0x080 +#define ADF_RING_CSR_RING_HEAD 0x0C0 +#define ADF_RING_CSR_RING_TAIL 0x100 +#define ADF_RING_CSR_E_STAT 0x14C +#define ADF_RING_CSR_INT_FLAG 0x170 +#define ADF_RING_CSR_INT_SRCSEL 0x174 +#define ADF_RING_CSR_INT_SRCSEL_2 0x178 +#define ADF_RING_CSR_INT_COL_EN 0x17C +#define ADF_RING_CSR_INT_COL_CTL 0x180 +#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184 +#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000 +#define ADF_RING_BUNDLE_SIZE 0x1000 +#define ADF_ARB_REG_SLOT 0x1000 +#define ADF_ARB_RINGSRVARBEN_OFFSET 0x19C + +#define BUILD_RING_BASE_ADDR(addr, size) \ + (((addr) >> 6) & (GENMASK_ULL(63, 0) << (size))) +#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \ + ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_HEAD + ((ring) << 2)) +#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \ + ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_TAIL + ((ring) << 2)) +#define READ_CSR_E_STAT(csr_base_addr, bank) \ + ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_E_STAT) +#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value) +#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \ +do { \ + u32 l_base = 0, u_base = 0; \ + l_base = (u32)((value) & 0xFFFFFFFF); \ + u_base = (u32)(((value) & 0xFFFFFFFF00000000ULL) >> 32); \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_LBASE + ((ring) << 2), l_base); \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_UBASE + ((ring) << 2), u_base); \ +} while (0) + +#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_HEAD + ((ring) << 2), value) +#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_TAIL + ((ring) << 2), value) +#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_FLAG, value) +#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \ +do { \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK_0); \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_SRCSEL_2, ADF_BANK_INT_SRC_SEL_MASK_X); \ +} while (0) +#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_COL_EN, value) +#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_COL_CTL, \ + ADF_RING_CSR_INT_COL_CTL_ENABLE | (value)) +#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_FLAG_AND_COL, value) + +#define WRITE_CSR_RING_SRV_ARB_EN(csr_addr, index, value) \ + ADF_CSR_WR(csr_addr, ADF_ARB_RINGSRVARBEN_OFFSET + \ + (ADF_ARB_REG_SLOT * (index)), value) + +void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); + +#endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c index d1884547b5a1..1f64bf49b221 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c @@ -111,103 +111,6 @@ void adf_gen2_enable_ints(struct adf_accel_dev *accel_dev) } EXPORT_SYMBOL_GPL(adf_gen2_enable_ints); -static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size) -{ - return BUILD_RING_BASE_ADDR(addr, size); -} - -static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring) -{ - return READ_CSR_RING_HEAD(csr_base_addr, bank, ring); -} - -static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring, - u32 value) -{ - WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value); -} - -static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring) -{ - return READ_CSR_RING_TAIL(csr_base_addr, bank, ring); -} - -static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring, - u32 value) -{ - WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value); -} - -static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank) -{ - return READ_CSR_E_STAT(csr_base_addr, bank); -} - -static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, - u32 ring, u32 value) -{ - WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value); -} - -static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring, - dma_addr_t addr) -{ - WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr); -} - -static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, u32 value) -{ - WRITE_CSR_INT_FLAG(csr_base_addr, bank, value); -} - -static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) -{ - WRITE_CSR_INT_SRCSEL(csr_base_addr, bank); -} - -static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value); -} - -static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value); -} - -static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value); -} - -static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value); -} - -void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) -{ - csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr; - csr_ops->read_csr_ring_head = read_csr_ring_head; - csr_ops->write_csr_ring_head = write_csr_ring_head; - csr_ops->read_csr_ring_tail = read_csr_ring_tail; - csr_ops->write_csr_ring_tail = write_csr_ring_tail; - csr_ops->read_csr_e_stat = read_csr_e_stat; - csr_ops->write_csr_ring_config = write_csr_ring_config; - csr_ops->write_csr_ring_base = write_csr_ring_base; - csr_ops->write_csr_int_flag = write_csr_int_flag; - csr_ops->write_csr_int_srcsel = write_csr_int_srcsel; - csr_ops->write_csr_int_col_en = write_csr_int_col_en; - csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl; - csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col; - csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en; -} -EXPORT_SYMBOL_GPL(adf_gen2_init_hw_csr_ops); - u32 adf_gen2_get_accel_cap(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h index 6bd341061de4..708e9186127b 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h @@ -6,78 +6,9 @@ #include "adf_accel_devices.h" #include "adf_cfg_common.h" -/* Transport access */ -#define ADF_BANK_INT_SRC_SEL_MASK_0 0x4444444CUL -#define ADF_BANK_INT_SRC_SEL_MASK_X 0x44444444UL -#define ADF_RING_CSR_RING_CONFIG 0x000 -#define ADF_RING_CSR_RING_LBASE 0x040 -#define ADF_RING_CSR_RING_UBASE 0x080 -#define ADF_RING_CSR_RING_HEAD 0x0C0 -#define ADF_RING_CSR_RING_TAIL 0x100 -#define ADF_RING_CSR_E_STAT 0x14C -#define ADF_RING_CSR_INT_FLAG 0x170 -#define ADF_RING_CSR_INT_SRCSEL 0x174 -#define ADF_RING_CSR_INT_SRCSEL_2 0x178 -#define ADF_RING_CSR_INT_COL_EN 0x17C -#define ADF_RING_CSR_INT_COL_CTL 0x180 -#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184 -#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000 -#define ADF_RING_BUNDLE_SIZE 0x1000 #define ADF_GEN2_RX_RINGS_OFFSET 8 #define ADF_GEN2_TX_RINGS_MASK 0xFF -#define BUILD_RING_BASE_ADDR(addr, size) \ - (((addr) >> 6) & (GENMASK_ULL(63, 0) << (size))) -#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \ - ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_HEAD + ((ring) << 2)) -#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \ - ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_TAIL + ((ring) << 2)) -#define READ_CSR_E_STAT(csr_base_addr, bank) \ - ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_E_STAT) -#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value) -#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \ -do { \ - u32 l_base = 0, u_base = 0; \ - l_base = (u32)((value) & 0xFFFFFFFF); \ - u_base = (u32)(((value) & 0xFFFFFFFF00000000ULL) >> 32); \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_LBASE + ((ring) << 2), l_base); \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_UBASE + ((ring) << 2), u_base); \ -} while (0) - -#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_HEAD + ((ring) << 2), value) -#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_TAIL + ((ring) << 2), value) -#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_FLAG, value) -#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \ -do { \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK_0); \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_SRCSEL_2, ADF_BANK_INT_SRC_SEL_MASK_X); \ -} while (0) -#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_COL_EN, value) -#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_COL_CTL, \ - ADF_RING_CSR_INT_COL_CTL_ENABLE | (value)) -#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_FLAG_AND_COL, value) - /* AE to function map */ #define AE2FUNCTION_MAP_A_OFFSET (0x3A400 + 0x190) #define AE2FUNCTION_MAP_B_OFFSET (0x3A400 + 0x310) @@ -106,12 +37,6 @@ do { \ #define ADF_ARB_OFFSET 0x30000 #define ADF_ARB_WRK_2_SER_MAP_OFFSET 0x180 #define ADF_ARB_CONFIG (BIT(31) | BIT(6) | BIT(0)) -#define ADF_ARB_REG_SLOT 0x1000 -#define ADF_ARB_RINGSRVARBEN_OFFSET 0x19C - -#define WRITE_CSR_RING_SRV_ARB_EN(csr_addr, index, value) \ - ADF_CSR_WR(csr_addr, ADF_ARB_RINGSRVARBEN_OFFSET + \ - (ADF_ARB_REG_SLOT * (index)), value) /* Power gating */ #define ADF_POWERGATE_DC BIT(23) @@ -158,7 +83,6 @@ u32 adf_gen2_get_num_aes(struct adf_hw_device_data *self); void adf_gen2_enable_error_correction(struct adf_accel_dev *accel_dev); void adf_gen2_cfg_iov_thds(struct adf_accel_dev *accel_dev, bool enable, int num_a_regs, int num_b_regs); -void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); void adf_gen2_get_admin_info(struct admin_info *admin_csrs_info); void adf_gen2_get_arb_info(struct arb_info *arb_info); void adf_gen2_enable_ints(struct adf_accel_dev *accel_dev); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c new file mode 100644 index 000000000000..6609c248aaba --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ +#include <linux/types.h> +#include "adf_gen4_hw_csr_data.h" + +static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size) +{ + return BUILD_RING_BASE_ADDR(addr, size); +} + +static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + return READ_CSR_RING_HEAD(csr_base_addr, bank, ring); +} + +static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value); +} + +static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + return READ_CSR_RING_TAIL(csr_base_addr, bank, ring); +} + +static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value); +} + +static u32 read_csr_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_STAT(csr_base_addr, bank); +} + +static u32 read_csr_uo_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_UO_STAT(csr_base_addr, bank); +} + +static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_E_STAT(csr_base_addr, bank); +} + +static u32 read_csr_ne_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_NE_STAT(csr_base_addr, bank); +} + +static u32 read_csr_nf_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_NF_STAT(csr_base_addr, bank); +} + +static u32 read_csr_f_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_F_STAT(csr_base_addr, bank); +} + +static u32 read_csr_c_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_C_STAT(csr_base_addr, bank); +} + +static u32 read_csr_exp_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_EXP_STAT(csr_base_addr, bank); +} + +static u32 read_csr_exp_int_en(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_EXP_INT_EN(csr_base_addr, bank); +} + +static void write_csr_exp_int_en(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_EXP_INT_EN(csr_base_addr, bank, value); +} + +static u32 read_csr_ring_config(void __iomem *csr_base_addr, u32 bank, + u32 ring) +{ + return READ_CSR_RING_CONFIG(csr_base_addr, bank, ring); +} + +static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value); +} + +static dma_addr_t read_csr_ring_base(void __iomem *csr_base_addr, u32 bank, + u32 ring) +{ + return READ_CSR_RING_BASE(csr_base_addr, bank, ring); +} + +static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring, + dma_addr_t addr) +{ + WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr); +} + +static u32 read_csr_int_en(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_EN(csr_base_addr, bank); +} + +static void write_csr_int_en(void __iomem *csr_base_addr, u32 bank, u32 value) +{ + WRITE_CSR_INT_EN(csr_base_addr, bank, value); +} + +static u32 read_csr_int_flag(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_FLAG(csr_base_addr, bank); +} + +static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_FLAG(csr_base_addr, bank, value); +} + +static u32 read_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_SRCSEL(csr_base_addr, bank); +} + +static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) +{ + WRITE_CSR_INT_SRCSEL(csr_base_addr, bank); +} + +static void write_csr_int_srcsel_w_val(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_SRCSEL_W_VAL(csr_base_addr, bank, value); +} + +static u32 read_csr_int_col_en(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_COL_EN(csr_base_addr, bank); +} + +static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, u32 value) +{ + WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value); +} + +static u32 read_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_COL_CTL(csr_base_addr, bank); +} + +static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value); +} + +static u32 read_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_FLAG_AND_COL(csr_base_addr, bank); +} + +static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value); +} + +static u32 read_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_RING_SRV_ARB_EN(csr_base_addr, bank); +} + +static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value); +} + +static u32 get_int_col_ctl_enable_mask(void) +{ + return ADF_RING_CSR_INT_COL_CTL_ENABLE; +} + +void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) +{ + csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr; + csr_ops->read_csr_ring_head = read_csr_ring_head; + csr_ops->write_csr_ring_head = write_csr_ring_head; + csr_ops->read_csr_ring_tail = read_csr_ring_tail; + csr_ops->write_csr_ring_tail = write_csr_ring_tail; + csr_ops->read_csr_stat = read_csr_stat; + csr_ops->read_csr_uo_stat = read_csr_uo_stat; + csr_ops->read_csr_e_stat = read_csr_e_stat; + csr_ops->read_csr_ne_stat = read_csr_ne_stat; + csr_ops->read_csr_nf_stat = read_csr_nf_stat; + csr_ops->read_csr_f_stat = read_csr_f_stat; + csr_ops->read_csr_c_stat = read_csr_c_stat; + csr_ops->read_csr_exp_stat = read_csr_exp_stat; + csr_ops->read_csr_exp_int_en = read_csr_exp_int_en; + csr_ops->write_csr_exp_int_en = write_csr_exp_int_en; + csr_ops->read_csr_ring_config = read_csr_ring_config; + csr_ops->write_csr_ring_config = write_csr_ring_config; + csr_ops->read_csr_ring_base = read_csr_ring_base; + csr_ops->write_csr_ring_base = write_csr_ring_base; + csr_ops->read_csr_int_en = read_csr_int_en; + csr_ops->write_csr_int_en = write_csr_int_en; + csr_ops->read_csr_int_flag = read_csr_int_flag; + csr_ops->write_csr_int_flag = write_csr_int_flag; + csr_ops->read_csr_int_srcsel = read_csr_int_srcsel; + csr_ops->write_csr_int_srcsel = write_csr_int_srcsel; + csr_ops->write_csr_int_srcsel_w_val = write_csr_int_srcsel_w_val; + csr_ops->read_csr_int_col_en = read_csr_int_col_en; + csr_ops->write_csr_int_col_en = write_csr_int_col_en; + csr_ops->read_csr_int_col_ctl = read_csr_int_col_ctl; + csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl; + csr_ops->read_csr_int_flag_and_col = read_csr_int_flag_and_col; + csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col; + csr_ops->read_csr_ring_srv_arb_en = read_csr_ring_srv_arb_en; + csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en; + csr_ops->get_int_col_ctl_enable_mask = get_int_col_ctl_enable_mask; +} +EXPORT_SYMBOL_GPL(adf_gen4_init_hw_csr_ops); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h new file mode 100644 index 000000000000..6f33e7c87c2c --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h @@ -0,0 +1,188 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation */ +#ifndef ADF_GEN4_HW_CSR_DATA_H_ +#define ADF_GEN4_HW_CSR_DATA_H_ + +#include <linux/bitops.h> +#include "adf_accel_devices.h" + +#define ADF_BANK_INT_SRC_SEL_MASK 0x44UL +#define ADF_RING_CSR_RING_CONFIG 0x1000 +#define ADF_RING_CSR_RING_LBASE 0x1040 +#define ADF_RING_CSR_RING_UBASE 0x1080 +#define ADF_RING_CSR_RING_HEAD 0x0C0 +#define ADF_RING_CSR_RING_TAIL 0x100 +#define ADF_RING_CSR_STAT 0x140 +#define ADF_RING_CSR_UO_STAT 0x148 +#define ADF_RING_CSR_E_STAT 0x14C +#define ADF_RING_CSR_NE_STAT 0x150 +#define ADF_RING_CSR_NF_STAT 0x154 +#define ADF_RING_CSR_F_STAT 0x158 +#define ADF_RING_CSR_C_STAT 0x15C +#define ADF_RING_CSR_INT_FLAG_EN 0x16C +#define ADF_RING_CSR_INT_FLAG 0x170 +#define ADF_RING_CSR_INT_SRCSEL 0x174 +#define ADF_RING_CSR_INT_COL_EN 0x17C +#define ADF_RING_CSR_INT_COL_CTL 0x180 +#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184 +#define ADF_RING_CSR_EXP_STAT 0x188 +#define ADF_RING_CSR_EXP_INT_EN 0x18C +#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000 +#define ADF_RING_CSR_ADDR_OFFSET 0x100000 +#define ADF_RING_BUNDLE_SIZE 0x2000 +#define ADF_RING_CSR_RING_SRV_ARB_EN 0x19C + +#define BUILD_RING_BASE_ADDR(addr, size) \ + ((((addr) >> 6) & (GENMASK_ULL(63, 0) << (size))) << 6) +#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_HEAD + ((ring) << 2)) +#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_TAIL + ((ring) << 2)) +#define READ_CSR_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_STAT) +#define READ_CSR_UO_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_UO_STAT) +#define READ_CSR_E_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_E_STAT) +#define READ_CSR_NE_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_NE_STAT) +#define READ_CSR_NF_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_NF_STAT) +#define READ_CSR_F_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_F_STAT) +#define READ_CSR_C_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_C_STAT) +#define READ_CSR_EXP_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_EXP_STAT) +#define READ_CSR_EXP_INT_EN(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_EXP_INT_EN) +#define WRITE_CSR_EXP_INT_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_EXP_INT_EN, value) +#define READ_CSR_RING_CONFIG(csr_base_addr, bank, ring) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_CONFIG + ((ring) << 2)) +#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value) +#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \ +do { \ + void __iomem *_csr_base_addr = csr_base_addr; \ + u32 _bank = bank; \ + u32 _ring = ring; \ + dma_addr_t _value = value; \ + u32 l_base = 0, u_base = 0; \ + l_base = lower_32_bits(_value); \ + u_base = upper_32_bits(_value); \ + ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (_bank) + \ + ADF_RING_CSR_RING_LBASE + ((_ring) << 2), l_base); \ + ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (_bank) + \ + ADF_RING_CSR_RING_UBASE + ((_ring) << 2), u_base); \ +} while (0) + +static inline u64 read_base(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + u32 l_base, u_base; + + /* + * Use special IO wrapper for ring base as LBASE and UBASE are + * not physically contigious + */ + l_base = ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + + ADF_RING_CSR_RING_LBASE + (ring << 2)); + u_base = ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + + ADF_RING_CSR_RING_UBASE + (ring << 2)); + + return (u64)u_base << 32 | (u64)l_base; +} + +#define READ_CSR_RING_BASE(csr_base_addr, bank, ring) \ + read_base((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, (bank), (ring)) + +#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_HEAD + ((ring) << 2), value) +#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_TAIL + ((ring) << 2), value) +#define READ_CSR_INT_EN(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_FLAG_EN) +#define WRITE_CSR_INT_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_FLAG_EN, (value)) +#define READ_CSR_INT_FLAG(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_FLAG) +#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_FLAG, (value)) +#define READ_CSR_INT_SRCSEL(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_SRCSEL) +#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK) +#define WRITE_CSR_INT_SRCSEL_W_VAL(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_SRCSEL, (value)) +#define READ_CSR_INT_COL_EN(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_COL_EN) +#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_COL_EN, (value)) +#define READ_CSR_INT_COL_CTL(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_COL_CTL) +#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_COL_CTL, \ + ADF_RING_CSR_INT_COL_CTL_ENABLE | (value)) +#define READ_CSR_INT_FLAG_AND_COL(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_FLAG_AND_COL) +#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_FLAG_AND_COL, (value)) + +#define READ_CSR_RING_SRV_ARB_EN(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_SRV_ARB_EN) +#define WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_SRV_ARB_EN, (value)) + +void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); + +#endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c index d28e1921940a..41a0979e68c1 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) /* Copyright(c) 2020 Intel Corporation */ #include <linux/iopoll.h> +#include <asm/div64.h> #include "adf_accel_devices.h" #include "adf_cfg_services.h" #include "adf_common_drv.h" @@ -8,103 +9,6 @@ #include "adf_gen4_hw_data.h" #include "adf_gen4_pm.h" -static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size) -{ - return BUILD_RING_BASE_ADDR(addr, size); -} - -static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring) -{ - return READ_CSR_RING_HEAD(csr_base_addr, bank, ring); -} - -static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring, - u32 value) -{ - WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value); -} - -static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring) -{ - return READ_CSR_RING_TAIL(csr_base_addr, bank, ring); -} - -static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring, - u32 value) -{ - WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value); -} - -static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank) -{ - return READ_CSR_E_STAT(csr_base_addr, bank); -} - -static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, u32 ring, - u32 value) -{ - WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value); -} - -static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring, - dma_addr_t addr) -{ - WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr); -} - -static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_FLAG(csr_base_addr, bank, value); -} - -static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) -{ - WRITE_CSR_INT_SRCSEL(csr_base_addr, bank); -} - -static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, u32 value) -{ - WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value); -} - -static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value); -} - -static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value); -} - -static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value); -} - -void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) -{ - csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr; - csr_ops->read_csr_ring_head = read_csr_ring_head; - csr_ops->write_csr_ring_head = write_csr_ring_head; - csr_ops->read_csr_ring_tail = read_csr_ring_tail; - csr_ops->write_csr_ring_tail = write_csr_ring_tail; - csr_ops->read_csr_e_stat = read_csr_e_stat; - csr_ops->write_csr_ring_config = write_csr_ring_config; - csr_ops->write_csr_ring_base = write_csr_ring_base; - csr_ops->write_csr_int_flag = write_csr_int_flag; - csr_ops->write_csr_int_srcsel = write_csr_int_srcsel; - csr_ops->write_csr_int_col_en = write_csr_int_col_en; - csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl; - csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col; - csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en; -} -EXPORT_SYMBOL_GPL(adf_gen4_init_hw_csr_ops); - u32 adf_gen4_get_accel_mask(struct adf_hw_device_data *self) { return ADF_GEN4_ACCELERATORS_MASK; @@ -321,8 +225,7 @@ static int reset_ring_pair(void __iomem *csr, u32 bank_number) int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; - u32 etr_bar_id = hw_data->get_etr_bar_id(hw_data); - void __iomem *csr; + void __iomem *csr = adf_get_etr_base(accel_dev); int ret; if (bank_number >= hw_data->num_banks) @@ -331,7 +234,6 @@ int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number) dev_dbg(&GET_DEV(accel_dev), "ring pair reset for bank:%d\n", bank_number); - csr = (&GET_BARS(accel_dev)[etr_bar_id])->virt_addr; ret = reset_ring_pair(csr, bank_number); if (ret) dev_err(&GET_DEV(accel_dev), @@ -489,3 +391,281 @@ set_mask: return ring_to_svc_map; } EXPORT_SYMBOL_GPL(adf_gen4_get_ring_to_svc_map); + +/* + * adf_gen4_bank_quiesce_coal_timer() - quiesce bank coalesced interrupt timer + * @accel_dev: Pointer to the device structure + * @bank_idx: Offset to the bank within this device + * @timeout_ms: Timeout in milliseconds for the operation + * + * This function tries to quiesce the coalesced interrupt timer of a bank if + * it has been enabled and triggered. + * + * Returns 0 on success, error code otherwise + * + */ +int adf_gen4_bank_quiesce_coal_timer(struct adf_accel_dev *accel_dev, + u32 bank_idx, int timeout_ms) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); + void __iomem *csr_misc = adf_get_pmisc_base(accel_dev); + void __iomem *csr_etr = adf_get_etr_base(accel_dev); + u32 int_col_ctl, int_col_mask, int_col_en; + u32 e_stat, intsrc; + u64 wait_us; + int ret; + + if (timeout_ms < 0) + return -EINVAL; + + int_col_ctl = csr_ops->read_csr_int_col_ctl(csr_etr, bank_idx); + int_col_mask = csr_ops->get_int_col_ctl_enable_mask(); + if (!(int_col_ctl & int_col_mask)) + return 0; + + int_col_en = csr_ops->read_csr_int_col_en(csr_etr, bank_idx); + int_col_en &= BIT(ADF_WQM_CSR_RP_IDX_RX); + + e_stat = csr_ops->read_csr_e_stat(csr_etr, bank_idx); + if (!(~e_stat & int_col_en)) + return 0; + + wait_us = 2 * ((int_col_ctl & ~int_col_mask) << 8) * USEC_PER_SEC; + do_div(wait_us, hw_data->clock_frequency); + wait_us = min(wait_us, (u64)timeout_ms * USEC_PER_MSEC); + dev_dbg(&GET_DEV(accel_dev), + "wait for bank %d - coalesced timer expires in %llu us (max=%u ms estat=0x%x intcolen=0x%x)\n", + bank_idx, wait_us, timeout_ms, e_stat, int_col_en); + + ret = read_poll_timeout(ADF_CSR_RD, intsrc, intsrc, + ADF_COALESCED_POLL_DELAY_US, wait_us, true, + csr_misc, ADF_WQM_CSR_RPINTSOU(bank_idx)); + if (ret) + dev_warn(&GET_DEV(accel_dev), + "coalesced timer for bank %d expired (%llu us)\n", + bank_idx, wait_us); + + return ret; +} +EXPORT_SYMBOL_GPL(adf_gen4_bank_quiesce_coal_timer); + +static int drain_bank(void __iomem *csr, u32 bank_number, int timeout_us) +{ + u32 status; + + ADF_CSR_WR(csr, ADF_WQM_CSR_RPRESETCTL(bank_number), + ADF_WQM_CSR_RPRESETCTL_DRAIN); + + return read_poll_timeout(ADF_CSR_RD, status, + status & ADF_WQM_CSR_RPRESETSTS_STATUS, + ADF_RPRESET_POLL_DELAY_US, timeout_us, true, + csr, ADF_WQM_CSR_RPRESETSTS(bank_number)); +} + +void adf_gen4_bank_drain_finish(struct adf_accel_dev *accel_dev, + u32 bank_number) +{ + void __iomem *csr = adf_get_etr_base(accel_dev); + + ADF_CSR_WR(csr, ADF_WQM_CSR_RPRESETSTS(bank_number), + ADF_WQM_CSR_RPRESETSTS_STATUS); +} + +int adf_gen4_bank_drain_start(struct adf_accel_dev *accel_dev, + u32 bank_number, int timeout_us) +{ + void __iomem *csr = adf_get_etr_base(accel_dev); + int ret; + + dev_dbg(&GET_DEV(accel_dev), "Drain bank %d\n", bank_number); + + ret = drain_bank(csr, bank_number, timeout_us); + if (ret) + dev_err(&GET_DEV(accel_dev), "Bank drain failed (timeout)\n"); + else + dev_dbg(&GET_DEV(accel_dev), "Bank drain successful\n"); + + return ret; +} + +static void bank_state_save(struct adf_hw_csr_ops *ops, void __iomem *base, + u32 bank, struct bank_state *state, u32 num_rings) +{ + u32 i; + + state->ringstat0 = ops->read_csr_stat(base, bank); + state->ringuostat = ops->read_csr_uo_stat(base, bank); + state->ringestat = ops->read_csr_e_stat(base, bank); + state->ringnestat = ops->read_csr_ne_stat(base, bank); + state->ringnfstat = ops->read_csr_nf_stat(base, bank); + state->ringfstat = ops->read_csr_f_stat(base, bank); + state->ringcstat0 = ops->read_csr_c_stat(base, bank); + state->iaintflagen = ops->read_csr_int_en(base, bank); + state->iaintflagreg = ops->read_csr_int_flag(base, bank); + state->iaintflagsrcsel0 = ops->read_csr_int_srcsel(base, bank); + state->iaintcolen = ops->read_csr_int_col_en(base, bank); + state->iaintcolctl = ops->read_csr_int_col_ctl(base, bank); + state->iaintflagandcolen = ops->read_csr_int_flag_and_col(base, bank); + state->ringexpstat = ops->read_csr_exp_stat(base, bank); + state->ringexpintenable = ops->read_csr_exp_int_en(base, bank); + state->ringsrvarben = ops->read_csr_ring_srv_arb_en(base, bank); + + for (i = 0; i < num_rings; i++) { + state->rings[i].head = ops->read_csr_ring_head(base, bank, i); + state->rings[i].tail = ops->read_csr_ring_tail(base, bank, i); + state->rings[i].config = ops->read_csr_ring_config(base, bank, i); + state->rings[i].base = ops->read_csr_ring_base(base, bank, i); + } +} + +#define CHECK_STAT(op, expect_val, name, args...) \ +({ \ + u32 __expect_val = (expect_val); \ + u32 actual_val = op(args); \ + (__expect_val == actual_val) ? 0 : \ + (pr_err("QAT: Fail to restore %s register. Expected 0x%x, actual 0x%x\n", \ + name, __expect_val, actual_val), -EINVAL); \ +}) + +static int bank_state_restore(struct adf_hw_csr_ops *ops, void __iomem *base, + u32 bank, struct bank_state *state, u32 num_rings, + int tx_rx_gap) +{ + u32 val, tmp_val, i; + int ret; + + for (i = 0; i < num_rings; i++) + ops->write_csr_ring_base(base, bank, i, state->rings[i].base); + + for (i = 0; i < num_rings; i++) + ops->write_csr_ring_config(base, bank, i, state->rings[i].config); + + for (i = 0; i < num_rings / 2; i++) { + int tx = i * (tx_rx_gap + 1); + int rx = tx + tx_rx_gap; + + ops->write_csr_ring_head(base, bank, tx, state->rings[tx].head); + ops->write_csr_ring_tail(base, bank, tx, state->rings[tx].tail); + + /* + * The TX ring head needs to be updated again to make sure that + * the HW will not consider the ring as full when it is empty + * and the correct state flags are set to match the recovered state. + */ + if (state->ringestat & BIT(tx)) { + val = ops->read_csr_int_srcsel(base, bank); + val |= ADF_RP_INT_SRC_SEL_F_RISE_MASK; + ops->write_csr_int_srcsel_w_val(base, bank, val); + ops->write_csr_ring_head(base, bank, tx, state->rings[tx].head); + } + + ops->write_csr_ring_tail(base, bank, rx, state->rings[rx].tail); + val = ops->read_csr_int_srcsel(base, bank); + val |= ADF_RP_INT_SRC_SEL_F_RISE_MASK << ADF_RP_INT_SRC_SEL_RANGE_WIDTH; + ops->write_csr_int_srcsel_w_val(base, bank, val); + + ops->write_csr_ring_head(base, bank, rx, state->rings[rx].head); + val = ops->read_csr_int_srcsel(base, bank); + val |= ADF_RP_INT_SRC_SEL_F_FALL_MASK << ADF_RP_INT_SRC_SEL_RANGE_WIDTH; + ops->write_csr_int_srcsel_w_val(base, bank, val); + + /* + * The RX ring tail needs to be updated again to make sure that + * the HW will not consider the ring as empty when it is full + * and the correct state flags are set to match the recovered state. + */ + if (state->ringfstat & BIT(rx)) + ops->write_csr_ring_tail(base, bank, rx, state->rings[rx].tail); + } + + ops->write_csr_int_flag_and_col(base, bank, state->iaintflagandcolen); + ops->write_csr_int_en(base, bank, state->iaintflagen); + ops->write_csr_int_col_en(base, bank, state->iaintcolen); + ops->write_csr_int_srcsel_w_val(base, bank, state->iaintflagsrcsel0); + ops->write_csr_exp_int_en(base, bank, state->ringexpintenable); + ops->write_csr_int_col_ctl(base, bank, state->iaintcolctl); + ops->write_csr_ring_srv_arb_en(base, bank, state->ringsrvarben); + + /* Check that all ring statuses match the saved state. */ + ret = CHECK_STAT(ops->read_csr_stat, state->ringstat0, "ringstat", + base, bank); + if (ret) + return ret; + + ret = CHECK_STAT(ops->read_csr_e_stat, state->ringestat, "ringestat", + base, bank); + if (ret) + return ret; + + ret = CHECK_STAT(ops->read_csr_ne_stat, state->ringnestat, "ringnestat", + base, bank); + if (ret) + return ret; + + ret = CHECK_STAT(ops->read_csr_nf_stat, state->ringnfstat, "ringnfstat", + base, bank); + if (ret) + return ret; + + ret = CHECK_STAT(ops->read_csr_f_stat, state->ringfstat, "ringfstat", + base, bank); + if (ret) + return ret; + + ret = CHECK_STAT(ops->read_csr_c_stat, state->ringcstat0, "ringcstat", + base, bank); + if (ret) + return ret; + + tmp_val = ops->read_csr_exp_stat(base, bank); + val = state->ringexpstat; + if (tmp_val && !val) { + pr_err("QAT: Bank was restored with exception: 0x%x\n", val); + return -EINVAL; + } + + return 0; +} + +int adf_gen4_bank_state_save(struct adf_accel_dev *accel_dev, u32 bank_number, + struct bank_state *state) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); + void __iomem *csr_base = adf_get_etr_base(accel_dev); + + if (bank_number >= hw_data->num_banks || !state) + return -EINVAL; + + dev_dbg(&GET_DEV(accel_dev), "Saving state of bank %d\n", bank_number); + + bank_state_save(csr_ops, csr_base, bank_number, state, + hw_data->num_rings_per_bank); + + return 0; +} +EXPORT_SYMBOL_GPL(adf_gen4_bank_state_save); + +int adf_gen4_bank_state_restore(struct adf_accel_dev *accel_dev, u32 bank_number, + struct bank_state *state) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); + void __iomem *csr_base = adf_get_etr_base(accel_dev); + int ret; + + if (bank_number >= hw_data->num_banks || !state) + return -EINVAL; + + dev_dbg(&GET_DEV(accel_dev), "Restoring state of bank %d\n", bank_number); + + ret = bank_state_restore(csr_ops, csr_base, bank_number, state, + hw_data->num_rings_per_bank, hw_data->tx_rx_gap); + if (ret) + dev_err(&GET_DEV(accel_dev), + "Unable to restore state of bank %d\n", bank_number); + + return ret; +} +EXPORT_SYMBOL_GPL(adf_gen4_bank_state_restore); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h index c6e80df5a85a..8b10926cedba 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */ /* Copyright(c) 2020 Intel Corporation */ -#ifndef ADF_GEN4_HW_CSR_DATA_H_ -#define ADF_GEN4_HW_CSR_DATA_H_ +#ifndef ADF_GEN4_HW_DATA_H_ +#define ADF_GEN4_HW_DATA_H_ #include <linux/units.h> @@ -54,95 +54,6 @@ #define ADF_GEN4_ADMINMSGLR_OFFSET 0x500578 #define ADF_GEN4_MAILBOX_BASE_OFFSET 0x600970 -/* Transport access */ -#define ADF_BANK_INT_SRC_SEL_MASK 0x44UL -#define ADF_RING_CSR_RING_CONFIG 0x1000 -#define ADF_RING_CSR_RING_LBASE 0x1040 -#define ADF_RING_CSR_RING_UBASE 0x1080 -#define ADF_RING_CSR_RING_HEAD 0x0C0 -#define ADF_RING_CSR_RING_TAIL 0x100 -#define ADF_RING_CSR_E_STAT 0x14C -#define ADF_RING_CSR_INT_FLAG 0x170 -#define ADF_RING_CSR_INT_SRCSEL 0x174 -#define ADF_RING_CSR_INT_COL_CTL 0x180 -#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184 -#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000 -#define ADF_RING_CSR_INT_COL_EN 0x17C -#define ADF_RING_CSR_ADDR_OFFSET 0x100000 -#define ADF_RING_BUNDLE_SIZE 0x2000 - -#define BUILD_RING_BASE_ADDR(addr, size) \ - ((((addr) >> 6) & (GENMASK_ULL(63, 0) << (size))) << 6) -#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \ - ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_HEAD + ((ring) << 2)) -#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \ - ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_TAIL + ((ring) << 2)) -#define READ_CSR_E_STAT(csr_base_addr, bank) \ - ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_E_STAT) -#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value) -#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \ -do { \ - void __iomem *_csr_base_addr = csr_base_addr; \ - u32 _bank = bank; \ - u32 _ring = ring; \ - dma_addr_t _value = value; \ - u32 l_base = 0, u_base = 0; \ - l_base = lower_32_bits(_value); \ - u_base = upper_32_bits(_value); \ - ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (_bank) + \ - ADF_RING_CSR_RING_LBASE + ((_ring) << 2), l_base); \ - ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (_bank) + \ - ADF_RING_CSR_RING_UBASE + ((_ring) << 2), u_base); \ -} while (0) - -#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_HEAD + ((ring) << 2), value) -#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_TAIL + ((ring) << 2), value) -#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_INT_FLAG, (value)) -#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK) -#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_INT_COL_EN, (value)) -#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_INT_COL_CTL, \ - ADF_RING_CSR_INT_COL_CTL_ENABLE | (value)) -#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_INT_FLAG_AND_COL, (value)) - -/* Arbiter configuration */ -#define ADF_RING_CSR_RING_SRV_ARB_EN 0x19C - -#define WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_SRV_ARB_EN, (value)) - /* Default ring mapping */ #define ADF_GEN4_DEFAULT_RING_TO_SRV_MAP \ (ASYM << ADF_CFG_SERV_RING_PAIR_0_SHIFT | \ @@ -166,10 +77,20 @@ do { \ #define ADF_RPRESET_POLL_TIMEOUT_US (5 * USEC_PER_SEC) #define ADF_RPRESET_POLL_DELAY_US 20 #define ADF_WQM_CSR_RPRESETCTL_RESET BIT(0) +#define ADF_WQM_CSR_RPRESETCTL_DRAIN BIT(2) #define ADF_WQM_CSR_RPRESETCTL(bank) (0x6000 + ((bank) << 3)) #define ADF_WQM_CSR_RPRESETSTS_STATUS BIT(0) #define ADF_WQM_CSR_RPRESETSTS(bank) (ADF_WQM_CSR_RPRESETCTL(bank) + 4) +/* Ring interrupt */ +#define ADF_RP_INT_SRC_SEL_F_RISE_MASK BIT(2) +#define ADF_RP_INT_SRC_SEL_F_FALL_MASK GENMASK(2, 0) +#define ADF_RP_INT_SRC_SEL_RANGE_WIDTH 4 +#define ADF_COALESCED_POLL_TIMEOUT_US (1 * USEC_PER_SEC) +#define ADF_COALESCED_POLL_DELAY_US 1000 +#define ADF_WQM_CSR_RPINTSOU(bank) (0x200000 + ((bank) << 12)) +#define ADF_WQM_CSR_RP_IDX_RX 1 + /* Error source registers */ #define ADF_GEN4_ERRSOU0 (0x41A200) #define ADF_GEN4_ERRSOU1 (0x41A204) @@ -197,6 +118,19 @@ do { \ /* Arbiter threads mask with error value */ #define ADF_GEN4_ENA_THD_MASK_ERROR GENMASK(ADF_NUM_THREADS_PER_AE, 0) +/* PF2VM communication channel */ +#define ADF_GEN4_PF2VM_OFFSET(i) (0x40B010 + (i) * 0x20) +#define ADF_GEN4_VM2PF_OFFSET(i) (0x40B014 + (i) * 0x20) +#define ADF_GEN4_VINTMSKPF2VM_OFFSET(i) (0x40B00C + (i) * 0x20) +#define ADF_GEN4_VINTSOUPF2VM_OFFSET(i) (0x40B008 + (i) * 0x20) +#define ADF_GEN4_VINTMSK_OFFSET(i) (0x40B004 + (i) * 0x20) +#define ADF_GEN4_VINTSOU_OFFSET(i) (0x40B000 + (i) * 0x20) + +struct adf_gen4_vfmig { + struct adf_mstate_mgr *mstate_mgr; + bool bank_stopped[ADF_GEN4_NUM_BANKS_PER_VF]; +}; + void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev); enum icp_qat_gen4_slice_mask { @@ -230,11 +164,20 @@ u32 adf_gen4_get_num_aes(struct adf_hw_device_data *self); enum dev_sku_info adf_gen4_get_sku(struct adf_hw_device_data *self); u32 adf_gen4_get_sram_bar_id(struct adf_hw_device_data *self); int adf_gen4_init_device(struct adf_accel_dev *accel_dev); -void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number); void adf_gen4_set_msix_default_rttable(struct adf_accel_dev *accel_dev); void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev); int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev); u16 adf_gen4_get_ring_to_svc_map(struct adf_accel_dev *accel_dev); +int adf_gen4_bank_quiesce_coal_timer(struct adf_accel_dev *accel_dev, + u32 bank_idx, int timeout_ms); +int adf_gen4_bank_drain_start(struct adf_accel_dev *accel_dev, + u32 bank_number, int timeout_us); +void adf_gen4_bank_drain_finish(struct adf_accel_dev *accel_dev, + u32 bank_number); +int adf_gen4_bank_state_save(struct adf_accel_dev *accel_dev, u32 bank_number, + struct bank_state *state); +int adf_gen4_bank_state_restore(struct adf_accel_dev *accel_dev, + u32 bank_number, struct bank_state *state); #endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c index 8e8efe93f3ee..21474d402d09 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c @@ -6,12 +6,10 @@ #include "adf_accel_devices.h" #include "adf_common_drv.h" #include "adf_gen4_pfvf.h" +#include "adf_gen4_hw_data.h" #include "adf_pfvf_pf_proto.h" #include "adf_pfvf_utils.h" -#define ADF_4XXX_PF2VM_OFFSET(i) (0x40B010 + ((i) * 0x20)) -#define ADF_4XXX_VM2PF_OFFSET(i) (0x40B014 + ((i) * 0x20)) - /* VF2PF interrupt source registers */ #define ADF_4XXX_VM2PF_SOU 0x41A180 #define ADF_4XXX_VM2PF_MSK 0x41A1C0 @@ -29,12 +27,12 @@ static const struct pfvf_csr_format csr_gen4_fmt = { static u32 adf_gen4_pf_get_pf2vf_offset(u32 i) { - return ADF_4XXX_PF2VM_OFFSET(i); + return ADF_GEN4_PF2VM_OFFSET(i); } static u32 adf_gen4_pf_get_vf2pf_offset(u32 i) { - return ADF_4XXX_VM2PF_OFFSET(i); + return ADF_GEN4_VM2PF_OFFSET(i); } static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.c index 7fc7a77f6aed..c7ad8cf07863 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.c @@ -149,5 +149,6 @@ void adf_gen4_init_tl_data(struct adf_tl_hw_data *tl_data) tl_data->sl_exec_counters = sl_exec_counters; tl_data->rp_counters = rp_counters; tl_data->num_rp_counters = ARRAY_SIZE(rp_counters); + tl_data->max_sl_cnt = ADF_GEN4_TL_MAX_SLICES_PER_TYPE; } EXPORT_SYMBOL_GPL(adf_gen4_init_tl_data); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c new file mode 100644 index 000000000000..a62eb5e8dbe6 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c @@ -0,0 +1,1010 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ +#include <linux/delay.h> +#include <linux/dev_printk.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/types.h> +#include <asm/errno.h> + +#include "adf_accel_devices.h" +#include "adf_common_drv.h" +#include "adf_gen4_hw_data.h" +#include "adf_gen4_pfvf.h" +#include "adf_pfvf_utils.h" +#include "adf_mstate_mgr.h" +#include "adf_gen4_vf_mig.h" + +#define ADF_GEN4_VF_MSTATE_SIZE 4096 +#define ADF_GEN4_PFVF_RSP_TIMEOUT_US 5000 + +static int adf_gen4_vfmig_save_setup(struct qat_mig_dev *mdev); +static int adf_gen4_vfmig_load_setup(struct qat_mig_dev *mdev, int len); + +static int adf_gen4_vfmig_init_device(struct qat_mig_dev *mdev) +{ + u8 *state; + + state = kmalloc(ADF_GEN4_VF_MSTATE_SIZE, GFP_KERNEL); + if (!state) + return -ENOMEM; + + mdev->state = state; + mdev->state_size = ADF_GEN4_VF_MSTATE_SIZE; + mdev->setup_size = 0; + mdev->remote_setup_size = 0; + + return 0; +} + +static void adf_gen4_vfmig_cleanup_device(struct qat_mig_dev *mdev) +{ + kfree(mdev->state); + mdev->state = NULL; +} + +static void adf_gen4_vfmig_reset_device(struct qat_mig_dev *mdev) +{ + mdev->setup_size = 0; + mdev->remote_setup_size = 0; +} + +static int adf_gen4_vfmig_open_device(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + + vf_info = &accel_dev->pf.vf_info[mdev->vf_id]; + + vfmig = kzalloc(sizeof(*vfmig), GFP_KERNEL); + if (!vfmig) + return -ENOMEM; + + vfmig->mstate_mgr = adf_mstate_mgr_new(mdev->state, mdev->state_size); + if (!vfmig->mstate_mgr) { + kfree(vfmig); + return -ENOMEM; + } + vf_info->mig_priv = vfmig; + mdev->setup_size = 0; + mdev->remote_setup_size = 0; + + return 0; +} + +static void adf_gen4_vfmig_close_device(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + + vf_info = &accel_dev->pf.vf_info[mdev->vf_id]; + if (vf_info->mig_priv) { + vfmig = vf_info->mig_priv; + adf_mstate_mgr_destroy(vfmig->mstate_mgr); + kfree(vfmig); + vf_info->mig_priv = NULL; + } +} + +static int adf_gen4_vfmig_suspend_device(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vf_mig; + u32 vf_nr = mdev->vf_id; + int ret, i; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vf_mig = vf_info->mig_priv; + + /* Stop all inflight jobs */ + for (i = 0; i < hw_data->num_banks_per_vf; i++) { + u32 pf_bank_nr = i + vf_nr * hw_data->num_banks_per_vf; + + ret = adf_gen4_bank_drain_start(accel_dev, pf_bank_nr, + ADF_RPRESET_POLL_TIMEOUT_US); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to drain bank %d for vf_nr %d\n", i, + vf_nr); + return ret; + } + vf_mig->bank_stopped[i] = true; + + adf_gen4_bank_quiesce_coal_timer(accel_dev, pf_bank_nr, + ADF_COALESCED_POLL_TIMEOUT_US); + } + + return 0; +} + +static int adf_gen4_vfmig_resume_device(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vf_mig; + u32 vf_nr = mdev->vf_id; + int i; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vf_mig = vf_info->mig_priv; + + for (i = 0; i < hw_data->num_banks_per_vf; i++) { + u32 pf_bank_nr = i + vf_nr * hw_data->num_banks_per_vf; + + if (vf_mig->bank_stopped[i]) { + adf_gen4_bank_drain_finish(accel_dev, pf_bank_nr); + vf_mig->bank_stopped[i] = false; + } + } + + return 0; +} + +struct adf_vf_bank_info { + struct adf_accel_dev *accel_dev; + u32 vf_nr; + u32 bank_nr; +}; + +struct mig_user_sla { + enum adf_base_services srv; + u64 rp_mask; + u32 cir; + u32 pir; +}; + +static int adf_mstate_sla_check(struct adf_mstate_mgr *sub_mgr, u8 *src_buf, + u32 src_size, void *opaque) +{ + struct adf_mstate_vreginfo _sinfo = { src_buf, src_size }; + struct adf_mstate_vreginfo *sinfo = &_sinfo, *dinfo = opaque; + u32 src_sla_cnt = sinfo->size / sizeof(struct mig_user_sla); + u32 dst_sla_cnt = dinfo->size / sizeof(struct mig_user_sla); + struct mig_user_sla *src_slas = sinfo->addr; + struct mig_user_sla *dst_slas = dinfo->addr; + int i, j; + + for (i = 0; i < src_sla_cnt; i++) { + for (j = 0; j < dst_sla_cnt; j++) { + if (src_slas[i].srv != dst_slas[j].srv || + src_slas[i].rp_mask != dst_slas[j].rp_mask) + continue; + + if (src_slas[i].cir > dst_slas[j].cir || + src_slas[i].pir > dst_slas[j].pir) { + pr_err("QAT: DST VF rate limiting mismatch.\n"); + return -EINVAL; + } + break; + } + + if (j == dst_sla_cnt) { + pr_err("QAT: SRC VF rate limiting mismatch - SRC srv %d and rp_mask 0x%llx.\n", + src_slas[i].srv, src_slas[i].rp_mask); + return -EINVAL; + } + } + + return 0; +} + +static inline int adf_mstate_check_cap_size(u32 src_sz, u32 dst_sz, u32 max_sz) +{ + if (src_sz > max_sz || dst_sz > max_sz) + return -EINVAL; + else + return 0; +} + +static int adf_mstate_compatver_check(struct adf_mstate_mgr *sub_mgr, + u8 *src_buf, u32 src_sz, void *opaque) +{ + struct adf_mstate_vreginfo *info = opaque; + u8 compat = 0; + u8 *pcompat; + + if (src_sz != info->size) { + pr_debug("QAT: State mismatch (compat version size), current %u, expected %u\n", + src_sz, info->size); + return -EINVAL; + } + + memcpy(info->addr, src_buf, info->size); + pcompat = info->addr; + if (*pcompat == 0) { + pr_warn("QAT: Unable to determine the version of VF\n"); + return 0; + } + + compat = adf_vf_compat_checker(*pcompat); + if (compat == ADF_PF2VF_VF_INCOMPATIBLE) { + pr_debug("QAT: SRC VF driver (ver=%u) is incompatible with DST PF driver (ver=%u)\n", + *pcompat, ADF_PFVF_COMPAT_THIS_VERSION); + return -EINVAL; + } + + if (compat == ADF_PF2VF_VF_COMPAT_UNKNOWN) + pr_debug("QAT: SRC VF driver (ver=%u) is newer than DST PF driver (ver=%u)\n", + *pcompat, ADF_PFVF_COMPAT_THIS_VERSION); + + return 0; +} + +/* + * adf_mstate_capmask_compare() - compare QAT device capability mask + * @sinfo: Pointer to source capability info + * @dinfo: Pointer to target capability info + * + * This function compares the capability mask between source VF and target VF + * + * Returns: 0 if target capability mask is identical to source capability mask, + * 1 if target mask can represent all the capabilities represented by source mask, + * -1 if target mask can't represent all the capabilities represented by source + * mask. + */ +static int adf_mstate_capmask_compare(struct adf_mstate_vreginfo *sinfo, + struct adf_mstate_vreginfo *dinfo) +{ + u64 src = 0, dst = 0; + + if (adf_mstate_check_cap_size(sinfo->size, dinfo->size, sizeof(u64))) { + pr_debug("QAT: Unexpected capability size %u %u %zu\n", + sinfo->size, dinfo->size, sizeof(u64)); + return -1; + } + + memcpy(&src, sinfo->addr, sinfo->size); + memcpy(&dst, dinfo->addr, dinfo->size); + + pr_debug("QAT: Check cap compatibility of cap %llu %llu\n", src, dst); + + if (src == dst) + return 0; + + if ((src | dst) == dst) + return 1; + + return -1; +} + +static int adf_mstate_capmask_superset(struct adf_mstate_mgr *sub_mgr, u8 *buf, + u32 size, void *opa) +{ + struct adf_mstate_vreginfo sinfo = { buf, size }; + + if (adf_mstate_capmask_compare(&sinfo, opa) >= 0) + return 0; + + return -EINVAL; +} + +static int adf_mstate_capmask_equal(struct adf_mstate_mgr *sub_mgr, u8 *buf, + u32 size, void *opa) +{ + struct adf_mstate_vreginfo sinfo = { buf, size }; + + if (adf_mstate_capmask_compare(&sinfo, opa) == 0) + return 0; + + return -EINVAL; +} + +static int adf_mstate_set_vreg(struct adf_mstate_mgr *sub_mgr, u8 *buf, + u32 size, void *opa) +{ + struct adf_mstate_vreginfo *info = opa; + + if (size != info->size) { + pr_debug("QAT: Unexpected cap size %u %u\n", size, info->size); + return -EINVAL; + } + memcpy(info->addr, buf, info->size); + + return 0; +} + +static u32 adf_gen4_vfmig_get_slas(struct adf_accel_dev *accel_dev, u32 vf_nr, + struct mig_user_sla *pmig_slas) +{ + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_rl *rl_data = accel_dev->rate_limiting; + struct rl_sla **sla_type_arr = NULL; + u64 rp_mask, rp_index; + u32 max_num_sla; + u32 sla_cnt = 0; + int i, j; + + if (!accel_dev->rate_limiting) + return 0; + + rp_index = vf_nr * hw_data->num_banks_per_vf; + max_num_sla = adf_rl_get_sla_arr_of_type(rl_data, RL_LEAF, &sla_type_arr); + + for (i = 0; i < max_num_sla; i++) { + if (!sla_type_arr[i]) + continue; + + rp_mask = 0; + for (j = 0; j < sla_type_arr[i]->ring_pairs_cnt; j++) + rp_mask |= BIT(sla_type_arr[i]->ring_pairs_ids[j]); + + if (rp_mask & GENMASK_ULL(rp_index + 3, rp_index)) { + pmig_slas->rp_mask = rp_mask; + pmig_slas->cir = sla_type_arr[i]->cir; + pmig_slas->pir = sla_type_arr[i]->pir; + pmig_slas->srv = sla_type_arr[i]->srv; + pmig_slas++; + sla_cnt++; + } + } + + return sla_cnt; +} + +static int adf_gen4_vfmig_load_etr_regs(struct adf_mstate_mgr *sub_mgr, + u8 *state, u32 size, void *opa) +{ + struct adf_vf_bank_info *vf_bank_info = opa; + struct adf_accel_dev *accel_dev = vf_bank_info->accel_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + u32 pf_bank_nr; + int ret; + + pf_bank_nr = vf_bank_info->bank_nr + vf_bank_info->vf_nr * hw_data->num_banks_per_vf; + ret = hw_data->bank_state_restore(accel_dev, pf_bank_nr, + (struct bank_state *)state); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to load regs for vf%d bank%d\n", + vf_bank_info->vf_nr, vf_bank_info->bank_nr); + return ret; + } + + return 0; +} + +static int adf_gen4_vfmig_load_etr_bank(struct adf_accel_dev *accel_dev, + u32 vf_nr, u32 bank_nr, + struct adf_mstate_mgr *mstate_mgr) +{ + struct adf_vf_bank_info vf_bank_info = {accel_dev, vf_nr, bank_nr}; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_mstate_mgr sub_sects_mgr; + char bank_ids[ADF_MSTATE_ID_LEN]; + + snprintf(bank_ids, sizeof(bank_ids), ADF_MSTATE_BANK_IDX_IDS "%x", bank_nr); + subsec = adf_mstate_sect_lookup(mstate_mgr, bank_ids, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), + "Failed to lookup sec %s for vf%d bank%d\n", + ADF_MSTATE_BANK_IDX_IDS, vf_nr, bank_nr); + return -EINVAL; + } + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec); + l2_subsec = adf_mstate_sect_lookup(&sub_sects_mgr, ADF_MSTATE_ETR_REGS_IDS, + adf_gen4_vfmig_load_etr_regs, + &vf_bank_info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), + "Failed to add sec %s for vf%d bank%d\n", + ADF_MSTATE_ETR_REGS_IDS, vf_nr, bank_nr); + return -EINVAL; + } + + return 0; +} + +static int adf_gen4_vfmig_load_etr(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_mgr sub_sects_mgr; + struct adf_mstate_sect_h *subsec; + int ret, i; + + subsec = adf_mstate_sect_lookup(mstate_mgr, ADF_MSTATE_ETRB_IDS, NULL, + NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + ADF_MSTATE_ETRB_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec); + for (i = 0; i < hw_data->num_banks_per_vf; i++) { + ret = adf_gen4_vfmig_load_etr_bank(accel_dev, vf_nr, i, + &sub_sects_mgr); + if (ret) + return ret; + } + + return 0; +} + +static int adf_gen4_vfmig_load_misc(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + void __iomem *csr = adf_get_pmisc_base(accel_dev); + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_mstate_mgr sub_sects_mgr; + struct { + char *id; + u64 ofs; + } misc_states[] = { + {ADF_MSTATE_VINTMSK_IDS, ADF_GEN4_VINTMSK_OFFSET(vf_nr)}, + {ADF_MSTATE_VINTMSK_PF2VM_IDS, ADF_GEN4_VINTMSKPF2VM_OFFSET(vf_nr)}, + {ADF_MSTATE_PF2VM_IDS, ADF_GEN4_PF2VM_OFFSET(vf_nr)}, + {ADF_MSTATE_VM2PF_IDS, ADF_GEN4_VM2PF_OFFSET(vf_nr)}, + }; + int i; + + subsec = adf_mstate_sect_lookup(mstate_mgr, ADF_MSTATE_MISCB_IDS, NULL, + NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + ADF_MSTATE_MISCB_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec); + for (i = 0; i < ARRAY_SIZE(misc_states); i++) { + struct adf_mstate_vreginfo info; + u32 regv; + + info.addr = ®v; + info.size = sizeof(regv); + l2_subsec = adf_mstate_sect_lookup(&sub_sects_mgr, + misc_states[i].id, + adf_mstate_set_vreg, + &info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), + "Failed to load sec %s\n", misc_states[i].id); + return -EINVAL; + } + ADF_CSR_WR(csr, misc_states[i].ofs, regv); + } + + return 0; +} + +static int adf_gen4_vfmig_load_generic(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct mig_user_sla dst_slas[RL_RP_CNT_PER_LEAF_MAX] = { }; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_mstate_mgr sub_sects_mgr; + u32 dst_sla_cnt; + struct { + char *id; + int (*action)(struct adf_mstate_mgr *sub_mgr, u8 *buf, u32 size, void *opa); + struct adf_mstate_vreginfo info; + } gen_states[] = { + {ADF_MSTATE_IOV_INIT_IDS, adf_mstate_set_vreg, + {&vf_info->init, sizeof(vf_info->init)}}, + {ADF_MSTATE_COMPAT_VER_IDS, adf_mstate_compatver_check, + {&vf_info->vf_compat_ver, sizeof(vf_info->vf_compat_ver)}}, + {ADF_MSTATE_SLA_IDS, adf_mstate_sla_check, {dst_slas, 0}}, + }; + int i; + + subsec = adf_mstate_sect_lookup(mstate_mgr, ADF_MSTATE_GEN_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + ADF_MSTATE_GEN_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec); + for (i = 0; i < ARRAY_SIZE(gen_states); i++) { + if (gen_states[i].info.addr == dst_slas) { + dst_sla_cnt = adf_gen4_vfmig_get_slas(accel_dev, vf_nr, dst_slas); + gen_states[i].info.size = dst_sla_cnt * sizeof(struct mig_user_sla); + } + + l2_subsec = adf_mstate_sect_lookup(&sub_sects_mgr, + gen_states[i].id, + gen_states[i].action, + &gen_states[i].info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + gen_states[i].id); + return -EINVAL; + } + } + + return 0; +} + +static int adf_gen4_vfmig_load_config(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_mstate_mgr sub_sects_mgr; + struct { + char *id; + int (*action)(struct adf_mstate_mgr *sub_mgr, u8 *buf, u32 size, void *opa); + struct adf_mstate_vreginfo info; + } setups[] = { + {ADF_MSTATE_GEN_CAP_IDS, adf_mstate_capmask_superset, + {&hw_data->accel_capabilities_mask, sizeof(hw_data->accel_capabilities_mask)}}, + {ADF_MSTATE_GEN_SVCMAP_IDS, adf_mstate_capmask_equal, + {&hw_data->ring_to_svc_map, sizeof(hw_data->ring_to_svc_map)}}, + {ADF_MSTATE_GEN_EXTDC_IDS, adf_mstate_capmask_superset, + {&hw_data->extended_dc_capabilities, sizeof(hw_data->extended_dc_capabilities)}}, + }; + int i; + + subsec = adf_mstate_sect_lookup(mstate_mgr, ADF_MSTATE_CONFIG_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + ADF_MSTATE_CONFIG_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec); + for (i = 0; i < ARRAY_SIZE(setups); i++) { + l2_subsec = adf_mstate_sect_lookup(&sub_sects_mgr, setups[i].id, + setups[i].action, &setups[i].info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + setups[i].id); + return -EINVAL; + } + } + + return 0; +} + +static int adf_gen4_vfmig_save_etr_regs(struct adf_mstate_mgr *subs, u8 *state, + u32 size, void *opa) +{ + struct adf_vf_bank_info *vf_bank_info = opa; + struct adf_accel_dev *accel_dev = vf_bank_info->accel_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + u32 pf_bank_nr; + int ret; + + pf_bank_nr = vf_bank_info->bank_nr; + pf_bank_nr += vf_bank_info->vf_nr * hw_data->num_banks_per_vf; + + ret = hw_data->bank_state_save(accel_dev, pf_bank_nr, + (struct bank_state *)state); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to save regs for vf%d bank%d\n", + vf_bank_info->vf_nr, vf_bank_info->bank_nr); + return ret; + } + + return sizeof(struct bank_state); +} + +static int adf_gen4_vfmig_save_etr_bank(struct adf_accel_dev *accel_dev, + u32 vf_nr, u32 bank_nr, + struct adf_mstate_mgr *mstate_mgr) +{ + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_vf_bank_info vf_bank_info; + struct adf_mstate_mgr sub_sects_mgr; + char bank_ids[ADF_MSTATE_ID_LEN]; + + snprintf(bank_ids, sizeof(bank_ids), ADF_MSTATE_BANK_IDX_IDS "%x", bank_nr); + + subsec = adf_mstate_sect_add(mstate_mgr, bank_ids, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), + "Failed to add sec %s for vf%d bank%d\n", + ADF_MSTATE_BANK_IDX_IDS, vf_nr, bank_nr); + return -EINVAL; + } + + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr); + vf_bank_info.accel_dev = accel_dev; + vf_bank_info.vf_nr = vf_nr; + vf_bank_info.bank_nr = bank_nr; + l2_subsec = adf_mstate_sect_add(&sub_sects_mgr, ADF_MSTATE_ETR_REGS_IDS, + adf_gen4_vfmig_save_etr_regs, + &vf_bank_info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), + "Failed to add sec %s for vf%d bank%d\n", + ADF_MSTATE_ETR_REGS_IDS, vf_nr, bank_nr); + return -EINVAL; + } + adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec); + + return 0; +} + +static int adf_gen4_vfmig_save_etr(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_mgr sub_sects_mgr; + struct adf_mstate_sect_h *subsec; + int ret, i; + + subsec = adf_mstate_sect_add(mstate_mgr, ADF_MSTATE_ETRB_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + ADF_MSTATE_ETRB_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr); + for (i = 0; i < hw_data->num_banks_per_vf; i++) { + ret = adf_gen4_vfmig_save_etr_bank(accel_dev, vf_nr, i, + &sub_sects_mgr); + if (ret) + return ret; + } + adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec); + + return 0; +} + +static int adf_gen4_vfmig_save_misc(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + void __iomem *csr = adf_get_pmisc_base(accel_dev); + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_mstate_mgr sub_sects_mgr; + struct { + char *id; + u64 offset; + } misc_states[] = { + {ADF_MSTATE_VINTSRC_IDS, ADF_GEN4_VINTSOU_OFFSET(vf_nr)}, + {ADF_MSTATE_VINTMSK_IDS, ADF_GEN4_VINTMSK_OFFSET(vf_nr)}, + {ADF_MSTATE_VINTSRC_PF2VM_IDS, ADF_GEN4_VINTSOUPF2VM_OFFSET(vf_nr)}, + {ADF_MSTATE_VINTMSK_PF2VM_IDS, ADF_GEN4_VINTMSKPF2VM_OFFSET(vf_nr)}, + {ADF_MSTATE_PF2VM_IDS, ADF_GEN4_PF2VM_OFFSET(vf_nr)}, + {ADF_MSTATE_VM2PF_IDS, ADF_GEN4_VM2PF_OFFSET(vf_nr)}, + }; + ktime_t time_exp; + int i; + + subsec = adf_mstate_sect_add(mstate_mgr, ADF_MSTATE_MISCB_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + ADF_MSTATE_MISCB_IDS); + return -EINVAL; + } + + time_exp = ktime_add_us(ktime_get(), ADF_GEN4_PFVF_RSP_TIMEOUT_US); + while (!mutex_trylock(&vf_info->pfvf_mig_lock)) { + if (ktime_after(ktime_get(), time_exp)) { + dev_err(&GET_DEV(accel_dev), "Failed to get pfvf mig lock\n"); + return -ETIMEDOUT; + } + usleep_range(500, 1000); + } + + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr); + for (i = 0; i < ARRAY_SIZE(misc_states); i++) { + struct adf_mstate_vreginfo info; + u32 regv; + + info.addr = ®v; + info.size = sizeof(regv); + regv = ADF_CSR_RD(csr, misc_states[i].offset); + + l2_subsec = adf_mstate_sect_add_vreg(&sub_sects_mgr, + misc_states[i].id, + &info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + misc_states[i].id); + mutex_unlock(&vf_info->pfvf_mig_lock); + return -EINVAL; + } + } + + mutex_unlock(&vf_info->pfvf_mig_lock); + adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec); + + return 0; +} + +static int adf_gen4_vfmig_save_generic(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_mgr sub_sects_mgr; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct mig_user_sla src_slas[RL_RP_CNT_PER_LEAF_MAX] = { }; + u32 src_sla_cnt; + struct { + char *id; + struct adf_mstate_vreginfo info; + } gen_states[] = { + {ADF_MSTATE_IOV_INIT_IDS, + {&vf_info->init, sizeof(vf_info->init)}}, + {ADF_MSTATE_COMPAT_VER_IDS, + {&vf_info->vf_compat_ver, sizeof(vf_info->vf_compat_ver)}}, + {ADF_MSTATE_SLA_IDS, {src_slas, 0}}, + }; + int i; + + subsec = adf_mstate_sect_add(mstate_mgr, ADF_MSTATE_GEN_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + ADF_MSTATE_GEN_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr); + for (i = 0; i < ARRAY_SIZE(gen_states); i++) { + if (gen_states[i].info.addr == src_slas) { + src_sla_cnt = adf_gen4_vfmig_get_slas(accel_dev, vf_nr, src_slas); + gen_states[i].info.size = src_sla_cnt * sizeof(struct mig_user_sla); + } + + l2_subsec = adf_mstate_sect_add_vreg(&sub_sects_mgr, + gen_states[i].id, + &gen_states[i].info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + gen_states[i].id); + return -EINVAL; + } + } + adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec); + + return 0; +} + +static int adf_gen4_vfmig_save_config(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_mgr sub_sects_mgr; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct { + char *id; + struct adf_mstate_vreginfo info; + } setups[] = { + {ADF_MSTATE_GEN_CAP_IDS, + {&hw_data->accel_capabilities_mask, sizeof(hw_data->accel_capabilities_mask)}}, + {ADF_MSTATE_GEN_SVCMAP_IDS, + {&hw_data->ring_to_svc_map, sizeof(hw_data->ring_to_svc_map)}}, + {ADF_MSTATE_GEN_EXTDC_IDS, + {&hw_data->extended_dc_capabilities, sizeof(hw_data->extended_dc_capabilities)}}, + }; + int i; + + subsec = adf_mstate_sect_add(mstate_mgr, ADF_MSTATE_CONFIG_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + ADF_MSTATE_CONFIG_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr); + for (i = 0; i < ARRAY_SIZE(setups); i++) { + l2_subsec = adf_mstate_sect_add_vreg(&sub_sects_mgr, setups[i].id, + &setups[i].info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + setups[i].id); + return -EINVAL; + } + } + adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec); + + return 0; +} + +static int adf_gen4_vfmig_save_state(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + u32 vf_nr = mdev->vf_id; + int ret; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vfmig = vf_info->mig_priv; + + ret = adf_gen4_vfmig_save_setup(mdev); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to save setup for vf_nr %d\n", vf_nr); + return ret; + } + + adf_mstate_mgr_init(vfmig->mstate_mgr, mdev->state + mdev->setup_size, + mdev->state_size - mdev->setup_size); + if (!adf_mstate_preamble_add(vfmig->mstate_mgr)) + return -EINVAL; + + ret = adf_gen4_vfmig_save_generic(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to save generic state for vf_nr %d\n", vf_nr); + return ret; + } + + ret = adf_gen4_vfmig_save_misc(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to save misc bar state for vf_nr %d\n", vf_nr); + return ret; + } + + ret = adf_gen4_vfmig_save_etr(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to save etr bar state for vf_nr %d\n", vf_nr); + return ret; + } + + adf_mstate_preamble_update(vfmig->mstate_mgr); + + return 0; +} + +static int adf_gen4_vfmig_load_state(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + u32 vf_nr = mdev->vf_id; + int ret; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vfmig = vf_info->mig_priv; + + ret = adf_gen4_vfmig_load_setup(mdev, mdev->state_size); + if (ret) { + dev_err(&GET_DEV(accel_dev), "Failed to load setup for vf_nr %d\n", + vf_nr); + return ret; + } + + ret = adf_mstate_mgr_init_from_remote(vfmig->mstate_mgr, + mdev->state + mdev->remote_setup_size, + mdev->state_size - mdev->remote_setup_size, + NULL, NULL); + if (ret) { + dev_err(&GET_DEV(accel_dev), "Invalid state for vf_nr %d\n", + vf_nr); + return ret; + } + + ret = adf_gen4_vfmig_load_generic(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to load general state for vf_nr %d\n", vf_nr); + return ret; + } + + ret = adf_gen4_vfmig_load_misc(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to load misc bar state for vf_nr %d\n", vf_nr); + return ret; + } + + ret = adf_gen4_vfmig_load_etr(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to load etr bar state for vf_nr %d\n", vf_nr); + return ret; + } + + return 0; +} + +static int adf_gen4_vfmig_save_setup(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + u32 vf_nr = mdev->vf_id; + int ret; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vfmig = vf_info->mig_priv; + + if (mdev->setup_size) + return 0; + + adf_mstate_mgr_init(vfmig->mstate_mgr, mdev->state, mdev->state_size); + if (!adf_mstate_preamble_add(vfmig->mstate_mgr)) + return -EINVAL; + + ret = adf_gen4_vfmig_save_config(accel_dev, mdev->vf_id); + if (ret) + return ret; + + adf_mstate_preamble_update(vfmig->mstate_mgr); + mdev->setup_size = adf_mstate_state_size(vfmig->mstate_mgr); + + return 0; +} + +static int adf_gen4_vfmig_load_setup(struct qat_mig_dev *mdev, int len) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + u32 vf_nr = mdev->vf_id; + u32 setup_size; + int ret; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vfmig = vf_info->mig_priv; + + if (mdev->remote_setup_size) + return 0; + + if (len < sizeof(struct adf_mstate_preh)) + return -EAGAIN; + + adf_mstate_mgr_init(vfmig->mstate_mgr, mdev->state, mdev->state_size); + setup_size = adf_mstate_state_size_from_remote(vfmig->mstate_mgr); + if (setup_size > mdev->state_size) + return -EINVAL; + + if (len < setup_size) + return -EAGAIN; + + ret = adf_mstate_mgr_init_from_remote(vfmig->mstate_mgr, mdev->state, + setup_size, NULL, NULL); + if (ret) { + dev_err(&GET_DEV(accel_dev), "Invalid setup for vf_nr %d\n", + vf_nr); + return ret; + } + + mdev->remote_setup_size = setup_size; + + ret = adf_gen4_vfmig_load_config(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to load config for vf_nr %d\n", vf_nr); + return ret; + } + + return 0; +} + +void adf_gen4_init_vf_mig_ops(struct qat_migdev_ops *vfmig_ops) +{ + vfmig_ops->init = adf_gen4_vfmig_init_device; + vfmig_ops->cleanup = adf_gen4_vfmig_cleanup_device; + vfmig_ops->reset = adf_gen4_vfmig_reset_device; + vfmig_ops->open = adf_gen4_vfmig_open_device; + vfmig_ops->close = adf_gen4_vfmig_close_device; + vfmig_ops->suspend = adf_gen4_vfmig_suspend_device; + vfmig_ops->resume = adf_gen4_vfmig_resume_device; + vfmig_ops->save_state = adf_gen4_vfmig_save_state; + vfmig_ops->load_state = adf_gen4_vfmig_load_state; + vfmig_ops->load_setup = adf_gen4_vfmig_load_setup; + vfmig_ops->save_setup = adf_gen4_vfmig_save_setup; +} +EXPORT_SYMBOL_GPL(adf_gen4_init_vf_mig_ops); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.h new file mode 100644 index 000000000000..72216d078ee1 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation */ +#ifndef ADF_GEN4_VF_MIG_H_ +#define ADF_GEN4_VF_MIG_H_ + +#include "adf_accel_devices.h" + +void adf_gen4_init_vf_mig_ops(struct qat_migdev_ops *vfmig_ops); + +#endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.c b/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.c new file mode 100644 index 000000000000..41cc763a74aa --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ + +#include <linux/slab.h> +#include <linux/types.h> +#include "adf_mstate_mgr.h" + +#define ADF_MSTATE_MAGIC 0xADF5CAEA +#define ADF_MSTATE_VERSION 0x1 + +struct adf_mstate_sect_h { + u8 id[ADF_MSTATE_ID_LEN]; + u32 size; + u32 sub_sects; + u8 state[]; +}; + +u32 adf_mstate_state_size(struct adf_mstate_mgr *mgr) +{ + return mgr->state - mgr->buf; +} + +static inline u32 adf_mstate_avail_room(struct adf_mstate_mgr *mgr) +{ + return mgr->buf + mgr->size - mgr->state; +} + +void adf_mstate_mgr_init(struct adf_mstate_mgr *mgr, u8 *buf, u32 size) +{ + mgr->buf = buf; + mgr->state = buf; + mgr->size = size; + mgr->n_sects = 0; +}; + +struct adf_mstate_mgr *adf_mstate_mgr_new(u8 *buf, u32 size) +{ + struct adf_mstate_mgr *mgr; + + mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); + if (!mgr) + return NULL; + + adf_mstate_mgr_init(mgr, buf, size); + + return mgr; +} + +void adf_mstate_mgr_destroy(struct adf_mstate_mgr *mgr) +{ + kfree(mgr); +} + +void adf_mstate_mgr_init_from_parent(struct adf_mstate_mgr *mgr, + struct adf_mstate_mgr *p_mgr) +{ + adf_mstate_mgr_init(mgr, p_mgr->state, + p_mgr->size - adf_mstate_state_size(p_mgr)); +} + +void adf_mstate_mgr_init_from_psect(struct adf_mstate_mgr *mgr, + struct adf_mstate_sect_h *p_sect) +{ + adf_mstate_mgr_init(mgr, p_sect->state, p_sect->size); + mgr->n_sects = p_sect->sub_sects; +} + +static void adf_mstate_preamble_init(struct adf_mstate_preh *preamble) +{ + preamble->magic = ADF_MSTATE_MAGIC; + preamble->version = ADF_MSTATE_VERSION; + preamble->preh_len = sizeof(*preamble); + preamble->size = 0; + preamble->n_sects = 0; +} + +/* default preambles checker */ +static int adf_mstate_preamble_def_checker(struct adf_mstate_preh *preamble, + void *opaque) +{ + struct adf_mstate_mgr *mgr = opaque; + + if (preamble->magic != ADF_MSTATE_MAGIC || + preamble->version > ADF_MSTATE_VERSION || + preamble->preh_len > mgr->size) { + pr_debug("QAT: LM - Invalid state (magic=%#x, version=%#x, hlen=%u), state_size=%u\n", + preamble->magic, preamble->version, preamble->preh_len, + mgr->size); + return -EINVAL; + } + + return 0; +} + +struct adf_mstate_preh *adf_mstate_preamble_add(struct adf_mstate_mgr *mgr) +{ + struct adf_mstate_preh *pre = (struct adf_mstate_preh *)mgr->buf; + + if (adf_mstate_avail_room(mgr) < sizeof(*pre)) { + pr_err("QAT: LM - Not enough space for preamble\n"); + return NULL; + } + + adf_mstate_preamble_init(pre); + mgr->state += pre->preh_len; + + return pre; +} + +int adf_mstate_preamble_update(struct adf_mstate_mgr *mgr) +{ + struct adf_mstate_preh *preamble = (struct adf_mstate_preh *)mgr->buf; + + preamble->size = adf_mstate_state_size(mgr) - preamble->preh_len; + preamble->n_sects = mgr->n_sects; + + return 0; +} + +static void adf_mstate_dump_sect(struct adf_mstate_sect_h *sect, + const char *prefix) +{ + pr_debug("QAT: LM - %s QAT state section %s\n", prefix, sect->id); + print_hex_dump_debug("h-", DUMP_PREFIX_OFFSET, 16, 2, sect, + sizeof(*sect), true); + print_hex_dump_debug("s-", DUMP_PREFIX_OFFSET, 16, 2, sect->state, + sect->size, true); +} + +static inline void __adf_mstate_sect_update(struct adf_mstate_mgr *mgr, + struct adf_mstate_sect_h *sect, + u32 size, + u32 n_subsects) +{ + sect->size += size; + sect->sub_sects += n_subsects; + mgr->n_sects++; + mgr->state += sect->size; + + adf_mstate_dump_sect(sect, "Add"); +} + +void adf_mstate_sect_update(struct adf_mstate_mgr *p_mgr, + struct adf_mstate_mgr *curr_mgr, + struct adf_mstate_sect_h *sect) +{ + __adf_mstate_sect_update(p_mgr, sect, adf_mstate_state_size(curr_mgr), + curr_mgr->n_sects); +} + +static struct adf_mstate_sect_h *adf_mstate_sect_add_header(struct adf_mstate_mgr *mgr, + const char *id) +{ + struct adf_mstate_sect_h *sect = (struct adf_mstate_sect_h *)(mgr->state); + + if (adf_mstate_avail_room(mgr) < sizeof(*sect)) { + pr_debug("QAT: LM - Not enough space for header of QAT state sect %s\n", id); + return NULL; + } + + strscpy(sect->id, id, sizeof(sect->id)); + sect->size = 0; + sect->sub_sects = 0; + mgr->state += sizeof(*sect); + + return sect; +} + +struct adf_mstate_sect_h *adf_mstate_sect_add_vreg(struct adf_mstate_mgr *mgr, + const char *id, + struct adf_mstate_vreginfo *info) +{ + struct adf_mstate_sect_h *sect; + + sect = adf_mstate_sect_add_header(mgr, id); + if (!sect) + return NULL; + + if (adf_mstate_avail_room(mgr) < info->size) { + pr_debug("QAT: LM - Not enough space for QAT state sect %s, requires %u\n", + id, info->size); + return NULL; + } + + memcpy(sect->state, info->addr, info->size); + __adf_mstate_sect_update(mgr, sect, info->size, 0); + + return sect; +} + +struct adf_mstate_sect_h *adf_mstate_sect_add(struct adf_mstate_mgr *mgr, + const char *id, + adf_mstate_populate populate, + void *opaque) +{ + struct adf_mstate_mgr sub_sects_mgr; + struct adf_mstate_sect_h *sect; + int avail_room, size; + + sect = adf_mstate_sect_add_header(mgr, id); + if (!sect) + return NULL; + + if (!populate) + return sect; + + avail_room = adf_mstate_avail_room(mgr); + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mgr); + + size = (*populate)(&sub_sects_mgr, sect->state, avail_room, opaque); + if (size < 0) + return NULL; + + size += adf_mstate_state_size(&sub_sects_mgr); + if (avail_room < size) { + pr_debug("QAT: LM - Not enough space for QAT state sect %s, requires %u\n", + id, size); + return NULL; + } + __adf_mstate_sect_update(mgr, sect, size, sub_sects_mgr.n_sects); + + return sect; +} + +static int adf_mstate_sect_validate(struct adf_mstate_mgr *mgr) +{ + struct adf_mstate_sect_h *start = (struct adf_mstate_sect_h *)mgr->state; + struct adf_mstate_sect_h *sect = start; + u64 end; + int i; + + end = (uintptr_t)mgr->buf + mgr->size; + for (i = 0; i < mgr->n_sects; i++) { + uintptr_t s_start = (uintptr_t)sect->state; + uintptr_t s_end = s_start + sect->size; + + if (s_end < s_start || s_end > end) { + pr_debug("QAT: LM - Corrupted state section (index=%u, size=%u) in state_mgr (size=%u, secs=%u)\n", + i, sect->size, mgr->size, mgr->n_sects); + return -EINVAL; + } + sect = (struct adf_mstate_sect_h *)s_end; + } + + pr_debug("QAT: LM - Scanned section (last child=%s, size=%lu) in state_mgr (size=%u, secs=%u)\n", + start->id, sizeof(struct adf_mstate_sect_h) * (ulong)(sect - start), + mgr->size, mgr->n_sects); + + return 0; +} + +u32 adf_mstate_state_size_from_remote(struct adf_mstate_mgr *mgr) +{ + struct adf_mstate_preh *preh = (struct adf_mstate_preh *)mgr->buf; + + return preh->preh_len + preh->size; +} + +int adf_mstate_mgr_init_from_remote(struct adf_mstate_mgr *mgr, u8 *buf, u32 size, + adf_mstate_preamble_checker pre_checker, + void *opaque) +{ + struct adf_mstate_preh *pre; + int ret; + + adf_mstate_mgr_init(mgr, buf, size); + pre = (struct adf_mstate_preh *)(mgr->buf); + + pr_debug("QAT: LM - Dump state preambles\n"); + print_hex_dump_debug("", DUMP_PREFIX_OFFSET, 16, 2, pre, pre->preh_len, 0); + + if (pre_checker) + ret = (*pre_checker)(pre, opaque); + else + ret = adf_mstate_preamble_def_checker(pre, mgr); + if (ret) + return ret; + + mgr->state = mgr->buf + pre->preh_len; + mgr->n_sects = pre->n_sects; + + return adf_mstate_sect_validate(mgr); +} + +struct adf_mstate_sect_h *adf_mstate_sect_lookup(struct adf_mstate_mgr *mgr, + const char *id, + adf_mstate_action action, + void *opaque) +{ + struct adf_mstate_sect_h *sect = (struct adf_mstate_sect_h *)mgr->state; + struct adf_mstate_mgr sub_sects_mgr; + int i, ret; + + for (i = 0; i < mgr->n_sects; i++) { + if (!strncmp(sect->id, id, sizeof(sect->id))) + goto found; + + sect = (struct adf_mstate_sect_h *)(sect->state + sect->size); + } + + return NULL; + +found: + adf_mstate_dump_sect(sect, "Found"); + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, sect); + if (sect->sub_sects && adf_mstate_sect_validate(&sub_sects_mgr)) + return NULL; + + if (!action) + return sect; + + ret = (*action)(&sub_sects_mgr, sect->state, sect->size, opaque); + if (ret) + return NULL; + + return sect; +} diff --git a/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.h b/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.h new file mode 100644 index 000000000000..81d263a596c5 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation */ + +#ifndef ADF_MSTATE_MGR_H +#define ADF_MSTATE_MGR_H + +#define ADF_MSTATE_ID_LEN 8 + +#define ADF_MSTATE_ETRB_IDS "ETRBAR" +#define ADF_MSTATE_MISCB_IDS "MISCBAR" +#define ADF_MSTATE_EXTB_IDS "EXTBAR" +#define ADF_MSTATE_GEN_IDS "GENER" +#define ADF_MSTATE_CONFIG_IDS "CONFIG" +#define ADF_MSTATE_SECTION_NUM 5 + +#define ADF_MSTATE_BANK_IDX_IDS "bnk" + +#define ADF_MSTATE_ETR_REGS_IDS "mregs" +#define ADF_MSTATE_VINTSRC_IDS "visrc" +#define ADF_MSTATE_VINTMSK_IDS "vimsk" +#define ADF_MSTATE_SLA_IDS "sla" +#define ADF_MSTATE_IOV_INIT_IDS "iovinit" +#define ADF_MSTATE_COMPAT_VER_IDS "compver" +#define ADF_MSTATE_GEN_CAP_IDS "gencap" +#define ADF_MSTATE_GEN_SVCMAP_IDS "svcmap" +#define ADF_MSTATE_GEN_EXTDC_IDS "extdc" +#define ADF_MSTATE_VINTSRC_PF2VM_IDS "vispv" +#define ADF_MSTATE_VINTMSK_PF2VM_IDS "vimpv" +#define ADF_MSTATE_VM2PF_IDS "vm2pf" +#define ADF_MSTATE_PF2VM_IDS "pf2vm" + +struct adf_mstate_mgr { + u8 *buf; + u8 *state; + u32 size; + u32 n_sects; +}; + +struct adf_mstate_preh { + u32 magic; + u32 version; + u16 preh_len; + u16 n_sects; + u32 size; +}; + +struct adf_mstate_vreginfo { + void *addr; + u32 size; +}; + +struct adf_mstate_sect_h; + +typedef int (*adf_mstate_preamble_checker)(struct adf_mstate_preh *preamble, void *opa); +typedef int (*adf_mstate_populate)(struct adf_mstate_mgr *sub_mgr, u8 *buf, + u32 size, void *opa); +typedef int (*adf_mstate_action)(struct adf_mstate_mgr *sub_mgr, u8 *buf, u32 size, + void *opa); + +struct adf_mstate_mgr *adf_mstate_mgr_new(u8 *buf, u32 size); +void adf_mstate_mgr_destroy(struct adf_mstate_mgr *mgr); +void adf_mstate_mgr_init(struct adf_mstate_mgr *mgr, u8 *buf, u32 size); +void adf_mstate_mgr_init_from_parent(struct adf_mstate_mgr *mgr, + struct adf_mstate_mgr *p_mgr); +void adf_mstate_mgr_init_from_psect(struct adf_mstate_mgr *mgr, + struct adf_mstate_sect_h *p_sect); +int adf_mstate_mgr_init_from_remote(struct adf_mstate_mgr *mgr, + u8 *buf, u32 size, + adf_mstate_preamble_checker checker, + void *opaque); +struct adf_mstate_preh *adf_mstate_preamble_add(struct adf_mstate_mgr *mgr); +int adf_mstate_preamble_update(struct adf_mstate_mgr *mgr); +u32 adf_mstate_state_size(struct adf_mstate_mgr *mgr); +u32 adf_mstate_state_size_from_remote(struct adf_mstate_mgr *mgr); +void adf_mstate_sect_update(struct adf_mstate_mgr *p_mgr, + struct adf_mstate_mgr *curr_mgr, + struct adf_mstate_sect_h *sect); +struct adf_mstate_sect_h *adf_mstate_sect_add_vreg(struct adf_mstate_mgr *mgr, + const char *id, + struct adf_mstate_vreginfo *info); +struct adf_mstate_sect_h *adf_mstate_sect_add(struct adf_mstate_mgr *mgr, + const char *id, + adf_mstate_populate populate, + void *opaque); +struct adf_mstate_sect_h *adf_mstate_sect_lookup(struct adf_mstate_mgr *mgr, + const char *id, + adf_mstate_action action, + void *opaque); +#endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c index 9ab93fbfefde..b9b5e744a3f1 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c +++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c @@ -242,13 +242,7 @@ static int adf_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u8 vf_nr, "VersionRequest received from VF%d (vers %d) to PF (vers %d)\n", vf_nr, vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION); - if (vf_compat_ver == 0) - compat = ADF_PF2VF_VF_INCOMPATIBLE; - else if (vf_compat_ver <= ADF_PFVF_COMPAT_THIS_VERSION) - compat = ADF_PF2VF_VF_COMPATIBLE; - else - compat = ADF_PF2VF_VF_COMPAT_UNKNOWN; - + compat = adf_vf_compat_checker(vf_compat_ver); vf_info->vf_compat_ver = vf_compat_ver; resp->type = ADF_PF2VF_MSGTYPE_VERSION_RESP; diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h b/drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h index 2be048e2287b..1a044297d873 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h +++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h @@ -28,4 +28,15 @@ u32 adf_pfvf_csr_msg_of(struct adf_accel_dev *accel_dev, struct pfvf_message msg struct pfvf_message adf_pfvf_message_of(struct adf_accel_dev *accel_dev, u32 raw_msg, const struct pfvf_csr_format *fmt); +static inline u8 adf_vf_compat_checker(u8 vf_compat_ver) +{ + if (vf_compat_ver == 0) + return ADF_PF2VF_VF_INCOMPATIBLE; + + if (vf_compat_ver <= ADF_PFVF_COMPAT_THIS_VERSION) + return ADF_PF2VF_VF_COMPATIBLE; + + return ADF_PF2VF_VF_COMPAT_UNKNOWN; +} + #endif /* ADF_PFVF_UTILS_H */ diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.c b/drivers/crypto/intel/qat/qat_common/adf_rl.c index d4f2db3c53d8..346ef8bee99d 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.c @@ -183,14 +183,14 @@ static enum adf_cfg_service_type srv_to_cfg_svc_type(enum adf_base_services rl_s } /** - * get_sla_arr_of_type() - Returns a pointer to SLA type specific array + * adf_rl_get_sla_arr_of_type() - Returns a pointer to SLA type specific array * @rl_data: pointer to ratelimiting data * @type: SLA type * @sla_arr: pointer to variable where requested pointer will be stored * * Return: Max number of elements allowed for the returned array */ -static u32 get_sla_arr_of_type(struct adf_rl *rl_data, enum rl_node_type type, +u32 adf_rl_get_sla_arr_of_type(struct adf_rl *rl_data, enum rl_node_type type, struct rl_sla ***sla_arr) { switch (type) { @@ -778,7 +778,7 @@ static void clear_sla(struct adf_rl *rl_data, struct rl_sla *sla) rp_in_use[sla->ring_pairs_ids[i]] = false; update_budget(sla, old_cir, true); - get_sla_arr_of_type(rl_data, sla->type, &sla_type_arr); + adf_rl_get_sla_arr_of_type(rl_data, sla->type, &sla_type_arr); assign_node_to_parent(rl_data->accel_dev, sla, true); adf_rl_send_admin_delete_msg(rl_data->accel_dev, node_id, sla->type); mark_rps_usage(sla, rl_data->rp_in_use, false); @@ -875,7 +875,7 @@ static int add_update_sla(struct adf_accel_dev *accel_dev, if (!is_update) { mark_rps_usage(sla, rl_data->rp_in_use, true); - get_sla_arr_of_type(rl_data, sla->type, &sla_type_arr); + adf_rl_get_sla_arr_of_type(rl_data, sla->type, &sla_type_arr); sla_type_arr[sla->node_id] = sla; rl_data->sla[sla->sla_id] = sla; } @@ -1065,7 +1065,7 @@ void adf_rl_remove_sla_all(struct adf_accel_dev *accel_dev, bool incl_default) /* Unregister and remove all SLAs */ for (j = RL_LEAF; j >= end_type; j--) { - max_id = get_sla_arr_of_type(rl_data, j, &sla_type_arr); + max_id = adf_rl_get_sla_arr_of_type(rl_data, j, &sla_type_arr); for (i = 0; i < max_id; i++) { if (!sla_type_arr[i]) @@ -1125,7 +1125,7 @@ int adf_rl_start(struct adf_accel_dev *accel_dev) } if ((fw_caps & RL_CAPABILITY_MASK) != RL_CAPABILITY_VALUE) { - dev_info(&GET_DEV(accel_dev), "not supported\n"); + dev_info(&GET_DEV(accel_dev), "feature not supported by FW\n"); ret = -EOPNOTSUPP; goto ret_free; } diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.h b/drivers/crypto/intel/qat/qat_common/adf_rl.h index 269c6656fb90..bfe750ea0e83 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.h +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.h @@ -151,6 +151,8 @@ struct rl_sla { u16 ring_pairs_cnt; }; +u32 adf_rl_get_sla_arr_of_type(struct adf_rl *rl_data, enum rl_node_type type, + struct rl_sla ***sla_arr); int adf_rl_add_sla(struct adf_accel_dev *accel_dev, struct adf_rl_sla_input_data *sla_in); int adf_rl_update_sla(struct adf_accel_dev *accel_dev, diff --git a/drivers/crypto/intel/qat/qat_common/adf_sriov.c b/drivers/crypto/intel/qat/qat_common/adf_sriov.c index 87a70c00c41e..8d645e7e04aa 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/intel/qat/qat_common/adf_sriov.c @@ -26,10 +26,12 @@ static void adf_iov_send_resp(struct work_struct *work) u32 vf_nr = vf_info->vf_nr; bool ret; + mutex_lock(&vf_info->pfvf_mig_lock); ret = adf_recv_and_handle_vf2pf_msg(accel_dev, vf_nr); if (ret) /* re-enable interrupt on PF from this VF */ adf_enable_vf2pf_interrupts(accel_dev, 1 << vf_nr); + mutex_unlock(&vf_info->pfvf_mig_lock); kfree(pf2vf_resp); } @@ -62,6 +64,7 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) vf_info->vf_nr = i; mutex_init(&vf_info->pf2vf_lock); + mutex_init(&vf_info->pfvf_mig_lock); ratelimit_state_init(&vf_info->vf2pf_ratelimit, ADF_VF2PF_RATELIMIT_INTERVAL, ADF_VF2PF_RATELIMIT_BURST); @@ -138,8 +141,10 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev) if (hw_data->configure_iov_threads) hw_data->configure_iov_threads(accel_dev, false); - for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) + for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) { mutex_destroy(&vf->pf2vf_lock); + mutex_destroy(&vf->pfvf_mig_lock); + } if (!test_bit(ADF_STATUS_RESTARTING, &accel_dev->status)) { kfree(accel_dev->pf.vf_info); diff --git a/drivers/crypto/intel/qat/qat_common/adf_telemetry.c b/drivers/crypto/intel/qat/qat_common/adf_telemetry.c index 2ff714d11bd2..74fb0c2ed241 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_telemetry.c +++ b/drivers/crypto/intel/qat/qat_common/adf_telemetry.c @@ -41,6 +41,20 @@ static int validate_tl_data(struct adf_tl_hw_data *tl_data) return 0; } +static int validate_tl_slice_counters(struct icp_qat_fw_init_admin_slice_cnt *slice_count, + u8 max_slices_per_type) +{ + u8 *sl_counter = (u8 *)slice_count; + int i; + + for (i = 0; i < ADF_TL_SL_CNT_COUNT; i++) { + if (sl_counter[i] > max_slices_per_type) + return -EINVAL; + } + + return 0; +} + static int adf_tl_alloc_mem(struct adf_accel_dev *accel_dev) { struct adf_tl_hw_data *tl_data = &GET_TL_DATA(accel_dev); @@ -214,6 +228,13 @@ int adf_tl_run(struct adf_accel_dev *accel_dev, int state) return ret; } + ret = validate_tl_slice_counters(&telemetry->slice_cnt, tl_data->max_sl_cnt); + if (ret) { + dev_err(dev, "invalid value returned by FW\n"); + adf_send_admin_tl_stop(accel_dev); + return ret; + } + telemetry->hbuffs = state; atomic_set(&telemetry->state, state); diff --git a/drivers/crypto/intel/qat/qat_common/adf_telemetry.h b/drivers/crypto/intel/qat/qat_common/adf_telemetry.h index 9be81cd3b886..e54a406cc1b4 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_telemetry.h +++ b/drivers/crypto/intel/qat/qat_common/adf_telemetry.h @@ -40,6 +40,7 @@ struct adf_tl_hw_data { u8 num_dev_counters; u8 num_rp_counters; u8 max_rp; + u8 max_sl_cnt; }; struct adf_telemetry { diff --git a/drivers/crypto/intel/qat/qat_common/adf_transport.c b/drivers/crypto/intel/qat/qat_common/adf_transport.c index 630d0483c4e0..1efdf46490f1 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_transport.c +++ b/drivers/crypto/intel/qat/qat_common/adf_transport.c @@ -474,7 +474,6 @@ err: int adf_init_etr_data(struct adf_accel_dev *accel_dev) { struct adf_etr_data *etr_data; - struct adf_hw_device_data *hw_data = accel_dev->hw_device; void __iomem *csr_addr; u32 size; u32 num_banks = 0; @@ -495,8 +494,7 @@ int adf_init_etr_data(struct adf_accel_dev *accel_dev) } accel_dev->transport = etr_data; - i = hw_data->get_etr_bar_id(hw_data); - csr_addr = accel_dev->accel_pci_dev.pci_bars[i].virt_addr; + csr_addr = adf_get_etr_base(accel_dev); /* accel_dev->debugfs_dir should always be non-NULL here */ etr_data->debug = debugfs_create_dir("transport", diff --git a/drivers/crypto/intel/qat/qat_common/qat_asym_algs.c b/drivers/crypto/intel/qat/qat_common/qat_asym_algs.c index 4128200a9032..85c682e248fb 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/intel/qat/qat_common/qat_asym_algs.c @@ -110,6 +110,8 @@ struct qat_dh_ctx { unsigned int p_size; bool g2; struct qat_crypto_instance *inst; + struct crypto_kpp *ftfm; + bool fallback; } __packed __aligned(64); struct qat_asym_request { @@ -381,6 +383,36 @@ unmap_src: return ret; } +static int qat_dh_generate_public_key(struct kpp_request *req) +{ + struct kpp_request *nreq = kpp_request_ctx(req); + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + + if (ctx->fallback) { + memcpy(nreq, req, sizeof(*req)); + kpp_request_set_tfm(nreq, ctx->ftfm); + return crypto_kpp_generate_public_key(nreq); + } + + return qat_dh_compute_value(req); +} + +static int qat_dh_compute_shared_secret(struct kpp_request *req) +{ + struct kpp_request *nreq = kpp_request_ctx(req); + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + + if (ctx->fallback) { + memcpy(nreq, req, sizeof(*req)); + kpp_request_set_tfm(nreq, ctx->ftfm); + return crypto_kpp_compute_shared_secret(nreq); + } + + return qat_dh_compute_value(req); +} + static int qat_dh_check_params_length(unsigned int p_len) { switch (p_len) { @@ -398,9 +430,6 @@ static int qat_dh_set_params(struct qat_dh_ctx *ctx, struct dh *params) struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); - if (qat_dh_check_params_length(params->p_size << 3)) - return -EINVAL; - ctx->p_size = params->p_size; ctx->p = dma_alloc_coherent(dev, ctx->p_size, &ctx->dma_p, GFP_KERNEL); if (!ctx->p) @@ -454,6 +483,13 @@ static int qat_dh_set_secret(struct crypto_kpp *tfm, const void *buf, if (crypto_dh_decode_key(buf, len, ¶ms) < 0) return -EINVAL; + if (qat_dh_check_params_length(params.p_size << 3)) { + ctx->fallback = true; + return crypto_kpp_set_secret(ctx->ftfm, buf, len); + } + + ctx->fallback = false; + /* Free old secret if any */ qat_dh_clear_ctx(dev, ctx); @@ -481,6 +517,9 @@ static unsigned int qat_dh_max_size(struct crypto_kpp *tfm) { struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + if (ctx->fallback) + return crypto_kpp_maxsize(ctx->ftfm); + return ctx->p_size; } @@ -489,11 +528,22 @@ static int qat_dh_init_tfm(struct crypto_kpp *tfm) struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); struct qat_crypto_instance *inst = qat_crypto_get_instance_node(numa_node_id()); + const char *alg = kpp_alg_name(tfm); + unsigned int reqsize; if (!inst) return -EINVAL; - kpp_set_reqsize(tfm, sizeof(struct qat_asym_request) + 64); + ctx->ftfm = crypto_alloc_kpp(alg, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->ftfm)) + return PTR_ERR(ctx->ftfm); + + crypto_kpp_set_flags(ctx->ftfm, crypto_kpp_get_flags(tfm)); + + reqsize = max(sizeof(struct qat_asym_request) + 64, + sizeof(struct kpp_request) + crypto_kpp_reqsize(ctx->ftfm)); + + kpp_set_reqsize(tfm, reqsize); ctx->p_size = 0; ctx->g2 = false; @@ -506,6 +556,9 @@ static void qat_dh_exit_tfm(struct crypto_kpp *tfm) struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); struct device *dev = &GET_DEV(ctx->inst->accel_dev); + if (ctx->ftfm) + crypto_free_kpp(ctx->ftfm); + qat_dh_clear_ctx(dev, ctx); qat_crypto_put_instance(ctx->inst); } @@ -1265,8 +1318,8 @@ static struct akcipher_alg rsa = { static struct kpp_alg dh = { .set_secret = qat_dh_set_secret, - .generate_public_key = qat_dh_compute_value, - .compute_shared_secret = qat_dh_compute_value, + .generate_public_key = qat_dh_generate_public_key, + .compute_shared_secret = qat_dh_compute_shared_secret, .max_size = qat_dh_max_size, .init = qat_dh_init_tfm, .exit = qat_dh_exit_tfm, @@ -1276,6 +1329,7 @@ static struct kpp_alg dh = { .cra_priority = 1000, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct qat_dh_ctx), + .cra_flags = CRYPTO_ALG_NEED_FALLBACK, }, }; diff --git a/drivers/crypto/intel/qat/qat_common/qat_bl.c b/drivers/crypto/intel/qat/qat_common/qat_bl.c index 76baed0a76c0..338acf29c487 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_bl.c +++ b/drivers/crypto/intel/qat/qat_common/qat_bl.c @@ -81,7 +81,8 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, if (unlikely(!bufl)) return -ENOMEM; } else { - bufl = &buf->sgl_src.sgl_hdr; + bufl = container_of(&buf->sgl_src.sgl_hdr, + struct qat_alg_buf_list, hdr); memset(bufl, 0, sizeof(struct qat_alg_buf_list)); buf->sgl_src_valid = true; } @@ -139,7 +140,8 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, if (unlikely(!buflout)) goto err_in; } else { - buflout = &buf->sgl_dst.sgl_hdr; + buflout = container_of(&buf->sgl_dst.sgl_hdr, + struct qat_alg_buf_list, hdr); memset(buflout, 0, sizeof(struct qat_alg_buf_list)); buf->sgl_dst_valid = true; } diff --git a/drivers/crypto/intel/qat/qat_common/qat_bl.h b/drivers/crypto/intel/qat/qat_common/qat_bl.h index d87e4f35ac39..85bc32a9ec0e 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_bl.h +++ b/drivers/crypto/intel/qat/qat_common/qat_bl.h @@ -15,14 +15,17 @@ struct qat_alg_buf { } __packed; struct qat_alg_buf_list { - u64 resrvd; - u32 num_bufs; - u32 num_mapped_bufs; + /* New members must be added within the __struct_group() macro below. */ + __struct_group(qat_alg_buf_list_hdr, hdr, __packed, + u64 resrvd; + u32 num_bufs; + u32 num_mapped_bufs; + ); struct qat_alg_buf buffers[]; } __packed; struct qat_alg_fixed_buf_list { - struct qat_alg_buf_list sgl_hdr; + struct qat_alg_buf_list_hdr sgl_hdr; struct qat_alg_buf descriptors[QAT_MAX_BUFF_DESC]; } __packed __aligned(64); diff --git a/drivers/crypto/intel/qat/qat_common/qat_mig_dev.c b/drivers/crypto/intel/qat/qat_common/qat_mig_dev.c new file mode 100644 index 000000000000..892c2283a50e --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/qat_mig_dev.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ +#include <linux/dev_printk.h> +#include <linux/export.h> +#include <linux/pci.h> +#include <linux/types.h> +#include <linux/qat/qat_mig_dev.h> +#include "adf_accel_devices.h" +#include "adf_common_drv.h" + +struct qat_mig_dev *qat_vfmig_create(struct pci_dev *pdev, int vf_id) +{ + struct adf_accel_dev *accel_dev; + struct qat_migdev_ops *ops; + struct qat_mig_dev *mdev; + + accel_dev = adf_devmgr_pci_to_accel_dev(pdev); + if (!accel_dev) + return ERR_PTR(-ENODEV); + + ops = GET_VFMIG_OPS(accel_dev); + if (!ops || !ops->init || !ops->cleanup || !ops->reset || !ops->open || + !ops->close || !ops->suspend || !ops->resume || !ops->save_state || + !ops->load_state || !ops->save_setup || !ops->load_setup) + return ERR_PTR(-EINVAL); + + mdev = kmalloc(sizeof(*mdev), GFP_KERNEL); + if (!mdev) + return ERR_PTR(-ENOMEM); + + mdev->vf_id = vf_id; + mdev->parent_accel_dev = accel_dev; + + return mdev; +} +EXPORT_SYMBOL_GPL(qat_vfmig_create); + +int qat_vfmig_init(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->init(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_init); + +void qat_vfmig_cleanup(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->cleanup(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_cleanup); + +void qat_vfmig_reset(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->reset(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_reset); + +int qat_vfmig_open(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->open(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_open); + +void qat_vfmig_close(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + GET_VFMIG_OPS(accel_dev)->close(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_close); + +int qat_vfmig_suspend(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->suspend(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_suspend); + +int qat_vfmig_resume(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->resume(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_resume); + +int qat_vfmig_save_state(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->save_state(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_save_state); + +int qat_vfmig_save_setup(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->save_setup(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_save_setup); + +int qat_vfmig_load_state(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->load_state(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_load_state); + +int qat_vfmig_load_setup(struct qat_mig_dev *mdev, int size) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->load_setup(mdev, size); +} +EXPORT_SYMBOL_GPL(qat_vfmig_load_setup); + +void qat_vfmig_destroy(struct qat_mig_dev *mdev) +{ + kfree(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_destroy); diff --git a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index af14090cc4be..6e24d57e6b98 100644 --- a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -5,6 +5,7 @@ #include <adf_common_drv.h> #include <adf_gen2_config.h> #include <adf_gen2_dc.h> +#include <adf_gen2_hw_csr_data.h> #include <adf_gen2_hw_data.h> #include <adf_gen2_pfvf.h> #include "adf_dh895xcc_hw_data.h" diff --git a/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c index 70e56cc16ece..f4ee4c2e00da 100644 --- a/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c +++ b/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c @@ -4,6 +4,7 @@ #include <adf_common_drv.h> #include <adf_gen2_config.h> #include <adf_gen2_dc.h> +#include <adf_gen2_hw_csr_data.h> #include <adf_gen2_hw_data.h> #include <adf_gen2_pfvf.h> #include <adf_pfvf_vf_msg.h> diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c index 79b4e74804f6..6bfc59e67747 100644 --- a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c +++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c @@ -138,6 +138,10 @@ int cn10k_cpt_hw_ctx_init(struct pci_dev *pdev, return -ENOMEM; cptr_dma = dma_map_single(&pdev->dev, hctx, CN10K_CPT_HW_CTX_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(&pdev->dev, cptr_dma)) { + kfree(hctx); + return -ENOMEM; + } cn10k_cpt_hw_ctx_set(hctx, 1); er_ctx->hw_ctx = hctx; diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c index 2ab90ec10e61..82214cde2bcd 100644 --- a/drivers/crypto/nx/nx-842.c +++ b/drivers/crypto/nx/nx-842.c @@ -251,7 +251,9 @@ int nx842_crypto_compress(struct crypto_tfm *tfm, u8 *dst, unsigned int *dlen) { struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); - struct nx842_crypto_header *hdr = &ctx->header; + struct nx842_crypto_header *hdr = + container_of(&ctx->header, + struct nx842_crypto_header, hdr); struct nx842_crypto_param p; struct nx842_constraints c = *ctx->driver->constraints; unsigned int groups, hdrsize, h; @@ -490,7 +492,7 @@ int nx842_crypto_decompress(struct crypto_tfm *tfm, } memcpy(&ctx->header, src, hdr_len); - hdr = &ctx->header; + hdr = container_of(&ctx->header, struct nx842_crypto_header, hdr); for (n = 0; n < hdr->groups; n++) { /* ignore applies to last group */ diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h index 7590bfb24d79..25fa70b2112c 100644 --- a/drivers/crypto/nx/nx-842.h +++ b/drivers/crypto/nx/nx-842.h @@ -157,9 +157,11 @@ struct nx842_crypto_header_group { } __packed; struct nx842_crypto_header { - __be16 magic; /* NX842_CRYPTO_MAGIC */ - __be16 ignore; /* decompressed end bytes to ignore */ - u8 groups; /* total groups in this header */ + struct_group_tagged(nx842_crypto_header_hdr, hdr, + __be16 magic; /* NX842_CRYPTO_MAGIC */ + __be16 ignore; /* decompressed end bytes to ignore */ + u8 groups; /* total groups in this header */ + ); struct nx842_crypto_header_group group[]; } __packed; @@ -171,7 +173,7 @@ struct nx842_crypto_ctx { u8 *wmem; u8 *sbounce, *dbounce; - struct nx842_crypto_header header; + struct nx842_crypto_header_hdr header; struct nx842_crypto_header_group group[NX842_CRYPTO_GROUP_MAX]; struct nx842_driver *driver; diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index 3423b5cde1c7..96d4af5d48a6 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -559,7 +559,7 @@ static int sahara_aes_process(struct skcipher_request *req) struct sahara_ctx *ctx; struct sahara_aes_reqctx *rctx; int ret; - unsigned long timeout; + unsigned long time_left; /* Request is ready to be dispatched by the device */ dev_dbg(dev->device, @@ -597,15 +597,15 @@ static int sahara_aes_process(struct skcipher_request *req) if (ret) return -EINVAL; - timeout = wait_for_completion_timeout(&dev->dma_completion, - msecs_to_jiffies(SAHARA_TIMEOUT_MS)); + time_left = wait_for_completion_timeout(&dev->dma_completion, + msecs_to_jiffies(SAHARA_TIMEOUT_MS)); dma_unmap_sg(dev->device, dev->out_sg, dev->nb_out_sg, DMA_FROM_DEVICE); dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg, DMA_TO_DEVICE); - if (!timeout) { + if (!time_left) { dev_err(dev->device, "AES timeout\n"); return -ETIMEDOUT; } @@ -931,7 +931,7 @@ static int sahara_sha_process(struct ahash_request *req) struct sahara_dev *dev = dev_ptr; struct sahara_sha_reqctx *rctx = ahash_request_ctx(req); int ret; - unsigned long timeout; + unsigned long time_left; ret = sahara_sha_prepare_request(req); if (!ret) @@ -963,14 +963,14 @@ static int sahara_sha_process(struct ahash_request *req) sahara_write(dev, dev->hw_phys_desc[0], SAHARA_REG_DAR); - timeout = wait_for_completion_timeout(&dev->dma_completion, - msecs_to_jiffies(SAHARA_TIMEOUT_MS)); + time_left = wait_for_completion_timeout(&dev->dma_completion, + msecs_to_jiffies(SAHARA_TIMEOUT_MS)); if (rctx->sg_in_idx) dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg, DMA_TO_DEVICE); - if (!timeout) { + if (!time_left) { dev_err(dev->device, "SHA timeout\n"); return -ETIMEDOUT; } diff --git a/drivers/crypto/starfive/Kconfig b/drivers/crypto/starfive/Kconfig index cb59357b58b2..0fe389e9f932 100644 --- a/drivers/crypto/starfive/Kconfig +++ b/drivers/crypto/starfive/Kconfig @@ -14,6 +14,10 @@ config CRYPTO_DEV_JH7110 select CRYPTO_RSA select CRYPTO_AES select CRYPTO_CCM + select CRYPTO_GCM + select CRYPTO_ECB + select CRYPTO_CBC + select CRYPTO_CTR help Support for StarFive JH7110 crypto hardware acceleration engine. This module provides acceleration for public key algo, diff --git a/drivers/crypto/starfive/jh7110-aes.c b/drivers/crypto/starfive/jh7110-aes.c index 1ac15cc4ef3c..86a1a1fa9f8f 100644 --- a/drivers/crypto/starfive/jh7110-aes.c +++ b/drivers/crypto/starfive/jh7110-aes.c @@ -78,7 +78,7 @@ static inline int is_gcm(struct starfive_cryp_dev *cryp) return (cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_GCM; } -static inline int is_encrypt(struct starfive_cryp_dev *cryp) +static inline bool is_encrypt(struct starfive_cryp_dev *cryp) { return cryp->flags & FLG_ENCRYPT; } @@ -103,16 +103,6 @@ static void starfive_aes_aead_hw_start(struct starfive_cryp_ctx *ctx, u32 hw_mod } } -static inline void starfive_aes_set_ivlen(struct starfive_cryp_ctx *ctx) -{ - struct starfive_cryp_dev *cryp = ctx->cryp; - - if (is_gcm(cryp)) - writel(GCM_AES_IV_SIZE, cryp->base + STARFIVE_AES_IVLEN); - else - writel(AES_BLOCK_SIZE, cryp->base + STARFIVE_AES_IVLEN); -} - static inline void starfive_aes_set_alen(struct starfive_cryp_ctx *ctx) { struct starfive_cryp_dev *cryp = ctx->cryp; @@ -261,7 +251,6 @@ static int starfive_aes_hw_init(struct starfive_cryp_ctx *ctx) rctx->csr.aes.mode = hw_mode; rctx->csr.aes.cmode = !is_encrypt(cryp); - rctx->csr.aes.ie = 1; rctx->csr.aes.stmode = STARFIVE_AES_MODE_XFB_1; if (cryp->side_chan) { @@ -279,7 +268,7 @@ static int starfive_aes_hw_init(struct starfive_cryp_ctx *ctx) case STARFIVE_AES_MODE_GCM: starfive_aes_set_alen(ctx); starfive_aes_set_mlen(ctx); - starfive_aes_set_ivlen(ctx); + writel(GCM_AES_IV_SIZE, cryp->base + STARFIVE_AES_IVLEN); starfive_aes_aead_hw_start(ctx, hw_mode); starfive_aes_write_iv(ctx, (void *)cryp->req.areq->iv); break; @@ -300,52 +289,49 @@ static int starfive_aes_hw_init(struct starfive_cryp_ctx *ctx) return cryp->err; } -static int starfive_aes_read_authtag(struct starfive_cryp_dev *cryp) +static int starfive_aes_read_authtag(struct starfive_cryp_ctx *ctx) { - int i, start_addr; + struct starfive_cryp_dev *cryp = ctx->cryp; + struct starfive_cryp_request_ctx *rctx = ctx->rctx; + int i; if (starfive_aes_wait_busy(cryp)) return dev_err_probe(cryp->dev, -ETIMEDOUT, "Timeout waiting for tag generation."); - start_addr = STARFIVE_AES_NONCE0; - - if (is_gcm(cryp)) - for (i = 0; i < AES_BLOCK_32; i++, start_addr += 4) - cryp->tag_out[i] = readl(cryp->base + start_addr); - else + if ((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_GCM) { + cryp->tag_out[0] = readl(cryp->base + STARFIVE_AES_NONCE0); + cryp->tag_out[1] = readl(cryp->base + STARFIVE_AES_NONCE1); + cryp->tag_out[2] = readl(cryp->base + STARFIVE_AES_NONCE2); + cryp->tag_out[3] = readl(cryp->base + STARFIVE_AES_NONCE3); + } else { for (i = 0; i < AES_BLOCK_32; i++) cryp->tag_out[i] = readl(cryp->base + STARFIVE_AES_AESDIO0R); + } if (is_encrypt(cryp)) { - scatterwalk_copychunks(cryp->tag_out, &cryp->out_walk, cryp->authsize, 1); + scatterwalk_map_and_copy(cryp->tag_out, rctx->out_sg, + cryp->total_in, cryp->authsize, 1); } else { - scatterwalk_copychunks(cryp->tag_in, &cryp->in_walk, cryp->authsize, 0); - if (crypto_memneq(cryp->tag_in, cryp->tag_out, cryp->authsize)) - return dev_err_probe(cryp->dev, -EBADMSG, "Failed tag verification\n"); + return -EBADMSG; } return 0; } -static void starfive_aes_finish_req(struct starfive_cryp_dev *cryp) +static void starfive_aes_finish_req(struct starfive_cryp_ctx *ctx) { - union starfive_aes_csr csr; + struct starfive_cryp_dev *cryp = ctx->cryp; int err = cryp->err; if (!err && cryp->authsize) - err = starfive_aes_read_authtag(cryp); + err = starfive_aes_read_authtag(ctx); if (!err && ((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CBC || (cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CTR)) starfive_aes_get_iv(cryp, (void *)cryp->req.sreq->iv); - /* reset irq flags*/ - csr.v = 0; - csr.aesrst = 1; - writel(csr.v, cryp->base + STARFIVE_AES_CSR); - if (cryp->authsize) crypto_finalize_aead_request(cryp->engine, cryp->req.areq, err); else @@ -353,39 +339,6 @@ static void starfive_aes_finish_req(struct starfive_cryp_dev *cryp) err); } -void starfive_aes_done_task(unsigned long param) -{ - struct starfive_cryp_dev *cryp = (struct starfive_cryp_dev *)param; - u32 block[AES_BLOCK_32]; - u32 stat; - int i; - - for (i = 0; i < AES_BLOCK_32; i++) - block[i] = readl(cryp->base + STARFIVE_AES_AESDIO0R); - - scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, AES_BLOCK_SIZE, - cryp->total_out), 1); - - cryp->total_out -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_out); - - if (!cryp->total_out) { - starfive_aes_finish_req(cryp); - return; - } - - memset(block, 0, AES_BLOCK_SIZE); - scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, AES_BLOCK_SIZE, - cryp->total_in), 0); - cryp->total_in -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_in); - - for (i = 0; i < AES_BLOCK_32; i++) - writel(block[i], cryp->base + STARFIVE_AES_AESDIO0R); - - stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET); - stat &= ~STARFIVE_IE_MASK_AES_DONE; - writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET); -} - static int starfive_aes_gcm_write_adata(struct starfive_cryp_ctx *ctx) { struct starfive_cryp_dev *cryp = ctx->cryp; @@ -451,60 +404,165 @@ static int starfive_aes_ccm_write_adata(struct starfive_cryp_ctx *ctx) return 0; } -static int starfive_aes_prepare_req(struct skcipher_request *req, - struct aead_request *areq) +static void starfive_aes_dma_done(void *param) { - struct starfive_cryp_ctx *ctx; - struct starfive_cryp_request_ctx *rctx; - struct starfive_cryp_dev *cryp; + struct starfive_cryp_dev *cryp = param; - if (!req && !areq) - return -EINVAL; + complete(&cryp->dma_done); +} - ctx = req ? crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)) : - crypto_aead_ctx(crypto_aead_reqtfm(areq)); +static void starfive_aes_dma_init(struct starfive_cryp_dev *cryp) +{ + cryp->cfg_in.direction = DMA_MEM_TO_DEV; + cryp->cfg_in.src_addr_width = DMA_SLAVE_BUSWIDTH_16_BYTES; + cryp->cfg_in.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + cryp->cfg_in.src_maxburst = cryp->dma_maxburst; + cryp->cfg_in.dst_maxburst = cryp->dma_maxburst; + cryp->cfg_in.dst_addr = cryp->phys_base + STARFIVE_ALG_FIFO_OFFSET; - cryp = ctx->cryp; - rctx = req ? skcipher_request_ctx(req) : aead_request_ctx(areq); + dmaengine_slave_config(cryp->tx, &cryp->cfg_in); - if (req) { - cryp->req.sreq = req; - cryp->total_in = req->cryptlen; - cryp->total_out = req->cryptlen; - cryp->assoclen = 0; - cryp->authsize = 0; - } else { - cryp->req.areq = areq; - cryp->assoclen = areq->assoclen; - cryp->authsize = crypto_aead_authsize(crypto_aead_reqtfm(areq)); - if (is_encrypt(cryp)) { - cryp->total_in = areq->cryptlen; - cryp->total_out = areq->cryptlen; - } else { - cryp->total_in = areq->cryptlen - cryp->authsize; - cryp->total_out = cryp->total_in; - } - } + cryp->cfg_out.direction = DMA_DEV_TO_MEM; + cryp->cfg_out.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + cryp->cfg_out.dst_addr_width = DMA_SLAVE_BUSWIDTH_16_BYTES; + cryp->cfg_out.src_maxburst = 4; + cryp->cfg_out.dst_maxburst = 4; + cryp->cfg_out.src_addr = cryp->phys_base + STARFIVE_ALG_FIFO_OFFSET; - rctx->in_sg = req ? req->src : areq->src; - scatterwalk_start(&cryp->in_walk, rctx->in_sg); + dmaengine_slave_config(cryp->rx, &cryp->cfg_out); - rctx->out_sg = req ? req->dst : areq->dst; - scatterwalk_start(&cryp->out_walk, rctx->out_sg); + init_completion(&cryp->dma_done); +} - if (cryp->assoclen) { - rctx->adata = kzalloc(cryp->assoclen + AES_BLOCK_SIZE, GFP_KERNEL); - if (!rctx->adata) - return dev_err_probe(cryp->dev, -ENOMEM, - "Failed to alloc memory for adata"); +static int starfive_aes_dma_xfer(struct starfive_cryp_dev *cryp, + struct scatterlist *src, + struct scatterlist *dst, + int len) +{ + struct dma_async_tx_descriptor *in_desc, *out_desc; + union starfive_alg_cr alg_cr; + int ret = 0, in_save, out_save; + + alg_cr.v = 0; + alg_cr.start = 1; + alg_cr.aes_dma_en = 1; + writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); + + in_save = sg_dma_len(src); + out_save = sg_dma_len(dst); - scatterwalk_copychunks(rctx->adata, &cryp->in_walk, cryp->assoclen, 0); - scatterwalk_copychunks(NULL, &cryp->out_walk, cryp->assoclen, 2); + writel(ALIGN(len, AES_BLOCK_SIZE), cryp->base + STARFIVE_DMA_IN_LEN_OFFSET); + writel(ALIGN(len, AES_BLOCK_SIZE), cryp->base + STARFIVE_DMA_OUT_LEN_OFFSET); + + sg_dma_len(src) = ALIGN(len, AES_BLOCK_SIZE); + sg_dma_len(dst) = ALIGN(len, AES_BLOCK_SIZE); + + out_desc = dmaengine_prep_slave_sg(cryp->rx, dst, 1, DMA_DEV_TO_MEM, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!out_desc) { + ret = -EINVAL; + goto dma_err; } - ctx->rctx = rctx; + out_desc->callback = starfive_aes_dma_done; + out_desc->callback_param = cryp; + + reinit_completion(&cryp->dma_done); + dmaengine_submit(out_desc); + dma_async_issue_pending(cryp->rx); + + in_desc = dmaengine_prep_slave_sg(cryp->tx, src, 1, DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!in_desc) { + ret = -EINVAL; + goto dma_err; + } + + dmaengine_submit(in_desc); + dma_async_issue_pending(cryp->tx); + + if (!wait_for_completion_timeout(&cryp->dma_done, + msecs_to_jiffies(1000))) + ret = -ETIMEDOUT; + +dma_err: + sg_dma_len(src) = in_save; + sg_dma_len(dst) = out_save; + + alg_cr.v = 0; + alg_cr.clear = 1; + writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); + + return ret; +} + +static int starfive_aes_map_sg(struct starfive_cryp_dev *cryp, + struct scatterlist *src, + struct scatterlist *dst) +{ + struct scatterlist *stsg, *dtsg; + struct scatterlist _src[2], _dst[2]; + unsigned int remain = cryp->total_in; + unsigned int len, src_nents, dst_nents; + int ret; + + if (src == dst) { + for (stsg = src, dtsg = dst; remain > 0; + stsg = sg_next(stsg), dtsg = sg_next(dtsg)) { + src_nents = dma_map_sg(cryp->dev, stsg, 1, DMA_BIDIRECTIONAL); + if (src_nents == 0) + return dev_err_probe(cryp->dev, -ENOMEM, + "dma_map_sg error\n"); + + dst_nents = src_nents; + len = min(sg_dma_len(stsg), remain); + + ret = starfive_aes_dma_xfer(cryp, stsg, dtsg, len); + dma_unmap_sg(cryp->dev, stsg, 1, DMA_BIDIRECTIONAL); + if (ret) + return ret; + + remain -= len; + } + } else { + for (stsg = src, dtsg = dst;;) { + src_nents = dma_map_sg(cryp->dev, stsg, 1, DMA_TO_DEVICE); + if (src_nents == 0) + return dev_err_probe(cryp->dev, -ENOMEM, + "dma_map_sg src error\n"); + + dst_nents = dma_map_sg(cryp->dev, dtsg, 1, DMA_FROM_DEVICE); + if (dst_nents == 0) + return dev_err_probe(cryp->dev, -ENOMEM, + "dma_map_sg dst error\n"); + + len = min(sg_dma_len(stsg), sg_dma_len(dtsg)); + len = min(len, remain); + + ret = starfive_aes_dma_xfer(cryp, stsg, dtsg, len); + dma_unmap_sg(cryp->dev, stsg, 1, DMA_TO_DEVICE); + dma_unmap_sg(cryp->dev, dtsg, 1, DMA_FROM_DEVICE); + if (ret) + return ret; + + remain -= len; + if (remain == 0) + break; + + if (sg_dma_len(stsg) - len) { + stsg = scatterwalk_ffwd(_src, stsg, len); + dtsg = sg_next(dtsg); + } else if (sg_dma_len(dtsg) - len) { + dtsg = scatterwalk_ffwd(_dst, dtsg, len); + stsg = sg_next(stsg); + } else { + stsg = sg_next(stsg); + dtsg = sg_next(dtsg); + } + } + } - return starfive_aes_hw_init(ctx); + return 0; } static int starfive_aes_do_one_req(struct crypto_engine *engine, void *areq) @@ -513,35 +571,42 @@ static int starfive_aes_do_one_req(struct crypto_engine *engine, void *areq) container_of(areq, struct skcipher_request, base); struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct starfive_cryp_request_ctx *rctx = skcipher_request_ctx(req); struct starfive_cryp_dev *cryp = ctx->cryp; - u32 block[AES_BLOCK_32]; - u32 stat; - int err; - int i; + int ret; - err = starfive_aes_prepare_req(req, NULL); - if (err) - return err; + cryp->req.sreq = req; + cryp->total_in = req->cryptlen; + cryp->total_out = req->cryptlen; + cryp->assoclen = 0; + cryp->authsize = 0; - /* - * Write first plain/ciphertext block to start the module - * then let irq tasklet handle the rest of the data blocks. - */ - scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, AES_BLOCK_SIZE, - cryp->total_in), 0); - cryp->total_in -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_in); + rctx->in_sg = req->src; + rctx->out_sg = req->dst; + + ctx->rctx = rctx; + + ret = starfive_aes_hw_init(ctx); + if (ret) + return ret; - for (i = 0; i < AES_BLOCK_32; i++) - writel(block[i], cryp->base + STARFIVE_AES_AESDIO0R); + if (!cryp->total_in) + goto finish_req; - stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET); - stat &= ~STARFIVE_IE_MASK_AES_DONE; - writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET); + starfive_aes_dma_init(cryp); + + ret = starfive_aes_map_sg(cryp, rctx->in_sg, rctx->out_sg); + if (ret) + return ret; + +finish_req: + starfive_aes_finish_req(ctx); return 0; } -static int starfive_aes_init_tfm(struct crypto_skcipher *tfm) +static int starfive_aes_init_tfm(struct crypto_skcipher *tfm, + const char *alg_name) { struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -549,12 +614,26 @@ static int starfive_aes_init_tfm(struct crypto_skcipher *tfm) if (!ctx->cryp) return -ENODEV; + ctx->skcipher_fbk = crypto_alloc_skcipher(alg_name, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->skcipher_fbk)) + return dev_err_probe(ctx->cryp->dev, PTR_ERR(ctx->skcipher_fbk), + "%s() failed to allocate fallback for %s\n", + __func__, alg_name); + crypto_skcipher_set_reqsize(tfm, sizeof(struct starfive_cryp_request_ctx) + - sizeof(struct skcipher_request)); + crypto_skcipher_reqsize(ctx->skcipher_fbk)); return 0; } +static void starfive_aes_exit_tfm(struct crypto_skcipher *tfm) +{ + struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_skcipher(ctx->skcipher_fbk); +} + static int starfive_aes_aead_do_one_req(struct crypto_engine *engine, void *areq) { struct aead_request *req = @@ -562,79 +641,99 @@ static int starfive_aes_aead_do_one_req(struct crypto_engine *engine, void *areq struct starfive_cryp_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); struct starfive_cryp_dev *cryp = ctx->cryp; - struct starfive_cryp_request_ctx *rctx; - u32 block[AES_BLOCK_32]; - u32 stat; - int err; - int i; + struct starfive_cryp_request_ctx *rctx = aead_request_ctx(req); + struct scatterlist _src[2], _dst[2]; + int ret; + + cryp->req.areq = req; + cryp->assoclen = req->assoclen; + cryp->authsize = crypto_aead_authsize(crypto_aead_reqtfm(req)); + + rctx->in_sg = scatterwalk_ffwd(_src, req->src, cryp->assoclen); + if (req->src == req->dst) + rctx->out_sg = rctx->in_sg; + else + rctx->out_sg = scatterwalk_ffwd(_dst, req->dst, cryp->assoclen); + + if (is_encrypt(cryp)) { + cryp->total_in = req->cryptlen; + cryp->total_out = req->cryptlen; + } else { + cryp->total_in = req->cryptlen - cryp->authsize; + cryp->total_out = cryp->total_in; + scatterwalk_map_and_copy(cryp->tag_in, req->src, + cryp->total_in + cryp->assoclen, + cryp->authsize, 0); + } - err = starfive_aes_prepare_req(NULL, req); - if (err) - return err; + if (cryp->assoclen) { + rctx->adata = kzalloc(cryp->assoclen + AES_BLOCK_SIZE, GFP_KERNEL); + if (!rctx->adata) + return dev_err_probe(cryp->dev, -ENOMEM, + "Failed to alloc memory for adata"); + + if (sg_copy_to_buffer(req->src, sg_nents_for_len(req->src, cryp->assoclen), + rctx->adata, cryp->assoclen) != cryp->assoclen) + return -EINVAL; + } + + if (cryp->total_in) + sg_zero_buffer(rctx->in_sg, sg_nents(rctx->in_sg), + sg_dma_len(rctx->in_sg) - cryp->total_in, + cryp->total_in); - rctx = ctx->rctx; + ctx->rctx = rctx; + + ret = starfive_aes_hw_init(ctx); + if (ret) + return ret; if (!cryp->assoclen) goto write_text; if ((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CCM) - cryp->err = starfive_aes_ccm_write_adata(ctx); + ret = starfive_aes_ccm_write_adata(ctx); else - cryp->err = starfive_aes_gcm_write_adata(ctx); + ret = starfive_aes_gcm_write_adata(ctx); kfree(rctx->adata); - if (cryp->err) - return cryp->err; + if (ret) + return ret; write_text: if (!cryp->total_in) goto finish_req; - /* - * Write first plain/ciphertext block to start the module - * then let irq tasklet handle the rest of the data blocks. - */ - scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, AES_BLOCK_SIZE, - cryp->total_in), 0); - cryp->total_in -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_in); - - for (i = 0; i < AES_BLOCK_32; i++) - writel(block[i], cryp->base + STARFIVE_AES_AESDIO0R); - - stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET); - stat &= ~STARFIVE_IE_MASK_AES_DONE; - writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET); + starfive_aes_dma_init(cryp); - return 0; + ret = starfive_aes_map_sg(cryp, rctx->in_sg, rctx->out_sg); + if (ret) + return ret; finish_req: - starfive_aes_finish_req(cryp); + starfive_aes_finish_req(ctx); return 0; } -static int starfive_aes_aead_init_tfm(struct crypto_aead *tfm) +static int starfive_aes_aead_init_tfm(struct crypto_aead *tfm, + const char *alg_name) { struct starfive_cryp_ctx *ctx = crypto_aead_ctx(tfm); - struct starfive_cryp_dev *cryp = ctx->cryp; - struct crypto_tfm *aead = crypto_aead_tfm(tfm); - struct crypto_alg *alg = aead->__crt_alg; ctx->cryp = starfive_cryp_find_dev(ctx); if (!ctx->cryp) return -ENODEV; - if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) { - ctx->aead_fbk = crypto_alloc_aead(alg->cra_name, 0, - CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(ctx->aead_fbk)) - return dev_err_probe(cryp->dev, PTR_ERR(ctx->aead_fbk), - "%s() failed to allocate fallback for %s\n", - __func__, alg->cra_name); - } + ctx->aead_fbk = crypto_alloc_aead(alg_name, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->aead_fbk)) + return dev_err_probe(ctx->cryp->dev, PTR_ERR(ctx->aead_fbk), + "%s() failed to allocate fallback for %s\n", + __func__, alg_name); - crypto_aead_set_reqsize(tfm, sizeof(struct starfive_cryp_ctx) + - sizeof(struct aead_request)); + crypto_aead_set_reqsize(tfm, sizeof(struct starfive_cryp_request_ctx) + + crypto_aead_reqsize(ctx->aead_fbk)); return 0; } @@ -646,6 +745,46 @@ static void starfive_aes_aead_exit_tfm(struct crypto_aead *tfm) crypto_free_aead(ctx->aead_fbk); } +static bool starfive_aes_check_unaligned(struct starfive_cryp_dev *cryp, + struct scatterlist *src, + struct scatterlist *dst) +{ + struct scatterlist *tsg; + int i; + + for_each_sg(src, tsg, sg_nents(src), i) + if (!IS_ALIGNED(tsg->offset, sizeof(u32)) || + (!IS_ALIGNED(tsg->length, AES_BLOCK_SIZE) && + !sg_is_last(tsg))) + return true; + + if (src != dst) + for_each_sg(dst, tsg, sg_nents(dst), i) + if (!IS_ALIGNED(tsg->offset, sizeof(u32)) || + (!IS_ALIGNED(tsg->length, AES_BLOCK_SIZE) && + !sg_is_last(tsg))) + return true; + + return false; +} + +static int starfive_aes_do_fallback(struct skcipher_request *req, bool enc) +{ + struct starfive_cryp_ctx *ctx = + crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct skcipher_request *subreq = skcipher_request_ctx(req); + + skcipher_request_set_tfm(subreq, ctx->skcipher_fbk); + skcipher_request_set_callback(subreq, req->base.flags, + req->base.complete, + req->base.data); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->cryptlen, req->iv); + + return enc ? crypto_skcipher_encrypt(subreq) : + crypto_skcipher_decrypt(subreq); +} + static int starfive_aes_crypt(struct skcipher_request *req, unsigned long flags) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -660,32 +799,54 @@ static int starfive_aes_crypt(struct skcipher_request *req, unsigned long flags) if (req->cryptlen & blocksize_align) return -EINVAL; + if (starfive_aes_check_unaligned(cryp, req->src, req->dst)) + return starfive_aes_do_fallback(req, is_encrypt(cryp)); + return crypto_transfer_skcipher_request_to_engine(cryp->engine, req); } +static int starfive_aes_aead_do_fallback(struct aead_request *req, bool enc) +{ + struct starfive_cryp_ctx *ctx = + crypto_aead_ctx(crypto_aead_reqtfm(req)); + struct aead_request *subreq = aead_request_ctx(req); + + aead_request_set_tfm(subreq, ctx->aead_fbk); + aead_request_set_callback(subreq, req->base.flags, + req->base.complete, + req->base.data); + aead_request_set_crypt(subreq, req->src, req->dst, + req->cryptlen, req->iv); + aead_request_set_ad(subreq, req->assoclen); + + return enc ? crypto_aead_encrypt(subreq) : + crypto_aead_decrypt(subreq); +} + static int starfive_aes_aead_crypt(struct aead_request *req, unsigned long flags) { struct starfive_cryp_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); struct starfive_cryp_dev *cryp = ctx->cryp; + struct scatterlist *src, *dst, _src[2], _dst[2]; cryp->flags = flags; - /* - * HW engine could not perform CCM tag verification on - * non-blocksize aligned text, use fallback algo instead + /* aes-ccm does not support tag verification for non-aligned text, + * use fallback for ccm decryption instead. */ - if (ctx->aead_fbk && !is_encrypt(cryp)) { - struct aead_request *subreq = aead_request_ctx(req); + if (((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CCM) && + !is_encrypt(cryp)) + return starfive_aes_aead_do_fallback(req, 0); - aead_request_set_tfm(subreq, ctx->aead_fbk); - aead_request_set_callback(subreq, req->base.flags, - req->base.complete, req->base.data); - aead_request_set_crypt(subreq, req->src, - req->dst, req->cryptlen, req->iv); - aead_request_set_ad(subreq, req->assoclen); + src = scatterwalk_ffwd(_src, req->src, req->assoclen); - return crypto_aead_decrypt(subreq); - } + if (req->src == req->dst) + dst = src; + else + dst = scatterwalk_ffwd(_dst, req->dst, req->assoclen); + + if (starfive_aes_check_unaligned(cryp, src, dst)) + return starfive_aes_aead_do_fallback(req, is_encrypt(cryp)); return crypto_transfer_aead_request_to_engine(cryp->engine, req); } @@ -706,7 +867,7 @@ static int starfive_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, memcpy(ctx->key, key, keylen); ctx->keylen = keylen; - return 0; + return crypto_skcipher_setkey(ctx->skcipher_fbk, key, keylen); } static int starfive_aes_aead_setkey(struct crypto_aead *tfm, const u8 *key, @@ -725,16 +886,20 @@ static int starfive_aes_aead_setkey(struct crypto_aead *tfm, const u8 *key, memcpy(ctx->key, key, keylen); ctx->keylen = keylen; - if (ctx->aead_fbk) - return crypto_aead_setkey(ctx->aead_fbk, key, keylen); - - return 0; + return crypto_aead_setkey(ctx->aead_fbk, key, keylen); } static int starfive_aes_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { - return crypto_gcm_check_authsize(authsize); + struct starfive_cryp_ctx *ctx = crypto_aead_ctx(tfm); + int ret; + + ret = crypto_gcm_check_authsize(authsize); + if (ret) + return ret; + + return crypto_aead_setauthsize(ctx->aead_fbk, authsize); } static int starfive_aes_ccm_setauthsize(struct crypto_aead *tfm, @@ -820,9 +985,35 @@ static int starfive_aes_ccm_decrypt(struct aead_request *req) return starfive_aes_aead_crypt(req, STARFIVE_AES_MODE_CCM); } +static int starfive_aes_ecb_init_tfm(struct crypto_skcipher *tfm) +{ + return starfive_aes_init_tfm(tfm, "ecb(aes-generic)"); +} + +static int starfive_aes_cbc_init_tfm(struct crypto_skcipher *tfm) +{ + return starfive_aes_init_tfm(tfm, "cbc(aes-generic)"); +} + +static int starfive_aes_ctr_init_tfm(struct crypto_skcipher *tfm) +{ + return starfive_aes_init_tfm(tfm, "ctr(aes-generic)"); +} + +static int starfive_aes_ccm_init_tfm(struct crypto_aead *tfm) +{ + return starfive_aes_aead_init_tfm(tfm, "ccm_base(ctr(aes-generic),cbcmac(aes-generic))"); +} + +static int starfive_aes_gcm_init_tfm(struct crypto_aead *tfm) +{ + return starfive_aes_aead_init_tfm(tfm, "gcm_base(ctr(aes-generic),ghash-generic)"); +} + static struct skcipher_engine_alg skcipher_algs[] = { { - .base.init = starfive_aes_init_tfm, + .base.init = starfive_aes_ecb_init_tfm, + .base.exit = starfive_aes_exit_tfm, .base.setkey = starfive_aes_setkey, .base.encrypt = starfive_aes_ecb_encrypt, .base.decrypt = starfive_aes_ecb_decrypt, @@ -832,7 +1023,8 @@ static struct skcipher_engine_alg skcipher_algs[] = { .cra_name = "ecb(aes)", .cra_driver_name = "starfive-ecb-aes", .cra_priority = 200, - .cra_flags = CRYPTO_ALG_ASYNC, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct starfive_cryp_ctx), .cra_alignmask = 0xf, @@ -842,7 +1034,8 @@ static struct skcipher_engine_alg skcipher_algs[] = { .do_one_request = starfive_aes_do_one_req, }, }, { - .base.init = starfive_aes_init_tfm, + .base.init = starfive_aes_cbc_init_tfm, + .base.exit = starfive_aes_exit_tfm, .base.setkey = starfive_aes_setkey, .base.encrypt = starfive_aes_cbc_encrypt, .base.decrypt = starfive_aes_cbc_decrypt, @@ -853,7 +1046,8 @@ static struct skcipher_engine_alg skcipher_algs[] = { .cra_name = "cbc(aes)", .cra_driver_name = "starfive-cbc-aes", .cra_priority = 200, - .cra_flags = CRYPTO_ALG_ASYNC, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct starfive_cryp_ctx), .cra_alignmask = 0xf, @@ -863,7 +1057,8 @@ static struct skcipher_engine_alg skcipher_algs[] = { .do_one_request = starfive_aes_do_one_req, }, }, { - .base.init = starfive_aes_init_tfm, + .base.init = starfive_aes_ctr_init_tfm, + .base.exit = starfive_aes_exit_tfm, .base.setkey = starfive_aes_setkey, .base.encrypt = starfive_aes_ctr_encrypt, .base.decrypt = starfive_aes_ctr_decrypt, @@ -874,7 +1069,8 @@ static struct skcipher_engine_alg skcipher_algs[] = { .cra_name = "ctr(aes)", .cra_driver_name = "starfive-ctr-aes", .cra_priority = 200, - .cra_flags = CRYPTO_ALG_ASYNC, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct starfive_cryp_ctx), .cra_alignmask = 0xf, @@ -892,7 +1088,7 @@ static struct aead_engine_alg aead_algs[] = { .base.setauthsize = starfive_aes_gcm_setauthsize, .base.encrypt = starfive_aes_gcm_encrypt, .base.decrypt = starfive_aes_gcm_decrypt, - .base.init = starfive_aes_aead_init_tfm, + .base.init = starfive_aes_gcm_init_tfm, .base.exit = starfive_aes_aead_exit_tfm, .base.ivsize = GCM_AES_IV_SIZE, .base.maxauthsize = AES_BLOCK_SIZE, @@ -900,7 +1096,8 @@ static struct aead_engine_alg aead_algs[] = { .cra_name = "gcm(aes)", .cra_driver_name = "starfive-gcm-aes", .cra_priority = 200, - .cra_flags = CRYPTO_ALG_ASYNC, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct starfive_cryp_ctx), .cra_alignmask = 0xf, @@ -914,7 +1111,7 @@ static struct aead_engine_alg aead_algs[] = { .base.setauthsize = starfive_aes_ccm_setauthsize, .base.encrypt = starfive_aes_ccm_encrypt, .base.decrypt = starfive_aes_ccm_decrypt, - .base.init = starfive_aes_aead_init_tfm, + .base.init = starfive_aes_ccm_init_tfm, .base.exit = starfive_aes_aead_exit_tfm, .base.ivsize = AES_BLOCK_SIZE, .base.maxauthsize = AES_BLOCK_SIZE, diff --git a/drivers/crypto/starfive/jh7110-cryp.c b/drivers/crypto/starfive/jh7110-cryp.c index 425fddf3a8ab..e4dfed7ee0b0 100644 --- a/drivers/crypto/starfive/jh7110-cryp.c +++ b/drivers/crypto/starfive/jh7110-cryp.c @@ -89,34 +89,10 @@ static void starfive_dma_cleanup(struct starfive_cryp_dev *cryp) dma_release_channel(cryp->rx); } -static irqreturn_t starfive_cryp_irq(int irq, void *priv) -{ - u32 status; - u32 mask; - struct starfive_cryp_dev *cryp = (struct starfive_cryp_dev *)priv; - - mask = readl(cryp->base + STARFIVE_IE_MASK_OFFSET); - status = readl(cryp->base + STARFIVE_IE_FLAG_OFFSET); - if (status & STARFIVE_IE_FLAG_AES_DONE) { - mask |= STARFIVE_IE_MASK_AES_DONE; - writel(mask, cryp->base + STARFIVE_IE_MASK_OFFSET); - tasklet_schedule(&cryp->aes_done); - } - - if (status & STARFIVE_IE_FLAG_HASH_DONE) { - mask |= STARFIVE_IE_MASK_HASH_DONE; - writel(mask, cryp->base + STARFIVE_IE_MASK_OFFSET); - tasklet_schedule(&cryp->hash_done); - } - - return IRQ_HANDLED; -} - static int starfive_cryp_probe(struct platform_device *pdev) { struct starfive_cryp_dev *cryp; struct resource *res; - int irq; int ret; cryp = devm_kzalloc(&pdev->dev, sizeof(*cryp), GFP_KERNEL); @@ -131,9 +107,6 @@ static int starfive_cryp_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, PTR_ERR(cryp->base), "Error remapping memory for platform device\n"); - tasklet_init(&cryp->aes_done, starfive_aes_done_task, (unsigned long)cryp); - tasklet_init(&cryp->hash_done, starfive_hash_done_task, (unsigned long)cryp); - cryp->phys_base = res->start; cryp->dma_maxburst = 32; cryp->side_chan = side_chan; @@ -153,16 +126,6 @@ static int starfive_cryp_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, PTR_ERR(cryp->rst), "Error getting hardware reset line\n"); - irq = platform_get_irq(pdev, 0); - if (irq < 0) - return irq; - - ret = devm_request_irq(&pdev->dev, irq, starfive_cryp_irq, 0, pdev->name, - (void *)cryp); - if (ret) - return dev_err_probe(&pdev->dev, ret, - "Failed to register interrupt handler\n"); - clk_prepare_enable(cryp->hclk); clk_prepare_enable(cryp->ahb); reset_control_deassert(cryp->rst); @@ -219,9 +182,6 @@ err_dma_init: clk_disable_unprepare(cryp->ahb); reset_control_assert(cryp->rst); - tasklet_kill(&cryp->aes_done); - tasklet_kill(&cryp->hash_done); - return ret; } @@ -233,9 +193,6 @@ static void starfive_cryp_remove(struct platform_device *pdev) starfive_hash_unregister_algs(); starfive_rsa_unregister_algs(); - tasklet_kill(&cryp->aes_done); - tasklet_kill(&cryp->hash_done); - crypto_engine_stop(cryp->engine); crypto_engine_exit(cryp->engine); diff --git a/drivers/crypto/starfive/jh7110-cryp.h b/drivers/crypto/starfive/jh7110-cryp.h index 6cdf6db5d904..494a74f52706 100644 --- a/drivers/crypto/starfive/jh7110-cryp.h +++ b/drivers/crypto/starfive/jh7110-cryp.h @@ -91,6 +91,7 @@ union starfive_hash_csr { #define STARFIVE_HASH_KEY_DONE BIT(13) u32 key_done :1; u32 key_flag :1; +#define STARFIVE_HASH_HMAC_DONE BIT(15) u32 hmac_done :1; #define STARFIVE_HASH_BUSY BIT(16) u32 busy :1; @@ -168,6 +169,7 @@ struct starfive_cryp_ctx { struct crypto_akcipher *akcipher_fbk; struct crypto_ahash *ahash_fbk; struct crypto_aead *aead_fbk; + struct crypto_skcipher *skcipher_fbk; }; struct starfive_cryp_dev { @@ -185,11 +187,8 @@ struct starfive_cryp_dev { struct dma_chan *rx; struct dma_slave_config cfg_in; struct dma_slave_config cfg_out; - struct scatter_walk in_walk; - struct scatter_walk out_walk; struct crypto_engine *engine; - struct tasklet_struct aes_done; - struct tasklet_struct hash_done; + struct completion dma_done; size_t assoclen; size_t total_in; size_t total_out; @@ -236,7 +235,4 @@ void starfive_rsa_unregister_algs(void); int starfive_aes_register_algs(void); void starfive_aes_unregister_algs(void); - -void starfive_hash_done_task(unsigned long param); -void starfive_aes_done_task(unsigned long param); #endif diff --git a/drivers/crypto/starfive/jh7110-hash.c b/drivers/crypto/starfive/jh7110-hash.c index b6d1808012ca..2c60a1047bc3 100644 --- a/drivers/crypto/starfive/jh7110-hash.c +++ b/drivers/crypto/starfive/jh7110-hash.c @@ -36,15 +36,22 @@ #define STARFIVE_HASH_BUFLEN SHA512_BLOCK_SIZE #define STARFIVE_HASH_RESET 0x2 -static inline int starfive_hash_wait_busy(struct starfive_cryp_ctx *ctx) +static inline int starfive_hash_wait_busy(struct starfive_cryp_dev *cryp) { - struct starfive_cryp_dev *cryp = ctx->cryp; u32 status; return readl_relaxed_poll_timeout(cryp->base + STARFIVE_HASH_SHACSR, status, !(status & STARFIVE_HASH_BUSY), 10, 100000); } +static inline int starfive_hash_wait_hmac_done(struct starfive_cryp_dev *cryp) +{ + u32 status; + + return readl_relaxed_poll_timeout(cryp->base + STARFIVE_HASH_SHACSR, status, + (status & STARFIVE_HASH_HMAC_DONE), 10, 100000); +} + static inline int starfive_hash_wait_key_done(struct starfive_cryp_ctx *ctx) { struct starfive_cryp_dev *cryp = ctx->cryp; @@ -84,64 +91,26 @@ static int starfive_hash_hmac_key(struct starfive_cryp_ctx *ctx) return 0; } -static void starfive_hash_start(void *param) +static void starfive_hash_start(struct starfive_cryp_dev *cryp) { - struct starfive_cryp_ctx *ctx = param; - struct starfive_cryp_request_ctx *rctx = ctx->rctx; - struct starfive_cryp_dev *cryp = ctx->cryp; - union starfive_alg_cr alg_cr; union starfive_hash_csr csr; - u32 stat; - - dma_unmap_sg(cryp->dev, rctx->in_sg, rctx->in_sg_len, DMA_TO_DEVICE); - - alg_cr.v = 0; - alg_cr.clear = 1; - - writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); csr.v = readl(cryp->base + STARFIVE_HASH_SHACSR); csr.firstb = 0; csr.final = 1; - - stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET); - stat &= ~STARFIVE_IE_MASK_HASH_DONE; - writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET); writel(csr.v, cryp->base + STARFIVE_HASH_SHACSR); } -static int starfive_hash_xmit_dma(struct starfive_cryp_ctx *ctx) +static void starfive_hash_dma_callback(void *param) { - struct starfive_cryp_request_ctx *rctx = ctx->rctx; - struct starfive_cryp_dev *cryp = ctx->cryp; - struct dma_async_tx_descriptor *in_desc; - union starfive_alg_cr alg_cr; - int total_len; - int ret; - - if (!rctx->total) { - starfive_hash_start(ctx); - return 0; - } + struct starfive_cryp_dev *cryp = param; - writel(rctx->total, cryp->base + STARFIVE_DMA_IN_LEN_OFFSET); - - total_len = rctx->total; - total_len = (total_len & 0x3) ? (((total_len >> 2) + 1) << 2) : total_len; - sg_dma_len(rctx->in_sg) = total_len; - - alg_cr.v = 0; - alg_cr.start = 1; - alg_cr.hash_dma_en = 1; - - writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); - - ret = dma_map_sg(cryp->dev, rctx->in_sg, rctx->in_sg_len, DMA_TO_DEVICE); - if (!ret) - return dev_err_probe(cryp->dev, -EINVAL, "dma_map_sg() error\n"); + complete(&cryp->dma_done); +} - cryp->cfg_in.direction = DMA_MEM_TO_DEV; - cryp->cfg_in.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; +static void starfive_hash_dma_init(struct starfive_cryp_dev *cryp) +{ + cryp->cfg_in.src_addr_width = DMA_SLAVE_BUSWIDTH_16_BYTES; cryp->cfg_in.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; cryp->cfg_in.src_maxburst = cryp->dma_maxburst; cryp->cfg_in.dst_maxburst = cryp->dma_maxburst; @@ -149,50 +118,48 @@ static int starfive_hash_xmit_dma(struct starfive_cryp_ctx *ctx) dmaengine_slave_config(cryp->tx, &cryp->cfg_in); - in_desc = dmaengine_prep_slave_sg(cryp->tx, rctx->in_sg, - ret, DMA_MEM_TO_DEV, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); - - if (!in_desc) - return -EINVAL; - - in_desc->callback = starfive_hash_start; - in_desc->callback_param = ctx; - - dmaengine_submit(in_desc); - dma_async_issue_pending(cryp->tx); - - return 0; + init_completion(&cryp->dma_done); } -static int starfive_hash_xmit(struct starfive_cryp_ctx *ctx) +static int starfive_hash_dma_xfer(struct starfive_cryp_dev *cryp, + struct scatterlist *sg) { - struct starfive_cryp_request_ctx *rctx = ctx->rctx; - struct starfive_cryp_dev *cryp = ctx->cryp; + struct dma_async_tx_descriptor *in_desc; + union starfive_alg_cr alg_cr; int ret = 0; - rctx->csr.hash.v = 0; - rctx->csr.hash.reset = 1; - writel(rctx->csr.hash.v, cryp->base + STARFIVE_HASH_SHACSR); - - if (starfive_hash_wait_busy(ctx)) - return dev_err_probe(cryp->dev, -ETIMEDOUT, "Error resetting engine.\n"); + alg_cr.v = 0; + alg_cr.start = 1; + alg_cr.hash_dma_en = 1; + writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); - rctx->csr.hash.v = 0; - rctx->csr.hash.mode = ctx->hash_mode; - rctx->csr.hash.ie = 1; + writel(sg_dma_len(sg), cryp->base + STARFIVE_DMA_IN_LEN_OFFSET); + sg_dma_len(sg) = ALIGN(sg_dma_len(sg), sizeof(u32)); - if (ctx->is_hmac) { - ret = starfive_hash_hmac_key(ctx); - if (ret) - return ret; - } else { - rctx->csr.hash.start = 1; - rctx->csr.hash.firstb = 1; - writel(rctx->csr.hash.v, cryp->base + STARFIVE_HASH_SHACSR); + in_desc = dmaengine_prep_slave_sg(cryp->tx, sg, 1, DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!in_desc) { + ret = -EINVAL; + goto end; } - return starfive_hash_xmit_dma(ctx); + reinit_completion(&cryp->dma_done); + in_desc->callback = starfive_hash_dma_callback; + in_desc->callback_param = cryp; + + dmaengine_submit(in_desc); + dma_async_issue_pending(cryp->tx); + + if (!wait_for_completion_timeout(&cryp->dma_done, + msecs_to_jiffies(1000))) + ret = -ETIMEDOUT; + +end: + alg_cr.v = 0; + alg_cr.clear = 1; + writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); + + return ret; } static int starfive_hash_copy_hash(struct ahash_request *req) @@ -215,58 +182,74 @@ static int starfive_hash_copy_hash(struct ahash_request *req) return 0; } -void starfive_hash_done_task(unsigned long param) +static void starfive_hash_done_task(struct starfive_cryp_dev *cryp) { - struct starfive_cryp_dev *cryp = (struct starfive_cryp_dev *)param; int err = cryp->err; if (!err) err = starfive_hash_copy_hash(cryp->req.hreq); - /* Reset to clear hash_done in irq register*/ - writel(STARFIVE_HASH_RESET, cryp->base + STARFIVE_HASH_SHACSR); - crypto_finalize_hash_request(cryp->engine, cryp->req.hreq, err); } -static int starfive_hash_check_aligned(struct scatterlist *sg, size_t total, size_t align) +static int starfive_hash_one_request(struct crypto_engine *engine, void *areq) { - int len = 0; + struct ahash_request *req = container_of(areq, struct ahash_request, + base); + struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct starfive_cryp_request_ctx *rctx = ctx->rctx; + struct starfive_cryp_dev *cryp = ctx->cryp; + struct scatterlist *tsg; + int ret, src_nents, i; - if (!total) - return 0; + writel(STARFIVE_HASH_RESET, cryp->base + STARFIVE_HASH_SHACSR); - if (!IS_ALIGNED(total, align)) - return -EINVAL; + if (starfive_hash_wait_busy(cryp)) + return dev_err_probe(cryp->dev, -ETIMEDOUT, "Error resetting hardware\n"); - while (sg) { - if (!IS_ALIGNED(sg->offset, sizeof(u32))) - return -EINVAL; + rctx->csr.hash.v = 0; + rctx->csr.hash.mode = ctx->hash_mode; - if (!IS_ALIGNED(sg->length, align)) - return -EINVAL; + if (ctx->is_hmac) { + ret = starfive_hash_hmac_key(ctx); + if (ret) + return ret; + } else { + rctx->csr.hash.start = 1; + rctx->csr.hash.firstb = 1; + writel(rctx->csr.hash.v, cryp->base + STARFIVE_HASH_SHACSR); + } + + /* No input message, get digest and end. */ + if (!rctx->total) + goto hash_start; + + starfive_hash_dma_init(cryp); + + for_each_sg(rctx->in_sg, tsg, rctx->in_sg_len, i) { + src_nents = dma_map_sg(cryp->dev, tsg, 1, DMA_TO_DEVICE); + if (src_nents == 0) + return dev_err_probe(cryp->dev, -ENOMEM, + "dma_map_sg error\n"); - len += sg->length; - sg = sg_next(sg); + ret = starfive_hash_dma_xfer(cryp, tsg); + dma_unmap_sg(cryp->dev, tsg, 1, DMA_TO_DEVICE); + if (ret) + return ret; } - if (len != total) - return -EINVAL; +hash_start: + starfive_hash_start(cryp); - return 0; -} + if (starfive_hash_wait_busy(cryp)) + return dev_err_probe(cryp->dev, -ETIMEDOUT, "Error generating digest\n"); -static int starfive_hash_one_request(struct crypto_engine *engine, void *areq) -{ - struct ahash_request *req = container_of(areq, struct ahash_request, - base); - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); - struct starfive_cryp_dev *cryp = ctx->cryp; + if (ctx->is_hmac) + cryp->err = starfive_hash_wait_hmac_done(cryp); - if (!cryp) - return -ENODEV; + starfive_hash_done_task(cryp); - return starfive_hash_xmit(ctx); + return 0; } static int starfive_hash_init(struct ahash_request *req) @@ -337,22 +320,6 @@ static int starfive_hash_finup(struct ahash_request *req) return crypto_ahash_finup(&rctx->ahash_fbk_req); } -static int starfive_hash_digest_fb(struct ahash_request *req) -{ - struct starfive_cryp_request_ctx *rctx = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(tfm); - - ahash_request_set_tfm(&rctx->ahash_fbk_req, ctx->ahash_fbk); - ahash_request_set_callback(&rctx->ahash_fbk_req, req->base.flags, - req->base.complete, req->base.data); - - ahash_request_set_crypt(&rctx->ahash_fbk_req, req->src, - req->result, req->nbytes); - - return crypto_ahash_digest(&rctx->ahash_fbk_req); -} - static int starfive_hash_digest(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); @@ -370,9 +337,6 @@ static int starfive_hash_digest(struct ahash_request *req) rctx->in_sg_len = sg_nents_for_len(rctx->in_sg, rctx->total); ctx->rctx = rctx; - if (starfive_hash_check_aligned(rctx->in_sg, rctx->total, rctx->blksize)) - return starfive_hash_digest_fb(req); - return crypto_transfer_hash_request_to_engine(cryp->engine, req); } @@ -406,7 +370,8 @@ static int starfive_hash_import(struct ahash_request *req, const void *in) static int starfive_hash_init_tfm(struct crypto_ahash *hash, const char *alg_name, - unsigned int mode) + unsigned int mode, + bool is_hmac) { struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); @@ -426,7 +391,7 @@ static int starfive_hash_init_tfm(struct crypto_ahash *hash, crypto_ahash_set_reqsize(hash, sizeof(struct starfive_cryp_request_ctx) + crypto_ahash_reqsize(ctx->ahash_fbk)); - ctx->keylen = 0; + ctx->is_hmac = is_hmac; ctx->hash_mode = mode; return 0; @@ -529,81 +494,61 @@ static int starfive_hash_setkey(struct crypto_ahash *hash, static int starfive_sha224_init_tfm(struct crypto_ahash *hash) { return starfive_hash_init_tfm(hash, "sha224-generic", - STARFIVE_HASH_SHA224); + STARFIVE_HASH_SHA224, 0); } static int starfive_sha256_init_tfm(struct crypto_ahash *hash) { return starfive_hash_init_tfm(hash, "sha256-generic", - STARFIVE_HASH_SHA256); + STARFIVE_HASH_SHA256, 0); } static int starfive_sha384_init_tfm(struct crypto_ahash *hash) { return starfive_hash_init_tfm(hash, "sha384-generic", - STARFIVE_HASH_SHA384); + STARFIVE_HASH_SHA384, 0); } static int starfive_sha512_init_tfm(struct crypto_ahash *hash) { return starfive_hash_init_tfm(hash, "sha512-generic", - STARFIVE_HASH_SHA512); + STARFIVE_HASH_SHA512, 0); } static int starfive_sm3_init_tfm(struct crypto_ahash *hash) { return starfive_hash_init_tfm(hash, "sm3-generic", - STARFIVE_HASH_SM3); + STARFIVE_HASH_SM3, 0); } static int starfive_hmac_sha224_init_tfm(struct crypto_ahash *hash) { - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); - - ctx->is_hmac = true; - return starfive_hash_init_tfm(hash, "hmac(sha224-generic)", - STARFIVE_HASH_SHA224); + STARFIVE_HASH_SHA224, 1); } static int starfive_hmac_sha256_init_tfm(struct crypto_ahash *hash) { - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); - - ctx->is_hmac = true; - return starfive_hash_init_tfm(hash, "hmac(sha256-generic)", - STARFIVE_HASH_SHA256); + STARFIVE_HASH_SHA256, 1); } static int starfive_hmac_sha384_init_tfm(struct crypto_ahash *hash) { - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); - - ctx->is_hmac = true; - return starfive_hash_init_tfm(hash, "hmac(sha384-generic)", - STARFIVE_HASH_SHA384); + STARFIVE_HASH_SHA384, 1); } static int starfive_hmac_sha512_init_tfm(struct crypto_ahash *hash) { - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); - - ctx->is_hmac = true; - return starfive_hash_init_tfm(hash, "hmac(sha512-generic)", - STARFIVE_HASH_SHA512); + STARFIVE_HASH_SHA512, 1); } static int starfive_hmac_sm3_init_tfm(struct crypto_ahash *hash) { - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); - - ctx->is_hmac = true; - return starfive_hash_init_tfm(hash, "hmac(sm3-generic)", - STARFIVE_HASH_SM3); + STARFIVE_HASH_SM3, 1); } static struct ahash_engine_alg algs_sha2_sm3[] = { diff --git a/drivers/crypto/starfive/jh7110-rsa.c b/drivers/crypto/starfive/jh7110-rsa.c index cf8bda7f0855..33093ba4b13a 100644 --- a/drivers/crypto/starfive/jh7110-rsa.c +++ b/drivers/crypto/starfive/jh7110-rsa.c @@ -45,6 +45,9 @@ static inline int starfive_pka_wait_done(struct starfive_cryp_ctx *ctx) static void starfive_rsa_free_key(struct starfive_rsa_key *key) { + if (!key->key_sz) + return; + kfree_sensitive(key->d); kfree_sensitive(key->e); kfree_sensitive(key->n); @@ -273,7 +276,6 @@ static int starfive_rsa_enc_core(struct starfive_cryp_ctx *ctx, int enc) err_rsa_crypt: writel(STARFIVE_RSA_RESET, cryp->base + STARFIVE_PKA_CACR_OFFSET); - kfree(rctx->rsa_data); return ret; } @@ -534,16 +536,14 @@ static int starfive_rsa_init_tfm(struct crypto_akcipher *tfm) { struct starfive_cryp_ctx *ctx = akcipher_tfm_ctx(tfm); + ctx->cryp = starfive_cryp_find_dev(ctx); + if (!ctx->cryp) + return -ENODEV; + ctx->akcipher_fbk = crypto_alloc_akcipher("rsa-generic", 0, 0); if (IS_ERR(ctx->akcipher_fbk)) return PTR_ERR(ctx->akcipher_fbk); - ctx->cryp = starfive_cryp_find_dev(ctx); - if (!ctx->cryp) { - crypto_free_akcipher(ctx->akcipher_fbk); - return -ENODEV; - } - akcipher_set_reqsize(tfm, sizeof(struct starfive_cryp_request_ctx) + sizeof(struct crypto_akcipher) + 32); diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c index 34e0d7e381a8..351827372ea6 100644 --- a/drivers/crypto/stm32/stm32-hash.c +++ b/drivers/crypto/stm32/stm32-hash.c @@ -94,6 +94,7 @@ #define HASH_FLAGS_ERRORS BIT(21) #define HASH_FLAGS_EMPTY BIT(22) #define HASH_FLAGS_HMAC BIT(23) +#define HASH_FLAGS_SGS_COPIED BIT(24) #define HASH_OP_UPDATE 1 #define HASH_OP_FINAL 2 @@ -145,7 +146,7 @@ struct stm32_hash_state { u16 bufcnt; u16 blocklen; - u8 buffer[HASH_BUFLEN] __aligned(4); + u8 buffer[HASH_BUFLEN] __aligned(sizeof(u32)); /* hash state */ u32 hw_context[3 + HASH_CSR_NB_MAX]; @@ -158,8 +159,8 @@ struct stm32_hash_request_ctx { u8 digest[SHA512_DIGEST_SIZE] __aligned(sizeof(u32)); size_t digcnt; - /* DMA */ struct scatterlist *sg; + struct scatterlist sgl[2]; /* scatterlist used to realize alignment */ unsigned int offset; unsigned int total; struct scatterlist sg_key; @@ -184,6 +185,7 @@ struct stm32_hash_pdata { size_t algs_info_size; bool has_sr; bool has_mdmat; + bool context_secured; bool broken_emptymsg; bool ux500; }; @@ -195,6 +197,7 @@ struct stm32_hash_dev { struct reset_control *rst; void __iomem *io_base; phys_addr_t phys_base; + u8 xmit_buf[HASH_BUFLEN] __aligned(sizeof(u32)); u32 dma_mode; bool polled; @@ -220,6 +223,8 @@ static struct stm32_hash_drv stm32_hash = { }; static void stm32_hash_dma_callback(void *param); +static int stm32_hash_prepare_request(struct ahash_request *req); +static void stm32_hash_unprepare_request(struct ahash_request *req); static inline u32 stm32_hash_read(struct stm32_hash_dev *hdev, u32 offset) { @@ -232,6 +237,11 @@ static inline void stm32_hash_write(struct stm32_hash_dev *hdev, writel_relaxed(value, hdev->io_base + offset); } +/** + * stm32_hash_wait_busy - wait until hash processor is available. It return an + * error if the hash core is processing a block of data for more than 10 ms. + * @hdev: the stm32_hash_dev device. + */ static inline int stm32_hash_wait_busy(struct stm32_hash_dev *hdev) { u32 status; @@ -245,6 +255,11 @@ static inline int stm32_hash_wait_busy(struct stm32_hash_dev *hdev) !(status & HASH_SR_BUSY), 10, 10000); } +/** + * stm32_hash_set_nblw - set the number of valid bytes in the last word. + * @hdev: the stm32_hash_dev device. + * @length: the length of the final word. + */ static void stm32_hash_set_nblw(struct stm32_hash_dev *hdev, int length) { u32 reg; @@ -282,6 +297,11 @@ static int stm32_hash_write_key(struct stm32_hash_dev *hdev) return 0; } +/** + * stm32_hash_write_ctrl - Initialize the hash processor, only if + * HASH_FLAGS_INIT is set. + * @hdev: the stm32_hash_dev device + */ static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); @@ -469,9 +489,7 @@ static int stm32_hash_update_cpu(struct stm32_hash_dev *hdev) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); struct stm32_hash_state *state = &rctx->state; - u32 *preg = state->hw_context; int bufcnt, err = 0, final; - int i, swap_reg; dev_dbg(hdev->dev, "%s flags %x\n", __func__, state->flags); @@ -495,34 +513,23 @@ static int stm32_hash_update_cpu(struct stm32_hash_dev *hdev) return stm32_hash_xmit_cpu(hdev, state->buffer, bufcnt, 1); } - if (!(hdev->flags & HASH_FLAGS_INIT)) - return 0; - - if (stm32_hash_wait_busy(hdev)) - return -ETIMEDOUT; - - swap_reg = hash_swap_reg(rctx); - - if (!hdev->pdata->ux500) - *preg++ = stm32_hash_read(hdev, HASH_IMR); - *preg++ = stm32_hash_read(hdev, HASH_STR); - *preg++ = stm32_hash_read(hdev, HASH_CR); - for (i = 0; i < swap_reg; i++) - *preg++ = stm32_hash_read(hdev, HASH_CSR(i)); - - state->flags |= HASH_FLAGS_INIT; - return err; } static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev, - struct scatterlist *sg, int length, int mdma) + struct scatterlist *sg, int length, int mdmat) { struct dma_async_tx_descriptor *in_desc; dma_cookie_t cookie; u32 reg; int err; + dev_dbg(hdev->dev, "%s mdmat: %x length: %d\n", __func__, mdmat, length); + + /* do not use dma if there is no data to send */ + if (length <= 0) + return 0; + in_desc = dmaengine_prep_slave_sg(hdev->dma_lch, sg, 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -535,13 +542,12 @@ static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev, in_desc->callback = stm32_hash_dma_callback; in_desc->callback_param = hdev; - hdev->flags |= HASH_FLAGS_FINAL; hdev->flags |= HASH_FLAGS_DMA_ACTIVE; reg = stm32_hash_read(hdev, HASH_CR); if (hdev->pdata->has_mdmat) { - if (mdma) + if (mdmat) reg |= HASH_CR_MDMAT; else reg &= ~HASH_CR_MDMAT; @@ -550,7 +556,6 @@ static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev, stm32_hash_write(hdev, HASH_CR, reg); - stm32_hash_set_nblw(hdev, length); cookie = dmaengine_submit(in_desc); err = dma_submit_error(cookie); @@ -590,7 +595,7 @@ static int stm32_hash_hmac_dma_send(struct stm32_hash_dev *hdev) struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm); int err; - if (ctx->keylen < rctx->state.blocklen || hdev->dma_mode == 1) { + if (ctx->keylen < rctx->state.blocklen || hdev->dma_mode > 0) { err = stm32_hash_write_key(hdev); if (stm32_hash_wait_busy(hdev)) return -ETIMEDOUT; @@ -655,18 +660,20 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) struct scatterlist sg[1], *tsg; int err = 0, reg, ncp = 0; unsigned int i, len = 0, bufcnt = 0; + bool final = hdev->flags & HASH_FLAGS_FINAL; bool is_last = false; + u32 last_word; - rctx->sg = hdev->req->src; - rctx->total = hdev->req->nbytes; + dev_dbg(hdev->dev, "%s total: %d bufcnt: %d final: %d\n", + __func__, rctx->total, rctx->state.bufcnt, final); - rctx->nents = sg_nents(rctx->sg); if (rctx->nents < 0) return -EINVAL; stm32_hash_write_ctrl(hdev); - if (hdev->flags & HASH_FLAGS_HMAC) { + if (hdev->flags & HASH_FLAGS_HMAC && (!(hdev->flags & HASH_FLAGS_HMAC_KEY))) { + hdev->flags |= HASH_FLAGS_HMAC_KEY; err = stm32_hash_hmac_dma_send(hdev); if (err != -EINPROGRESS) return err; @@ -677,22 +684,36 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) len = sg->length; if (sg_is_last(sg) || (bufcnt + sg[0].length) >= rctx->total) { - sg->length = rctx->total - bufcnt; - is_last = true; - if (hdev->dma_mode == 1) { - len = (ALIGN(sg->length, 16) - 16); - - ncp = sg_pcopy_to_buffer( - rctx->sg, rctx->nents, - rctx->state.buffer, sg->length - len, - rctx->total - sg->length + len); - - sg->length = len; + if (!final) { + /* Always manually put the last word of a non-final transfer. */ + len -= sizeof(u32); + sg_pcopy_to_buffer(rctx->sg, rctx->nents, &last_word, 4, len); + sg->length -= sizeof(u32); } else { - if (!(IS_ALIGNED(sg->length, sizeof(u32)))) { - len = sg->length; - sg->length = ALIGN(sg->length, - sizeof(u32)); + /* + * In Multiple DMA mode, DMA must be aborted before the final + * transfer. + */ + sg->length = rctx->total - bufcnt; + if (hdev->dma_mode > 0) { + len = (ALIGN(sg->length, 16) - 16); + + ncp = sg_pcopy_to_buffer(rctx->sg, rctx->nents, + rctx->state.buffer, + sg->length - len, + rctx->total - sg->length + len); + + if (!len) + break; + + sg->length = len; + } else { + is_last = true; + if (!(IS_ALIGNED(sg->length, sizeof(u32)))) { + len = sg->length; + sg->length = ALIGN(sg->length, + sizeof(u32)); + } } } } @@ -706,43 +727,67 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) err = stm32_hash_xmit_dma(hdev, sg, len, !is_last); + /* The last word of a non final transfer is sent manually. */ + if (!final) { + stm32_hash_write(hdev, HASH_DIN, last_word); + len += sizeof(u32); + } + + rctx->total -= len; + bufcnt += sg[0].length; dma_unmap_sg(hdev->dev, sg, 1, DMA_TO_DEVICE); - if (err == -ENOMEM) + if (err == -ENOMEM || err == -ETIMEDOUT) return err; if (is_last) break; } - if (hdev->dma_mode == 1) { - if (stm32_hash_wait_busy(hdev)) - return -ETIMEDOUT; - reg = stm32_hash_read(hdev, HASH_CR); - reg &= ~HASH_CR_DMAE; - reg |= HASH_CR_DMAA; - stm32_hash_write(hdev, HASH_CR, reg); + /* + * When the second last block transfer of 4 words is performed by the DMA, + * the software must set the DMA Abort bit (DMAA) to 1 before completing the + * last transfer of 4 words or less. + */ + if (final) { + if (hdev->dma_mode > 0) { + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + reg = stm32_hash_read(hdev, HASH_CR); + reg &= ~HASH_CR_DMAE; + reg |= HASH_CR_DMAA; + stm32_hash_write(hdev, HASH_CR, reg); + + if (ncp) { + memset(buffer + ncp, 0, 4 - DIV_ROUND_UP(ncp, sizeof(u32))); + writesl(hdev->io_base + HASH_DIN, buffer, + DIV_ROUND_UP(ncp, sizeof(u32))); + } - if (ncp) { - memset(buffer + ncp, 0, - DIV_ROUND_UP(ncp, sizeof(u32)) - ncp); - writesl(hdev->io_base + HASH_DIN, buffer, - DIV_ROUND_UP(ncp, sizeof(u32))); + stm32_hash_set_nblw(hdev, ncp); + reg = stm32_hash_read(hdev, HASH_STR); + reg |= HASH_STR_DCAL; + stm32_hash_write(hdev, HASH_STR, reg); + err = -EINPROGRESS; } - stm32_hash_set_nblw(hdev, ncp); - reg = stm32_hash_read(hdev, HASH_STR); - reg |= HASH_STR_DCAL; - stm32_hash_write(hdev, HASH_STR, reg); - err = -EINPROGRESS; - } - if (hdev->flags & HASH_FLAGS_HMAC) { - if (stm32_hash_wait_busy(hdev)) - return -ETIMEDOUT; - err = stm32_hash_hmac_dma_send(hdev); + /* + * The hash processor needs the key to be loaded a second time in order + * to process the HMAC. + */ + if (hdev->flags & HASH_FLAGS_HMAC) { + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + err = stm32_hash_hmac_dma_send(hdev); + } + + return err; } - return err; + if (err != -EINPROGRESS) + return err; + + return 0; } static struct stm32_hash_dev *stm32_hash_find_dev(struct stm32_hash_ctx *ctx) @@ -765,33 +810,6 @@ static struct stm32_hash_dev *stm32_hash_find_dev(struct stm32_hash_ctx *ctx) return hdev; } -static bool stm32_hash_dma_aligned_data(struct ahash_request *req) -{ - struct scatterlist *sg; - struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); - struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); - struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); - int i; - - if (!hdev->dma_lch || req->nbytes <= rctx->state.blocklen) - return false; - - if (sg_nents(req->src) > 1) { - if (hdev->dma_mode == 1) - return false; - for_each_sg(req->src, sg, sg_nents(req->src), i) { - if ((!IS_ALIGNED(sg->length, sizeof(u32))) && - (!sg_is_last(sg))) - return false; - } - } - - if (req->src->offset % 4) - return false; - - return true; -} - static int stm32_hash_init(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); @@ -802,8 +820,10 @@ static int stm32_hash_init(struct ahash_request *req) bool sha3_mode = ctx->flags & HASH_FLAGS_SHA3_MODE; rctx->hdev = hdev; + state->flags = 0; - state->flags = HASH_FLAGS_CPU; + if (!(hdev->dma_lch && hdev->pdata->has_mdmat)) + state->flags |= HASH_FLAGS_CPU; if (sha3_mode) state->flags |= HASH_FLAGS_SHA3_MODE; @@ -857,6 +877,7 @@ static int stm32_hash_init(struct ahash_request *req) dev_err(hdev->dev, "Error, block too large"); return -EINVAL; } + rctx->nents = 0; rctx->total = 0; rctx->offset = 0; rctx->data_type = HASH_DATA_8_BITS; @@ -874,6 +895,9 @@ static int stm32_hash_update_req(struct stm32_hash_dev *hdev) struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); struct stm32_hash_state *state = &rctx->state; + dev_dbg(hdev->dev, "update_req: total: %u, digcnt: %zd, final: 0", + rctx->total, rctx->digcnt); + if (!(state->flags & HASH_FLAGS_CPU)) return stm32_hash_dma_send(hdev); @@ -887,6 +911,11 @@ static int stm32_hash_final_req(struct stm32_hash_dev *hdev) struct stm32_hash_state *state = &rctx->state; int buflen = state->bufcnt; + if (!(state->flags & HASH_FLAGS_CPU)) { + hdev->flags |= HASH_FLAGS_FINAL; + return stm32_hash_dma_send(hdev); + } + if (state->flags & HASH_FLAGS_FINUP) return stm32_hash_update_req(hdev); @@ -968,15 +997,21 @@ static int stm32_hash_finish(struct ahash_request *req) static void stm32_hash_finish_req(struct ahash_request *req, int err) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_state *state = &rctx->state; struct stm32_hash_dev *hdev = rctx->hdev; + if (hdev->flags & HASH_FLAGS_DMA_ACTIVE) + state->flags |= HASH_FLAGS_DMA_ACTIVE; + else + state->flags &= ~HASH_FLAGS_DMA_ACTIVE; + if (!err && (HASH_FLAGS_FINAL & hdev->flags)) { stm32_hash_copy_hash(req); err = stm32_hash_finish(req); } - pm_runtime_mark_last_busy(hdev->dev); - pm_runtime_put_autosuspend(hdev->dev); + /* Finalized request mist be unprepared here */ + stm32_hash_unprepare_request(req); crypto_finalize_hash_request(hdev->engine, req, err); } @@ -1006,6 +1041,10 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq) pm_runtime_get_sync(hdev->dev); + err = stm32_hash_prepare_request(req); + if (err) + return err; + hdev->req = req; hdev->flags = 0; swap_reg = hash_swap_reg(rctx); @@ -1030,6 +1069,12 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq) if (state->flags & HASH_FLAGS_HMAC) hdev->flags |= HASH_FLAGS_HMAC | HASH_FLAGS_HMAC_KEY; + + if (state->flags & HASH_FLAGS_CPU) + hdev->flags |= HASH_FLAGS_CPU; + + if (state->flags & HASH_FLAGS_DMA_ACTIVE) + hdev->flags |= HASH_FLAGS_DMA_ACTIVE; } if (rctx->op == HASH_OP_UPDATE) @@ -1054,6 +1099,284 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq) return 0; } +static int stm32_hash_copy_sgs(struct stm32_hash_request_ctx *rctx, + struct scatterlist *sg, int bs, + unsigned int new_len) +{ + struct stm32_hash_state *state = &rctx->state; + int pages; + void *buf; + + pages = get_order(new_len); + + buf = (void *)__get_free_pages(GFP_ATOMIC, pages); + if (!buf) { + pr_err("Couldn't allocate pages for unaligned cases.\n"); + return -ENOMEM; + } + + if (state->bufcnt) + memcpy(buf, rctx->hdev->xmit_buf, state->bufcnt); + + scatterwalk_map_and_copy(buf + state->bufcnt, sg, rctx->offset, + min(new_len, rctx->total) - state->bufcnt, 0); + sg_init_table(rctx->sgl, 1); + sg_set_buf(rctx->sgl, buf, new_len); + rctx->sg = rctx->sgl; + state->flags |= HASH_FLAGS_SGS_COPIED; + rctx->nents = 1; + rctx->offset += new_len - state->bufcnt; + state->bufcnt = 0; + rctx->total = new_len; + + return 0; +} + +static int stm32_hash_align_sgs(struct scatterlist *sg, + int nbytes, int bs, bool init, bool final, + struct stm32_hash_request_ctx *rctx) +{ + struct stm32_hash_state *state = &rctx->state; + struct stm32_hash_dev *hdev = rctx->hdev; + struct scatterlist *sg_tmp = sg; + int offset = rctx->offset; + int new_len; + int n = 0; + int bufcnt = state->bufcnt; + bool secure_ctx = hdev->pdata->context_secured; + bool aligned = true; + + if (!sg || !sg->length || !nbytes) { + if (bufcnt) { + bufcnt = DIV_ROUND_UP(bufcnt, bs) * bs; + sg_init_table(rctx->sgl, 1); + sg_set_buf(rctx->sgl, rctx->hdev->xmit_buf, bufcnt); + rctx->sg = rctx->sgl; + rctx->nents = 1; + } + + return 0; + } + + new_len = nbytes; + + if (offset) + aligned = false; + + if (final) { + new_len = DIV_ROUND_UP(new_len, bs) * bs; + } else { + new_len = (new_len - 1) / bs * bs; // return n block - 1 block + + /* + * Context save in some version of HASH IP can only be done when the + * FIFO is ready to get a new block. This implies to send n block plus a + * 32 bit word in the first DMA send. + */ + if (init && secure_ctx) { + new_len += sizeof(u32); + if (unlikely(new_len > nbytes)) + new_len -= bs; + } + } + + if (!new_len) + return 0; + + if (nbytes != new_len) + aligned = false; + + while (nbytes > 0 && sg_tmp) { + n++; + + if (bufcnt) { + if (!IS_ALIGNED(bufcnt, bs)) { + aligned = false; + break; + } + nbytes -= bufcnt; + bufcnt = 0; + if (!nbytes) + aligned = false; + + continue; + } + + if (offset < sg_tmp->length) { + if (!IS_ALIGNED(offset + sg_tmp->offset, 4)) { + aligned = false; + break; + } + + if (!IS_ALIGNED(sg_tmp->length - offset, bs)) { + aligned = false; + break; + } + } + + if (offset) { + offset -= sg_tmp->length; + if (offset < 0) { + nbytes += offset; + offset = 0; + } + } else { + nbytes -= sg_tmp->length; + } + + sg_tmp = sg_next(sg_tmp); + + if (nbytes < 0) { + aligned = false; + break; + } + } + + if (!aligned) + return stm32_hash_copy_sgs(rctx, sg, bs, new_len); + + rctx->total = new_len; + rctx->offset += new_len; + rctx->nents = n; + if (state->bufcnt) { + sg_init_table(rctx->sgl, 2); + sg_set_buf(rctx->sgl, rctx->hdev->xmit_buf, state->bufcnt); + sg_chain(rctx->sgl, 2, sg); + rctx->sg = rctx->sgl; + } else { + rctx->sg = sg; + } + + return 0; +} + +static int stm32_hash_prepare_request(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); + struct stm32_hash_state *state = &rctx->state; + unsigned int nbytes; + int ret, hash_later, bs; + bool update = rctx->op & HASH_OP_UPDATE; + bool init = !(state->flags & HASH_FLAGS_INIT); + bool finup = state->flags & HASH_FLAGS_FINUP; + bool final = state->flags & HASH_FLAGS_FINAL; + + if (!hdev->dma_lch || state->flags & HASH_FLAGS_CPU) + return 0; + + bs = crypto_ahash_blocksize(tfm); + + nbytes = state->bufcnt; + + /* + * In case of update request nbytes must correspond to the content of the + * buffer + the offset minus the content of the request already in the + * buffer. + */ + if (update || finup) + nbytes += req->nbytes - rctx->offset; + + dev_dbg(hdev->dev, + "%s: nbytes=%d, bs=%d, total=%d, offset=%d, bufcnt=%d\n", + __func__, nbytes, bs, rctx->total, rctx->offset, state->bufcnt); + + if (!nbytes) + return 0; + + rctx->total = nbytes; + + if (update && req->nbytes && (!IS_ALIGNED(state->bufcnt, bs))) { + int len = bs - state->bufcnt % bs; + + if (len > req->nbytes) + len = req->nbytes; + scatterwalk_map_and_copy(state->buffer + state->bufcnt, req->src, + 0, len, 0); + state->bufcnt += len; + rctx->offset = len; + } + + /* copy buffer in a temporary one that is used for sg alignment */ + if (state->bufcnt) + memcpy(hdev->xmit_buf, state->buffer, state->bufcnt); + + ret = stm32_hash_align_sgs(req->src, nbytes, bs, init, final, rctx); + if (ret) + return ret; + + hash_later = nbytes - rctx->total; + if (hash_later < 0) + hash_later = 0; + + if (hash_later && hash_later <= state->blocklen) { + scatterwalk_map_and_copy(state->buffer, + req->src, + req->nbytes - hash_later, + hash_later, 0); + + state->bufcnt = hash_later; + } else { + state->bufcnt = 0; + } + + if (hash_later > state->blocklen) { + /* FIXME: add support of this case */ + pr_err("Buffer contains more than one block.\n"); + return -ENOMEM; + } + + rctx->total = min(nbytes, rctx->total); + + return 0; +} + +static void stm32_hash_unprepare_request(struct ahash_request *req) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_state *state = &rctx->state; + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); + u32 *preg = state->hw_context; + int swap_reg, i; + + if (hdev->dma_lch) + dmaengine_terminate_sync(hdev->dma_lch); + + if (state->flags & HASH_FLAGS_SGS_COPIED) + free_pages((unsigned long)sg_virt(rctx->sg), get_order(rctx->sg->length)); + + rctx->sg = NULL; + rctx->offset = 0; + + state->flags &= ~(HASH_FLAGS_SGS_COPIED); + + if (!(hdev->flags & HASH_FLAGS_INIT)) + goto pm_runtime; + + state->flags |= HASH_FLAGS_INIT; + + if (stm32_hash_wait_busy(hdev)) { + dev_warn(hdev->dev, "Wait busy failed."); + return; + } + + swap_reg = hash_swap_reg(rctx); + + if (!hdev->pdata->ux500) + *preg++ = stm32_hash_read(hdev, HASH_IMR); + *preg++ = stm32_hash_read(hdev, HASH_STR); + *preg++ = stm32_hash_read(hdev, HASH_CR); + for (i = 0; i < swap_reg; i++) + *preg++ = stm32_hash_read(hdev, HASH_CSR(i)); + +pm_runtime: + pm_runtime_mark_last_busy(hdev->dev); + pm_runtime_put_autosuspend(hdev->dev); +} + static int stm32_hash_enqueue(struct ahash_request *req, unsigned int op) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); @@ -1070,16 +1393,26 @@ static int stm32_hash_update(struct ahash_request *req) struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); struct stm32_hash_state *state = &rctx->state; - if (!req->nbytes || !(state->flags & HASH_FLAGS_CPU)) + if (!req->nbytes) return 0; - rctx->total = req->nbytes; - rctx->sg = req->src; - rctx->offset = 0; - if ((state->bufcnt + rctx->total < state->blocklen)) { - stm32_hash_append_sg(rctx); - return 0; + if (state->flags & HASH_FLAGS_CPU) { + rctx->total = req->nbytes; + rctx->sg = req->src; + rctx->offset = 0; + + if ((state->bufcnt + rctx->total < state->blocklen)) { + stm32_hash_append_sg(rctx); + return 0; + } + } else { /* DMA mode */ + if (state->bufcnt + req->nbytes <= state->blocklen) { + scatterwalk_map_and_copy(state->buffer + state->bufcnt, req->src, + 0, req->nbytes, 0); + state->bufcnt += req->nbytes; + return 0; + } } return stm32_hash_enqueue(req, HASH_OP_UPDATE); @@ -1098,20 +1431,18 @@ static int stm32_hash_final(struct ahash_request *req) static int stm32_hash_finup(struct ahash_request *req) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); - struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); - struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); struct stm32_hash_state *state = &rctx->state; if (!req->nbytes) goto out; state->flags |= HASH_FLAGS_FINUP; - rctx->total = req->nbytes; - rctx->sg = req->src; - rctx->offset = 0; - if (hdev->dma_lch && stm32_hash_dma_aligned_data(req)) - state->flags &= ~HASH_FLAGS_CPU; + if ((state->flags & HASH_FLAGS_CPU)) { + rctx->total = req->nbytes; + rctx->sg = req->src; + rctx->offset = 0; + } out: return stm32_hash_final(req); @@ -1215,7 +1546,6 @@ static int stm32_hash_cra_sha3_hmac_init(struct crypto_tfm *tfm) HASH_FLAGS_HMAC); } - static void stm32_hash_cra_exit(struct crypto_tfm *tfm) { struct stm32_hash_ctx *ctx = crypto_tfm_ctx(tfm); @@ -1228,14 +1558,9 @@ static irqreturn_t stm32_hash_irq_thread(int irq, void *dev_id) { struct stm32_hash_dev *hdev = dev_id; - if (HASH_FLAGS_CPU & hdev->flags) { - if (HASH_FLAGS_OUTPUT_READY & hdev->flags) { - hdev->flags &= ~HASH_FLAGS_OUTPUT_READY; - goto finish; - } - } else if (HASH_FLAGS_DMA_ACTIVE & hdev->flags) { - hdev->flags &= ~HASH_FLAGS_DMA_ACTIVE; - goto finish; + if (HASH_FLAGS_OUTPUT_READY & hdev->flags) { + hdev->flags &= ~HASH_FLAGS_OUTPUT_READY; + goto finish; } return IRQ_HANDLED; @@ -1984,6 +2309,7 @@ static const struct stm32_hash_pdata stm32_hash_pdata_stm32mp13 = { .algs_info_size = ARRAY_SIZE(stm32_hash_algs_info_stm32mp13), .has_sr = true, .has_mdmat = true, + .context_secured = true, }; static const struct of_device_id stm32_hash_of_match[] = { diff --git a/drivers/crypto/tegra/Makefile b/drivers/crypto/tegra/Makefile new file mode 100644 index 000000000000..a32001e58eb2 --- /dev/null +++ b/drivers/crypto/tegra/Makefile @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +tegra-se-objs := tegra-se-key.o tegra-se-main.o + +tegra-se-y += tegra-se-aes.o +tegra-se-y += tegra-se-hash.o + +obj-$(CONFIG_CRYPTO_DEV_TEGRA) += tegra-se.o diff --git a/drivers/crypto/tegra/tegra-se-aes.c b/drivers/crypto/tegra/tegra-se-aes.c new file mode 100644 index 000000000000..ae7a0f8435fc --- /dev/null +++ b/drivers/crypto/tegra/tegra-se-aes.c @@ -0,0 +1,1933 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +/* + * Crypto driver to handle block cipher algorithms using NVIDIA Security Engine. + */ + +#include <linux/clk.h> +#include <linux/dma-mapping.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> + +#include <crypto/aead.h> +#include <crypto/aes.h> +#include <crypto/engine.h> +#include <crypto/gcm.h> +#include <crypto/scatterwalk.h> +#include <crypto/xts.h> +#include <crypto/internal/aead.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/skcipher.h> + +#include "tegra-se.h" + +struct tegra_aes_ctx { + struct tegra_se *se; + u32 alg; + u32 ivsize; + u32 key1_id; + u32 key2_id; +}; + +struct tegra_aes_reqctx { + struct tegra_se_datbuf datbuf; + bool encrypt; + u32 config; + u32 crypto_config; + u32 len; + u32 *iv; +}; + +struct tegra_aead_ctx { + struct tegra_se *se; + unsigned int authsize; + u32 alg; + u32 keylen; + u32 key_id; +}; + +struct tegra_aead_reqctx { + struct tegra_se_datbuf inbuf; + struct tegra_se_datbuf outbuf; + struct scatterlist *src_sg; + struct scatterlist *dst_sg; + unsigned int assoclen; + unsigned int cryptlen; + unsigned int authsize; + bool encrypt; + u32 config; + u32 crypto_config; + u32 key_id; + u32 iv[4]; + u8 authdata[16]; +}; + +struct tegra_cmac_ctx { + struct tegra_se *se; + unsigned int alg; + u32 key_id; + struct crypto_shash *fallback_tfm; +}; + +struct tegra_cmac_reqctx { + struct scatterlist *src_sg; + struct tegra_se_datbuf datbuf; + struct tegra_se_datbuf residue; + unsigned int total_len; + unsigned int blk_size; + unsigned int task; + u32 crypto_config; + u32 config; + u32 key_id; + u32 *iv; + u32 result[CMAC_RESULT_REG_COUNT]; +}; + +/* increment counter (128-bit int) */ +static void ctr_iv_inc(__u8 *counter, __u8 bits, __u32 nums) +{ + do { + --bits; + nums += counter[bits]; + counter[bits] = nums & 0xff; + nums >>= 8; + } while (bits && nums); +} + +static void tegra_cbc_iv_copyback(struct skcipher_request *req, struct tegra_aes_ctx *ctx) +{ + struct tegra_aes_reqctx *rctx = skcipher_request_ctx(req); + unsigned int offset; + + offset = req->cryptlen - ctx->ivsize; + + if (rctx->encrypt) + memcpy(req->iv, rctx->datbuf.buf + offset, ctx->ivsize); + else + scatterwalk_map_and_copy(req->iv, req->src, offset, ctx->ivsize, 0); +} + +static void tegra_aes_update_iv(struct skcipher_request *req, struct tegra_aes_ctx *ctx) +{ + int num; + + if (ctx->alg == SE_ALG_CBC) { + tegra_cbc_iv_copyback(req, ctx); + } else if (ctx->alg == SE_ALG_CTR) { + num = req->cryptlen / ctx->ivsize; + if (req->cryptlen % ctx->ivsize) + num++; + + ctr_iv_inc(req->iv, ctx->ivsize, num); + } +} + +static int tegra234_aes_crypto_cfg(u32 alg, bool encrypt) +{ + switch (alg) { + case SE_ALG_CMAC: + case SE_ALG_GMAC: + case SE_ALG_GCM: + case SE_ALG_GCM_FINAL: + return 0; + case SE_ALG_CBC: + if (encrypt) + return SE_CRYPTO_CFG_CBC_ENCRYPT; + else + return SE_CRYPTO_CFG_CBC_DECRYPT; + case SE_ALG_ECB: + if (encrypt) + return SE_CRYPTO_CFG_ECB_ENCRYPT; + else + return SE_CRYPTO_CFG_ECB_DECRYPT; + case SE_ALG_XTS: + if (encrypt) + return SE_CRYPTO_CFG_XTS_ENCRYPT; + else + return SE_CRYPTO_CFG_XTS_DECRYPT; + + case SE_ALG_CTR: + return SE_CRYPTO_CFG_CTR; + case SE_ALG_CBC_MAC: + return SE_CRYPTO_CFG_CBC_MAC; + + default: + break; + } + + return -EINVAL; +} + +static int tegra234_aes_cfg(u32 alg, bool encrypt) +{ + switch (alg) { + case SE_ALG_CBC: + case SE_ALG_ECB: + case SE_ALG_XTS: + case SE_ALG_CTR: + if (encrypt) + return SE_CFG_AES_ENCRYPT; + else + return SE_CFG_AES_DECRYPT; + + case SE_ALG_GMAC: + if (encrypt) + return SE_CFG_GMAC_ENCRYPT; + else + return SE_CFG_GMAC_DECRYPT; + + case SE_ALG_GCM: + if (encrypt) + return SE_CFG_GCM_ENCRYPT; + else + return SE_CFG_GCM_DECRYPT; + + case SE_ALG_GCM_FINAL: + if (encrypt) + return SE_CFG_GCM_FINAL_ENCRYPT; + else + return SE_CFG_GCM_FINAL_DECRYPT; + + case SE_ALG_CMAC: + return SE_CFG_CMAC; + + case SE_ALG_CBC_MAC: + return SE_AES_ENC_ALG_AES_ENC | + SE_AES_DST_HASH_REG; + } + return -EINVAL; +} + +static unsigned int tegra_aes_prep_cmd(struct tegra_aes_ctx *ctx, + struct tegra_aes_reqctx *rctx) +{ + unsigned int data_count, res_bits, i = 0, j; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + dma_addr_t addr = rctx->datbuf.addr; + + data_count = rctx->len / AES_BLOCK_SIZE; + res_bits = (rctx->len % AES_BLOCK_SIZE) * 8; + + /* + * Hardware processes data_count + 1 blocks. + * Reduce 1 block if there is no residue + */ + if (!res_bits) + data_count--; + + if (rctx->iv) { + cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr); + for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++) + cpuvaddr[i++] = rctx->iv[j]; + } + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count) | + SE_LAST_BLOCK_RES_BITS(res_bits); + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + + /* Source address setting */ + cpuvaddr[i++] = lower_32_bits(addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(addr)) | SE_ADDR_HI_SZ(rctx->len); + + /* Destination address setting */ + cpuvaddr[i++] = lower_32_bits(addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(addr)) | + SE_ADDR_HI_SZ(rctx->len); + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_LASTBUF | + SE_AES_OP_START; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "cfg %#x crypto cfg %#x\n", rctx->config, rctx->crypto_config); + + return i; +} + +static int tegra_aes_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct skcipher_request *req = container_of(areq, struct skcipher_request, base); + struct tegra_aes_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct tegra_aes_reqctx *rctx = skcipher_request_ctx(req); + struct tegra_se *se = ctx->se; + unsigned int cmdlen; + int ret; + + rctx->datbuf.buf = dma_alloc_coherent(se->dev, SE_AES_BUFLEN, + &rctx->datbuf.addr, GFP_KERNEL); + if (!rctx->datbuf.buf) + return -ENOMEM; + + rctx->datbuf.size = SE_AES_BUFLEN; + rctx->iv = (u32 *)req->iv; + rctx->len = req->cryptlen; + + /* Pad input to AES Block size */ + if (ctx->alg != SE_ALG_XTS) { + if (rctx->len % AES_BLOCK_SIZE) + rctx->len += AES_BLOCK_SIZE - (rctx->len % AES_BLOCK_SIZE); + } + + scatterwalk_map_and_copy(rctx->datbuf.buf, req->src, 0, req->cryptlen, 0); + + /* Prepare the command and submit for execution */ + cmdlen = tegra_aes_prep_cmd(ctx, rctx); + ret = tegra_se_host1x_submit(se, cmdlen); + + /* Copy the result */ + tegra_aes_update_iv(req, ctx); + scatterwalk_map_and_copy(rctx->datbuf.buf, req->dst, 0, req->cryptlen, 1); + + /* Free the buffer */ + dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN, + rctx->datbuf.buf, rctx->datbuf.addr); + + crypto_finalize_skcipher_request(se->engine, req, ret); + + return 0; +} + +static int tegra_aes_cra_init(struct crypto_skcipher *tfm) +{ + struct tegra_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + struct tegra_se_alg *se_alg; + const char *algname; + int ret; + + se_alg = container_of(alg, struct tegra_se_alg, alg.skcipher.base); + + crypto_skcipher_set_reqsize(tfm, sizeof(struct tegra_aes_reqctx)); + + ctx->ivsize = crypto_skcipher_ivsize(tfm); + ctx->se = se_alg->se_dev; + ctx->key1_id = 0; + ctx->key2_id = 0; + + algname = crypto_tfm_alg_name(&tfm->base); + ret = se_algname_to_algid(algname); + if (ret < 0) { + dev_err(ctx->se->dev, "invalid algorithm\n"); + return ret; + } + + ctx->alg = ret; + + return 0; +} + +static void tegra_aes_cra_exit(struct crypto_skcipher *tfm) +{ + struct tegra_aes_ctx *ctx = crypto_tfm_ctx(&tfm->base); + + if (ctx->key1_id) + tegra_key_invalidate(ctx->se, ctx->key1_id, ctx->alg); + + if (ctx->key2_id) + tegra_key_invalidate(ctx->se, ctx->key2_id, ctx->alg); +} + +static int tegra_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + struct tegra_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (aes_check_keylen(keylen)) { + dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen); + return -EINVAL; + } + + return tegra_key_submit(ctx->se, key, keylen, ctx->alg, &ctx->key1_id); +} + +static int tegra_xts_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + struct tegra_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 len = keylen / 2; + int ret; + + ret = xts_verify_key(tfm, key, keylen); + if (ret || aes_check_keylen(len)) { + dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen); + return -EINVAL; + } + + ret = tegra_key_submit(ctx->se, key, len, + ctx->alg, &ctx->key1_id); + if (ret) + return ret; + + return tegra_key_submit(ctx->se, key + len, len, + ctx->alg, &ctx->key2_id); + + return 0; +} + +static int tegra_aes_kac_manifest(u32 user, u32 alg, u32 keylen) +{ + int manifest; + + manifest = SE_KAC_USER_NS; + + switch (alg) { + case SE_ALG_CBC: + case SE_ALG_ECB: + case SE_ALG_CTR: + manifest |= SE_KAC_ENC; + break; + case SE_ALG_XTS: + manifest |= SE_KAC_XTS; + break; + case SE_ALG_GCM: + manifest |= SE_KAC_GCM; + break; + case SE_ALG_CMAC: + manifest |= SE_KAC_CMAC; + break; + case SE_ALG_CBC_MAC: + manifest |= SE_KAC_ENC; + break; + default: + return -EINVAL; + } + + switch (keylen) { + case AES_KEYSIZE_128: + manifest |= SE_KAC_SIZE_128; + break; + case AES_KEYSIZE_192: + manifest |= SE_KAC_SIZE_192; + break; + case AES_KEYSIZE_256: + manifest |= SE_KAC_SIZE_256; + break; + default: + return -EINVAL; + } + + return manifest; +} + +static int tegra_aes_crypt(struct skcipher_request *req, bool encrypt) + +{ + struct crypto_skcipher *tfm; + struct tegra_aes_ctx *ctx; + struct tegra_aes_reqctx *rctx; + + tfm = crypto_skcipher_reqtfm(req); + ctx = crypto_skcipher_ctx(tfm); + rctx = skcipher_request_ctx(req); + + if (ctx->alg != SE_ALG_XTS) { + if (!IS_ALIGNED(req->cryptlen, crypto_skcipher_blocksize(tfm))) { + dev_dbg(ctx->se->dev, "invalid length (%d)", req->cryptlen); + return -EINVAL; + } + } else if (req->cryptlen < XTS_BLOCK_SIZE) { + dev_dbg(ctx->se->dev, "invalid length (%d)", req->cryptlen); + return -EINVAL; + } + + if (!req->cryptlen) + return 0; + + rctx->encrypt = encrypt; + rctx->config = tegra234_aes_cfg(ctx->alg, encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(ctx->alg, encrypt); + rctx->crypto_config |= SE_AES_KEY_INDEX(ctx->key1_id); + + if (ctx->key2_id) + rctx->crypto_config |= SE_AES_KEY2_INDEX(ctx->key2_id); + + return crypto_transfer_skcipher_request_to_engine(ctx->se->engine, req); +} + +static int tegra_aes_encrypt(struct skcipher_request *req) +{ + return tegra_aes_crypt(req, true); +} + +static int tegra_aes_decrypt(struct skcipher_request *req) +{ + return tegra_aes_crypt(req, false); +} + +static struct tegra_se_alg tegra_aes_algs[] = { + { + .alg.skcipher.op.do_one_request = tegra_aes_do_one_req, + .alg.skcipher.base = { + .init = tegra_aes_cra_init, + .exit = tegra_aes_cra_exit, + .setkey = tegra_aes_setkey, + .encrypt = tegra_aes_encrypt, + .decrypt = tegra_aes_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .base = { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-tegra", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_aes_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, + } + }, { + .alg.skcipher.op.do_one_request = tegra_aes_do_one_req, + .alg.skcipher.base = { + .init = tegra_aes_cra_init, + .exit = tegra_aes_cra_exit, + .setkey = tegra_aes_setkey, + .encrypt = tegra_aes_encrypt, + .decrypt = tegra_aes_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .base = { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-tegra", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_aes_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, + } + }, { + .alg.skcipher.op.do_one_request = tegra_aes_do_one_req, + .alg.skcipher.base = { + .init = tegra_aes_cra_init, + .exit = tegra_aes_cra_exit, + .setkey = tegra_aes_setkey, + .encrypt = tegra_aes_encrypt, + .decrypt = tegra_aes_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .base = { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-tegra", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct tegra_aes_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, + } + }, { + .alg.skcipher.op.do_one_request = tegra_aes_do_one_req, + .alg.skcipher.base = { + .init = tegra_aes_cra_init, + .exit = tegra_aes_cra_exit, + .setkey = tegra_xts_setkey, + .encrypt = tegra_aes_encrypt, + .decrypt = tegra_aes_decrypt, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .base = { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-tegra", + .cra_priority = 500, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_aes_ctx), + .cra_alignmask = (__alignof__(u64) - 1), + .cra_module = THIS_MODULE, + }, + } + }, +}; + +static unsigned int tegra_gmac_prep_cmd(struct tegra_aead_ctx *ctx, + struct tegra_aead_reqctx *rctx) +{ + unsigned int data_count, res_bits, i = 0; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + + data_count = (rctx->assoclen / AES_BLOCK_SIZE); + res_bits = (rctx->assoclen % AES_BLOCK_SIZE) * 8; + + /* + * Hardware processes data_count + 1 blocks. + * Reduce 1 block if there is no residue + */ + if (!res_bits) + data_count--; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count) | + SE_LAST_BLOCK_RES_BITS(res_bits); + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 4); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + cpuvaddr[i++] = lower_32_bits(rctx->inbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->inbuf.addr)) | + SE_ADDR_HI_SZ(rctx->assoclen); + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_FINAL | + SE_AES_OP_INIT | SE_AES_OP_LASTBUF | + SE_AES_OP_START; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + return i; +} + +static unsigned int tegra_gcm_crypt_prep_cmd(struct tegra_aead_ctx *ctx, + struct tegra_aead_reqctx *rctx) +{ + unsigned int data_count, res_bits, i = 0, j; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr, op; + + data_count = (rctx->cryptlen / AES_BLOCK_SIZE); + res_bits = (rctx->cryptlen % AES_BLOCK_SIZE) * 8; + op = SE_AES_OP_WRSTALL | SE_AES_OP_FINAL | + SE_AES_OP_LASTBUF | SE_AES_OP_START; + + /* + * If there is no assoc data, + * this will be the init command + */ + if (!rctx->assoclen) + op |= SE_AES_OP_INIT; + + /* + * Hardware processes data_count + 1 blocks. + * Reduce 1 block if there is no residue + */ + if (!res_bits) + data_count--; + + cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr); + for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++) + cpuvaddr[i++] = rctx->iv[j]; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count) | + SE_LAST_BLOCK_RES_BITS(res_bits); + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + + /* Source Address */ + cpuvaddr[i++] = lower_32_bits(rctx->inbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->inbuf.addr)) | + SE_ADDR_HI_SZ(rctx->cryptlen); + + /* Destination Address */ + cpuvaddr[i++] = lower_32_bits(rctx->outbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->outbuf.addr)) | + SE_ADDR_HI_SZ(rctx->cryptlen); + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = op; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "cfg %#x crypto cfg %#x\n", rctx->config, rctx->crypto_config); + return i; +} + +static int tegra_gcm_prep_final_cmd(struct tegra_se *se, u32 *cpuvaddr, + struct tegra_aead_reqctx *rctx) +{ + unsigned int i = 0, j; + u32 op; + + op = SE_AES_OP_WRSTALL | SE_AES_OP_FINAL | + SE_AES_OP_LASTBUF | SE_AES_OP_START; + + /* + * Set init for zero sized vector + */ + if (!rctx->assoclen && !rctx->cryptlen) + op |= SE_AES_OP_INIT; + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->aad_len, 2); + cpuvaddr[i++] = rctx->assoclen * 8; + cpuvaddr[i++] = 0; + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->cryp_msg_len, 2); + cpuvaddr[i++] = rctx->cryptlen * 8; + cpuvaddr[i++] = 0; + + cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr); + for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++) + cpuvaddr[i++] = rctx->iv[j]; + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + cpuvaddr[i++] = 0; + cpuvaddr[i++] = 0; + + /* Destination Address */ + cpuvaddr[i++] = lower_32_bits(rctx->outbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->outbuf.addr)) | + SE_ADDR_HI_SZ(0x10); /* HW always generates 128-bit tag */ + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = op; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "cfg %#x crypto cfg %#x\n", rctx->config, rctx->crypto_config); + + return i; +} + +static int tegra_gcm_do_gmac(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + unsigned int cmdlen; + + scatterwalk_map_and_copy(rctx->inbuf.buf, + rctx->src_sg, 0, rctx->assoclen, 0); + + rctx->config = tegra234_aes_cfg(SE_ALG_GMAC, rctx->encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_GMAC, rctx->encrypt) | + SE_AES_KEY_INDEX(ctx->key_id); + + cmdlen = tegra_gmac_prep_cmd(ctx, rctx); + + return tegra_se_host1x_submit(se, cmdlen); +} + +static int tegra_gcm_do_crypt(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + int cmdlen, ret; + + scatterwalk_map_and_copy(rctx->inbuf.buf, rctx->src_sg, + rctx->assoclen, rctx->cryptlen, 0); + + rctx->config = tegra234_aes_cfg(SE_ALG_GCM, rctx->encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_GCM, rctx->encrypt) | + SE_AES_KEY_INDEX(ctx->key_id); + + /* Prepare command and submit */ + cmdlen = tegra_gcm_crypt_prep_cmd(ctx, rctx); + ret = tegra_se_host1x_submit(se, cmdlen); + if (ret) + return ret; + + /* Copy the result */ + scatterwalk_map_and_copy(rctx->outbuf.buf, rctx->dst_sg, + rctx->assoclen, rctx->cryptlen, 1); + + return 0; +} + +static int tegra_gcm_do_final(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + int cmdlen, ret, offset; + + rctx->config = tegra234_aes_cfg(SE_ALG_GCM_FINAL, rctx->encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_GCM_FINAL, rctx->encrypt) | + SE_AES_KEY_INDEX(ctx->key_id); + + /* Prepare command and submit */ + cmdlen = tegra_gcm_prep_final_cmd(se, cpuvaddr, rctx); + ret = tegra_se_host1x_submit(se, cmdlen); + if (ret) + return ret; + + if (rctx->encrypt) { + /* Copy the result */ + offset = rctx->assoclen + rctx->cryptlen; + scatterwalk_map_and_copy(rctx->outbuf.buf, rctx->dst_sg, + offset, rctx->authsize, 1); + } + + return 0; +} + +static int tegra_gcm_do_verify(struct tegra_se *se, struct tegra_aead_reqctx *rctx) +{ + unsigned int offset; + u8 mac[16]; + + offset = rctx->assoclen + rctx->cryptlen; + scatterwalk_map_and_copy(mac, rctx->src_sg, offset, rctx->authsize, 0); + + if (crypto_memneq(rctx->outbuf.buf, mac, rctx->authsize)) + return -EBADMSG; + + return 0; +} + +static inline int tegra_ccm_check_iv(const u8 *iv) +{ + /* iv[0] gives value of q-1 + * 2 <= q <= 8 as per NIST 800-38C notation + * 2 <= L <= 8, so 1 <= L' <= 7. as per rfc 3610 notation + */ + if (iv[0] < 1 || iv[0] > 7) { + pr_debug("ccm_check_iv failed %d\n", iv[0]); + return -EINVAL; + } + + return 0; +} + +static unsigned int tegra_cbcmac_prep_cmd(struct tegra_aead_ctx *ctx, + struct tegra_aead_reqctx *rctx) +{ + unsigned int data_count, i = 0; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + + data_count = (rctx->inbuf.size / AES_BLOCK_SIZE) - 1; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count); + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + + cpuvaddr[i++] = lower_32_bits(rctx->inbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->inbuf.addr)) | + SE_ADDR_HI_SZ(rctx->inbuf.size); + + cpuvaddr[i++] = lower_32_bits(rctx->outbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->outbuf.addr)) | + SE_ADDR_HI_SZ(0x10); /* HW always generates 128 bit tag */ + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | + SE_AES_OP_LASTBUF | SE_AES_OP_START; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + return i; +} + +static unsigned int tegra_ctr_prep_cmd(struct tegra_aead_ctx *ctx, + struct tegra_aead_reqctx *rctx) +{ + unsigned int i = 0, j; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + + cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr); + for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++) + cpuvaddr[i++] = rctx->iv[j]; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = (rctx->inbuf.size / AES_BLOCK_SIZE) - 1; + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + + /* Source address setting */ + cpuvaddr[i++] = lower_32_bits(rctx->inbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->inbuf.addr)) | + SE_ADDR_HI_SZ(rctx->inbuf.size); + + /* Destination address setting */ + cpuvaddr[i++] = lower_32_bits(rctx->outbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->outbuf.addr)) | + SE_ADDR_HI_SZ(rctx->inbuf.size); + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_LASTBUF | + SE_AES_OP_START; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "cfg %#x crypto cfg %#x\n", + rctx->config, rctx->crypto_config); + + return i; +} + +static int tegra_ccm_do_cbcmac(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + int cmdlen; + + rctx->config = tegra234_aes_cfg(SE_ALG_CBC_MAC, rctx->encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_CBC_MAC, + rctx->encrypt) | + SE_AES_KEY_INDEX(ctx->key_id); + + /* Prepare command and submit */ + cmdlen = tegra_cbcmac_prep_cmd(ctx, rctx); + + return tegra_se_host1x_submit(se, cmdlen); +} + +static int tegra_ccm_set_msg_len(u8 *block, unsigned int msglen, int csize) +{ + __be32 data; + + memset(block, 0, csize); + block += csize; + + if (csize >= 4) + csize = 4; + else if (msglen > (1 << (8 * csize))) + return -EOVERFLOW; + + data = cpu_to_be32(msglen); + memcpy(block - csize, (u8 *)&data + 4 - csize, csize); + + return 0; +} + +static int tegra_ccm_format_nonce(struct tegra_aead_reqctx *rctx, u8 *nonce) +{ + unsigned int q, t; + u8 *q_ptr, *iv = (u8 *)rctx->iv; + + memcpy(nonce, rctx->iv, 16); + + /*** 1. Prepare Flags Octet ***/ + + /* Encode t (mac length) */ + t = rctx->authsize; + nonce[0] |= (((t - 2) / 2) << 3); + + /* Adata */ + if (rctx->assoclen) + nonce[0] |= (1 << 6); + + /*** Encode Q - message length ***/ + q = iv[0] + 1; + q_ptr = nonce + 16 - q; + + return tegra_ccm_set_msg_len(q_ptr, rctx->cryptlen, q); +} + +static int tegra_ccm_format_adata(u8 *adata, unsigned int a) +{ + int len = 0; + + /* add control info for associated data + * RFC 3610 and NIST Special Publication 800-38C + */ + if (a < 65280) { + *(__be16 *)adata = cpu_to_be16(a); + len = 2; + } else { + *(__be16 *)adata = cpu_to_be16(0xfffe); + *(__be32 *)&adata[2] = cpu_to_be32(a); + len = 6; + } + + return len; +} + +static int tegra_ccm_add_padding(u8 *buf, unsigned int len) +{ + unsigned int padlen = 16 - (len % 16); + u8 padding[16] = {0}; + + if (padlen == 16) + return 0; + + memcpy(buf, padding, padlen); + + return padlen; +} + +static int tegra_ccm_format_blocks(struct tegra_aead_reqctx *rctx) +{ + unsigned int alen = 0, offset = 0; + u8 nonce[16], adata[16]; + int ret; + + ret = tegra_ccm_format_nonce(rctx, nonce); + if (ret) + return ret; + + memcpy(rctx->inbuf.buf, nonce, 16); + offset = 16; + + if (rctx->assoclen) { + alen = tegra_ccm_format_adata(adata, rctx->assoclen); + memcpy(rctx->inbuf.buf + offset, adata, alen); + offset += alen; + + scatterwalk_map_and_copy(rctx->inbuf.buf + offset, + rctx->src_sg, 0, rctx->assoclen, 0); + + offset += rctx->assoclen; + offset += tegra_ccm_add_padding(rctx->inbuf.buf + offset, + rctx->assoclen + alen); + } + + return offset; +} + +static int tegra_ccm_mac_result(struct tegra_se *se, struct tegra_aead_reqctx *rctx) +{ + u32 result[16]; + int i, ret; + + /* Read and clear Result */ + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + result[i] = readl(se->base + se->hw->regs->result + (i * 4)); + + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + writel(0, se->base + se->hw->regs->result + (i * 4)); + + if (rctx->encrypt) { + memcpy(rctx->authdata, result, rctx->authsize); + } else { + ret = crypto_memneq(rctx->authdata, result, rctx->authsize); + if (ret) + return -EBADMSG; + } + + return 0; +} + +static int tegra_ccm_ctr_result(struct tegra_se *se, struct tegra_aead_reqctx *rctx) +{ + /* Copy result */ + scatterwalk_map_and_copy(rctx->outbuf.buf + 16, rctx->dst_sg, + rctx->assoclen, rctx->cryptlen, 1); + + if (rctx->encrypt) + scatterwalk_map_and_copy(rctx->outbuf.buf, rctx->dst_sg, + rctx->assoclen + rctx->cryptlen, + rctx->authsize, 1); + else + memcpy(rctx->authdata, rctx->outbuf.buf, rctx->authsize); + + return 0; +} + +static int tegra_ccm_compute_auth(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + struct scatterlist *sg; + int offset, ret; + + offset = tegra_ccm_format_blocks(rctx); + if (offset < 0) + return -EINVAL; + + /* Copy plain text to the buffer */ + sg = rctx->encrypt ? rctx->src_sg : rctx->dst_sg; + + scatterwalk_map_and_copy(rctx->inbuf.buf + offset, + sg, rctx->assoclen, + rctx->cryptlen, 0); + offset += rctx->cryptlen; + offset += tegra_ccm_add_padding(rctx->inbuf.buf + offset, rctx->cryptlen); + + rctx->inbuf.size = offset; + + ret = tegra_ccm_do_cbcmac(ctx, rctx); + if (ret) + return ret; + + return tegra_ccm_mac_result(se, rctx); +} + +static int tegra_ccm_do_ctr(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + unsigned int cmdlen, offset = 0; + struct scatterlist *sg = rctx->src_sg; + int ret; + + rctx->config = tegra234_aes_cfg(SE_ALG_CTR, rctx->encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_CTR, rctx->encrypt) | + SE_AES_KEY_INDEX(ctx->key_id); + + /* Copy authdata in the top of buffer for encryption/decryption */ + if (rctx->encrypt) + memcpy(rctx->inbuf.buf, rctx->authdata, rctx->authsize); + else + scatterwalk_map_and_copy(rctx->inbuf.buf, sg, + rctx->assoclen + rctx->cryptlen, + rctx->authsize, 0); + + offset += rctx->authsize; + offset += tegra_ccm_add_padding(rctx->inbuf.buf + offset, rctx->authsize); + + /* If there is no cryptlen, proceed to submit the task */ + if (rctx->cryptlen) { + scatterwalk_map_and_copy(rctx->inbuf.buf + offset, sg, + rctx->assoclen, rctx->cryptlen, 0); + offset += rctx->cryptlen; + offset += tegra_ccm_add_padding(rctx->inbuf.buf + offset, rctx->cryptlen); + } + + rctx->inbuf.size = offset; + + /* Prepare command and submit */ + cmdlen = tegra_ctr_prep_cmd(ctx, rctx); + ret = tegra_se_host1x_submit(se, cmdlen); + if (ret) + return ret; + + return tegra_ccm_ctr_result(se, rctx); +} + +static int tegra_ccm_crypt_init(struct aead_request *req, struct tegra_se *se, + struct tegra_aead_reqctx *rctx) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + u8 *iv = (u8 *)rctx->iv; + int ret, i; + + rctx->src_sg = req->src; + rctx->dst_sg = req->dst; + rctx->assoclen = req->assoclen; + rctx->authsize = crypto_aead_authsize(tfm); + + memcpy(iv, req->iv, 16); + + ret = tegra_ccm_check_iv(iv); + if (ret) + return ret; + + /* Note: rfc 3610 and NIST 800-38C require counter (ctr_0) of + * zero to encrypt auth tag. + * req->iv has the formatted ctr_0 (i.e. Flags || N || 0). + */ + memset(iv + 15 - iv[0], 0, iv[0] + 1); + + /* Clear any previous result */ + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + writel(0, se->base + se->hw->regs->result + (i * 4)); + + return 0; +} + +static int tegra_ccm_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct aead_request *req = container_of(areq, struct aead_request, base); + struct tegra_aead_reqctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct tegra_se *se = ctx->se; + int ret; + + /* Allocate buffers required */ + rctx->inbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN, + &rctx->inbuf.addr, GFP_KERNEL); + if (!rctx->inbuf.buf) + return -ENOMEM; + + rctx->inbuf.size = SE_AES_BUFLEN; + + rctx->outbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN, + &rctx->outbuf.addr, GFP_KERNEL); + if (!rctx->outbuf.buf) { + ret = -ENOMEM; + goto outbuf_err; + } + + rctx->outbuf.size = SE_AES_BUFLEN; + + ret = tegra_ccm_crypt_init(req, se, rctx); + if (ret) + goto out; + + if (rctx->encrypt) { + rctx->cryptlen = req->cryptlen; + + /* CBC MAC Operation */ + ret = tegra_ccm_compute_auth(ctx, rctx); + if (ret) + goto out; + + /* CTR operation */ + ret = tegra_ccm_do_ctr(ctx, rctx); + if (ret) + goto out; + } else { + rctx->cryptlen = req->cryptlen - ctx->authsize; + if (ret) + goto out; + + /* CTR operation */ + ret = tegra_ccm_do_ctr(ctx, rctx); + if (ret) + goto out; + + /* CBC MAC Operation */ + ret = tegra_ccm_compute_auth(ctx, rctx); + if (ret) + goto out; + } + +out: + dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN, + rctx->outbuf.buf, rctx->outbuf.addr); + +outbuf_err: + dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN, + rctx->inbuf.buf, rctx->inbuf.addr); + + crypto_finalize_aead_request(ctx->se->engine, req, ret); + + return 0; +} + +static int tegra_gcm_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct aead_request *req = container_of(areq, struct aead_request, base); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct tegra_aead_reqctx *rctx = aead_request_ctx(req); + int ret; + + /* Allocate buffers required */ + rctx->inbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN, + &rctx->inbuf.addr, GFP_KERNEL); + if (!rctx->inbuf.buf) + return -ENOMEM; + + rctx->inbuf.size = SE_AES_BUFLEN; + + rctx->outbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN, + &rctx->outbuf.addr, GFP_KERNEL); + if (!rctx->outbuf.buf) { + ret = -ENOMEM; + goto outbuf_err; + } + + rctx->outbuf.size = SE_AES_BUFLEN; + + rctx->src_sg = req->src; + rctx->dst_sg = req->dst; + rctx->assoclen = req->assoclen; + rctx->authsize = crypto_aead_authsize(tfm); + + if (rctx->encrypt) + rctx->cryptlen = req->cryptlen; + else + rctx->cryptlen = req->cryptlen - ctx->authsize; + + memcpy(rctx->iv, req->iv, GCM_AES_IV_SIZE); + rctx->iv[3] = (1 << 24); + + /* If there is associated data perform GMAC operation */ + if (rctx->assoclen) { + ret = tegra_gcm_do_gmac(ctx, rctx); + if (ret) + goto out; + } + + /* GCM Encryption/Decryption operation */ + if (rctx->cryptlen) { + ret = tegra_gcm_do_crypt(ctx, rctx); + if (ret) + goto out; + } + + /* GCM_FINAL operation */ + ret = tegra_gcm_do_final(ctx, rctx); + if (ret) + goto out; + + if (!rctx->encrypt) + ret = tegra_gcm_do_verify(ctx->se, rctx); + +out: + dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN, + rctx->outbuf.buf, rctx->outbuf.addr); + +outbuf_err: + dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN, + rctx->inbuf.buf, rctx->inbuf.addr); + + /* Finalize the request if there are no errors */ + crypto_finalize_aead_request(ctx->se->engine, req, ret); + + return 0; +} + +static int tegra_aead_cra_init(struct crypto_aead *tfm) +{ + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct aead_alg *alg = crypto_aead_alg(tfm); + struct tegra_se_alg *se_alg; + const char *algname; + int ret; + + algname = crypto_tfm_alg_name(&tfm->base); + + se_alg = container_of(alg, struct tegra_se_alg, alg.aead.base); + + crypto_aead_set_reqsize(tfm, sizeof(struct tegra_aead_reqctx)); + + ctx->se = se_alg->se_dev; + ctx->key_id = 0; + + ret = se_algname_to_algid(algname); + if (ret < 0) { + dev_err(ctx->se->dev, "invalid algorithm\n"); + return ret; + } + + ctx->alg = ret; + + return 0; +} + +static int tegra_ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + + switch (authsize) { + case 4: + case 6: + case 8: + case 10: + case 12: + case 14: + case 16: + break; + default: + return -EINVAL; + } + + ctx->authsize = authsize; + + return 0; +} + +static int tegra_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + int ret; + + ret = crypto_gcm_check_authsize(authsize); + if (ret) + return ret; + + ctx->authsize = authsize; + + return 0; +} + +static void tegra_aead_cra_exit(struct crypto_aead *tfm) +{ + struct tegra_aead_ctx *ctx = crypto_tfm_ctx(&tfm->base); + + if (ctx->key_id) + tegra_key_invalidate(ctx->se, ctx->key_id, ctx->alg); +} + +static int tegra_aead_crypt(struct aead_request *req, bool encrypt) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct tegra_aead_reqctx *rctx = aead_request_ctx(req); + + rctx->encrypt = encrypt; + + return crypto_transfer_aead_request_to_engine(ctx->se->engine, req); +} + +static int tegra_aead_encrypt(struct aead_request *req) +{ + return tegra_aead_crypt(req, true); +} + +static int tegra_aead_decrypt(struct aead_request *req) +{ + return tegra_aead_crypt(req, false); +} + +static int tegra_aead_setkey(struct crypto_aead *tfm, + const u8 *key, u32 keylen) +{ + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + + if (aes_check_keylen(keylen)) { + dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen); + return -EINVAL; + } + + return tegra_key_submit(ctx->se, key, keylen, ctx->alg, &ctx->key_id); +} + +static unsigned int tegra_cmac_prep_cmd(struct tegra_cmac_ctx *ctx, + struct tegra_cmac_reqctx *rctx) +{ + unsigned int data_count, res_bits = 0, i = 0, j; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr, op; + + data_count = (rctx->datbuf.size / AES_BLOCK_SIZE); + + op = SE_AES_OP_WRSTALL | SE_AES_OP_START | SE_AES_OP_LASTBUF; + + if (!(rctx->task & SHA_UPDATE)) { + op |= SE_AES_OP_FINAL; + res_bits = (rctx->datbuf.size % AES_BLOCK_SIZE) * 8; + } + + if (!res_bits && data_count) + data_count--; + + if (rctx->task & SHA_FIRST) { + rctx->task &= ~SHA_FIRST; + + cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr); + /* Load 0 IV */ + for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++) + cpuvaddr[i++] = 0; + } + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count) | + SE_LAST_BLOCK_RES_BITS(res_bits); + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + + /* Source Address */ + cpuvaddr[i++] = lower_32_bits(rctx->datbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->datbuf.addr)) | + SE_ADDR_HI_SZ(rctx->datbuf.size); + cpuvaddr[i++] = 0; + cpuvaddr[i++] = SE_ADDR_HI_SZ(AES_BLOCK_SIZE); + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = op; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + return i; +} + +static void tegra_cmac_copy_result(struct tegra_se *se, struct tegra_cmac_reqctx *rctx) +{ + int i; + + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + rctx->result[i] = readl(se->base + se->hw->regs->result + (i * 4)); +} + +static void tegra_cmac_paste_result(struct tegra_se *se, struct tegra_cmac_reqctx *rctx) +{ + int i; + + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + writel(rctx->result[i], + se->base + se->hw->regs->result + (i * 4)); +} + +static int tegra_cmac_do_update(struct ahash_request *req) +{ + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + unsigned int nblks, nresidue, cmdlen; + int ret; + + if (!req->nbytes) + return 0; + + nresidue = (req->nbytes + rctx->residue.size) % rctx->blk_size; + nblks = (req->nbytes + rctx->residue.size) / rctx->blk_size; + + /* + * Reserve the last block as residue during final() to process. + */ + if (!nresidue && nblks) { + nresidue += rctx->blk_size; + nblks--; + } + + rctx->src_sg = req->src; + rctx->datbuf.size = (req->nbytes + rctx->residue.size) - nresidue; + rctx->total_len += rctx->datbuf.size; + rctx->config = tegra234_aes_cfg(SE_ALG_CMAC, 0); + rctx->crypto_config = SE_AES_KEY_INDEX(ctx->key_id); + + /* + * Keep one block and residue bytes in residue and + * return. The bytes will be processed in final() + */ + if (nblks < 1) { + scatterwalk_map_and_copy(rctx->residue.buf + rctx->residue.size, + rctx->src_sg, 0, req->nbytes, 0); + + rctx->residue.size += req->nbytes; + return 0; + } + + /* Copy the previous residue first */ + if (rctx->residue.size) + memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size); + + scatterwalk_map_and_copy(rctx->datbuf.buf + rctx->residue.size, + rctx->src_sg, 0, req->nbytes - nresidue, 0); + + scatterwalk_map_and_copy(rctx->residue.buf, rctx->src_sg, + req->nbytes - nresidue, nresidue, 0); + + /* Update residue value with the residue after current block */ + rctx->residue.size = nresidue; + + /* + * If this is not the first 'update' call, paste the previous copied + * intermediate results to the registers so that it gets picked up. + * This is to support the import/export functionality. + */ + if (!(rctx->task & SHA_FIRST)) + tegra_cmac_paste_result(ctx->se, rctx); + + cmdlen = tegra_cmac_prep_cmd(ctx, rctx); + + ret = tegra_se_host1x_submit(se, cmdlen); + /* + * If this is not the final update, copy the intermediate results + * from the registers so that it can be used in the next 'update' + * call. This is to support the import/export functionality. + */ + if (!(rctx->task & SHA_FINAL)) + tegra_cmac_copy_result(ctx->se, rctx); + + return ret; +} + +static int tegra_cmac_do_final(struct ahash_request *req) +{ + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + u32 *result = (u32 *)req->result; + int ret = 0, i, cmdlen; + + if (!req->nbytes && !rctx->total_len && ctx->fallback_tfm) { + return crypto_shash_tfm_digest(ctx->fallback_tfm, + rctx->datbuf.buf, 0, req->result); + } + + memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size); + rctx->datbuf.size = rctx->residue.size; + rctx->total_len += rctx->residue.size; + rctx->config = tegra234_aes_cfg(SE_ALG_CMAC, 0); + + /* Prepare command and submit */ + cmdlen = tegra_cmac_prep_cmd(ctx, rctx); + ret = tegra_se_host1x_submit(se, cmdlen); + if (ret) + goto out; + + /* Read and clear Result register */ + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + result[i] = readl(se->base + se->hw->regs->result + (i * 4)); + + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + writel(0, se->base + se->hw->regs->result + (i * 4)); + +out: + dma_free_coherent(se->dev, SE_SHA_BUFLEN, + rctx->datbuf.buf, rctx->datbuf.addr); + dma_free_coherent(se->dev, crypto_ahash_blocksize(tfm) * 2, + rctx->residue.buf, rctx->residue.addr); + return ret; +} + +static int tegra_cmac_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct ahash_request *req = ahash_request_cast(areq); + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + int ret; + + if (rctx->task & SHA_UPDATE) { + ret = tegra_cmac_do_update(req); + rctx->task &= ~SHA_UPDATE; + } + + if (rctx->task & SHA_FINAL) { + ret = tegra_cmac_do_final(req); + rctx->task &= ~SHA_FINAL; + } + + crypto_finalize_hash_request(se->engine, req, ret); + + return 0; +} + +static void tegra_cmac_init_fallback(struct crypto_ahash *tfm, struct tegra_cmac_ctx *ctx, + const char *algname) +{ + unsigned int statesize; + + ctx->fallback_tfm = crypto_alloc_shash(algname, 0, CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(ctx->fallback_tfm)) { + dev_warn(ctx->se->dev, "failed to allocate fallback for %s\n", algname); + ctx->fallback_tfm = NULL; + return; + } + + statesize = crypto_shash_statesize(ctx->fallback_tfm); + + if (statesize > sizeof(struct tegra_cmac_reqctx)) + crypto_ahash_set_statesize(tfm, statesize); +} + +static int tegra_cmac_cra_init(struct crypto_tfm *tfm) +{ + struct tegra_cmac_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_ahash *ahash_tfm = __crypto_ahash_cast(tfm); + struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg); + struct tegra_se_alg *se_alg; + const char *algname; + int ret; + + algname = crypto_tfm_alg_name(tfm); + se_alg = container_of(alg, struct tegra_se_alg, alg.ahash.base); + + crypto_ahash_set_reqsize(ahash_tfm, sizeof(struct tegra_cmac_reqctx)); + + ctx->se = se_alg->se_dev; + ctx->key_id = 0; + + ret = se_algname_to_algid(algname); + if (ret < 0) { + dev_err(ctx->se->dev, "invalid algorithm\n"); + return ret; + } + + ctx->alg = ret; + + tegra_cmac_init_fallback(ahash_tfm, ctx, algname); + + return 0; +} + +static void tegra_cmac_cra_exit(struct crypto_tfm *tfm) +{ + struct tegra_cmac_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->fallback_tfm) + crypto_free_shash(ctx->fallback_tfm); + + tegra_key_invalidate(ctx->se, ctx->key_id, ctx->alg); +} + +static int tegra_cmac_init(struct ahash_request *req) +{ + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + int i; + + rctx->total_len = 0; + rctx->datbuf.size = 0; + rctx->residue.size = 0; + rctx->task = SHA_FIRST; + rctx->blk_size = crypto_ahash_blocksize(tfm); + + rctx->residue.buf = dma_alloc_coherent(se->dev, rctx->blk_size * 2, + &rctx->residue.addr, GFP_KERNEL); + if (!rctx->residue.buf) + goto resbuf_fail; + + rctx->residue.size = 0; + + rctx->datbuf.buf = dma_alloc_coherent(se->dev, SE_SHA_BUFLEN, + &rctx->datbuf.addr, GFP_KERNEL); + if (!rctx->datbuf.buf) + goto datbuf_fail; + + rctx->datbuf.size = 0; + + /* Clear any previous result */ + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + writel(0, se->base + se->hw->regs->result + (i * 4)); + + return 0; + +datbuf_fail: + dma_free_coherent(se->dev, rctx->blk_size, rctx->residue.buf, + rctx->residue.addr); +resbuf_fail: + return -ENOMEM; +} + +static int tegra_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + + if (aes_check_keylen(keylen)) { + dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen); + return -EINVAL; + } + + if (ctx->fallback_tfm) + crypto_shash_setkey(ctx->fallback_tfm, key, keylen); + + return tegra_key_submit(ctx->se, key, keylen, ctx->alg, &ctx->key_id); +} + +static int tegra_cmac_update(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + rctx->task |= SHA_UPDATE; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_cmac_final(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + rctx->task |= SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_cmac_finup(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + rctx->task |= SHA_UPDATE | SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_cmac_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + tegra_cmac_init(req); + rctx->task |= SHA_UPDATE | SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_cmac_export(struct ahash_request *req, void *out) +{ + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + memcpy(out, rctx, sizeof(*rctx)); + + return 0; +} + +static int tegra_cmac_import(struct ahash_request *req, const void *in) +{ + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + memcpy(rctx, in, sizeof(*rctx)); + + return 0; +} + +static struct tegra_se_alg tegra_aead_algs[] = { + { + .alg.aead.op.do_one_request = tegra_gcm_do_one_req, + .alg.aead.base = { + .init = tegra_aead_cra_init, + .exit = tegra_aead_cra_exit, + .setkey = tegra_aead_setkey, + .setauthsize = tegra_gcm_setauthsize, + .encrypt = tegra_aead_encrypt, + .decrypt = tegra_aead_decrypt, + .maxauthsize = AES_BLOCK_SIZE, + .ivsize = GCM_AES_IV_SIZE, + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "gcm-aes-tegra", + .cra_priority = 500, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct tegra_aead_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, + } + }, { + .alg.aead.op.do_one_request = tegra_ccm_do_one_req, + .alg.aead.base = { + .init = tegra_aead_cra_init, + .exit = tegra_aead_cra_exit, + .setkey = tegra_aead_setkey, + .setauthsize = tegra_ccm_setauthsize, + .encrypt = tegra_aead_encrypt, + .decrypt = tegra_aead_decrypt, + .maxauthsize = AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .base = { + .cra_name = "ccm(aes)", + .cra_driver_name = "ccm-aes-tegra", + .cra_priority = 500, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct tegra_aead_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, + } + } +}; + +static struct tegra_se_alg tegra_cmac_algs[] = { + { + .alg.ahash.op.do_one_request = tegra_cmac_do_one_req, + .alg.ahash.base = { + .init = tegra_cmac_init, + .setkey = tegra_cmac_setkey, + .update = tegra_cmac_update, + .final = tegra_cmac_final, + .finup = tegra_cmac_finup, + .digest = tegra_cmac_digest, + .export = tegra_cmac_export, + .import = tegra_cmac_import, + .halg.digestsize = AES_BLOCK_SIZE, + .halg.statesize = sizeof(struct tegra_cmac_reqctx), + .halg.base = { + .cra_name = "cmac(aes)", + .cra_driver_name = "tegra-se-cmac", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_cmac_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_cmac_cra_init, + .cra_exit = tegra_cmac_cra_exit, + } + } + } +}; + +int tegra_init_aes(struct tegra_se *se) +{ + struct aead_engine_alg *aead_alg; + struct ahash_engine_alg *ahash_alg; + struct skcipher_engine_alg *sk_alg; + int i, ret; + + se->manifest = tegra_aes_kac_manifest; + + for (i = 0; i < ARRAY_SIZE(tegra_aes_algs); i++) { + sk_alg = &tegra_aes_algs[i].alg.skcipher; + tegra_aes_algs[i].se_dev = se; + + ret = crypto_engine_register_skcipher(sk_alg); + if (ret) { + dev_err(se->dev, "failed to register %s\n", + sk_alg->base.base.cra_name); + goto err_aes; + } + } + + for (i = 0; i < ARRAY_SIZE(tegra_aead_algs); i++) { + aead_alg = &tegra_aead_algs[i].alg.aead; + tegra_aead_algs[i].se_dev = se; + + ret = crypto_engine_register_aead(aead_alg); + if (ret) { + dev_err(se->dev, "failed to register %s\n", + aead_alg->base.base.cra_name); + goto err_aead; + } + } + + for (i = 0; i < ARRAY_SIZE(tegra_cmac_algs); i++) { + ahash_alg = &tegra_cmac_algs[i].alg.ahash; + tegra_cmac_algs[i].se_dev = se; + + ret = crypto_engine_register_ahash(ahash_alg); + if (ret) { + dev_err(se->dev, "failed to register %s\n", + ahash_alg->base.halg.base.cra_name); + goto err_cmac; + } + } + + return 0; + +err_cmac: + while (i--) + crypto_engine_unregister_ahash(&tegra_cmac_algs[i].alg.ahash); + + i = ARRAY_SIZE(tegra_aead_algs); +err_aead: + while (i--) + crypto_engine_unregister_aead(&tegra_aead_algs[i].alg.aead); + + i = ARRAY_SIZE(tegra_aes_algs); +err_aes: + while (i--) + crypto_engine_unregister_skcipher(&tegra_aes_algs[i].alg.skcipher); + + return ret; +} + +void tegra_deinit_aes(struct tegra_se *se) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tegra_aes_algs); i++) + crypto_engine_unregister_skcipher(&tegra_aes_algs[i].alg.skcipher); + + for (i = 0; i < ARRAY_SIZE(tegra_aead_algs); i++) + crypto_engine_unregister_aead(&tegra_aead_algs[i].alg.aead); + + for (i = 0; i < ARRAY_SIZE(tegra_cmac_algs); i++) + crypto_engine_unregister_ahash(&tegra_cmac_algs[i].alg.ahash); +} diff --git a/drivers/crypto/tegra/tegra-se-hash.c b/drivers/crypto/tegra/tegra-se-hash.c new file mode 100644 index 000000000000..4d4bd727f498 --- /dev/null +++ b/drivers/crypto/tegra/tegra-se-hash.c @@ -0,0 +1,1060 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +/* + * Crypto driver to handle HASH algorithms using NVIDIA Security Engine. + */ + +#include <linux/clk.h> +#include <linux/dma-mapping.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> + +#include <crypto/aes.h> +#include <crypto/sha1.h> +#include <crypto/sha2.h> +#include <crypto/sha3.h> +#include <crypto/internal/des.h> +#include <crypto/engine.h> +#include <crypto/scatterwalk.h> +#include <crypto/internal/hash.h> + +#include "tegra-se.h" + +struct tegra_sha_ctx { + struct tegra_se *se; + unsigned int alg; + bool fallback; + u32 key_id; + struct crypto_ahash *fallback_tfm; +}; + +struct tegra_sha_reqctx { + struct scatterlist *src_sg; + struct tegra_se_datbuf datbuf; + struct tegra_se_datbuf residue; + struct tegra_se_datbuf digest; + unsigned int alg; + unsigned int config; + unsigned int total_len; + unsigned int blk_size; + unsigned int task; + u32 key_id; + u32 result[HASH_RESULT_REG_COUNT]; + struct ahash_request fallback_req; +}; + +static int tegra_sha_get_config(u32 alg) +{ + int cfg = 0; + + switch (alg) { + case SE_ALG_SHA1: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA1; + break; + + case SE_ALG_HMAC_SHA224: + cfg |= SE_SHA_ENC_ALG_HMAC; + fallthrough; + case SE_ALG_SHA224: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA224; + break; + + case SE_ALG_HMAC_SHA256: + cfg |= SE_SHA_ENC_ALG_HMAC; + fallthrough; + case SE_ALG_SHA256: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA256; + break; + + case SE_ALG_HMAC_SHA384: + cfg |= SE_SHA_ENC_ALG_HMAC; + fallthrough; + case SE_ALG_SHA384: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA384; + break; + + case SE_ALG_HMAC_SHA512: + cfg |= SE_SHA_ENC_ALG_HMAC; + fallthrough; + case SE_ALG_SHA512: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA512; + break; + + case SE_ALG_SHA3_224: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA3_224; + break; + case SE_ALG_SHA3_256: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA3_256; + break; + case SE_ALG_SHA3_384: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA3_384; + break; + case SE_ALG_SHA3_512: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA3_512; + break; + default: + return -EINVAL; + } + + return cfg; +} + +static int tegra_sha_fallback_init(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + return crypto_ahash_init(&rctx->fallback_req); +} + +static int tegra_sha_fallback_update(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + rctx->fallback_req.nbytes = req->nbytes; + rctx->fallback_req.src = req->src; + + return crypto_ahash_update(&rctx->fallback_req); +} + +static int tegra_sha_fallback_final(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + rctx->fallback_req.result = req->result; + + return crypto_ahash_final(&rctx->fallback_req); +} + +static int tegra_sha_fallback_finup(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + rctx->fallback_req.nbytes = req->nbytes; + rctx->fallback_req.src = req->src; + rctx->fallback_req.result = req->result; + + return crypto_ahash_finup(&rctx->fallback_req); +} + +static int tegra_sha_fallback_digest(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + rctx->fallback_req.nbytes = req->nbytes; + rctx->fallback_req.src = req->src; + rctx->fallback_req.result = req->result; + + return crypto_ahash_digest(&rctx->fallback_req); +} + +static int tegra_sha_fallback_import(struct ahash_request *req, const void *in) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + return crypto_ahash_import(&rctx->fallback_req, in); +} + +static int tegra_sha_fallback_export(struct ahash_request *req, void *out) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + return crypto_ahash_export(&rctx->fallback_req, out); +} + +static int tegra_sha_prep_cmd(struct tegra_se *se, u32 *cpuvaddr, + struct tegra_sha_reqctx *rctx) +{ + u64 msg_len, msg_left; + int i = 0; + + msg_len = rctx->total_len * 8; + msg_left = rctx->datbuf.size * 8; + + /* + * If IN_ADDR_HI_0.SZ > SHA_MSG_LEFT_[0-3] to the HASH engine, + * HW treats it as the last buffer and process the data. + * Therefore, add an extra byte to msg_left if it is not the + * last buffer. + */ + if (rctx->task & SHA_UPDATE) { + msg_left += 8; + msg_len += 8; + } + + cpuvaddr[i++] = host1x_opcode_setpayload(8); + cpuvaddr[i++] = se_host1x_opcode_incr_w(SE_SHA_MSG_LENGTH); + cpuvaddr[i++] = lower_32_bits(msg_len); + cpuvaddr[i++] = upper_32_bits(msg_len); + cpuvaddr[i++] = 0; + cpuvaddr[i++] = 0; + cpuvaddr[i++] = lower_32_bits(msg_left); + cpuvaddr[i++] = upper_32_bits(msg_left); + cpuvaddr[i++] = 0; + cpuvaddr[i++] = 0; + cpuvaddr[i++] = host1x_opcode_setpayload(6); + cpuvaddr[i++] = se_host1x_opcode_incr_w(SE_SHA_CFG); + cpuvaddr[i++] = rctx->config; + + if (rctx->task & SHA_FIRST) { + cpuvaddr[i++] = SE_SHA_TASK_HASH_INIT; + rctx->task &= ~SHA_FIRST; + } else { + cpuvaddr[i++] = 0; + } + + cpuvaddr[i++] = rctx->datbuf.addr; + cpuvaddr[i++] = (u32)(SE_ADDR_HI_MSB(upper_32_bits(rctx->datbuf.addr)) | + SE_ADDR_HI_SZ(rctx->datbuf.size)); + cpuvaddr[i++] = rctx->digest.addr; + cpuvaddr[i++] = (u32)(SE_ADDR_HI_MSB(upper_32_bits(rctx->digest.addr)) | + SE_ADDR_HI_SZ(rctx->digest.size)); + if (rctx->key_id) { + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_nonincr_w(SE_SHA_CRYPTO_CFG); + cpuvaddr[i++] = SE_AES_KEY_INDEX(rctx->key_id); + } + + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_nonincr_w(SE_SHA_OPERATION); + cpuvaddr[i++] = SE_SHA_OP_WRSTALL | + SE_SHA_OP_START | + SE_SHA_OP_LASTBUF; + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "msg len %llu msg left %llu cfg %#x", + msg_len, msg_left, rctx->config); + + return i; +} + +static void tegra_sha_copy_hash_result(struct tegra_se *se, struct tegra_sha_reqctx *rctx) +{ + int i; + + for (i = 0; i < HASH_RESULT_REG_COUNT; i++) + rctx->result[i] = readl(se->base + se->hw->regs->result + (i * 4)); +} + +static void tegra_sha_paste_hash_result(struct tegra_se *se, struct tegra_sha_reqctx *rctx) +{ + int i; + + for (i = 0; i < HASH_RESULT_REG_COUNT; i++) + writel(rctx->result[i], + se->base + se->hw->regs->result + (i * 4)); +} + +static int tegra_sha_do_update(struct ahash_request *req) +{ + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + unsigned int nblks, nresidue, size, ret; + u32 *cpuvaddr = ctx->se->cmdbuf->addr; + + nresidue = (req->nbytes + rctx->residue.size) % rctx->blk_size; + nblks = (req->nbytes + rctx->residue.size) / rctx->blk_size; + + /* + * If nbytes is a multiple of block size and there is no residue, + * then reserve the last block as residue during final() to process. + */ + if (!nresidue && nblks) { + nresidue = rctx->blk_size; + nblks--; + } + + rctx->src_sg = req->src; + rctx->datbuf.size = (req->nbytes + rctx->residue.size) - nresidue; + rctx->total_len += rctx->datbuf.size; + + /* + * If nbytes are less than a block size, copy it residue and + * return. The bytes will be processed in final() + */ + if (nblks < 1) { + scatterwalk_map_and_copy(rctx->residue.buf + rctx->residue.size, + rctx->src_sg, 0, req->nbytes, 0); + + rctx->residue.size += req->nbytes; + return 0; + } + + /* Copy the previous residue first */ + if (rctx->residue.size) + memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size); + + scatterwalk_map_and_copy(rctx->datbuf.buf + rctx->residue.size, + rctx->src_sg, 0, req->nbytes - nresidue, 0); + + scatterwalk_map_and_copy(rctx->residue.buf, rctx->src_sg, + req->nbytes - nresidue, nresidue, 0); + + /* Update residue value with the residue after current block */ + rctx->residue.size = nresidue; + + rctx->config = tegra_sha_get_config(rctx->alg) | + SE_SHA_DST_HASH_REG; + + /* + * If this is not the first 'update' call, paste the previous copied + * intermediate results to the registers so that it gets picked up. + * This is to support the import/export functionality. + */ + if (!(rctx->task & SHA_FIRST)) + tegra_sha_paste_hash_result(ctx->se, rctx); + + size = tegra_sha_prep_cmd(ctx->se, cpuvaddr, rctx); + + ret = tegra_se_host1x_submit(ctx->se, size); + + /* + * If this is not the final update, copy the intermediate results + * from the registers so that it can be used in the next 'update' + * call. This is to support the import/export functionality. + */ + if (!(rctx->task & SHA_FINAL)) + tegra_sha_copy_hash_result(ctx->se, rctx); + + return ret; +} + +static int tegra_sha_do_final(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + int size, ret = 0; + + memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size); + rctx->datbuf.size = rctx->residue.size; + rctx->total_len += rctx->residue.size; + + rctx->config = tegra_sha_get_config(rctx->alg) | + SE_SHA_DST_MEMORY; + + size = tegra_sha_prep_cmd(se, cpuvaddr, rctx); + + ret = tegra_se_host1x_submit(se, size); + if (ret) + goto out; + + /* Copy result */ + memcpy(req->result, rctx->digest.buf, rctx->digest.size); + +out: + dma_free_coherent(se->dev, SE_SHA_BUFLEN, + rctx->datbuf.buf, rctx->datbuf.addr); + dma_free_coherent(se->dev, crypto_ahash_blocksize(tfm), + rctx->residue.buf, rctx->residue.addr); + dma_free_coherent(se->dev, rctx->digest.size, rctx->digest.buf, + rctx->digest.addr); + return ret; +} + +static int tegra_sha_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct ahash_request *req = ahash_request_cast(areq); + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + int ret = 0; + + if (rctx->task & SHA_UPDATE) { + ret = tegra_sha_do_update(req); + rctx->task &= ~SHA_UPDATE; + } + + if (rctx->task & SHA_FINAL) { + ret = tegra_sha_do_final(req); + rctx->task &= ~SHA_FINAL; + } + + crypto_finalize_hash_request(se->engine, req, ret); + + return 0; +} + +static void tegra_sha_init_fallback(struct crypto_ahash *tfm, struct tegra_sha_ctx *ctx, + const char *algname) +{ + unsigned int statesize; + + ctx->fallback_tfm = crypto_alloc_ahash(algname, 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(ctx->fallback_tfm)) { + dev_warn(ctx->se->dev, + "failed to allocate fallback for %s\n", algname); + ctx->fallback_tfm = NULL; + return; + } + + statesize = crypto_ahash_statesize(ctx->fallback_tfm); + + if (statesize > sizeof(struct tegra_sha_reqctx)) + crypto_ahash_set_statesize(tfm, statesize); + + /* Update reqsize if fallback is added */ + crypto_ahash_set_reqsize(tfm, + sizeof(struct tegra_sha_reqctx) + + crypto_ahash_reqsize(ctx->fallback_tfm)); +} + +static int tegra_sha_cra_init(struct crypto_tfm *tfm) +{ + struct tegra_sha_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_ahash *ahash_tfm = __crypto_ahash_cast(tfm); + struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg); + struct tegra_se_alg *se_alg; + const char *algname; + int ret; + + algname = crypto_tfm_alg_name(tfm); + se_alg = container_of(alg, struct tegra_se_alg, alg.ahash.base); + + crypto_ahash_set_reqsize(ahash_tfm, sizeof(struct tegra_sha_reqctx)); + + ctx->se = se_alg->se_dev; + ctx->fallback = false; + ctx->key_id = 0; + + ret = se_algname_to_algid(algname); + if (ret < 0) { + dev_err(ctx->se->dev, "invalid algorithm\n"); + return ret; + } + + if (se_alg->alg_base) + tegra_sha_init_fallback(ahash_tfm, ctx, algname); + + ctx->alg = ret; + + return 0; +} + +static void tegra_sha_cra_exit(struct crypto_tfm *tfm) +{ + struct tegra_sha_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->fallback_tfm) + crypto_free_ahash(ctx->fallback_tfm); + + tegra_key_invalidate(ctx->se, ctx->key_id, ctx->alg); +} + +static int tegra_sha_init(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + + if (ctx->fallback) + return tegra_sha_fallback_init(req); + + rctx->total_len = 0; + rctx->datbuf.size = 0; + rctx->residue.size = 0; + rctx->key_id = ctx->key_id; + rctx->task = SHA_FIRST; + rctx->alg = ctx->alg; + rctx->blk_size = crypto_ahash_blocksize(tfm); + rctx->digest.size = crypto_ahash_digestsize(tfm); + + rctx->digest.buf = dma_alloc_coherent(se->dev, rctx->digest.size, + &rctx->digest.addr, GFP_KERNEL); + if (!rctx->digest.buf) + goto digbuf_fail; + + rctx->residue.buf = dma_alloc_coherent(se->dev, rctx->blk_size, + &rctx->residue.addr, GFP_KERNEL); + if (!rctx->residue.buf) + goto resbuf_fail; + + rctx->datbuf.buf = dma_alloc_coherent(se->dev, SE_SHA_BUFLEN, + &rctx->datbuf.addr, GFP_KERNEL); + if (!rctx->datbuf.buf) + goto datbuf_fail; + + return 0; + +datbuf_fail: + dma_free_coherent(se->dev, rctx->blk_size, rctx->residue.buf, + rctx->residue.addr); +resbuf_fail: + dma_free_coherent(se->dev, SE_SHA_BUFLEN, rctx->datbuf.buf, + rctx->datbuf.addr); +digbuf_fail: + return -ENOMEM; +} + +static int tegra_hmac_fallback_setkey(struct tegra_sha_ctx *ctx, const u8 *key, + unsigned int keylen) +{ + if (!ctx->fallback_tfm) { + dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen); + return -EINVAL; + } + + ctx->fallback = true; + return crypto_ahash_setkey(ctx->fallback_tfm, key, keylen); +} + +static int tegra_hmac_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (aes_check_keylen(keylen)) + return tegra_hmac_fallback_setkey(ctx, key, keylen); + + ctx->fallback = false; + + return tegra_key_submit(ctx->se, key, keylen, ctx->alg, &ctx->key_id); +} + +static int tegra_sha_update(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_update(req); + + rctx->task |= SHA_UPDATE; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_sha_final(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_final(req); + + rctx->task |= SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_sha_finup(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_finup(req); + + rctx->task |= SHA_UPDATE | SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_sha_digest(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_digest(req); + + tegra_sha_init(req); + rctx->task |= SHA_UPDATE | SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_sha_export(struct ahash_request *req, void *out) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_export(req, out); + + memcpy(out, rctx, sizeof(*rctx)); + + return 0; +} + +static int tegra_sha_import(struct ahash_request *req, const void *in) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_import(req, in); + + memcpy(rctx, in, sizeof(*rctx)); + + return 0; +} + +static struct tegra_se_alg tegra_hash_algs[] = { + { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha1", + .cra_driver_name = "tegra-se-sha1", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha224", + .cra_driver_name = "tegra-se-sha224", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha256", + .cra_driver_name = "tegra-se-sha256", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha384", + .cra_driver_name = "tegra-se-sha384", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha512", + .cra_driver_name = "tegra-se-sha512", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA3_224_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha3-224", + .cra_driver_name = "tegra-se-sha3-224", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA3_224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA3_256_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha3-256", + .cra_driver_name = "tegra-se-sha3-256", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA3_256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA3_384_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha3-384", + .cra_driver_name = "tegra-se-sha3-384", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA3_384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA3_512_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha3-512", + .cra_driver_name = "tegra-se-sha3-512", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA3_512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg_base = "sha224", + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .setkey = tegra_hmac_setkey, + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha224)", + .cra_driver_name = "tegra-se-hmac-sha224", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg_base = "sha256", + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .setkey = tegra_hmac_setkey, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "tegra-se-hmac-sha256", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg_base = "sha384", + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .setkey = tegra_hmac_setkey, + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha384)", + .cra_driver_name = "tegra-se-hmac-sha384", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg_base = "sha512", + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .setkey = tegra_hmac_setkey, + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha512)", + .cra_driver_name = "tegra-se-hmac-sha512", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + } +}; + +static int tegra_hash_kac_manifest(u32 user, u32 alg, u32 keylen) +{ + int manifest; + + manifest = SE_KAC_USER_NS; + + switch (alg) { + case SE_ALG_HMAC_SHA224: + case SE_ALG_HMAC_SHA256: + case SE_ALG_HMAC_SHA384: + case SE_ALG_HMAC_SHA512: + manifest |= SE_KAC_HMAC; + break; + default: + return -EINVAL; + } + + switch (keylen) { + case AES_KEYSIZE_128: + manifest |= SE_KAC_SIZE_128; + break; + case AES_KEYSIZE_192: + manifest |= SE_KAC_SIZE_192; + break; + case AES_KEYSIZE_256: + default: + manifest |= SE_KAC_SIZE_256; + break; + } + + return manifest; +} + +int tegra_init_hash(struct tegra_se *se) +{ + struct ahash_engine_alg *alg; + int i, ret; + + se->manifest = tegra_hash_kac_manifest; + + for (i = 0; i < ARRAY_SIZE(tegra_hash_algs); i++) { + tegra_hash_algs[i].se_dev = se; + alg = &tegra_hash_algs[i].alg.ahash; + + ret = crypto_engine_register_ahash(alg); + if (ret) { + dev_err(se->dev, "failed to register %s\n", + alg->base.halg.base.cra_name); + goto sha_err; + } + } + + return 0; + +sha_err: + while (i--) + crypto_engine_unregister_ahash(&tegra_hash_algs[i].alg.ahash); + + return ret; +} + +void tegra_deinit_hash(struct tegra_se *se) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tegra_hash_algs); i++) + crypto_engine_unregister_ahash(&tegra_hash_algs[i].alg.ahash); +} diff --git a/drivers/crypto/tegra/tegra-se-key.c b/drivers/crypto/tegra/tegra-se-key.c new file mode 100644 index 000000000000..ac14678dbd30 --- /dev/null +++ b/drivers/crypto/tegra/tegra-se-key.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +/* + * Crypto driver file to manage keys of NVIDIA Security Engine. + */ + +#include <linux/bitops.h> +#include <linux/module.h> +#include <crypto/aes.h> + +#include "tegra-se.h" + +#define SE_KEY_FULL_MASK GENMASK(SE_MAX_KEYSLOT, 0) + +/* Reserve keyslot 0, 14, 15 */ +#define SE_KEY_RSVD_MASK (BIT(0) | BIT(14) | BIT(15)) +#define SE_KEY_VALID_MASK (SE_KEY_FULL_MASK & ~SE_KEY_RSVD_MASK) + +/* Mutex lock to guard keyslots */ +static DEFINE_MUTEX(kslt_lock); + +/* Keyslot bitmask (0 = available, 1 = in use/not available) */ +static u16 tegra_se_keyslots = SE_KEY_RSVD_MASK; + +static u16 tegra_keyslot_alloc(void) +{ + u16 keyid; + + mutex_lock(&kslt_lock); + /* Check if all key slots are full */ + if (tegra_se_keyslots == GENMASK(SE_MAX_KEYSLOT, 0)) { + mutex_unlock(&kslt_lock); + return 0; + } + + keyid = ffz(tegra_se_keyslots); + tegra_se_keyslots |= BIT(keyid); + + mutex_unlock(&kslt_lock); + + return keyid; +} + +static void tegra_keyslot_free(u16 slot) +{ + mutex_lock(&kslt_lock); + tegra_se_keyslots &= ~(BIT(slot)); + mutex_unlock(&kslt_lock); +} + +static unsigned int tegra_key_prep_ins_cmd(struct tegra_se *se, u32 *cpuvaddr, + const u32 *key, u32 keylen, u16 slot, u32 alg) +{ + int i = 0, j; + + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->op); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_DUMMY; + + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->manifest); + cpuvaddr[i++] = se->manifest(se->owner, alg, keylen); + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->key_dst); + + cpuvaddr[i++] = SE_AES_KEY_DST_INDEX(slot); + + for (j = 0; j < keylen / 4; j++) { + /* Set key address */ + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->key_addr); + cpuvaddr[i++] = j; + + /* Set key data */ + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->key_data); + cpuvaddr[i++] = key[j]; + } + + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->config); + cpuvaddr[i++] = SE_CFG_INS; + + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->op); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_START | + SE_AES_OP_LASTBUF; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "key-slot %u key-manifest %#x\n", + slot, se->manifest(se->owner, alg, keylen)); + + return i; +} + +static bool tegra_key_in_kslt(u32 keyid) +{ + bool ret; + + if (keyid > SE_MAX_KEYSLOT) + return false; + + mutex_lock(&kslt_lock); + ret = ((BIT(keyid) & SE_KEY_VALID_MASK) && + (BIT(keyid) & tegra_se_keyslots)); + mutex_unlock(&kslt_lock); + + return ret; +} + +static int tegra_key_insert(struct tegra_se *se, const u8 *key, + u32 keylen, u16 slot, u32 alg) +{ + const u32 *keyval = (u32 *)key; + u32 *addr = se->cmdbuf->addr, size; + + size = tegra_key_prep_ins_cmd(se, addr, keyval, keylen, slot, alg); + + return tegra_se_host1x_submit(se, size); +} + +void tegra_key_invalidate(struct tegra_se *se, u32 keyid, u32 alg) +{ + u8 zkey[AES_MAX_KEY_SIZE] = {0}; + + if (!keyid) + return; + + /* Overwrite the key with 0s */ + tegra_key_insert(se, zkey, AES_MAX_KEY_SIZE, keyid, alg); + + tegra_keyslot_free(keyid); +} + +int tegra_key_submit(struct tegra_se *se, const u8 *key, u32 keylen, u32 alg, u32 *keyid) +{ + int ret; + + /* Use the existing slot if it is already allocated */ + if (!tegra_key_in_kslt(*keyid)) { + *keyid = tegra_keyslot_alloc(); + if (!(*keyid)) { + dev_err(se->dev, "failed to allocate key slot\n"); + return -ENOMEM; + } + } + + ret = tegra_key_insert(se, key, keylen, *keyid, alg); + if (ret) + return ret; + + return 0; +} diff --git a/drivers/crypto/tegra/tegra-se-main.c b/drivers/crypto/tegra/tegra-se-main.c new file mode 100644 index 000000000000..9955874b3dc3 --- /dev/null +++ b/drivers/crypto/tegra/tegra-se-main.c @@ -0,0 +1,437 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +/* + * Crypto driver for NVIDIA Security Engine in Tegra Chips + */ + +#include <linux/clk.h> +#include <linux/dma-mapping.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/mod_devicetable.h> + +#include <crypto/engine.h> + +#include "tegra-se.h" + +static struct host1x_bo *tegra_se_cmdbuf_get(struct host1x_bo *host_bo) +{ + struct tegra_se_cmdbuf *cmdbuf = container_of(host_bo, struct tegra_se_cmdbuf, bo); + + kref_get(&cmdbuf->ref); + + return host_bo; +} + +static void tegra_se_cmdbuf_release(struct kref *ref) +{ + struct tegra_se_cmdbuf *cmdbuf = container_of(ref, struct tegra_se_cmdbuf, ref); + + dma_free_attrs(cmdbuf->dev, cmdbuf->size, cmdbuf->addr, + cmdbuf->iova, 0); + + kfree(cmdbuf); +} + +static void tegra_se_cmdbuf_put(struct host1x_bo *host_bo) +{ + struct tegra_se_cmdbuf *cmdbuf = container_of(host_bo, struct tegra_se_cmdbuf, bo); + + kref_put(&cmdbuf->ref, tegra_se_cmdbuf_release); +} + +static struct host1x_bo_mapping * +tegra_se_cmdbuf_pin(struct device *dev, struct host1x_bo *bo, enum dma_data_direction direction) +{ + struct tegra_se_cmdbuf *cmdbuf = container_of(bo, struct tegra_se_cmdbuf, bo); + struct host1x_bo_mapping *map; + int err; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return ERR_PTR(-ENOMEM); + + kref_init(&map->ref); + map->bo = host1x_bo_get(bo); + map->direction = direction; + map->dev = dev; + + map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL); + if (!map->sgt) { + err = -ENOMEM; + goto free; + } + + err = dma_get_sgtable(dev, map->sgt, cmdbuf->addr, + cmdbuf->iova, cmdbuf->words * 4); + if (err) + goto free_sgt; + + err = dma_map_sgtable(dev, map->sgt, direction, 0); + if (err) + goto free_sgt; + + map->phys = sg_dma_address(map->sgt->sgl); + map->size = cmdbuf->words * 4; + map->chunks = err; + + return map; + +free_sgt: + sg_free_table(map->sgt); + kfree(map->sgt); +free: + kfree(map); + return ERR_PTR(err); +} + +static void tegra_se_cmdbuf_unpin(struct host1x_bo_mapping *map) +{ + if (!map) + return; + + dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0); + sg_free_table(map->sgt); + kfree(map->sgt); + host1x_bo_put(map->bo); + + kfree(map); +} + +static void *tegra_se_cmdbuf_mmap(struct host1x_bo *host_bo) +{ + struct tegra_se_cmdbuf *cmdbuf = container_of(host_bo, struct tegra_se_cmdbuf, bo); + + return cmdbuf->addr; +} + +static void tegra_se_cmdbuf_munmap(struct host1x_bo *host_bo, void *addr) +{ +} + +static const struct host1x_bo_ops tegra_se_cmdbuf_ops = { + .get = tegra_se_cmdbuf_get, + .put = tegra_se_cmdbuf_put, + .pin = tegra_se_cmdbuf_pin, + .unpin = tegra_se_cmdbuf_unpin, + .mmap = tegra_se_cmdbuf_mmap, + .munmap = tegra_se_cmdbuf_munmap, +}; + +static struct tegra_se_cmdbuf *tegra_se_host1x_bo_alloc(struct tegra_se *se, ssize_t size) +{ + struct tegra_se_cmdbuf *cmdbuf; + struct device *dev = se->dev->parent; + + cmdbuf = kzalloc(sizeof(*cmdbuf), GFP_KERNEL); + if (!cmdbuf) + return NULL; + + cmdbuf->addr = dma_alloc_attrs(dev, size, &cmdbuf->iova, + GFP_KERNEL, 0); + if (!cmdbuf->addr) + return NULL; + + cmdbuf->size = size; + cmdbuf->dev = dev; + + host1x_bo_init(&cmdbuf->bo, &tegra_se_cmdbuf_ops); + kref_init(&cmdbuf->ref); + + return cmdbuf; +} + +int tegra_se_host1x_submit(struct tegra_se *se, u32 size) +{ + struct host1x_job *job; + int ret; + + job = host1x_job_alloc(se->channel, 1, 0, true); + if (!job) { + dev_err(se->dev, "failed to allocate host1x job\n"); + return -ENOMEM; + } + + job->syncpt = host1x_syncpt_get(se->syncpt); + job->syncpt_incrs = 1; + job->client = &se->client; + job->class = se->client.class; + job->serialize = true; + job->engine_fallback_streamid = se->stream_id; + job->engine_streamid_offset = SE_STREAM_ID; + + se->cmdbuf->words = size; + + host1x_job_add_gather(job, &se->cmdbuf->bo, size, 0); + + ret = host1x_job_pin(job, se->dev); + if (ret) { + dev_err(se->dev, "failed to pin host1x job\n"); + goto job_put; + } + + ret = host1x_job_submit(job); + if (ret) { + dev_err(se->dev, "failed to submit host1x job\n"); + goto job_unpin; + } + + ret = host1x_syncpt_wait(job->syncpt, job->syncpt_end, + MAX_SCHEDULE_TIMEOUT, NULL); + if (ret) { + dev_err(se->dev, "host1x job timed out\n"); + return ret; + } + + host1x_job_put(job); + return 0; + +job_unpin: + host1x_job_unpin(job); +job_put: + host1x_job_put(job); + + return ret; +} + +static int tegra_se_client_init(struct host1x_client *client) +{ + struct tegra_se *se = container_of(client, struct tegra_se, client); + int ret; + + se->channel = host1x_channel_request(&se->client); + if (!se->channel) { + dev_err(se->dev, "host1x channel map failed\n"); + return -ENODEV; + } + + se->syncpt = host1x_syncpt_request(&se->client, 0); + if (!se->syncpt) { + dev_err(se->dev, "host1x syncpt allocation failed\n"); + ret = -EINVAL; + goto channel_put; + } + + se->syncpt_id = host1x_syncpt_id(se->syncpt); + + se->cmdbuf = tegra_se_host1x_bo_alloc(se, SZ_4K); + if (!se->cmdbuf) { + ret = -ENOMEM; + goto syncpt_put; + } + + ret = se->hw->init_alg(se); + if (ret) { + dev_err(se->dev, "failed to register algorithms\n"); + goto cmdbuf_put; + } + + return 0; + +cmdbuf_put: + tegra_se_cmdbuf_put(&se->cmdbuf->bo); +syncpt_put: + host1x_syncpt_put(se->syncpt); +channel_put: + host1x_channel_put(se->channel); + + return ret; +} + +static int tegra_se_client_deinit(struct host1x_client *client) +{ + struct tegra_se *se = container_of(client, struct tegra_se, client); + + se->hw->deinit_alg(se); + tegra_se_cmdbuf_put(&se->cmdbuf->bo); + host1x_syncpt_put(se->syncpt); + host1x_channel_put(se->channel); + + return 0; +} + +static const struct host1x_client_ops tegra_se_client_ops = { + .init = tegra_se_client_init, + .exit = tegra_se_client_deinit, +}; + +static int tegra_se_host1x_register(struct tegra_se *se) +{ + INIT_LIST_HEAD(&se->client.list); + se->client.dev = se->dev; + se->client.ops = &tegra_se_client_ops; + se->client.class = se->hw->host1x_class; + se->client.num_syncpts = 1; + + host1x_client_register(&se->client); + + return 0; +} + +static int tegra_se_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct tegra_se *se; + int ret; + + se = devm_kzalloc(dev, sizeof(*se), GFP_KERNEL); + if (!se) + return -ENOMEM; + + se->dev = dev; + se->owner = TEGRA_GPSE_ID; + se->hw = device_get_match_data(&pdev->dev); + + se->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(se->base)) + return PTR_ERR(se->base); + + dma_set_mask_and_coherent(dev, DMA_BIT_MASK(39)); + platform_set_drvdata(pdev, se); + + se->clk = devm_clk_get_enabled(se->dev, NULL); + if (IS_ERR(se->clk)) + return dev_err_probe(dev, PTR_ERR(se->clk), + "failed to enable clocks\n"); + + if (!tegra_dev_iommu_get_stream_id(dev, &se->stream_id)) + return dev_err_probe(dev, -ENODEV, + "failed to get IOMMU stream ID\n"); + + writel(se->stream_id, se->base + SE_STREAM_ID); + + se->engine = crypto_engine_alloc_init(dev, 0); + if (!se->engine) + return dev_err_probe(dev, -ENOMEM, "failed to init crypto engine\n"); + + ret = crypto_engine_start(se->engine); + if (ret) { + crypto_engine_exit(se->engine); + return dev_err_probe(dev, ret, "failed to start crypto engine\n"); + } + + ret = tegra_se_host1x_register(se); + if (ret) { + crypto_engine_stop(se->engine); + crypto_engine_exit(se->engine); + return dev_err_probe(dev, ret, "failed to init host1x params\n"); + } + + return 0; +} + +static void tegra_se_remove(struct platform_device *pdev) +{ + struct tegra_se *se = platform_get_drvdata(pdev); + + crypto_engine_stop(se->engine); + crypto_engine_exit(se->engine); + iommu_fwspec_free(se->dev); + host1x_client_unregister(&se->client); +} + +static const struct tegra_se_regs tegra234_aes1_regs = { + .config = SE_AES1_CFG, + .op = SE_AES1_OPERATION, + .last_blk = SE_AES1_LAST_BLOCK, + .linear_ctr = SE_AES1_LINEAR_CTR, + .aad_len = SE_AES1_AAD_LEN, + .cryp_msg_len = SE_AES1_CRYPTO_MSG_LEN, + .manifest = SE_AES1_KEYMANIFEST, + .key_addr = SE_AES1_KEY_ADDR, + .key_data = SE_AES1_KEY_DATA, + .key_dst = SE_AES1_KEY_DST, + .result = SE_AES1_CMAC_RESULT, +}; + +static const struct tegra_se_regs tegra234_hash_regs = { + .config = SE_SHA_CFG, + .op = SE_SHA_OPERATION, + .manifest = SE_SHA_KEYMANIFEST, + .key_addr = SE_SHA_KEY_ADDR, + .key_data = SE_SHA_KEY_DATA, + .key_dst = SE_SHA_KEY_DST, + .result = SE_SHA_HASH_RESULT, +}; + +static const struct tegra_se_hw tegra234_aes_hw = { + .regs = &tegra234_aes1_regs, + .kac_ver = 1, + .host1x_class = 0x3b, + .init_alg = tegra_init_aes, + .deinit_alg = tegra_deinit_aes, +}; + +static const struct tegra_se_hw tegra234_hash_hw = { + .regs = &tegra234_hash_regs, + .kac_ver = 1, + .host1x_class = 0x3d, + .init_alg = tegra_init_hash, + .deinit_alg = tegra_deinit_hash, +}; + +static const struct of_device_id tegra_se_of_match[] = { + { + .compatible = "nvidia,tegra234-se-aes", + .data = &tegra234_aes_hw + }, { + .compatible = "nvidia,tegra234-se-hash", + .data = &tegra234_hash_hw, + }, + { }, +}; +MODULE_DEVICE_TABLE(of, tegra_se_of_match); + +static struct platform_driver tegra_se_driver = { + .driver = { + .name = "tegra-se", + .of_match_table = tegra_se_of_match, + }, + .probe = tegra_se_probe, + .remove_new = tegra_se_remove, +}; + +static int tegra_se_host1x_probe(struct host1x_device *dev) +{ + return host1x_device_init(dev); +} + +static int tegra_se_host1x_remove(struct host1x_device *dev) +{ + host1x_device_exit(dev); + + return 0; +} + +static struct host1x_driver tegra_se_host1x_driver = { + .driver = { + .name = "tegra-se-host1x", + }, + .probe = tegra_se_host1x_probe, + .remove = tegra_se_host1x_remove, + .subdevs = tegra_se_of_match, +}; + +static int __init tegra_se_module_init(void) +{ + int ret; + + ret = host1x_driver_register(&tegra_se_host1x_driver); + if (ret) + return ret; + + return platform_driver_register(&tegra_se_driver); +} + +static void __exit tegra_se_module_exit(void) +{ + host1x_driver_unregister(&tegra_se_host1x_driver); + platform_driver_unregister(&tegra_se_driver); +} + +module_init(tegra_se_module_init); +module_exit(tegra_se_module_exit); + +MODULE_DESCRIPTION("NVIDIA Tegra Security Engine Driver"); +MODULE_AUTHOR("Akhil R <akhilrajeev@nvidia.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/tegra/tegra-se.h b/drivers/crypto/tegra/tegra-se.h new file mode 100644 index 000000000000..b9dd7ceb8783 --- /dev/null +++ b/drivers/crypto/tegra/tegra-se.h @@ -0,0 +1,560 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * Header file for NVIDIA Security Engine driver. + */ + +#ifndef _TEGRA_SE_H +#define _TEGRA_SE_H + +#include <linux/bitfield.h> +#include <linux/iommu.h> +#include <linux/host1x.h> +#include <crypto/aead.h> +#include <crypto/engine.h> +#include <crypto/hash.h> +#include <crypto/sha1.h> +#include <crypto/sha3.h> +#include <crypto/skcipher.h> + +#define SE_OWNERSHIP 0x14 +#define SE_OWNERSHIP_UID(x) FIELD_GET(GENMASK(7, 0), x) +#define TEGRA_GPSE_ID 3 + +#define SE_STREAM_ID 0x90 + +#define SE_SHA_CFG 0x4004 +#define SE_SHA_KEY_ADDR 0x4094 +#define SE_SHA_KEY_DATA 0x4098 +#define SE_SHA_KEYMANIFEST 0x409c +#define SE_SHA_CRYPTO_CFG 0x40a4 +#define SE_SHA_KEY_DST 0x40a8 +#define SE_SHA_SRC_KSLT 0x4180 +#define SE_SHA_TGT_KSLT 0x4184 +#define SE_SHA_MSG_LENGTH 0x401c +#define SE_SHA_OPERATION 0x407c +#define SE_SHA_HASH_RESULT 0x40b0 + +#define SE_SHA_ENC_MODE(x) FIELD_PREP(GENMASK(31, 24), x) +#define SE_SHA_ENC_MODE_SHA1 SE_SHA_ENC_MODE(0) +#define SE_SHA_ENC_MODE_SHA224 SE_SHA_ENC_MODE(4) +#define SE_SHA_ENC_MODE_SHA256 SE_SHA_ENC_MODE(5) +#define SE_SHA_ENC_MODE_SHA384 SE_SHA_ENC_MODE(6) +#define SE_SHA_ENC_MODE_SHA512 SE_SHA_ENC_MODE(7) +#define SE_SHA_ENC_MODE_SHA_CTX_INTEGRITY SE_SHA_ENC_MODE(8) +#define SE_SHA_ENC_MODE_SHA3_224 SE_SHA_ENC_MODE(9) +#define SE_SHA_ENC_MODE_SHA3_256 SE_SHA_ENC_MODE(10) +#define SE_SHA_ENC_MODE_SHA3_384 SE_SHA_ENC_MODE(11) +#define SE_SHA_ENC_MODE_SHA3_512 SE_SHA_ENC_MODE(12) +#define SE_SHA_ENC_MODE_SHAKE128 SE_SHA_ENC_MODE(13) +#define SE_SHA_ENC_MODE_SHAKE256 SE_SHA_ENC_MODE(14) +#define SE_SHA_ENC_MODE_HMAC_SHA256_1KEY SE_SHA_ENC_MODE(0) +#define SE_SHA_ENC_MODE_HMAC_SHA256_2KEY SE_SHA_ENC_MODE(1) +#define SE_SHA_ENC_MODE_SM3_256 SE_SHA_ENC_MODE(0) + +#define SE_SHA_CFG_ENC_ALG(x) FIELD_PREP(GENMASK(15, 12), x) +#define SE_SHA_ENC_ALG_NOP SE_SHA_CFG_ENC_ALG(0) +#define SE_SHA_ENC_ALG_SHA_ENC SE_SHA_CFG_ENC_ALG(1) +#define SE_SHA_ENC_ALG_RNG SE_SHA_CFG_ENC_ALG(2) +#define SE_SHA_ENC_ALG_SHA SE_SHA_CFG_ENC_ALG(3) +#define SE_SHA_ENC_ALG_SM3 SE_SHA_CFG_ENC_ALG(4) +#define SE_SHA_ENC_ALG_HMAC SE_SHA_CFG_ENC_ALG(7) +#define SE_SHA_ENC_ALG_KDF SE_SHA_CFG_ENC_ALG(8) +#define SE_SHA_ENC_ALG_KEY_INVLD SE_SHA_CFG_ENC_ALG(10) +#define SE_SHA_ENC_ALG_KEY_INQUIRE SE_SHA_CFG_ENC_ALG(12) +#define SE_SHA_ENC_ALG_INS SE_SHA_CFG_ENC_ALG(13) + +#define SE_SHA_OP_LASTBUF FIELD_PREP(BIT(16), 1) +#define SE_SHA_OP_WRSTALL FIELD_PREP(BIT(15), 1) + +#define SE_SHA_OP_OP(x) FIELD_PREP(GENMASK(2, 0), x) +#define SE_SHA_OP_START SE_SHA_OP_OP(1) +#define SE_SHA_OP_RESTART_OUT SE_SHA_OP_OP(2) +#define SE_SHA_OP_RESTART_IN SE_SHA_OP_OP(4) +#define SE_SHA_OP_RESTART_INOUT SE_SHA_OP_OP(5) +#define SE_SHA_OP_DUMMY SE_SHA_OP_OP(6) + +#define SE_SHA_CFG_DEC_ALG(x) FIELD_PREP(GENMASK(11, 8), x) +#define SE_SHA_DEC_ALG_NOP SE_SHA_CFG_DEC_ALG(0) +#define SE_SHA_DEC_ALG_AES_DEC SE_SHA_CFG_DEC_ALG(1) +#define SE_SHA_DEC_ALG_HMAC SE_SHA_CFG_DEC_ALG(7) +#define SE_SHA_DEC_ALG_HMAC_VERIFY SE_SHA_CFG_DEC_ALG(9) + +#define SE_SHA_CFG_DST(x) FIELD_PREP(GENMASK(4, 2), x) +#define SE_SHA_DST_MEMORY SE_SHA_CFG_DST(0) +#define SE_SHA_DST_HASH_REG SE_SHA_CFG_DST(1) +#define SE_SHA_DST_KEYTABLE SE_SHA_CFG_DST(2) +#define SE_SHA_DST_SRK SE_SHA_CFG_DST(3) + +#define SE_SHA_TASK_HASH_INIT BIT(0) + +/* AES Configuration */ +#define SE_AES0_CFG 0x1004 +#define SE_AES0_CRYPTO_CONFIG 0x1008 +#define SE_AES0_KEY_DST 0x1030 +#define SE_AES0_OPERATION 0x1038 +#define SE_AES0_LINEAR_CTR 0x101c +#define SE_AES0_LAST_BLOCK 0x102c +#define SE_AES0_KEY_ADDR 0x10bc +#define SE_AES0_KEY_DATA 0x10c0 +#define SE_AES0_CMAC_RESULT 0x10c4 +#define SE_AES0_SRC_KSLT 0x1100 +#define SE_AES0_TGT_KSLT 0x1104 +#define SE_AES0_KEYMANIFEST 0x1114 +#define SE_AES0_AAD_LEN 0x112c +#define SE_AES0_CRYPTO_MSG_LEN 0x1134 + +#define SE_AES1_CFG 0x2004 +#define SE_AES1_CRYPTO_CONFIG 0x2008 +#define SE_AES1_KEY_DST 0x2030 +#define SE_AES1_OPERATION 0x2038 +#define SE_AES1_LINEAR_CTR 0x201c +#define SE_AES1_LAST_BLOCK 0x202c +#define SE_AES1_KEY_ADDR 0x20bc +#define SE_AES1_KEY_DATA 0x20c0 +#define SE_AES1_CMAC_RESULT 0x20c4 +#define SE_AES1_SRC_KSLT 0x2100 +#define SE_AES1_TGT_KSLT 0x2104 +#define SE_AES1_KEYMANIFEST 0x2114 +#define SE_AES1_AAD_LEN 0x212c +#define SE_AES1_CRYPTO_MSG_LEN 0x2134 + +#define SE_AES_CFG_ENC_MODE(x) FIELD_PREP(GENMASK(31, 24), x) +#define SE_AES_ENC_MODE_GMAC SE_AES_CFG_ENC_MODE(3) +#define SE_AES_ENC_MODE_GCM SE_AES_CFG_ENC_MODE(4) +#define SE_AES_ENC_MODE_GCM_FINAL SE_AES_CFG_ENC_MODE(5) +#define SE_AES_ENC_MODE_CMAC SE_AES_CFG_ENC_MODE(7) +#define SE_AES_ENC_MODE_CBC_MAC SE_AES_CFG_ENC_MODE(12) + +#define SE_AES_CFG_DEC_MODE(x) FIELD_PREP(GENMASK(23, 16), x) +#define SE_AES_DEC_MODE_GMAC SE_AES_CFG_DEC_MODE(3) +#define SE_AES_DEC_MODE_GCM SE_AES_CFG_DEC_MODE(4) +#define SE_AES_DEC_MODE_GCM_FINAL SE_AES_CFG_DEC_MODE(5) +#define SE_AES_DEC_MODE_CBC_MAC SE_AES_CFG_DEC_MODE(12) + +#define SE_AES_CFG_ENC_ALG(x) FIELD_PREP(GENMASK(15, 12), x) +#define SE_AES_ENC_ALG_NOP SE_AES_CFG_ENC_ALG(0) +#define SE_AES_ENC_ALG_AES_ENC SE_AES_CFG_ENC_ALG(1) +#define SE_AES_ENC_ALG_RNG SE_AES_CFG_ENC_ALG(2) +#define SE_AES_ENC_ALG_SHA SE_AES_CFG_ENC_ALG(3) +#define SE_AES_ENC_ALG_HMAC SE_AES_CFG_ENC_ALG(7) +#define SE_AES_ENC_ALG_KDF SE_AES_CFG_ENC_ALG(8) +#define SE_AES_ENC_ALG_INS SE_AES_CFG_ENC_ALG(13) + +#define SE_AES_CFG_DEC_ALG(x) FIELD_PREP(GENMASK(11, 8), x) +#define SE_AES_DEC_ALG_NOP SE_AES_CFG_DEC_ALG(0) +#define SE_AES_DEC_ALG_AES_DEC SE_AES_CFG_DEC_ALG(1) + +#define SE_AES_CFG_DST(x) FIELD_PREP(GENMASK(4, 2), x) +#define SE_AES_DST_MEMORY SE_AES_CFG_DST(0) +#define SE_AES_DST_HASH_REG SE_AES_CFG_DST(1) +#define SE_AES_DST_KEYTABLE SE_AES_CFG_DST(2) +#define SE_AES_DST_SRK SE_AES_CFG_DST(3) + +/* AES Crypto Configuration */ +#define SE_AES_KEY2_INDEX(x) FIELD_PREP(GENMASK(31, 28), x) +#define SE_AES_KEY_INDEX(x) FIELD_PREP(GENMASK(27, 24), x) + +#define SE_AES_CRYPTO_CFG_SCC_DIS FIELD_PREP(BIT(20), 1) + +#define SE_AES_CRYPTO_CFG_CTR_CNTN(x) FIELD_PREP(GENMASK(18, 11), x) + +#define SE_AES_CRYPTO_CFG_IV_MODE(x) FIELD_PREP(BIT(10), x) +#define SE_AES_IV_MODE_SWIV SE_AES_CRYPTO_CFG_IV_MODE(0) +#define SE_AES_IV_MODE_HWIV SE_AES_CRYPTO_CFG_IV_MODE(1) + +#define SE_AES_CRYPTO_CFG_CORE_SEL(x) FIELD_PREP(BIT(9), x) +#define SE_AES_CORE_SEL_DECRYPT SE_AES_CRYPTO_CFG_CORE_SEL(0) +#define SE_AES_CORE_SEL_ENCRYPT SE_AES_CRYPTO_CFG_CORE_SEL(1) + +#define SE_AES_CRYPTO_CFG_IV_SEL(x) FIELD_PREP(GENMASK(8, 7), x) +#define SE_AES_IV_SEL_UPDATED SE_AES_CRYPTO_CFG_IV_SEL(1) +#define SE_AES_IV_SEL_REG SE_AES_CRYPTO_CFG_IV_SEL(2) +#define SE_AES_IV_SEL_RANDOM SE_AES_CRYPTO_CFG_IV_SEL(3) + +#define SE_AES_CRYPTO_CFG_VCTRAM_SEL(x) FIELD_PREP(GENMASK(6, 5), x) +#define SE_AES_VCTRAM_SEL_MEMORY SE_AES_CRYPTO_CFG_VCTRAM_SEL(0) +#define SE_AES_VCTRAM_SEL_TWEAK SE_AES_CRYPTO_CFG_VCTRAM_SEL(1) +#define SE_AES_VCTRAM_SEL_AESOUT SE_AES_CRYPTO_CFG_VCTRAM_SEL(2) +#define SE_AES_VCTRAM_SEL_PREV_MEM SE_AES_CRYPTO_CFG_VCTRAM_SEL(3) + +#define SE_AES_CRYPTO_CFG_INPUT_SEL(x) FIELD_PREP(GENMASK(4, 3), x) +#define SE_AES_INPUT_SEL_MEMORY SE_AES_CRYPTO_CFG_INPUT_SEL(0) +#define SE_AES_INPUT_SEL_RANDOM SE_AES_CRYPTO_CFG_INPUT_SEL(1) +#define SE_AES_INPUT_SEL_AESOUT SE_AES_CRYPTO_CFG_INPUT_SEL(2) +#define SE_AES_INPUT_SEL_LINEAR_CTR SE_AES_CRYPTO_CFG_INPUT_SEL(3) +#define SE_AES_INPUT_SEL_REG SE_AES_CRYPTO_CFG_INPUT_SEL(1) + +#define SE_AES_CRYPTO_CFG_XOR_POS(x) FIELD_PREP(GENMASK(2, 1), x) +#define SE_AES_XOR_POS_BYPASS SE_AES_CRYPTO_CFG_XOR_POS(0) +#define SE_AES_XOR_POS_BOTH SE_AES_CRYPTO_CFG_XOR_POS(1) +#define SE_AES_XOR_POS_TOP SE_AES_CRYPTO_CFG_XOR_POS(2) +#define SE_AES_XOR_POS_BOTTOM SE_AES_CRYPTO_CFG_XOR_POS(3) + +#define SE_AES_CRYPTO_CFG_HASH_EN(x) FIELD_PREP(BIT(0), x) +#define SE_AES_HASH_DISABLE SE_AES_CRYPTO_CFG_HASH_EN(0) +#define SE_AES_HASH_ENABLE SE_AES_CRYPTO_CFG_HASH_EN(1) + +#define SE_LAST_BLOCK_VAL(x) FIELD_PREP(GENMASK(19, 0), x) +#define SE_LAST_BLOCK_RES_BITS(x) FIELD_PREP(GENMASK(26, 20), x) + +#define SE_AES_OP_LASTBUF FIELD_PREP(BIT(16), 1) +#define SE_AES_OP_WRSTALL FIELD_PREP(BIT(15), 1) +#define SE_AES_OP_FINAL FIELD_PREP(BIT(5), 1) +#define SE_AES_OP_INIT FIELD_PREP(BIT(4), 1) + +#define SE_AES_OP_OP(x) FIELD_PREP(GENMASK(2, 0), x) +#define SE_AES_OP_START SE_AES_OP_OP(1) +#define SE_AES_OP_RESTART_OUT SE_AES_OP_OP(2) +#define SE_AES_OP_RESTART_IN SE_AES_OP_OP(4) +#define SE_AES_OP_RESTART_INOUT SE_AES_OP_OP(5) +#define SE_AES_OP_DUMMY SE_AES_OP_OP(6) + +#define SE_KAC_SIZE(x) FIELD_PREP(GENMASK(15, 14), x) +#define SE_KAC_SIZE_128 SE_KAC_SIZE(0) +#define SE_KAC_SIZE_192 SE_KAC_SIZE(1) +#define SE_KAC_SIZE_256 SE_KAC_SIZE(2) + +#define SE_KAC_EXPORTABLE FIELD_PREP(BIT(12), 1) + +#define SE_KAC_PURPOSE(x) FIELD_PREP(GENMASK(11, 8), x) +#define SE_KAC_ENC SE_KAC_PURPOSE(0) +#define SE_KAC_CMAC SE_KAC_PURPOSE(1) +#define SE_KAC_HMAC SE_KAC_PURPOSE(2) +#define SE_KAC_GCM_KW SE_KAC_PURPOSE(3) +#define SE_KAC_HMAC_KDK SE_KAC_PURPOSE(6) +#define SE_KAC_HMAC_KDD SE_KAC_PURPOSE(7) +#define SE_KAC_HMAC_KDD_KUW SE_KAC_PURPOSE(8) +#define SE_KAC_XTS SE_KAC_PURPOSE(9) +#define SE_KAC_GCM SE_KAC_PURPOSE(10) + +#define SE_KAC_USER_NS FIELD_PREP(GENMASK(6, 4), 3) + +#define SE_AES_KEY_DST_INDEX(x) FIELD_PREP(GENMASK(11, 8), x) +#define SE_ADDR_HI_MSB(x) FIELD_PREP(GENMASK(31, 24), x) +#define SE_ADDR_HI_SZ(x) FIELD_PREP(GENMASK(23, 0), x) + +#define SE_CFG_AES_ENCRYPT (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_DEC_ALG_NOP | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_AES_DECRYPT (SE_AES_ENC_ALG_NOP | \ + SE_AES_DEC_ALG_AES_DEC | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GMAC_ENCRYPT (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_DEC_ALG_NOP | \ + SE_AES_ENC_MODE_GMAC | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GMAC_DECRYPT (SE_AES_ENC_ALG_NOP | \ + SE_AES_DEC_ALG_AES_DEC | \ + SE_AES_DEC_MODE_GMAC | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GCM_ENCRYPT (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_DEC_ALG_NOP | \ + SE_AES_ENC_MODE_GCM | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GCM_DECRYPT (SE_AES_ENC_ALG_NOP | \ + SE_AES_DEC_ALG_AES_DEC | \ + SE_AES_DEC_MODE_GCM | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GCM_FINAL_ENCRYPT (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_DEC_ALG_NOP | \ + SE_AES_ENC_MODE_GCM_FINAL | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GCM_FINAL_DECRYPT (SE_AES_ENC_ALG_NOP | \ + SE_AES_DEC_ALG_AES_DEC | \ + SE_AES_DEC_MODE_GCM_FINAL | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_CMAC (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_ENC_MODE_CMAC | \ + SE_AES_DST_HASH_REG) + +#define SE_CFG_CBC_MAC (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_ENC_MODE_CBC_MAC) + +#define SE_CFG_INS (SE_AES_ENC_ALG_INS | \ + SE_AES_DEC_ALG_NOP) + +#define SE_CRYPTO_CFG_ECB_ENCRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_XOR_POS_BYPASS | \ + SE_AES_CORE_SEL_ENCRYPT) + +#define SE_CRYPTO_CFG_ECB_DECRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_XOR_POS_BYPASS | \ + SE_AES_CORE_SEL_DECRYPT) + +#define SE_CRYPTO_CFG_CBC_ENCRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_AESOUT | \ + SE_AES_XOR_POS_TOP | \ + SE_AES_CORE_SEL_ENCRYPT | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_CBC_DECRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_PREV_MEM | \ + SE_AES_XOR_POS_BOTTOM | \ + SE_AES_CORE_SEL_DECRYPT | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_CTR (SE_AES_INPUT_SEL_LINEAR_CTR | \ + SE_AES_VCTRAM_SEL_MEMORY | \ + SE_AES_XOR_POS_BOTTOM | \ + SE_AES_CORE_SEL_ENCRYPT | \ + SE_AES_CRYPTO_CFG_CTR_CNTN(1) | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_XTS_ENCRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_TWEAK | \ + SE_AES_XOR_POS_BOTH | \ + SE_AES_CORE_SEL_ENCRYPT | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_XTS_DECRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_TWEAK | \ + SE_AES_XOR_POS_BOTH | \ + SE_AES_CORE_SEL_DECRYPT | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_XTS_DECRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_TWEAK | \ + SE_AES_XOR_POS_BOTH | \ + SE_AES_CORE_SEL_DECRYPT | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_CBC_MAC (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_AESOUT | \ + SE_AES_XOR_POS_TOP | \ + SE_AES_CORE_SEL_ENCRYPT | \ + SE_AES_HASH_ENABLE | \ + SE_AES_IV_SEL_REG) + +#define HASH_RESULT_REG_COUNT 50 +#define CMAC_RESULT_REG_COUNT 4 + +#define SE_CRYPTO_CTR_REG_COUNT 4 +#define SE_MAX_KEYSLOT 15 +#define SE_MAX_MEM_ALLOC SZ_4M +#define SE_AES_BUFLEN 0x8000 +#define SE_SHA_BUFLEN 0x2000 + +#define SHA_FIRST BIT(0) +#define SHA_UPDATE BIT(1) +#define SHA_FINAL BIT(2) + +/* Security Engine operation modes */ +enum se_aes_alg { + SE_ALG_CBC, /* Cipher Block Chaining (CBC) mode */ + SE_ALG_ECB, /* Electronic Codebook (ECB) mode */ + SE_ALG_CTR, /* Counter (CTR) mode */ + SE_ALG_XTS, /* XTS mode */ + SE_ALG_GMAC, /* GMAC mode */ + SE_ALG_GCM, /* GCM mode */ + SE_ALG_GCM_FINAL, /* GCM FINAL mode */ + SE_ALG_CMAC, /* Cipher-based MAC (CMAC) mode */ + SE_ALG_CBC_MAC, /* CBC MAC mode */ +}; + +enum se_hash_alg { + SE_ALG_RNG_DRBG, /* Deterministic Random Bit Generator */ + SE_ALG_SHA1, /* Secure Hash Algorithm-1 (SHA1) mode */ + SE_ALG_SHA224, /* Secure Hash Algorithm-224 (SHA224) mode */ + SE_ALG_SHA256, /* Secure Hash Algorithm-256 (SHA256) mode */ + SE_ALG_SHA384, /* Secure Hash Algorithm-384 (SHA384) mode */ + SE_ALG_SHA512, /* Secure Hash Algorithm-512 (SHA512) mode */ + SE_ALG_SHA3_224, /* Secure Hash Algorithm3-224 (SHA3-224) mode */ + SE_ALG_SHA3_256, /* Secure Hash Algorithm3-256 (SHA3-256) mode */ + SE_ALG_SHA3_384, /* Secure Hash Algorithm3-384 (SHA3-384) mode */ + SE_ALG_SHA3_512, /* Secure Hash Algorithm3-512 (SHA3-512) mode */ + SE_ALG_SHAKE128, /* Secure Hash Algorithm3 (SHAKE128) mode */ + SE_ALG_SHAKE256, /* Secure Hash Algorithm3 (SHAKE256) mode */ + SE_ALG_HMAC_SHA224, /* Hash based MAC (HMAC) - 224 */ + SE_ALG_HMAC_SHA256, /* Hash based MAC (HMAC) - 256 */ + SE_ALG_HMAC_SHA384, /* Hash based MAC (HMAC) - 384 */ + SE_ALG_HMAC_SHA512, /* Hash based MAC (HMAC) - 512 */ +}; + +struct tegra_se_alg { + struct tegra_se *se_dev; + const char *alg_base; + + union { + struct skcipher_engine_alg skcipher; + struct aead_engine_alg aead; + struct ahash_engine_alg ahash; + } alg; +}; + +struct tegra_se_regs { + u32 op; + u32 config; + u32 last_blk; + u32 linear_ctr; + u32 out_addr; + u32 aad_len; + u32 cryp_msg_len; + u32 manifest; + u32 key_addr; + u32 key_data; + u32 key_dst; + u32 result; +}; + +struct tegra_se_hw { + const struct tegra_se_regs *regs; + int (*init_alg)(struct tegra_se *se); + void (*deinit_alg)(struct tegra_se *se); + bool support_sm_alg; + u32 host1x_class; + u32 kac_ver; +}; + +struct tegra_se { + int (*manifest)(u32 user, u32 alg, u32 keylen); + const struct tegra_se_hw *hw; + struct host1x_client client; + struct host1x_channel *channel; + struct tegra_se_cmdbuf *cmdbuf; + struct crypto_engine *engine; + struct host1x_syncpt *syncpt; + struct device *dev; + struct clk *clk; + unsigned int opcode_addr; + unsigned int stream_id; + unsigned int syncpt_id; + void __iomem *base; + u32 owner; +}; + +struct tegra_se_cmdbuf { + dma_addr_t iova; + u32 *addr; + struct device *dev; + struct kref ref; + struct host1x_bo bo; + ssize_t size; + u32 words; +}; + +struct tegra_se_datbuf { + u8 *buf; + dma_addr_t addr; + ssize_t size; +}; + +static inline int se_algname_to_algid(const char *name) +{ + if (!strcmp(name, "cbc(aes)")) + return SE_ALG_CBC; + else if (!strcmp(name, "ecb(aes)")) + return SE_ALG_ECB; + else if (!strcmp(name, "ctr(aes)")) + return SE_ALG_CTR; + else if (!strcmp(name, "xts(aes)")) + return SE_ALG_XTS; + else if (!strcmp(name, "cmac(aes)")) + return SE_ALG_CMAC; + else if (!strcmp(name, "gcm(aes)")) + return SE_ALG_GCM; + else if (!strcmp(name, "ccm(aes)")) + return SE_ALG_CBC_MAC; + + else if (!strcmp(name, "sha1")) + return SE_ALG_SHA1; + else if (!strcmp(name, "sha224")) + return SE_ALG_SHA224; + else if (!strcmp(name, "sha256")) + return SE_ALG_SHA256; + else if (!strcmp(name, "sha384")) + return SE_ALG_SHA384; + else if (!strcmp(name, "sha512")) + return SE_ALG_SHA512; + else if (!strcmp(name, "sha3-224")) + return SE_ALG_SHA3_224; + else if (!strcmp(name, "sha3-256")) + return SE_ALG_SHA3_256; + else if (!strcmp(name, "sha3-384")) + return SE_ALG_SHA3_384; + else if (!strcmp(name, "sha3-512")) + return SE_ALG_SHA3_512; + else if (!strcmp(name, "hmac(sha224)")) + return SE_ALG_HMAC_SHA224; + else if (!strcmp(name, "hmac(sha256)")) + return SE_ALG_HMAC_SHA256; + else if (!strcmp(name, "hmac(sha384)")) + return SE_ALG_HMAC_SHA384; + else if (!strcmp(name, "hmac(sha512)")) + return SE_ALG_HMAC_SHA512; + else + return -EINVAL; +} + +/* Functions */ +int tegra_init_aes(struct tegra_se *se); +int tegra_init_hash(struct tegra_se *se); +void tegra_deinit_aes(struct tegra_se *se); +void tegra_deinit_hash(struct tegra_se *se); +int tegra_key_submit(struct tegra_se *se, const u8 *key, + u32 keylen, u32 alg, u32 *keyid); +void tegra_key_invalidate(struct tegra_se *se, u32 keyid, u32 alg); +int tegra_se_host1x_submit(struct tegra_se *se, u32 size); + +/* HOST1x OPCODES */ +static inline u32 host1x_opcode_setpayload(unsigned int payload) +{ + return (9 << 28) | payload; +} + +static inline u32 host1x_opcode_incr_w(unsigned int offset) +{ + /* 22-bit offset supported */ + return (10 << 28) | offset; +} + +static inline u32 host1x_opcode_nonincr_w(unsigned int offset) +{ + /* 22-bit offset supported */ + return (11 << 28) | offset; +} + +static inline u32 host1x_opcode_incr(unsigned int offset, unsigned int count) +{ + return (1 << 28) | (offset << 16) | count; +} + +static inline u32 host1x_opcode_nonincr(unsigned int offset, unsigned int count) +{ + return (2 << 28) | (offset << 16) | count; +} + +static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) +{ + return (v & 0xff) << 10; +} + +static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) +{ + return (v & 0x3ff) << 0; +} + +static inline u32 host1x_uclass_wait_syncpt_r(void) +{ + return 0x8; +} + +static inline u32 host1x_uclass_incr_syncpt_r(void) +{ + return 0x0; +} + +#define se_host1x_opcode_incr_w(x) host1x_opcode_incr_w((x) / 4) +#define se_host1x_opcode_nonincr_w(x) host1x_opcode_nonincr_w((x) / 4) +#define se_host1x_opcode_incr(x, y) host1x_opcode_incr((x) / 4, y) +#define se_host1x_opcode_nonincr(x, y) host1x_opcode_nonincr((x) / 4, y) + +#endif /*_TEGRA_SE_H*/ diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 89983d7d73ca..3a0aaa68ac8d 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -216,6 +216,30 @@ static const struct host1x_info host1x07_info = { */ static const struct host1x_sid_entry tegra234_sid_table[] = { { + /* SE2 MMIO */ + .base = 0x1658, + .offset = 0x90, + .limit = 0x90 + }, + { + /* SE4 MMIO */ + .base = 0x1660, + .offset = 0x90, + .limit = 0x90 + }, + { + /* SE2 channel */ + .base = 0x1738, + .offset = 0x90, + .limit = 0x90 + }, + { + /* SE4 channel */ + .base = 0x1740, + .offset = 0x90, + .limit = 0x90 + }, + { /* VIC channel */ .base = 0x17b8, .offset = 0x30, diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index 80e243611fe2..54937b615239 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -56,31 +56,7 @@ struct crypto_acomp { struct crypto_tfm base; }; -/* - * struct crypto_istat_compress - statistics for compress algorithm - * @compress_cnt: number of compress requests - * @compress_tlen: total data size handled by compress requests - * @decompress_cnt: number of decompress requests - * @decompress_tlen: total data size handled by decompress requests - * @err_cnt: number of error for compress requests - */ -struct crypto_istat_compress { - atomic64_t compress_cnt; - atomic64_t compress_tlen; - atomic64_t decompress_cnt; - atomic64_t decompress_tlen; - atomic64_t err_cnt; -}; - -#ifdef CONFIG_CRYPTO_STATS -#define COMP_ALG_COMMON_STATS struct crypto_istat_compress stat; -#else -#define COMP_ALG_COMMON_STATS -#endif - #define COMP_ALG_COMMON { \ - COMP_ALG_COMMON_STATS \ - \ struct crypto_alg base; \ } struct comp_alg_common COMP_ALG_COMMON; @@ -261,27 +237,6 @@ static inline void acomp_request_set_params(struct acomp_req *req, req->flags |= CRYPTO_ACOMP_ALLOC_OUTPUT; } -static inline struct crypto_istat_compress *comp_get_stat( - struct comp_alg_common *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - return &alg->stat; -#else - return NULL; -#endif -} - -static inline int crypto_comp_errstat(struct comp_alg_common *alg, int err) -{ - if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) - return err; - - if (err && err != -EINPROGRESS && err != -EBUSY) - atomic64_inc(&comp_get_stat(alg)->err_cnt); - - return err; -} - /** * crypto_acomp_compress() -- Invoke asynchronous compress operation * @@ -293,19 +248,7 @@ static inline int crypto_comp_errstat(struct comp_alg_common *alg, int err) */ static inline int crypto_acomp_compress(struct acomp_req *req) { - struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); - struct comp_alg_common *alg; - - alg = crypto_comp_alg_common(tfm); - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_compress *istat = comp_get_stat(alg); - - atomic64_inc(&istat->compress_cnt); - atomic64_add(req->slen, &istat->compress_tlen); - } - - return crypto_comp_errstat(alg, tfm->compress(req)); + return crypto_acomp_reqtfm(req)->compress(req); } /** @@ -319,19 +262,7 @@ static inline int crypto_acomp_compress(struct acomp_req *req) */ static inline int crypto_acomp_decompress(struct acomp_req *req) { - struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); - struct comp_alg_common *alg; - - alg = crypto_comp_alg_common(tfm); - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_compress *istat = comp_get_stat(alg); - - atomic64_inc(&istat->decompress_cnt); - atomic64_add(req->slen, &istat->decompress_tlen); - } - - return crypto_comp_errstat(alg, tfm->decompress(req)); + return crypto_acomp_reqtfm(req)->decompress(req); } #endif diff --git a/include/crypto/aead.h b/include/crypto/aead.h index 51382befbe37..0e8a41638678 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -101,22 +101,6 @@ struct aead_request { void *__ctx[] CRYPTO_MINALIGN_ATTR; }; -/* - * struct crypto_istat_aead - statistics for AEAD algorithm - * @encrypt_cnt: number of encrypt requests - * @encrypt_tlen: total data size handled by encrypt requests - * @decrypt_cnt: number of decrypt requests - * @decrypt_tlen: total data size handled by decrypt requests - * @err_cnt: number of error for AEAD requests - */ -struct crypto_istat_aead { - atomic64_t encrypt_cnt; - atomic64_t encrypt_tlen; - atomic64_t decrypt_cnt; - atomic64_t decrypt_tlen; - atomic64_t err_cnt; -}; - /** * struct aead_alg - AEAD cipher definition * @maxauthsize: Set the maximum authentication tag size supported by the @@ -135,7 +119,6 @@ struct crypto_istat_aead { * @setkey: see struct skcipher_alg * @encrypt: see struct skcipher_alg * @decrypt: see struct skcipher_alg - * @stat: statistics for AEAD algorithm * @ivsize: see struct skcipher_alg * @chunksize: see struct skcipher_alg * @init: Initialize the cryptographic transformation object. This function @@ -162,10 +145,6 @@ struct aead_alg { int (*init)(struct crypto_aead *tfm); void (*exit)(struct crypto_aead *tfm); -#ifdef CONFIG_CRYPTO_STATS - struct crypto_istat_aead stat; -#endif - unsigned int ivsize; unsigned int maxauthsize; unsigned int chunksize; diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h index 31c111bebb68..18a10cad07aa 100644 --- a/include/crypto/akcipher.h +++ b/include/crypto/akcipher.h @@ -54,26 +54,6 @@ struct crypto_akcipher { struct crypto_tfm base; }; -/* - * struct crypto_istat_akcipher - statistics for akcipher algorithm - * @encrypt_cnt: number of encrypt requests - * @encrypt_tlen: total data size handled by encrypt requests - * @decrypt_cnt: number of decrypt requests - * @decrypt_tlen: total data size handled by decrypt requests - * @verify_cnt: number of verify operation - * @sign_cnt: number of sign requests - * @err_cnt: number of error for akcipher requests - */ -struct crypto_istat_akcipher { - atomic64_t encrypt_cnt; - atomic64_t encrypt_tlen; - atomic64_t decrypt_cnt; - atomic64_t decrypt_tlen; - atomic64_t verify_cnt; - atomic64_t sign_cnt; - atomic64_t err_cnt; -}; - /** * struct akcipher_alg - generic public key algorithm * @@ -110,7 +90,6 @@ struct crypto_istat_akcipher { * @exit: Deinitialize the cryptographic transformation object. This is a * counterpart to @init, used to remove various changes set in * @init. - * @stat: Statistics for akcipher algorithm * * @base: Common crypto API algorithm data structure */ @@ -127,10 +106,6 @@ struct akcipher_alg { int (*init)(struct crypto_akcipher *tfm); void (*exit)(struct crypto_akcipher *tfm); -#ifdef CONFIG_CRYPTO_STATS - struct crypto_istat_akcipher stat; -#endif - struct crypto_alg base; }; @@ -302,27 +277,6 @@ static inline unsigned int crypto_akcipher_maxsize(struct crypto_akcipher *tfm) return alg->max_size(tfm); } -static inline struct crypto_istat_akcipher *akcipher_get_stat( - struct akcipher_alg *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - return &alg->stat; -#else - return NULL; -#endif -} - -static inline int crypto_akcipher_errstat(struct akcipher_alg *alg, int err) -{ - if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) - return err; - - if (err && err != -EINPROGRESS && err != -EBUSY) - atomic64_inc(&akcipher_get_stat(alg)->err_cnt); - - return err; -} - /** * crypto_akcipher_encrypt() - Invoke public key encrypt operation * @@ -336,16 +290,8 @@ static inline int crypto_akcipher_errstat(struct akcipher_alg *alg, int err) static inline int crypto_akcipher_encrypt(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct akcipher_alg *alg = crypto_akcipher_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_akcipher *istat = akcipher_get_stat(alg); - - atomic64_inc(&istat->encrypt_cnt); - atomic64_add(req->src_len, &istat->encrypt_tlen); - } - - return crypto_akcipher_errstat(alg, alg->encrypt(req)); + return crypto_akcipher_alg(tfm)->encrypt(req); } /** @@ -361,16 +307,8 @@ static inline int crypto_akcipher_encrypt(struct akcipher_request *req) static inline int crypto_akcipher_decrypt(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct akcipher_alg *alg = crypto_akcipher_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_akcipher *istat = akcipher_get_stat(alg); - - atomic64_inc(&istat->decrypt_cnt); - atomic64_add(req->src_len, &istat->decrypt_tlen); - } - - return crypto_akcipher_errstat(alg, alg->decrypt(req)); + return crypto_akcipher_alg(tfm)->decrypt(req); } /** @@ -422,12 +360,8 @@ int crypto_akcipher_sync_decrypt(struct crypto_akcipher *tfm, static inline int crypto_akcipher_sign(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct akcipher_alg *alg = crypto_akcipher_alg(tfm); - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_inc(&akcipher_get_stat(alg)->sign_cnt); - - return crypto_akcipher_errstat(alg, alg->sign(req)); + return crypto_akcipher_alg(tfm)->sign(req); } /** @@ -447,12 +381,8 @@ static inline int crypto_akcipher_sign(struct akcipher_request *req) static inline int crypto_akcipher_verify(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct akcipher_alg *alg = crypto_akcipher_alg(tfm); - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_inc(&akcipher_get_stat(alg)->verify_cnt); - return crypto_akcipher_errstat(alg, alg->verify(req)); + return crypto_akcipher_alg(tfm)->verify(req); } /** diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 7a4a71af653f..156de41ca760 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -61,9 +61,6 @@ struct crypto_type { void (*show)(struct seq_file *m, struct crypto_alg *alg); int (*report)(struct sk_buff *skb, struct crypto_alg *alg); void (*free)(struct crypto_instance *inst); -#ifdef CONFIG_CRYPTO_STATS - int (*report_stat)(struct sk_buff *skb, struct crypto_alg *alg); -#endif unsigned int type; unsigned int maskclear; diff --git a/include/crypto/ecc_curve.h b/include/crypto/ecc_curve.h index 70964781eb68..7d90c5e82266 100644 --- a/include/crypto/ecc_curve.h +++ b/include/crypto/ecc_curve.h @@ -23,6 +23,7 @@ struct ecc_point { * struct ecc_curve - definition of elliptic curve * * @name: Short name of the curve. + * @nbits: The number of bits of a curve. * @g: Generator point of the curve. * @p: Prime number, if Barrett's reduction is used for this curve * pre-calculated value 'mu' is appended to the @p after ndigits. @@ -34,6 +35,7 @@ struct ecc_point { */ struct ecc_curve { char *name; + u32 nbits; struct ecc_point g; u64 *p; u64 *n; diff --git a/include/crypto/ecdh.h b/include/crypto/ecdh.h index a9f98078d29c..9784ecdd2fb4 100644 --- a/include/crypto/ecdh.h +++ b/include/crypto/ecdh.h @@ -26,6 +26,7 @@ #define ECC_CURVE_NIST_P192 0x0001 #define ECC_CURVE_NIST_P256 0x0002 #define ECC_CURVE_NIST_P384 0x0003 +#define ECC_CURVE_NIST_P521 0x0004 /** * struct ecdh - define an ECDH private key diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 5d61f576cfc8..0014bdd81ab7 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -24,26 +24,7 @@ struct crypto_ahash; */ /* - * struct crypto_istat_hash - statistics for has algorithm - * @hash_cnt: number of hash requests - * @hash_tlen: total data size hashed - * @err_cnt: number of error for hash requests - */ -struct crypto_istat_hash { - atomic64_t hash_cnt; - atomic64_t hash_tlen; - atomic64_t err_cnt; -}; - -#ifdef CONFIG_CRYPTO_STATS -#define HASH_ALG_COMMON_STAT struct crypto_istat_hash stat; -#else -#define HASH_ALG_COMMON_STAT -#endif - -/* * struct hash_alg_common - define properties of message digest - * @stat: Statistics for hash algorithm. * @digestsize: Size of the result of the transformation. A buffer of this size * must be available to the @final and @finup calls, so they can * store the resulting hash into it. For various predefined sizes, @@ -60,8 +41,6 @@ struct crypto_istat_hash { * information. */ #define HASH_ALG_COMMON { \ - HASH_ALG_COMMON_STAT \ - \ unsigned int digestsize; \ unsigned int statesize; \ \ @@ -243,7 +222,6 @@ struct shash_alg { }; }; #undef HASH_ALG_COMMON -#undef HASH_ALG_COMMON_STAT struct crypto_ahash { bool using_shash; /* Underlying algorithm is shash, not ahash */ diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index 4ac46bafba9d..d00392d1988e 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -31,7 +31,6 @@ * @init. * * @reqsize: Context size for (de)compression requests - * @stat: Statistics for compress algorithm * @base: Common crypto API algorithm data structure * @calg: Cmonn algorithm data structure shared with scomp */ diff --git a/include/crypto/internal/cryptouser.h b/include/crypto/internal/cryptouser.h deleted file mode 100644 index fd54074332f5..000000000000 --- a/include/crypto/internal/cryptouser.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <linux/cryptouser.h> -#include <net/netlink.h> - -struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact); - -#ifdef CONFIG_CRYPTO_STATS -int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct nlattr **attrs); -#else -static inline int crypto_reportstat(struct sk_buff *in_skb, - struct nlmsghdr *in_nlh, - struct nlattr **attrs) -{ - return -ENOTSUPP; -} -#endif diff --git a/include/crypto/internal/ecc.h b/include/crypto/internal/ecc.h index 4f6c1a68882f..7ca1f463d1ec 100644 --- a/include/crypto/internal/ecc.h +++ b/include/crypto/internal/ecc.h @@ -33,7 +33,8 @@ #define ECC_CURVE_NIST_P192_DIGITS 3 #define ECC_CURVE_NIST_P256_DIGITS 4 #define ECC_CURVE_NIST_P384_DIGITS 6 -#define ECC_MAX_DIGITS (512 / 64) /* due to ecrdsa */ +#define ECC_CURVE_NIST_P521_DIGITS 9 +#define ECC_MAX_DIGITS DIV_ROUND_UP(521, 64) /* NIST P521 */ #define ECC_DIGITS_TO_BYTES_SHIFT 3 @@ -57,6 +58,27 @@ static inline void ecc_swap_digits(const void *in, u64 *out, unsigned int ndigit } /** + * ecc_digits_from_bytes() - Create ndigits-sized digits array from byte array + * @in: Input byte array + * @nbytes Size of input byte array + * @out Output digits array + * @ndigits: Number of digits to create from byte array + */ +static inline void ecc_digits_from_bytes(const u8 *in, unsigned int nbytes, + u64 *out, unsigned int ndigits) +{ + unsigned int o = nbytes & 7; + __be64 msd = 0; + + if (o) { + memcpy((u8 *)&msd + sizeof(msd) - o, in, o); + out[--ndigits] = be64_to_cpu(msd); + in += o; + } + ecc_swap_digits(in, out, ndigits); +} + +/** * ecc_is_key_valid() - Validate a given ECDH private key * * @curve_id: id representing the curve to use @@ -81,7 +103,8 @@ int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits, * Returns 0 if the private key was generated successfully, a negative value * if an error occurred. */ -int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey); +int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, + u64 *private_key); /** * ecc_make_pub_key() - Compute an ECC public key diff --git a/include/crypto/internal/scompress.h b/include/crypto/internal/scompress.h index 858fe3965ae3..07a10fd2d321 100644 --- a/include/crypto/internal/scompress.h +++ b/include/crypto/internal/scompress.h @@ -27,7 +27,6 @@ struct crypto_scomp { * @free_ctx: Function frees context allocated with alloc_ctx * @compress: Function performs a compress operation * @decompress: Function performs a de-compress operation - * @stat: Statistics for compress algorithm * @base: Common crypto API algorithm data structure * @calg: Cmonn algorithm data structure shared with acomp */ diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h index 1988e24a0d1d..2d9c4de57b69 100644 --- a/include/crypto/kpp.h +++ b/include/crypto/kpp.h @@ -51,20 +51,6 @@ struct crypto_kpp { struct crypto_tfm base; }; -/* - * struct crypto_istat_kpp - statistics for KPP algorithm - * @setsecret_cnt: number of setsecrey operation - * @generate_public_key_cnt: number of generate_public_key operation - * @compute_shared_secret_cnt: number of compute_shared_secret operation - * @err_cnt: number of error for KPP requests - */ -struct crypto_istat_kpp { - atomic64_t setsecret_cnt; - atomic64_t generate_public_key_cnt; - atomic64_t compute_shared_secret_cnt; - atomic64_t err_cnt; -}; - /** * struct kpp_alg - generic key-agreement protocol primitives * @@ -87,7 +73,6 @@ struct crypto_istat_kpp { * @exit: Undo everything @init did. * * @base: Common crypto API algorithm data structure - * @stat: Statistics for KPP algorithm */ struct kpp_alg { int (*set_secret)(struct crypto_kpp *tfm, const void *buffer, @@ -100,10 +85,6 @@ struct kpp_alg { int (*init)(struct crypto_kpp *tfm); void (*exit)(struct crypto_kpp *tfm); -#ifdef CONFIG_CRYPTO_STATS - struct crypto_istat_kpp stat; -#endif - struct crypto_alg base; }; @@ -291,26 +272,6 @@ struct kpp_secret { unsigned short len; }; -static inline struct crypto_istat_kpp *kpp_get_stat(struct kpp_alg *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - return &alg->stat; -#else - return NULL; -#endif -} - -static inline int crypto_kpp_errstat(struct kpp_alg *alg, int err) -{ - if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) - return err; - - if (err && err != -EINPROGRESS && err != -EBUSY) - atomic64_inc(&kpp_get_stat(alg)->err_cnt); - - return err; -} - /** * crypto_kpp_set_secret() - Invoke kpp operation * @@ -329,12 +290,7 @@ static inline int crypto_kpp_errstat(struct kpp_alg *alg, int err) static inline int crypto_kpp_set_secret(struct crypto_kpp *tfm, const void *buffer, unsigned int len) { - struct kpp_alg *alg = crypto_kpp_alg(tfm); - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_inc(&kpp_get_stat(alg)->setsecret_cnt); - - return crypto_kpp_errstat(alg, alg->set_secret(tfm, buffer, len)); + return crypto_kpp_alg(tfm)->set_secret(tfm, buffer, len); } /** @@ -353,12 +309,8 @@ static inline int crypto_kpp_set_secret(struct crypto_kpp *tfm, static inline int crypto_kpp_generate_public_key(struct kpp_request *req) { struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); - struct kpp_alg *alg = crypto_kpp_alg(tfm); - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_inc(&kpp_get_stat(alg)->generate_public_key_cnt); - return crypto_kpp_errstat(alg, alg->generate_public_key(req)); + return crypto_kpp_alg(tfm)->generate_public_key(req); } /** @@ -374,12 +326,8 @@ static inline int crypto_kpp_generate_public_key(struct kpp_request *req) static inline int crypto_kpp_compute_shared_secret(struct kpp_request *req) { struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); - struct kpp_alg *alg = crypto_kpp_alg(tfm); - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) - atomic64_inc(&kpp_get_stat(alg)->compute_shared_secret_cnt); - return crypto_kpp_errstat(alg, alg->compute_shared_secret(req)); + return crypto_kpp_alg(tfm)->compute_shared_secret(req); } /** diff --git a/include/crypto/rng.h b/include/crypto/rng.h index 6abe5102e5fb..5ac4388f50e1 100644 --- a/include/crypto/rng.h +++ b/include/crypto/rng.h @@ -15,20 +15,6 @@ struct crypto_rng; -/* - * struct crypto_istat_rng: statistics for RNG algorithm - * @generate_cnt: number of RNG generate requests - * @generate_tlen: total data size of generated data by the RNG - * @seed_cnt: number of times the RNG was seeded - * @err_cnt: number of error for RNG requests - */ -struct crypto_istat_rng { - atomic64_t generate_cnt; - atomic64_t generate_tlen; - atomic64_t seed_cnt; - atomic64_t err_cnt; -}; - /** * struct rng_alg - random number generator definition * @@ -46,7 +32,6 @@ struct crypto_istat_rng { * size of the seed is defined with @seedsize . * @set_ent: Set entropy that would otherwise be obtained from * entropy source. Internal use only. - * @stat: Statistics for rng algorithm * @seedsize: The seed size required for a random number generator * initialization defined with this variable. Some * random number generators does not require a seed @@ -63,10 +48,6 @@ struct rng_alg { void (*set_ent)(struct crypto_rng *tfm, const u8 *data, unsigned int len); -#ifdef CONFIG_CRYPTO_STATS - struct crypto_istat_rng stat; -#endif - unsigned int seedsize; struct crypto_alg base; @@ -144,26 +125,6 @@ static inline void crypto_free_rng(struct crypto_rng *tfm) crypto_destroy_tfm(tfm, crypto_rng_tfm(tfm)); } -static inline struct crypto_istat_rng *rng_get_stat(struct rng_alg *alg) -{ -#ifdef CONFIG_CRYPTO_STATS - return &alg->stat; -#else - return NULL; -#endif -} - -static inline int crypto_rng_errstat(struct rng_alg *alg, int err) -{ - if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) - return err; - - if (err && err != -EINPROGRESS && err != -EBUSY) - atomic64_inc(&rng_get_stat(alg)->err_cnt); - - return err; -} - /** * crypto_rng_generate() - get random number * @tfm: cipher handle @@ -182,17 +143,7 @@ static inline int crypto_rng_generate(struct crypto_rng *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int dlen) { - struct rng_alg *alg = crypto_rng_alg(tfm); - - if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { - struct crypto_istat_rng *istat = rng_get_stat(alg); - - atomic64_inc(&istat->generate_cnt); - atomic64_add(dlen, &istat->generate_tlen); - } - - return crypto_rng_errstat(alg, - alg->generate(tfm, src, slen, dst, dlen)); + return crypto_rng_alg(tfm)->generate(tfm, src, slen, dst, dlen); } /** diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index c8857d7bdb37..74d47e23374e 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -65,28 +65,6 @@ struct crypto_lskcipher { }; /* - * struct crypto_istat_cipher - statistics for cipher algorithm - * @encrypt_cnt: number of encrypt requests - * @encrypt_tlen: total data size handled by encrypt requests - * @decrypt_cnt: number of decrypt requests - * @decrypt_tlen: total data size handled by decrypt requests - * @err_cnt: number of error for cipher requests - */ -struct crypto_istat_cipher { - atomic64_t encrypt_cnt; - atomic64_t encrypt_tlen; - atomic64_t decrypt_cnt; - atomic64_t decrypt_tlen; - atomic64_t err_cnt; -}; - -#ifdef CONFIG_CRYPTO_STATS -#define SKCIPHER_ALG_COMMON_STAT struct crypto_istat_cipher stat; -#else -#define SKCIPHER_ALG_COMMON_STAT -#endif - -/* * struct skcipher_alg_common - common properties of skcipher_alg * @min_keysize: Minimum key size supported by the transformation. This is the * smallest key length supported by this transformation algorithm. @@ -103,7 +81,6 @@ struct crypto_istat_cipher { * @chunksize: Equal to the block size except for stream ciphers such as * CTR where it is set to the underlying block size. * @statesize: Size of the internal state for the algorithm. - * @stat: Statistics for cipher algorithm * @base: Definition of a generic crypto algorithm. */ #define SKCIPHER_ALG_COMMON { \ @@ -113,8 +90,6 @@ struct crypto_istat_cipher { unsigned int chunksize; \ unsigned int statesize; \ \ - SKCIPHER_ALG_COMMON_STAT \ - \ struct crypto_alg base; \ } struct skcipher_alg_common SKCIPHER_ALG_COMMON; diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 51421fdbb0ba..6f9242259edc 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -69,6 +69,7 @@ enum OID { OID_certAuthInfoAccess, /* 1.3.6.1.5.5.7.1.1 */ OID_sha1, /* 1.3.14.3.2.26 */ OID_id_ansip384r1, /* 1.3.132.0.34 */ + OID_id_ansip521r1, /* 1.3.132.0.35 */ OID_sha256, /* 2.16.840.1.101.3.4.2.1 */ OID_sha384, /* 2.16.840.1.101.3.4.2.2 */ OID_sha512, /* 2.16.840.1.101.3.4.2.3 */ diff --git a/include/linux/qat/qat_mig_dev.h b/include/linux/qat/qat_mig_dev.h new file mode 100644 index 000000000000..dbbb6a063dd2 --- /dev/null +++ b/include/linux/qat/qat_mig_dev.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation */ +#ifndef QAT_MIG_DEV_H_ +#define QAT_MIG_DEV_H_ + +struct pci_dev; + +struct qat_mig_dev { + void *parent_accel_dev; + u8 *state; + u32 setup_size; + u32 remote_setup_size; + u32 state_size; + s32 vf_id; +}; + +struct qat_mig_dev *qat_vfmig_create(struct pci_dev *pdev, int vf_id); +int qat_vfmig_init(struct qat_mig_dev *mdev); +void qat_vfmig_cleanup(struct qat_mig_dev *mdev); +void qat_vfmig_reset(struct qat_mig_dev *mdev); +int qat_vfmig_open(struct qat_mig_dev *mdev); +void qat_vfmig_close(struct qat_mig_dev *mdev); +int qat_vfmig_suspend(struct qat_mig_dev *mdev); +int qat_vfmig_resume(struct qat_mig_dev *mdev); +int qat_vfmig_save_state(struct qat_mig_dev *mdev); +int qat_vfmig_save_setup(struct qat_mig_dev *mdev); +int qat_vfmig_load_state(struct qat_mig_dev *mdev); +int qat_vfmig_load_setup(struct qat_mig_dev *mdev, int size); +void qat_vfmig_destroy(struct qat_mig_dev *mdev); + +#endif /*QAT_MIG_DEV_H_*/ diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 5730c67f0617..20a6c0fc149e 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -32,7 +32,7 @@ enum { CRYPTO_MSG_UPDATEALG, CRYPTO_MSG_GETALG, CRYPTO_MSG_DELRNG, - CRYPTO_MSG_GETSTAT, + CRYPTO_MSG_GETSTAT, /* No longer supported, do not use. */ __CRYPTO_MSG_MAX }; #define CRYPTO_MSG_MAX (__CRYPTO_MSG_MAX - 1) @@ -54,16 +54,16 @@ enum crypto_attr_type_t { CRYPTOCFGA_REPORT_AKCIPHER, /* struct crypto_report_akcipher */ CRYPTOCFGA_REPORT_KPP, /* struct crypto_report_kpp */ CRYPTOCFGA_REPORT_ACOMP, /* struct crypto_report_acomp */ - CRYPTOCFGA_STAT_LARVAL, /* struct crypto_stat */ - CRYPTOCFGA_STAT_HASH, /* struct crypto_stat */ - CRYPTOCFGA_STAT_BLKCIPHER, /* struct crypto_stat */ - CRYPTOCFGA_STAT_AEAD, /* struct crypto_stat */ - CRYPTOCFGA_STAT_COMPRESS, /* struct crypto_stat */ - CRYPTOCFGA_STAT_RNG, /* struct crypto_stat */ - CRYPTOCFGA_STAT_CIPHER, /* struct crypto_stat */ - CRYPTOCFGA_STAT_AKCIPHER, /* struct crypto_stat */ - CRYPTOCFGA_STAT_KPP, /* struct crypto_stat */ - CRYPTOCFGA_STAT_ACOMP, /* struct crypto_stat */ + CRYPTOCFGA_STAT_LARVAL, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_HASH, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_BLKCIPHER, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_AEAD, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_COMPRESS, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_RNG, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_CIPHER, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_AKCIPHER, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_KPP, /* No longer supported, do not use. */ + CRYPTOCFGA_STAT_ACOMP, /* No longer supported, do not use. */ __CRYPTOCFGA_MAX #define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1) @@ -79,6 +79,7 @@ struct crypto_user_alg { __u32 cru_flags; }; +/* No longer supported, do not use. */ struct crypto_stat_aead { char type[CRYPTO_MAX_NAME]; __u64 stat_encrypt_cnt; @@ -88,6 +89,7 @@ struct crypto_stat_aead { __u64 stat_err_cnt; }; +/* No longer supported, do not use. */ struct crypto_stat_akcipher { char type[CRYPTO_MAX_NAME]; __u64 stat_encrypt_cnt; @@ -99,6 +101,7 @@ struct crypto_stat_akcipher { __u64 stat_err_cnt; }; +/* No longer supported, do not use. */ struct crypto_stat_cipher { char type[CRYPTO_MAX_NAME]; __u64 stat_encrypt_cnt; @@ -108,6 +111,7 @@ struct crypto_stat_cipher { __u64 stat_err_cnt; }; +/* No longer supported, do not use. */ struct crypto_stat_compress { char type[CRYPTO_MAX_NAME]; __u64 stat_compress_cnt; @@ -117,6 +121,7 @@ struct crypto_stat_compress { __u64 stat_err_cnt; }; +/* No longer supported, do not use. */ struct crypto_stat_hash { char type[CRYPTO_MAX_NAME]; __u64 stat_hash_cnt; @@ -124,6 +129,7 @@ struct crypto_stat_hash { __u64 stat_err_cnt; }; +/* No longer supported, do not use. */ struct crypto_stat_kpp { char type[CRYPTO_MAX_NAME]; __u64 stat_setsecret_cnt; @@ -132,6 +138,7 @@ struct crypto_stat_kpp { __u64 stat_err_cnt; }; +/* No longer supported, do not use. */ struct crypto_stat_rng { char type[CRYPTO_MAX_NAME]; __u64 stat_generate_cnt; @@ -140,6 +147,7 @@ struct crypto_stat_rng { __u64 stat_err_cnt; }; +/* No longer supported, do not use. */ struct crypto_stat_larval { char type[CRYPTO_MAX_NAME]; }; diff --git a/kernel/padata.c b/kernel/padata.c index e3f639ff1670..53f4bc912712 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -106,7 +106,7 @@ static int __init padata_work_alloc_mt(int nworks, void *data, { int i; - spin_lock(&padata_works_lock); + spin_lock_bh(&padata_works_lock); /* Start at 1 because the current task participates in the job. */ for (i = 1; i < nworks; ++i) { struct padata_work *pw = padata_work_alloc(); @@ -116,7 +116,7 @@ static int __init padata_work_alloc_mt(int nworks, void *data, padata_work_init(pw, padata_mt_helper, data, 0); list_add(&pw->pw_list, head); } - spin_unlock(&padata_works_lock); + spin_unlock_bh(&padata_works_lock); return i; } @@ -134,12 +134,12 @@ static void __init padata_works_free(struct list_head *works) if (list_empty(works)) return; - spin_lock(&padata_works_lock); + spin_lock_bh(&padata_works_lock); list_for_each_entry_safe(cur, next, works, pw_list) { list_del(&cur->pw_list); padata_work_free(cur); } - spin_unlock(&padata_works_lock); + spin_unlock_bh(&padata_works_lock); } static void padata_parallel_worker(struct work_struct *parallel_work) |