diff options
Diffstat (limited to 'crypto/gf128mul.c')
-rw-r--r-- | crypto/gf128mul.c | 111 |
1 files changed, 47 insertions, 64 deletions
diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c index 72015fee533d..dc012129c063 100644 --- a/crypto/gf128mul.c +++ b/crypto/gf128mul.c @@ -44,7 +44,7 @@ --------------------------------------------------------------------------- Issue 31/01/2006 - This file provides fast multiplication in GF(128) as required by several + This file provides fast multiplication in GF(2^128) as required by several cryptographic authentication modes */ @@ -88,76 +88,59 @@ q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \ } -/* Given the value i in 0..255 as the byte overflow when a field element - in GHASH is multiplied by x^8, this function will return the values that - are generated in the lo 16-bit word of the field value by applying the - modular polynomial. The values lo_byte and hi_byte are returned via the - macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into - memory as required by a suitable definition of this macro operating on - the table above -*/ - -#define xx(p, q) 0x##p##q +/* + * Given a value i in 0..255 as the byte overflow when a field element + * in GF(2^128) is multiplied by x^8, the following macro returns the + * 16-bit value that must be XOR-ed into the low-degree end of the + * product to reduce it modulo the polynomial x^128 + x^7 + x^2 + x + 1. + * + * There are two versions of the macro, and hence two tables: one for + * the "be" convention where the highest-order bit is the coefficient of + * the highest-degree polynomial term, and one for the "le" convention + * where the highest-order bit is the coefficient of the lowest-degree + * polynomial term. In both cases the values are stored in CPU byte + * endianness such that the coefficients are ordered consistently across + * bytes, i.e. in the "be" table bits 15..0 of the stored value + * correspond to the coefficients of x^15..x^0, and in the "le" table + * bits 15..0 correspond to the coefficients of x^0..x^15. + * + * Therefore, provided that the appropriate byte endianness conversions + * are done by the multiplication functions (and these must be in place + * anyway to support both little endian and big endian CPUs), the "be" + * table can be used for multiplications of both "bbe" and "ble" + * elements, and the "le" table can be used for multiplications of both + * "lle" and "lbe" elements. + */ -#define xda_bbe(i) ( \ - (i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \ - (i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \ - (i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \ - (i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \ +#define xda_be(i) ( \ + (i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \ + (i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \ + (i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \ + (i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \ ) -#define xda_lle(i) ( \ - (i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \ - (i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \ - (i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \ - (i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \ +#define xda_le(i) ( \ + (i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \ + (i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \ + (i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \ + (i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \ ) -static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle); -static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe); +static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le); +static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be); -/* These functions multiply a field element by x, by x^4 and by x^8 - * in the polynomial field representation. It uses 32-bit word operations - * to gain speed but compensates for machine endianess and hence works +/* + * The following functions multiply a field element by x^8 in + * the polynomial field representation. They use 64-bit word operations + * to gain speed but compensate for machine endianness and hence work * correctly on both styles of machine. */ -static void gf128mul_x_lle(be128 *r, const be128 *x) -{ - u64 a = be64_to_cpu(x->a); - u64 b = be64_to_cpu(x->b); - u64 _tt = gf128mul_table_lle[(b << 7) & 0xff]; - - r->b = cpu_to_be64((b >> 1) | (a << 63)); - r->a = cpu_to_be64((a >> 1) ^ (_tt << 48)); -} - -static void gf128mul_x_bbe(be128 *r, const be128 *x) -{ - u64 a = be64_to_cpu(x->a); - u64 b = be64_to_cpu(x->b); - u64 _tt = gf128mul_table_bbe[a >> 63]; - - r->a = cpu_to_be64((a << 1) | (b >> 63)); - r->b = cpu_to_be64((b << 1) ^ _tt); -} - -void gf128mul_x_ble(be128 *r, const be128 *x) -{ - u64 a = le64_to_cpu(x->a); - u64 b = le64_to_cpu(x->b); - u64 _tt = gf128mul_table_bbe[b >> 63]; - - r->a = cpu_to_le64((a << 1) ^ _tt); - r->b = cpu_to_le64((b << 1) | (a >> 63)); -} -EXPORT_SYMBOL(gf128mul_x_ble); - static void gf128mul_x8_lle(be128 *x) { u64 a = be64_to_cpu(x->a); u64 b = be64_to_cpu(x->b); - u64 _tt = gf128mul_table_lle[b & 0xff]; + u64 _tt = gf128mul_table_le[b & 0xff]; x->b = cpu_to_be64((b >> 8) | (a << 56)); x->a = cpu_to_be64((a >> 8) ^ (_tt << 48)); @@ -167,7 +150,7 @@ static void gf128mul_x8_bbe(be128 *x) { u64 a = be64_to_cpu(x->a); u64 b = be64_to_cpu(x->b); - u64 _tt = gf128mul_table_bbe[a >> 56]; + u64 _tt = gf128mul_table_be[a >> 56]; x->a = cpu_to_be64((a << 8) | (b >> 56)); x->b = cpu_to_be64((b << 8) ^ _tt); @@ -251,7 +234,7 @@ EXPORT_SYMBOL(gf128mul_bbe); /* This version uses 64k bytes of table space. A 16 byte buffer has to be multiplied by a 16 byte key - value in GF(128). If we consider a GF(128) value in + value in GF(2^128). If we consider a GF(2^128) value in the buffer's lowest byte, we can construct a table of the 256 16 byte values that result from the 256 values of this byte. This requires 4096 bytes. But we also @@ -315,7 +298,7 @@ void gf128mul_free_64k(struct gf128mul_64k *t) } EXPORT_SYMBOL(gf128mul_free_64k); -void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t) +void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t) { u8 *ap = (u8 *)a; be128 r[1]; @@ -330,7 +313,7 @@ EXPORT_SYMBOL(gf128mul_64k_bbe); /* This version uses 4k bytes of table space. A 16 byte buffer has to be multiplied by a 16 byte key - value in GF(128). If we consider a GF(128) value in a + value in GF(2^128). If we consider a GF(2^128) value in a single byte, we can construct a table of the 256 16 byte values that result from the 256 values of this byte. This requires 4096 bytes. If we take the highest byte in @@ -388,7 +371,7 @@ out: } EXPORT_SYMBOL(gf128mul_init_4k_bbe); -void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t) +void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t) { u8 *ap = (u8 *)a; be128 r[1]; @@ -403,7 +386,7 @@ void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t) } EXPORT_SYMBOL(gf128mul_4k_lle); -void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t) +void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t) { u8 *ap = (u8 *)a; be128 r[1]; |