From eb366989aa42e688b525929a0ff67ac047df7ee3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 22 Apr 2018 18:23:47 +0200 Subject: dm/verity_fec: Use GFP aware reed solomon init Allocations from the rs_pool can invoke init_rs() from the mempool allocation callback. This is problematic in fec_alloc_bufs() which invokes mempool_alloc() with GFP_NOIO to prevent a swap deadlock because init_rs() uses GFP_KERNEL allocations. Switch it to init_rs_gfp() and invoke it with the gfp_t flags which are handed in from the allocator. Note: This is not a problem today because the rs control struct is shared between the instances and its created when the mempool is initialized. But the upcoming changes which switch to a rs_control struct per instance to embed decoder buffers will trigger the swap vs. GFP_KERNEL issue. Signed-off-by: Thomas Gleixner Cc: Mike Snitzer Cc: Alasdair Kergon Cc: Neil Brown Signed-off-by: Kees Cook --- drivers/md/dm-verity-fec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index e13f90832b6b..375453ef6f26 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -570,7 +570,7 @@ static void *fec_rs_alloc(gfp_t gfp_mask, void *pool_data) { struct dm_verity *v = (struct dm_verity *)pool_data; - return init_rs(8, 0x11d, 0, 1, v->fec->roots); + return init_rs_gfp(8, 0x11d, 0, 1, v->fec->roots, gfp_mask); } static void fec_rs_free(void *element, void *pool_data) -- cgit v1.2.3 From 2163398192f6a53e84765cfe3e5a2088437c3d2d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 22 Apr 2018 18:23:53 +0200 Subject: rslib: Split rs control struct The decoder library uses variable length arrays on stack. To get rid of them it would be simple to allocate fixed length arrays on stack, but those might become rather large. The other solution is to allocate the buffers in the rs control structure, but this cannot be done as long as the structure can be shared by several users. Sharing is desired because the RS polynom tables are large and initialization is time consuming. To solve this split the codec information out of the control structure and have a pointer to a shared codec in it. Instantiate the control structure for each user, create a new codec if no shareable is avaiable yet. Adjust all affected usage sites to the new scheme. This allows to add per instance decoder buffers to the control structure later on. Signed-off-by: Thomas Gleixner Acked-by: Boris Brezillon Cc: Tony Luck Cc: Kees Cook Cc: Segher Boessenkool Cc: Kernel Hardening Cc: Richard Weinberger Cc: Mike Snitzer Cc: Anton Vorontsov Cc: Colin Cross Cc: Andrew Morton Cc: David Woodhouse Cc: Alasdair Kergon Signed-off-by: Kees Cook --- drivers/mtd/nand/raw/cafe_nand.c | 7 +- drivers/mtd/nand/raw/diskonchip.c | 7 +- include/linux/rslib.h | 19 ++++-- lib/reed_solomon/decode_rs.c | 1 + lib/reed_solomon/encode_rs.c | 1 + lib/reed_solomon/reed_solomon.c | 135 ++++++++++++++++++++++---------------- 6 files changed, 100 insertions(+), 70 deletions(-) (limited to 'drivers') diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c index d8c8c9d1e640..d721f489b38b 100644 --- a/drivers/mtd/nand/raw/cafe_nand.c +++ b/drivers/mtd/nand/raw/cafe_nand.c @@ -394,12 +394,13 @@ static int cafe_nand_read_page(struct mtd_info *mtd, struct nand_chip *chip, for (i=0; i<8; i+=2) { uint32_t tmp = cafe_readl(cafe, NAND_ECC_SYN01 + (i*2)); - syn[i] = cafe->rs->index_of[tmp & 0xfff]; - syn[i+1] = cafe->rs->index_of[(tmp >> 16) & 0xfff]; + + syn[i] = cafe->rs->codec->index_of[tmp & 0xfff]; + syn[i+1] = cafe->rs->codec->index_of[(tmp >> 16) & 0xfff]; } n = decode_rs16(cafe->rs, NULL, NULL, 1367, syn, 0, pos, 0, - pat); + pat); for (i = 0; i < n; i++) { int p = pos[i]; diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c index 86a258de0b75..73308333e788 100644 --- a/drivers/mtd/nand/raw/diskonchip.c +++ b/drivers/mtd/nand/raw/diskonchip.c @@ -140,6 +140,7 @@ static int doc_ecc_decode(struct rs_control *rs, uint8_t *data, uint8_t *ecc) int i, j, nerr, errpos[8]; uint8_t parity; uint16_t ds[4], s[5], tmp, errval[8], syn[4]; + struct rs_codec *cd = rs->codec; memset(syn, 0, sizeof(syn)); /* Convert the ecc bytes into words */ @@ -160,15 +161,15 @@ static int doc_ecc_decode(struct rs_control *rs, uint8_t *data, uint8_t *ecc) for (j = 1; j < NROOTS; j++) { if (ds[j] == 0) continue; - tmp = rs->index_of[ds[j]]; + tmp = cd->index_of[ds[j]]; for (i = 0; i < NROOTS; i++) - s[i] ^= rs->alpha_to[rs_modnn(rs, tmp + (FCR + i) * j)]; + s[i] ^= cd->alpha_to[rs_modnn(cd, tmp + (FCR + i) * j)]; } /* Calc syn[i] = s[i] / alpha^(v + i) */ for (i = 0; i < NROOTS; i++) { if (s[i]) - syn[i] = rs_modnn(rs, rs->index_of[s[i]] + (NN - FCR - i)); + syn[i] = rs_modnn(cd, cd->index_of[s[i]] + (NN - FCR - i)); } /* Call the decoder library */ nerr = decode_rs16(rs, NULL, NULL, 1019, syn, 0, errpos, 0, errval); diff --git a/include/linux/rslib.h b/include/linux/rslib.h index 27652c18bf8c..6703311beea3 100644 --- a/include/linux/rslib.h +++ b/include/linux/rslib.h @@ -15,7 +15,7 @@ #include /* for GFP_KERNEL */ /** - * struct rs_control - rs control structure + * struct rs_codec - rs codec data * * @mm: Bits per symbol * @nn: Symbols per block (= (1<= 3 * rs->nn */ -static inline int rs_modnn(struct rs_control *rs, int x) +static inline int rs_modnn(struct rs_codec *rs, int x) { while (x >= rs->nn) { x -= rs->nn; diff --git a/lib/reed_solomon/decode_rs.c b/lib/reed_solomon/decode_rs.c index d61007ade4a0..794cced31c75 100644 --- a/lib/reed_solomon/decode_rs.c +++ b/lib/reed_solomon/decode_rs.c @@ -10,6 +10,7 @@ * Generic data width independent code which is included by the wrappers. */ { + struct rs_codec *rs = rsc->codec; int deg_lambda, el, deg_omega; int i, j, r, k, pad; int nn = rs->nn; diff --git a/lib/reed_solomon/encode_rs.c b/lib/reed_solomon/encode_rs.c index 10ca1ebb13dd..9112d46e869e 100644 --- a/lib/reed_solomon/encode_rs.c +++ b/lib/reed_solomon/encode_rs.c @@ -10,6 +10,7 @@ * Generic data width independent code which is included by the wrappers. */ { + struct rs_codec *rs = rsc->codec; int i, j, pad; int nn = rs->nn; int nroots = rs->nroots; diff --git a/lib/reed_solomon/reed_solomon.c b/lib/reed_solomon/reed_solomon.c index 02c19ecffc28..cb21e8b5a4e0 100644 --- a/lib/reed_solomon/reed_solomon.c +++ b/lib/reed_solomon/reed_solomon.c @@ -11,22 +11,23 @@ * * The generic Reed Solomon library provides runtime configurable * encoding / decoding of RS codes. - * Each user must call init_rs to get a pointer to a rs_control - * structure for the given rs parameters. This structure is either - * generated or a already available matching control structure is used. - * If a structure is generated then the polynomial arrays for - * fast encoding / decoding are built. This can take some time so - * make sure not to call this function from a time critical path. - * Usually a module / driver should initialize the necessary - * rs_control structure on module / driver init and release it - * on exit. - * The encoding puts the calculated syndrome into a given syndrome - * buffer. - * The decoding is a two step process. The first step calculates - * the syndrome over the received (data + syndrome) and calls the - * second stage, which does the decoding / error correction itself. - * Many hw encoders provide a syndrome calculation over the received - * data + syndrome and can call the second stage directly. + * + * Each user must call init_rs to get a pointer to a rs_control structure + * for the given rs parameters. The control struct is unique per instance. + * It points to a codec which can be shared by multiple control structures. + * If a codec is newly allocated then the polynomial arrays for fast + * encoding / decoding are built. This can take some time so make sure not + * to call this function from a time critical path. Usually a module / + * driver should initialize the necessary rs_control structure on module / + * driver init and release it on exit. + * + * The encoding puts the calculated syndrome into a given syndrome buffer. + * + * The decoding is a two step process. The first step calculates the + * syndrome over the received (data + syndrome) and calls the second stage, + * which does the decoding / error correction itself. Many hw encoders + * provide a syndrome calculation over the received data + syndrome and can + * call the second stage directly. */ #include #include @@ -36,13 +37,13 @@ #include #include -/* This list holds all currently allocated rs control structures */ -static LIST_HEAD (rslist); +/* This list holds all currently allocated rs codec structures */ +static LIST_HEAD(codec_list); /* Protection for the list */ static DEFINE_MUTEX(rslistlock); /** - * rs_init - Initialize a Reed-Solomon codec + * codec_init - Initialize a Reed-Solomon codec * @symsize: symbol size, bits (1-8) * @gfpoly: Field generator polynomial coefficients * @gffunc: Field generator function @@ -51,14 +52,14 @@ static DEFINE_MUTEX(rslistlock); * @nroots: RS code generator polynomial degree (number of roots) * @gfp: GFP_ flags for allocations * - * Allocate a control structure and the polynom arrays for faster + * Allocate a codec structure and the polynom arrays for faster * en/decoding. Fill the arrays according to the given parameters. */ -static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int), - int fcr, int prim, int nroots, gfp_t gfp) +static struct rs_codec *codec_init(int symsize, int gfpoly, int (*gffunc)(int), + int fcr, int prim, int nroots, gfp_t gfp) { - struct rs_control *rs; int i, j, sr, root, iprim; + struct rs_codec *rs; rs = kzalloc(sizeof(*rs), gfp); if (!rs) @@ -138,6 +139,9 @@ static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int), /* convert rs->genpoly[] to index form for quicker encoding */ for (i = 0; i <= nroots; i++) rs->genpoly[i] = rs->index_of[rs->genpoly[i]]; + + rs->users = 1; + list_add(&rs->list, &codec_list); return rs; err: @@ -150,27 +154,37 @@ err: /** - * free_rs - Free the rs control structure, if it is no longer used - * @rs: the control structure which is not longer used by the + * free_rs - Free the rs control structure + * @rs: The control structure which is not longer used by the * caller + * + * Free the control structure. If @rs is the last user of the associated + * codec, free the codec as well. */ void free_rs(struct rs_control *rs) { + struct rs_codec *cd; + + if (!rs) + return; + + cd = rs->codec; mutex_lock(&rslistlock); - rs->users--; - if(!rs->users) { - list_del(&rs->list); - kfree(rs->alpha_to); - kfree(rs->index_of); - kfree(rs->genpoly); - kfree(rs); + cd->users--; + if(!cd->users) { + list_del(&cd->list); + kfree(cd->alpha_to); + kfree(cd->index_of); + kfree(cd->genpoly); + kfree(cd); } mutex_unlock(&rslistlock); + kfree(rs); } EXPORT_SYMBOL_GPL(free_rs); /** - * init_rs_internal - Find a matching or allocate a new rs control structure + * init_rs_internal - Allocate rs control, find a matching codec or allocate a new one * @symsize: the symbol size (number of bits) * @gfpoly: the extended Galois field generator polynomial coefficients, * with the 0th coefficient in the low order bit. The polynomial @@ -201,33 +215,39 @@ static struct rs_control *init_rs_internal(int symsize, int gfpoly, if (nroots < 0 || nroots >= (1<mm) + list_for_each(tmp, &codec_list) { + struct rs_codec *cd = list_entry(tmp, struct rs_codec, list); + + if (symsize != cd->mm) continue; - if (gfpoly != rs->gfpoly) + if (gfpoly != cd->gfpoly) continue; - if (gffunc != rs->gffunc) + if (gffunc != cd->gffunc) continue; - if (fcr != rs->fcr) + if (fcr != cd->fcr) continue; - if (prim != rs->prim) + if (prim != cd->prim) continue; - if (nroots != rs->nroots) + if (nroots != cd->nroots) continue; /* We have a matching one already */ - rs->users++; + cd->users++; + rs->codec = cd; goto out; } /* Create a new one */ - rs = rs_init(symsize, gfpoly, gffunc, fcr, prim, nroots, gfp); - if (rs) { - rs->users = 1; - list_add(&rs->list, &rslist); + rs->codec = codec_init(symsize, gfpoly, gffunc, fcr, prim, nroots, gfp); + if (!rs->codec) { + kfree(rs); + rs = NULL; } out: mutex_unlock(&rslistlock); @@ -235,7 +255,7 @@ out: } /** - * init_rs_gfp - Find a matching or allocate a new rs control structure + * init_rs_gfp - Create a RS control struct and initialize it * @symsize: the symbol size (number of bits) * @gfpoly: the extended Galois field generator polynomial coefficients, * with the 0th coefficient in the low order bit. The polynomial @@ -254,9 +274,8 @@ struct rs_control *init_rs_gfp(int symsize, int gfpoly, int fcr, int prim, EXPORT_SYMBOL_GPL(init_rs_gfp); /** - * init_rs_non_canonical - Find a matching or allocate a new rs control - * structure, for fields with non-canonical - * representation + * init_rs_non_canonical - Allocate rs control struct for fields with + * non-canonical representation * @symsize: the symbol size (number of bits) * @gffunc: pointer to function to generate the next field element, * or the multiplicative identity element if given 0. Used @@ -277,7 +296,7 @@ EXPORT_SYMBOL_GPL(init_rs_non_canonical); #ifdef CONFIG_REED_SOLOMON_ENC8 /** * encode_rs8 - Calculate the parity for data values (8bit data width) - * @rs: the rs control structure + * @rsc: the rs control structure * @data: data field of a given type * @len: data length * @par: parity data, must be initialized by caller (usually all 0) @@ -287,7 +306,7 @@ EXPORT_SYMBOL_GPL(init_rs_non_canonical); * symbol size > 8. The calling code must take care of encoding of the * syndrome result for storage itself. */ -int encode_rs8(struct rs_control *rs, uint8_t *data, int len, uint16_t *par, +int encode_rs8(struct rs_control *rsc, uint8_t *data, int len, uint16_t *par, uint16_t invmsk) { #include "encode_rs.c" @@ -298,7 +317,7 @@ EXPORT_SYMBOL_GPL(encode_rs8); #ifdef CONFIG_REED_SOLOMON_DEC8 /** * decode_rs8 - Decode codeword (8bit data width) - * @rs: the rs control structure + * @rsc: the rs control structure * @data: data field of a given type * @par: received parity data field * @len: data length @@ -313,7 +332,7 @@ EXPORT_SYMBOL_GPL(encode_rs8); * syndrome result and the received parity before calling this code. * Returns the number of corrected bits or -EBADMSG for uncorrectable errors. */ -int decode_rs8(struct rs_control *rs, uint8_t *data, uint16_t *par, int len, +int decode_rs8(struct rs_control *rsc, uint8_t *data, uint16_t *par, int len, uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk, uint16_t *corr) { @@ -325,7 +344,7 @@ EXPORT_SYMBOL_GPL(decode_rs8); #ifdef CONFIG_REED_SOLOMON_ENC16 /** * encode_rs16 - Calculate the parity for data values (16bit data width) - * @rs: the rs control structure + * @rsc: the rs control structure * @data: data field of a given type * @len: data length * @par: parity data, must be initialized by caller (usually all 0) @@ -333,7 +352,7 @@ EXPORT_SYMBOL_GPL(decode_rs8); * * Each field in the data array contains up to symbol size bits of valid data. */ -int encode_rs16(struct rs_control *rs, uint16_t *data, int len, uint16_t *par, +int encode_rs16(struct rs_control *rsc, uint16_t *data, int len, uint16_t *par, uint16_t invmsk) { #include "encode_rs.c" @@ -344,7 +363,7 @@ EXPORT_SYMBOL_GPL(encode_rs16); #ifdef CONFIG_REED_SOLOMON_DEC16 /** * decode_rs16 - Decode codeword (16bit data width) - * @rs: the rs control structure + * @rsc: the rs control structure * @data: data field of a given type * @par: received parity data field * @len: data length @@ -357,7 +376,7 @@ EXPORT_SYMBOL_GPL(encode_rs16); * Each field in the data array contains up to symbol size bits of valid data. * Returns the number of corrected bits or -EBADMSG for uncorrectable errors. */ -int decode_rs16(struct rs_control *rs, uint16_t *data, uint16_t *par, int len, +int decode_rs16(struct rs_control *rsc, uint16_t *data, uint16_t *par, int len, uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk, uint16_t *corr) { -- cgit v1.2.3 From 964dfce9c2b323a9a9d0bd6764e0f530b40104e4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 22 Apr 2018 18:23:54 +0200 Subject: mtd: rawnand: diskonchip: Allocate rs control per instance The reed solomon library is moving the on stack decoder buffers into the rs control structure. That would break the DoC driver because multiple instances share the same control structure and can operate in parallel. At least in theory.... Instantiate a rs control instance per DoC device to avoid that. The per instance buffer is fine as the operation on a single DoC instance is serialized by the MTD/NAND core. Signed-off-by: Thomas Gleixner Acked-by: Boris Brezillon Cc: Tony Luck Cc: Kees Cook Cc: Segher Boessenkool Cc: Kernel Hardening Cc: Richard Weinberger Cc: Mike Snitzer Cc: Anton Vorontsov Cc: Colin Cross Cc: Andrew Morton Cc: David Woodhouse Cc: Alasdair Kergon Signed-off-by: Kees Cook --- drivers/mtd/nand/raw/diskonchip.c | 60 ++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c index 73308333e788..2b7b2b982b77 100644 --- a/drivers/mtd/nand/raw/diskonchip.c +++ b/drivers/mtd/nand/raw/diskonchip.c @@ -66,6 +66,7 @@ struct doc_priv { int curchip; int mh0_page; int mh1_page; + struct rs_control *rs_decoder; struct mtd_info *nextdoc; /* Handle the last stage of initialization (BBT scan, partitioning) */ @@ -123,9 +124,6 @@ MODULE_PARM_DESC(doc_config_location, "Physical memory address at which to probe /* Number of symbols */ #define NN 1023 -/* the Reed Solomon control structure */ -static struct rs_control *rs_decoder; - /* * The HW decoder in the DoC ASIC's provides us a error syndrome, * which we must convert to a standard syndrome usable by the generic @@ -931,7 +929,7 @@ static int doc200x_correct_data(struct mtd_info *mtd, u_char *dat, calc_ecc[i] = ReadDOC_(docptr, DoC_ECCSyndrome0 + i); } - ret = doc_ecc_decode(rs_decoder, dat, calc_ecc); + ret = doc_ecc_decode(doc->rs_decoder, dat, calc_ecc); if (ret > 0) pr_err("doc200x_correct_data corrected %d errors\n", ret); @@ -1422,10 +1420,10 @@ static inline int __init doc2001plus_init(struct mtd_info *mtd) static int __init doc_probe(unsigned long physadr) { + struct nand_chip *nand = NULL; + struct doc_priv *doc = NULL; unsigned char ChipID; struct mtd_info *mtd; - struct nand_chip *nand; - struct doc_priv *doc; void __iomem *virtadr; unsigned char save_control; unsigned char tmp, tmpb, tmpc; @@ -1562,8 +1560,25 @@ static int __init doc_probe(unsigned long physadr) goto fail; } + + /* + * Allocate a RS codec instance + * + * Symbolsize is 10 (bits) + * Primitve polynomial is x^10+x^3+1 + * First consecutive root is 510 + * Primitve element to generate roots = 1 + * Generator polinomial degree = 4 + */ + doc = (struct doc_priv *) (nand + 1); + doc->rs_decoder = init_rs(10, 0x409, FCR, 1, NROOTS); + if (!doc->rs_decoder) { + pr_err("DiskOnChip: Could not create a RS codec\n"); + ret = -ENOMEM; + goto fail; + } + mtd = nand_to_mtd(nand); - doc = (struct doc_priv *) (nand + 1); nand->bbt_td = (struct nand_bbt_descr *) (doc + 1); nand->bbt_md = nand->bbt_td + 1; @@ -1613,7 +1628,6 @@ static int __init doc_probe(unsigned long physadr) haven't yet added it. This is handled without incident by mtd_device_unregister, as far as I can tell. */ nand_release(mtd); - kfree(nand); goto fail; } @@ -1626,6 +1640,9 @@ static int __init doc_probe(unsigned long physadr) actually a DiskOnChip. */ WriteDOC(save_control, virtadr, DOCControl); fail: + if (doc) + free_rs(doc->rs_decoder); + kfree(nand); iounmap(virtadr); error_ioremap: @@ -1648,6 +1665,7 @@ static void release_nanddoc(void) nand_release(mtd); iounmap(doc->virtadr); release_mem_region(doc->physadr, DOC_IOREMAP_LEN); + free_rs(doc->rs_decoder); kfree(nand); } } @@ -1656,27 +1674,12 @@ static int __init init_nanddoc(void) { int i, ret = 0; - /* We could create the decoder on demand, if memory is a concern. - * This way we have it handy, if an error happens - * - * Symbolsize is 10 (bits) - * Primitve polynomial is x^10+x^3+1 - * first consecutive root is 510 - * primitve element to generate roots = 1 - * generator polinomial degree = 4 - */ - rs_decoder = init_rs(10, 0x409, FCR, 1, NROOTS); - if (!rs_decoder) { - pr_err("DiskOnChip: Could not create a RS decoder\n"); - return -ENOMEM; - } - if (doc_config_location) { pr_info("Using configured DiskOnChip probe address 0x%lx\n", doc_config_location); ret = doc_probe(doc_config_location); if (ret < 0) - goto outerr; + return ret; } else { for (i = 0; (doc_locations[i] != 0xffffffff); i++) { doc_probe(doc_locations[i]); @@ -1687,11 +1690,7 @@ static int __init init_nanddoc(void) if (!doclist) { pr_info("No valid DiskOnChip devices found\n"); ret = -ENODEV; - goto outerr; } - return 0; - outerr: - free_rs(rs_decoder); return ret; } @@ -1699,11 +1698,6 @@ static void __exit cleanup_nanddoc(void) { /* Cleanup the nand/DoC resources */ release_nanddoc(); - - /* Free the reed solomon resources */ - if (rs_decoder) { - free_rs(rs_decoder); - } } module_init(init_nanddoc); -- cgit v1.2.3