summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/nouveau/nvkm/subdev
AgeCommit message (Collapse)AuthorFilesLines
2018-07-20Merge branch 'linux-4.18' of git://github.com/skeggsb/linux into drm-fixesDave Airlie4-0/+15
- fix problem with pascal and large memory systems - fix a bunch of MST problems - fix a runtime PM interaction with MST Signed-off-by: Dave Airlie <airlied@redhat.com> Link: https://patchwork.freedesktop.org/patch/msgid/CACAvsv79O8deSts2fxJ_oS6=q8yA+OgwBSEpp5R=BQBmWa+oyg@mail.gmail.com
2018-07-19drm/nouveau/fb/gp100-: disable address remapperBen Skeggs4-0/+15
This was causing problems on a system with a large amount of RAM, where display push buffers were being fetched incorrectly when placed in high system memory addresses. While this commit will resolve the issue on that particular system, the issue will be avoided completely with another patch to more fully solve problems with display and large amounts of system memory on Pascal. It's still probably a good idea to disable this to prevent weird issues in the future. Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-06-13treewide: kvzalloc() -> kvcalloc()Kees Cook1-1/+1
The kvzalloc() function has a 2-factor argument form, kvcalloc(). This patch replaces cases of: kvzalloc(a * b, gfp) with: kvcalloc(a * b, gfp) as well as handling cases of: kvzalloc(a * b * c, gfp) with: kvzalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kvcalloc(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kvzalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kvzalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | kvzalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kvzalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | kvzalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | kvzalloc( - sizeof(char) * (COUNT) + COUNT , ...) | kvzalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | kvzalloc( - sizeof(u8) * COUNT + COUNT , ...) | kvzalloc( - sizeof(__u8) * COUNT + COUNT , ...) | kvzalloc( - sizeof(char) * COUNT + COUNT , ...) | kvzalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kvzalloc + kvcalloc ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) | - kvzalloc + kvcalloc ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) | - kvzalloc + kvcalloc ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) | - kvzalloc + kvcalloc ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) | - kvzalloc + kvcalloc ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) | - kvzalloc + kvcalloc ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) | - kvzalloc + kvcalloc ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) | - kvzalloc + kvcalloc ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kvzalloc + kvcalloc ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kvzalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kvzalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kvzalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kvzalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kvzalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kvzalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kvzalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kvzalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kvzalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kvzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kvzalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kvzalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kvzalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | kvzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kvzalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kvzalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kvzalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kvzalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kvzalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kvzalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kvzalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kvzalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kvzalloc(C1 * C2 * C3, ...) | kvzalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) | kvzalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) | kvzalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) | kvzalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kvzalloc(sizeof(THING) * C2, ...) | kvzalloc(sizeof(TYPE) * C2, ...) | kvzalloc(C1 * C2 * C3, ...) | kvzalloc(C1 * C2, ...) | - kvzalloc + kvcalloc ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) | - kvzalloc + kvcalloc ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) | - kvzalloc + kvcalloc ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) | - kvzalloc + kvcalloc ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) | - kvzalloc + kvcalloc ( - (E1) * E2 + E1, E2 , ...) | - kvzalloc + kvcalloc ( - (E1) * (E2) + E1, E2 , ...) | - kvzalloc + kvcalloc ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13treewide: kvmalloc() -> kvmalloc_array()Kees Cook1-2/+2
The kvmalloc() function has a 2-factor argument form, kvmalloc_array(). This patch replaces cases of: kvmalloc(a * b, gfp) with: kvmalloc_array(a * b, gfp) as well as handling cases of: kvmalloc(a * b * c, gfp) with: kvmalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kvmalloc_array(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kvmalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kvmalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | kvmalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kvmalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | kvmalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | kvmalloc( - sizeof(char) * (COUNT) + COUNT , ...) | kvmalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | kvmalloc( - sizeof(u8) * COUNT + COUNT , ...) | kvmalloc( - sizeof(__u8) * COUNT + COUNT , ...) | kvmalloc( - sizeof(char) * COUNT + COUNT , ...) | kvmalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kvmalloc + kvmalloc_array ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kvmalloc + kvmalloc_array ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kvmalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kvmalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kvmalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kvmalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kvmalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kvmalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kvmalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kvmalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kvmalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kvmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kvmalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kvmalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kvmalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | kvmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kvmalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kvmalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kvmalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kvmalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kvmalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kvmalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kvmalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kvmalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kvmalloc(C1 * C2 * C3, ...) | kvmalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) | kvmalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) | kvmalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) | kvmalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kvmalloc(sizeof(THING) * C2, ...) | kvmalloc(sizeof(TYPE) * C2, ...) | kvmalloc(C1 * C2 * C3, ...) | kvmalloc(C1 * C2, ...) | - kvmalloc + kvmalloc_array ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) | - kvmalloc + kvmalloc_array ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) | - kvmalloc + kvmalloc_array ( - (E1) * E2 + E1, E2 , ...) | - kvmalloc + kvmalloc_array ( - (E1) * (E2) + E1, E2 , ...) | - kvmalloc + kvmalloc_array ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13treewide: kmalloc() -> kmalloc_array()Kees Cook2-2/+3
The kmalloc() function has a 2-factor argument form, kmalloc_array(). This patch replaces cases of: kmalloc(a * b, gfp) with: kmalloc_array(a * b, gfp) as well as handling cases of: kmalloc(a * b * c, gfp) with: kmalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kmalloc_array(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kmalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The tools/ directory was manually excluded, since it has its own implementation of kmalloc(). The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kmalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | kmalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kmalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | kmalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | kmalloc( - sizeof(char) * (COUNT) + COUNT , ...) | kmalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | kmalloc( - sizeof(u8) * COUNT + COUNT , ...) | kmalloc( - sizeof(__u8) * COUNT + COUNT , ...) | kmalloc( - sizeof(char) * COUNT + COUNT , ...) | kmalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kmalloc + kmalloc_array ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kmalloc + kmalloc_array ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kmalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kmalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kmalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kmalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kmalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kmalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kmalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kmalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kmalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kmalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kmalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kmalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | kmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kmalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kmalloc(C1 * C2 * C3, ...) | kmalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) | kmalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) | kmalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) | kmalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kmalloc(sizeof(THING) * C2, ...) | kmalloc(sizeof(TYPE) * C2, ...) | kmalloc(C1 * C2 * C3, ...) | kmalloc(C1 * C2, ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - (E1) * E2 + E1, E2 , ...) | - kmalloc + kmalloc_array ( - (E1) * (E2) + E1, E2 , ...) | - kmalloc + kmalloc_array ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-05-18drm/nouveau/clk: Use list_for_each_entry_from_reverseArushi Singhal1-6/+4
It's better to use "list_for_each_entry_from_reverse" for iterating list than "for loop" as it makes the code more clear to read. This patch replace "for loop" with "list_for_each_entry_from_reverse" and "start" variable with "cstate" which helps in refactoring the code and also "cstate" variable is more commonly used in the other functions. changes in v2: "start" variable is removed, before "cstate" variable was removed but "cstate" is more common so preferred "cstate" over "start". Signed-off-by: Arushi Singhal <arushisinghal19971997@gmail.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/gr/gv100: initial supportBen Skeggs1-0/+21
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/fault/gv100: initial supportBen Skeggs2-0/+207
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/mmu/gv100: initial supportBen Skeggs4-0/+135
VEID support hacked in here, as it's the most convenient place for now. Will be refined once it's better understood. Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/fb/gv100: initial supportBen Skeggs4-1/+50
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/top/gv100: initial supportBen Skeggs1-1/+2
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/devinit/gv100: initial supportBen Skeggs4-1/+83
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/bios/pll: limits table 5.0Ben Skeggs1-1/+18
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/bios/gv100: initial supportBen Skeggs1-1/+4
No real surprises here so far. Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/kms/nv50-: determine MST support from DP Info TableBen Skeggs1-1/+1
GV100 doesn't support MST, use the information provided in VBIOS tables to detect its presence instead. Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/gr/gp102-: setup stencil zbcBen Skeggs5-3/+70
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/fb/gm200-: fix overwriting of big page settingBen Skeggs1-2/+0
Likely a rebase bug. Should have no impact in default configuration due to using per-instance setting by default. Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/fb/gf100-: bump size of mmu debug buffers to match big page sizeBen Skeggs2-3/+3
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/fault/gp100: implement replayable fault buffer initialisationBen Skeggs2-0/+70
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/fault: add infrastructure to support fault buffersBen Skeggs3-0/+214
GPU-specific support will be added separately. Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/mc/gp100-: route fault buffer interrupts to FAULTBen Skeggs3-2/+22
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/core: define FAULT subdevBen Skeggs2-0/+1
This will be responsible for the handling of MMU fault buffers on GPUs that support them. Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-05-18drm/nouveau/secboot: remove VLA usageGustavo A. R. Silva1-4/+3
In preparation to enabling -Wvla, remove VLA. In this particular case directly use macro NVKM_MSGQUEUE_CMDLINE_SIZE instead of local variable cmdline_size. Also, remove cmdline_size as it is not actually useful anymore. The use of stack Variable Length Arrays needs to be avoided, as they can be a vector for stack exhaustion, which can be both a runtime bug or a security flaw. Also, in general, as code evolves it is easy to lose track of how big a VLA can get. Thus, we can end up having runtime failures that are hard to debug. Also, fixed as part of the directive to remove all VLAs from the kernel: https://lkml.org/lkml/2018/3/7/621 Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com> Reviewed-by: Thierry Reding <treding@nvidia.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-03-30Merge airlied/drm-next into drm-misc-nextSean Paul2-4/+4
Backmerging to pick up a fix from drm-misc-next-fixes. Signed-off-by: Sean Paul <seanpaul@chromium.org>
2018-03-26gpu: drm: nouveau: Use list_{next/prev}_entry instead of list_entryArushi Singhal1-1/+1
It's better to use list_entry instead of list_{next/prev}_entry as it makes the code more clear to read. This patch replace list_entry with list_{next/prev}_entry. Signed-off-by: Arushi Singhal <arushisinghal19971997@gmail.com> Acked-by: Ben Skeggs <bskeggs@redhat.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/1522000893-5331-3-git-send-email-arushisinghal19971997@gmail.com
2018-03-16Merge branch 'linux-4.16' of git://github.com/skeggsb/linux into drm-fixesDave Airlie1-1/+1
nouveau regression fixes. * 'linux-4.16' of git://github.com/skeggsb/linux: drm/nouveau/bl: fix backlight regression drm/nouveau/bl: Fix oops on driver unbind drm/nouveau/mmu: ALIGN_DOWN correct variable
2018-03-16drm/nouveau/mmu: ALIGN_DOWN correct variableMāris Nartišs1-1/+1
Commit 7110c89bb8852ff8b0f88ce05b332b3fe22bd11e ("mmu: swap out round for ALIGN") replaced two calls to round/rounddown with ALIGN/ALIGN_DOWN, but erroneously applied ALIGN_DOWN to a different variable (addr) and left intended variable (tail) not rounded/ALIGNed. As a result screen corruption, X lockups are observable. An example of kernel log of affected system with NV98 card where it was bisected: nouveau 0000:01:00.0: gr: TRAP_M2MF 00000002 [IN] nouveau 0000:01:00.0: gr: TRAP_M2MF 00320951 400007c0 00000000 04000000 nouveau 0000:01:00.0: gr: 00200000 [] ch 1 [000fbbe000 DRM] subc 4 class 5039 mthd 0100 data 00000000 nouveau 0000:01:00.0: fb: trapped read at 0040000000 on channel 1 [0fbbe000 DRM] engine 00 [PGRAPH] client 03 [DISPATCH] subclient 04 [M2M_IN] reason 00000006 [NULL_DMAOBJ] Fixes bug 105173 ("[MCP79][Regression] Unhandled NULL pointer dereference in nvkm_object_unmap since kernel 4.15") https://bugs.freedesktop.org/show_bug.cgi?id=105173 Fixes: 7110c89bb885 ("mmu: swap out round for ALIGN ") Tested-by: Pierre Moreau <pierre.morrow@free.fr> Reviewed-by: Pierre Moreau <pierre.morrow@free.fr> Signed-off-by: Maris Nartiss <maris.nartiss@gmail.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Cc: stable@vger.kernel.org # v4.15+
2018-02-16Merge branch 'linux-4.16' of git://github.com/skeggsb/linux into drm-fixesDave Airlie1-3/+3
single fix for older gpus. * 'linux-4.16' of git://github.com/skeggsb/linux: drm/nouveau: Make clock gate support conditional
2018-02-16drm/nouveau: Make clock gate support conditionalThierry Reding1-3/+3
The recently introduced clock gate support breaks on Tegra chips because no thermal support is enabled for those devices. Conditionalize the code on the existence of thermal support to fix this. Fixes: b138eca661cc ("drm/nouveau: Add support for basic clockgating on Kepler1") Cc: Martin Peres <martin.peres@free.fr> Cc: Lyude Paul <lyude@redhat.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Reviewed-by: Lyude Paul <lyude@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-02-08Merge tag 'drm-for-v4.16-part2-fixes' of ↵Linus Torvalds30-1399/+2101
git://people.freedesktop.org/~airlied/linux Pull more drm updates from Dave Airlie: "Ben missed sending his nouveau tree, but he really didn't have much stuff in it: - GP108 acceleration support is enabled by "secure boot" support - some clockgating work on Kepler, and bunch of fixes - the bulk of the diff is regenerated firmware files, the change to them really isn't that large. Otherwise this contains regular Intel and AMDGPU fixes" * tag 'drm-for-v4.16-part2-fixes' of git://people.freedesktop.org/~airlied/linux: (59 commits) drm/i915/bios: add DP max link rate to VBT child device struct drm/i915/cnp: Properly handle VBT ddc pin out of bounds. drm/i915/cnp: Ignore VBT request for know invalid DDC pin. drm/i915/cmdparser: Do not check past the cmd length. drm/i915/cmdparser: Check reg_table_count before derefencing. drm/i915/bxt, glk: Increase PCODE timeouts during CDCLK freq changing drm/i915/gvt: Use KVM r/w to access guest opregion drm/i915/gvt: Fix aperture read/write emulation when enable x-no-mmap=on drm/i915/gvt: only reset execlist state of one engine during VM engine reset drm/i915/gvt: refine intel_vgpu_submission_ops as per engine ops drm/amdgpu: re-enable CGCG on CZ and disable on ST drm/nouveau/clk: fix gcc-7 -Wint-in-bool-context warning drm/nouveau/mmu: Fix trailing semicolon drm/nouveau: Introduce NvPmEnableGating option drm/nouveau: Add support for SLCG for Kepler2 drm/nouveau: Add support for BLCG on Kepler2 drm/nouveau: Add support for BLCG on Kepler1 drm/nouveau: Add support for basic clockgating on Kepler1 drm/nouveau/kms/nv50: fix handling of gamma since atomic conversion drm/nouveau/kms/nv50: use INTERPOLATE_257_UNITY_RANGE LUT on newer chipsets ...
2018-02-06Merge tag 'pci-v4.16-changes' of ↵Linus Torvalds1-1/+9
git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci Pull PCI updates from Bjorn Helgaas: - skip AER driver error recovery callbacks for correctable errors reported via ACPI APEI, as we already do for errors reported via the native path (Tyler Baicar) - fix DPC shared interrupt handling (Alex Williamson) - print full DPC interrupt number (Keith Busch) - enable DPC only if AER is available (Keith Busch) - simplify DPC code (Bjorn Helgaas) - calculate ASPM L1 substate parameter instead of hardcoding it (Bjorn Helgaas) - enable Latency Tolerance Reporting for ASPM L1 substates (Bjorn Helgaas) - move ASPM internal interfaces out of public header (Bjorn Helgaas) - allow hot-removal of VGA devices (Mika Westerberg) - speed up unplug and shutdown by assuming Thunderbolt controllers don't support Command Completed events (Lukas Wunner) - add AtomicOps support for GPU and Infiniband drivers (Felix Kuehling, Jay Cornwall) - expose "ari_enabled" in sysfs to help NIC naming (Stuart Hayes) - clean up PCI DMA interface usage (Christoph Hellwig) - remove PCI pool API (replaced with DMA pool) (Romain Perier) - deprecate pci_get_bus_and_slot(), which assumed PCI domain 0 (Sinan Kaya) - move DT PCI code from drivers/of/ to drivers/pci/ (Rob Herring) - add PCI-specific wrappers for dev_info(), etc (Frederick Lawler) - remove warnings on sysfs mmap failure (Bjorn Helgaas) - quiet ROM validation messages (Alex Deucher) - remove redundant memory alloc failure messages (Markus Elfring) - fill in types for compile-time VGA and other I/O port resources (Bjorn Helgaas) - make "pci=pcie_scan_all" work for Root Ports as well as Downstream Ports to help AmigaOne X1000 (Bjorn Helgaas) - add SPDX tags to all PCI files (Bjorn Helgaas) - quirk Marvell 9128 DMA aliases (Alex Williamson) - quirk broken INTx disable on Ceton InfiniTV4 (Bjorn Helgaas) - fix CONFIG_PCI=n build by adding dummy pci_irqd_intx_xlate() (Niklas Cassel) - use DMA API to get MSI address for DesignWare IP (Niklas Cassel) - fix endpoint-mode DMA mask configuration (Kishon Vijay Abraham I) - fix ARTPEC-6 incorrect IS_ERR() usage (Wei Yongjun) - add support for ARTPEC-7 SoC (Niklas Cassel) - add endpoint-mode support for ARTPEC (Niklas Cassel) - add Cadence PCIe host and endpoint controller driver (Cyrille Pitchen) - handle multiple INTx status bits being set in dra7xx (Vignesh R) - translate dra7xx hwirq range to fix INTD handling (Vignesh R) - remove deprecated Exynos PHY initialization code (Jaehoon Chung) - fix MSI erratum workaround for HiSilicon Hip06/Hip07 (Dongdong Liu) - fix NULL pointer dereference in iProc BCMA driver (Ray Jui) - fix Keystone interrupt-controller-node lookup (Johan Hovold) - constify qcom driver structures (Julia Lawall) - rework Tegra config space mapping to increase space available for endpoints (Vidya Sagar) - simplify Tegra driver by using bus->sysdata (Manikanta Maddireddy) - remove PCI_REASSIGN_ALL_BUS usage on Tegra (Manikanta Maddireddy) - add support for Global Fabric Manager Server (GFMS) event to Microsemi Switchtec switch driver (Logan Gunthorpe) - add IDs for Switchtec PSX 24xG3 and PSX 48xG3 (Kelvin Cao) * tag 'pci-v4.16-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci: (140 commits) PCI: cadence: Add EndPoint Controller driver for Cadence PCIe controller dt-bindings: PCI: cadence: Add DT bindings for Cadence PCIe endpoint controller PCI: endpoint: Fix EPF device name to support multi-function devices PCI: endpoint: Add the function number as argument to EPC ops PCI: cadence: Add host driver for Cadence PCIe controller dt-bindings: PCI: cadence: Add DT bindings for Cadence PCIe host controller PCI: Add vendor ID for Cadence PCI: Add generic function to probe PCI host controllers PCI: generic: fix missing call of pci_free_resource_list() PCI: OF: Add generic function to parse and allocate PCI resources PCI: Regroup all PCI related entries into drivers/pci/Makefile PCI/DPC: Reformat DPC register definitions PCI/DPC: Add and use DPC Status register field definitions PCI/DPC: Squash dpc_rp_pio_get_info() into dpc_process_rp_pio_error() PCI/DPC: Remove unnecessary RP PIO register structs PCI/DPC: Push dpc->rp_pio_status assignment into dpc_rp_pio_get_info() PCI/DPC: Squash dpc_rp_pio_print_error() into dpc_rp_pio_get_info() PCI/DPC: Make RP PIO log size check more generic PCI/DPC: Rename local "status" to "dpc_status" PCI/DPC: Squash dpc_rp_pio_print_tlp_header() into dpc_rp_pio_print_error() ...
2018-02-02drm/nouveau/clk: fix gcc-7 -Wint-in-bool-context warningArnd Bergmann1-3/+6
gcc thinks that interpreting a multiplication result as a bool is confusing: drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.c: In function 'read_pll': drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.c:133:8: error: '*' in boolean context, suggest '&&' instead [-Werror=int-in-bool-context] Adding a temporary variable to contain the divisor helps make it clear what is going on and avoids that warning. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-02-02drm/nouveau/mmu: Fix trailing semicolonLuis de Bethencourt1-1/+1
The trailing semicolon is an empty statement that does no operation. Removing it since it doesn't do anything. Signed-off-by: Luis de Bethencourt <luisbg@kernel.org> Reviewed-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-02-02drm/nouveau: Introduce NvPmEnableGating optionLyude Paul1-1/+3
This adds the NvPmEnableGating config option to nouveau, which can be used to enable or disable clockgating for supported chipsets. Enabling can be done by passing config=NvPmEnableGating=1 To nouveau. If your chipset supports it, you'll see a message in your kernel log indicating that clockgating is enabled. Since clockgating has only had limited testing thus far, we leave this option disabled by default for now. Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Martin Peres <martin.peres@free.fr> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-02-02drm/nouveau: Add support for BLCG on Kepler2Lyude Paul2-0/+72
Same as the previous patch, but for Kepler2 now Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Martin Peres <martin.peres@free.fr> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-02-02drm/nouveau: Add support for BLCG on Kepler1Lyude Paul10-1/+167
This enables BLCG optimization for kepler1. When using clockgating, nvidia's firmware has a set of registers which are initially programmed by the vbios with various engine delays and other mysterious settings that are safe enough to bring up the GPU. However, the values used by the vbios are more power hungry then they need to be, so the nvidia driver writes it's own more optimized set of BLCG settings before enabling CG_CTRL. This adds support for programming the optimized BLCG values during engine/subdev init, which enables rather significant power savings. This introduces the nvkm_therm_clkgate_init() helper, which we use to program the optimized BLCG settings before enabling clockgating with nvkm_therm_clkgate_enable. As well, this commit shares a lot more code with Fermi since BLCG is mostly the same there as far as we can tell. In the future, it's likely we'll reformat the clkgate_packs for kepler1 so that they share a list of mmio packs with Fermi. Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Martin Peres <martin.peres@free.fr> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-02-02drm/nouveau: Add support for basic clockgating on Kepler1Lyude Paul7-14/+286
This adds support for enabling automatic clockgating on nvidia GPUs for Kepler1. While this is not technically a clockgating level, it does enable clockgating using the clockgating values initially set by the vbios (which should be safe to use). This introduces two therm helpers for controlling basic clockgating: nvkm_therm_clkgate_enable() - enables clockgating through CG_CTRL, done after initializing the GPU fully nvkm_therm_clkgate_fini() - prepares clockgating for suspend or driver unload A lot of this code was originally going to be based off of fermi; however it turns out that while Fermi's the first line of GPUs that introduced this kind of power saving, Fermi requires more fine tuned control of the CG_CTRL registers from the driver while reclocking that we don't entirely understand yet. For the simple parts we will be sharing with Fermi for certain however, we at least add those into a new subdev/therm/gf100.h header. Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Martin Peres <martin.peres@free.fr> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-02-02drm/nouveau/secboot/gp108: implement on top of acr_r370Ben Skeggs4-1/+71
Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Reviewed-by: Gourav Samaiya <gsamaiya@nvidia.com>
2018-02-02drm/nouveau/secboot/r370: implement support for booting LS SEC2 ucodeBen Skeggs1-0/+39
Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Reviewed-by: Gourav Samaiya <gsamaiya@nvidia.com>
2018-02-02drm/nouveau/secboot/r370: move a bunch of r375 stuff to a new implementationBen Skeggs5-86/+163
It's entirely possibly that the other r375 code is relevant to r370 too, but I've not confirmed this, so I'll leave it where it is for now. NVIDIA's copyright headers maintained, as it's still all their code. Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Reviewed-by: Gourav Samaiya <gsamaiya@nvidia.com>
2018-02-02drm/nouveau/drm/nouveau/mmu: fix odd_ptr_err.cocci warningsChristoph Böhmwalder1-1/+2
The kbuild test bot complained about a new coccinelle warning nearby, which sparked a discussion about the assignment to 'memory' inside of the conditional expression. See Link below for the original post. Fix the assignment to silence the coccinelle warning and also make the code look a little nicer. Link: https://lists.freedesktop.org/archives/nouveau/2017-November/029242.html Signed-off-by: Christoph Böhmwalder <christoph@boehmwalder.at> Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com>
2018-02-02drm/nouveau/pmu/fuc: don't use movw directly anymoreKarol Herbst4-1292/+1292
Fixes failure to compile with recent envyas as a result of the 'movw' alias being removed for v5. A bit of history: v3 only has a 16-bit sign-extended immediate mov op. In order to set the high bits, there's a separate 'sethi' op. envyas validates that the value passed to mov(imm) is between -0x8000 and 0x7fff. In order to simplify macros that load both the low and high word, a 'movw' alias was added which takes an unsigned 16-bit immediate. However the actual hardware op still sign extends. v5 has a full 32-bit immediate mov op. The v3 16-bit immediate mov op is gone (loads 0 into the dst reg). However due to a bug in envyas, the movw alias still existed, and selected the no-longer-present v3 16-bit immediate mov op. As a result usage of movw on v5 is the same as mov with a 0x0 argument. The proper fix throughout is to only ever use the 'movw' alias in combination with 'sethi'. Anything else should get the sign-extended validation to ensure that the intended value ends up in the destination register. Changes in fuc3 binaries is the result of a different encoding being selected for a mov with an 8-bit value. v2: added commit message written by Ilia, thanks for that! v3: messed up rebasing, now it should apply Signed-off-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-01-26Merge branch 'linux-4.15' of git://github.com/skeggsb/linux into drm-fixesDave Airlie1-15/+31
Single irq regression fix * 'linux-4.15' of git://github.com/skeggsb/linux: drm/nouveau: Move irq setup/teardown to pci ctor/dtor
2018-01-26drm/nouveau: Move irq setup/teardown to pci ctor/dtorLyude Paul1-15/+31
For a while we've been having issues with seemingly random interrupts coming from nvidia cards when resuming them. Originally the fix for this was thought to be just re-arming the MSI interrupt registers right after re-allocating our IRQs, however it seems a lot of what we do is both wrong and not even nessecary. This was made apparent by what appeared to be a regression in the mainline kernel that started introducing suspend/resume issues for nouveau: a0c9259dc4e1 (irq/matrix: Spread interrupts on allocation) After this commit was introduced, we started getting interrupts from the GPU before we actually re-allocated our own IRQ (see references below) and assigned the IRQ handler. Investigating this turned out that the problem was not with the commit, but the fact that nouveau even free/allocates it's irqs before and after suspend/resume. For starters: drivers in the linux kernel haven't had to handle freeing/re-allocating their IRQs during suspend/resume cycles for quite a while now. Nouveau seems to be one of the few drivers left that still does this, despite the fact there's no reason we actually need to since disabling interrupts from the device side should be enough, as the kernel is already smart enough to know to disable host-side interrupts for us before going into suspend. Since we were tearing down our IRQs by hand however, that means there was a short period during resume where interrupts could be received before we re-allocated our IRQ which would lead to us getting an unhandled IRQ. Since we never handle said IRQ and re-arm the interrupt registers, this would cause us to miss all of the interrupts from the GPU and cause our init process to start timing out on anything requiring interrupts. So, since this whole setup/teardown every suspend/resume cycle is useless anyway, move irq setup/teardown into the pci subdev's ctor/dtor functions instead so they're only called at driver load and driver unload. This should fix most of the issues with pending interrupts on resume, along with getting suspend/resume for nouveau to work again. As well, this probably means we can also just remove the msi rearm call inside nvkm_pci_init(). But since our main focus here is to fix suspend/resume before 4.15, we'll save that for a later patch. Signed-off-by: Lyude Paul <lyude@redhat.com> Cc: Karol Herbst <kherbst@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Mike Galbraith <efault@gmx.de> Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-01-19Merge branch 'linux-4.15' of git://github.com/skeggsb/linux into drm-fixesDave Airlie7-10/+108
Thought I'd try my luck getting one more in: - Two fixes for Tegra (one is to common code, but our userspace doesn't hit it). - One for NV5x-class MCPs * 'linux-4.15' of git://github.com/skeggsb/linux: drm/nouveau/mmu/mcp77: fix regressions in stolen memory handling drm/nouveau/bar/gk20a: Avoid bar teardown during init drm/nouveau/drm/nouveau: Pass the proper arguments to nvif_object_map_handle()
2018-01-19drm/nouveau/mmu/mcp77: fix regressions in stolen memory handlingBen Skeggs5-8/+106
- Fixes addition of stolen memory base address to PTEs. - Removes support for compression. Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Tested-by: Pierre Moreau <pierre.morrow@free.fr>
2018-01-19drm/nouveau/bar/gk20a: Avoid bar teardown during initJon Hunter2-2/+2
Commit bbb163e18960 ("drm/nouveau/bar: implement bar1 teardown") introduced add a teardown helper function for BAR1. During initialisation of the Nouveau, initially all the teardown helpers are called once, before calling their init counterparts. For gk20a, after the BAR1 teardown function is called, the device is hanging during the initialisation of the FB sub-device. At this point it is unclear why this is happening and this is still under investigation. However, this change is preventing Tegra124 devices from booting when Nouveau is enabled. To allow Tegra124 to boot, remove the teardown helper for gk20a. This is based upon a previous patch by Guillaume Tucker but limits the workaround to only gk20a GPUs. Fixes: bbb163e18960 ("drm/nouveau/bar: implement bar1 teardown") Reported-by: Guillaume Tucker <guillaume.tucker@collabora.com> Signed-off-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-01-12drm/nouveau: deprecate pci_get_bus_and_slot()Sinan Kaya1-1/+9
pci_get_bus_and_slot() is restrictive such that it assumes domain=0 as where a PCI device is present. This restricts the device drivers to be reused for other domain numbers. Getting ready to remove pci_get_bus_and_slot() function in favor of pci_get_domain_bus_and_slot(). Replace pci_get_bus_and_slot() with pci_get_domain_bus_and_slot() and extract the domain number from 1. struct pci_dev 2. struct pci_dev through drm_device->pdev 3. struct pci_dev through fb->subdev->drm_device->pdev Signed-off-by: Sinan Kaya <okaya@codeaurora.org> Signed-off-by: Bjorn Helgaas <helgaas@kernel.org>
2017-12-19Merge branch 'linux-4.15' of git://github.com/skeggsb/linux into drm-fixesDave Airlie3-2/+16
nouveau regression fixes, and some minor fixes. * 'linux-4.15' of git://github.com/skeggsb/linux: drm/nouveau: use alternate memory type for system-memory buffers with kind != 0 drm/nouveau: avoid GPU page sizes > PAGE_SIZE for buffer objects in host memory drm/nouveau/mmu/gp10b: use correct implementation drm/nouveau/pci: do a msi rearm on init drm/nouveau/imem/nv50: fix refcount_t warning drm/nouveau/bios/dp: support DP Info Table 2.0 drm/nouveau/fbcon: fix NULL pointer access in nouveau_fbcon_destroy
2017-12-19drm/nouveau/pci: do a msi rearm on initKarol Herbst1-0/+7
On my GP107 when I load nouveau after unloading it, for some reason the GPU stopped sending or the CPU stopped receiving interrupts if MSI was enabled. Doing a rearm once before getting any interrupts fixes this. Signed-off-by: Karol Herbst <kherbst@redhat.com> Reviewed-by: Thierry Reding <treding@nvidia.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>