5 files changed, 125 insertions, 36 deletions
diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
index 7da8f0402af5..403a7abfc2bc 100644
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -235,7 +235,7 @@ static void board_hwcontrol(struct mtd_info *mtd, int cmd)
 		<programlisting>
 static void board_hwcontrol(struct mtd_info *mtd, int cmd)
 {
-	struct nand_chip *this = (struct nand_chip *) mtd->priv;
+	struct nand_chip *this = mtd_to_nand(mtd);
 	switch(cmd){
 		case NAND_CTL_SETCLE: this->IO_ADDR_W |= CLE_ADRR_BIT;  break;
 		case NAND_CTL_CLRCLE: this->IO_ADDR_W &amp;= ~CLE_ADRR_BIT; break;
@@ -399,7 +399,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 		<programlisting>
 static void board_select_chip (struct mtd_info *mtd, int chip)
 {
-	struct nand_chip *this = (struct nand_chip *) mtd->priv;
+	struct nand_chip *this = mtd_to_nand(mtd);
 	
 	/* Deselect all chips */
 	this->IO_ADDR_R &amp;= ~BOARD_NAND_ADDR_MASK;
diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
index 2bee68103b01..2c91c03e7eb0 100644
--- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
+++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
@@ -1,15 +1,61 @@
-* MTD SPI driver for ST M25Pxx (and similar) serial flash chips
+* SPI NOR flash: ST M25Pxx (and similar) serial flash chips
 
 Required properties:
 - #address-cells, #size-cells : Must be present if the device has sub-nodes
   representing partitions.
 - compatible : May include a device-specific string consisting of the
-               manufacturer and name of the chip. Bear in mind the DT binding
-               is not Linux-only, but in case of Linux, see the "m25p_ids"
-               table in drivers/mtd/devices/m25p80.c for the list of supported
-               chips.
+               manufacturer and name of the chip. A list of supported chip
+               names follows.
                Must also include "jedec,spi-nor" for any SPI NOR flash that can
                be identified by the JEDEC READ ID opcode (0x9F).
+
+               Supported chip names:
+                 at25df321a
+                 at25df641
+                 at26df081a
+                 mr25h256
+                 mx25l4005a
+                 mx25l1606e
+                 mx25l6405d
+                 mx25l12805d
+                 mx25l25635e
+                 n25q064
+                 n25q128a11
+                 n25q128a13
+                 n25q512a
+                 s25fl256s1
+                 s25fl512s
+                 s25sl12801
+                 s25fl008k
+                 s25fl064k
+                 sst25vf040b
+                 m25p40
+                 m25p80
+                 m25p16
+                 m25p32
+                 m25p64
+                 m25p128
+                 w25x80
+                 w25x32
+                 w25q32
+                 w25q32dw
+                 w25q80bl
+                 w25q128
+                 w25q256
+
+               The following chip names have been used historically to
+               designate quirky versions of flash chips that do not support the
+               JEDEC READ ID opcode (0x9F):
+                 m25p05-nonjedec
+                 m25p10-nonjedec
+                 m25p20-nonjedec
+                 m25p40-nonjedec
+                 m25p80-nonjedec
+                 m25p16-nonjedec
+                 m25p32-nonjedec
+                 m25p64-nonjedec
+                 m25p128-nonjedec
+
 - reg : Chip-Select number
 - spi-max-frequency : Maximum frequency of the SPI bus the chip can operate at
 
diff --git a/Documentation/devicetree/bindings/mtd/mtk-quadspi.txt b/Documentation/devicetree/bindings/mtd/mtk-quadspi.txt
new file mode 100644
index 000000000000..fb314f09861b
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/mtk-quadspi.txt
@@ -0,0 +1,41 @@
+* Serial NOR flash controller for MTK MT81xx (and similar)
+
+Required properties:
+- compatible: 	  should be "mediatek,mt8173-nor";
+- reg: 		  physical base address and length of the controller's register
+- clocks: 	  the phandle of the clocks needed by the nor controller
+- clock-names: 	  the names of the clocks
+		  the clocks should be named "spi" and "sf". "spi" is used for spi bus,
+		  and "sf" is used for controller, these are the clocks witch
+		  hardware needs to enabling nor flash and nor flash controller.
+		  See Documentation/devicetree/bindings/clock/clock-bindings.txt for details.
+- #address-cells: should be <1>
+- #size-cells:	  should be <0>
+
+The SPI flash must be a child of the nor_flash node and must have a
+compatible property. Also see jedec,spi-nor.txt.
+
+Required properties:
+- compatible:	  May include a device-specific string consisting of the manufacturer
+		  and name of the chip. Must also include "jedec,spi-nor" for any
+		  SPI NOR flash that can be identified by the JEDEC READ ID opcode (0x9F).
+- reg :		  Chip-Select number
+
+Example:
+
+nor_flash: spi@1100d000 {
+	compatible = "mediatek,mt8173-nor";
+	reg = <0 0x1100d000 0 0xe0>;
+	clocks = <&pericfg CLK_PERI_SPI>,
+		 <&topckgen CLK_TOP_SPINFI_IFR_SEL>;
+	clock-names = "spi", "sf";
+	#address-cells = <1>;
+	#size-cells = <0>;
+	status = "disabled";
+
+	flash@0 {
+		compatible = "jedec,spi-nor";
+		reg = <0>;
+	};
+};
+
diff --git a/Documentation/devicetree/bindings/mtd/partition.txt b/Documentation/devicetree/bindings/mtd/partition.txt
index 1c63e40659fc..81a224da63be 100644
--- a/Documentation/devicetree/bindings/mtd/partition.txt
+++ b/Documentation/devicetree/bindings/mtd/partition.txt
@@ -32,6 +32,8 @@ Optional properties:
   partition should only be mounted read-only. This is usually used for flash
   partitions containing early-boot firmware images or data which should not be
   clobbered.
+- lock : Do not unlock the partition at initialization time (not supported on
+  all devices)
 
 Examples:
 
diff --git a/Documentation/mtd/nand_ecc.txt b/Documentation/mtd/nand_ecc.txt
index e129b2479ea8..f8c3284bf6a7 100644
--- a/Documentation/mtd/nand_ecc.txt
+++ b/Documentation/mtd/nand_ecc.txt
@@ -107,7 +107,7 @@ for (i = 0; i < 256; i++)
     if (i & 0x01)
        rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
     else
-       rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
+       rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp0;
     if (i & 0x02)
        rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3;
     else
@@ -127,7 +127,7 @@ for (i = 0; i < 256; i++)
     if (i & 0x20)
       rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11;
     else
-    rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10;
+      rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10;
     if (i & 0x40)
       rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13;
     else
@@ -158,7 +158,7 @@ the values in any order. So instead of calculating all the bits
 individually, let us try to rearrange things.
 For the column parity this is easy. We can just xor the bytes and in the
 end filter out the relevant bits. This is pretty nice as it will bring
-all cp calculation out of the if loop.
+all cp calculation out of the for loop.
 
 Similarly we can first xor the bytes for the various rows.
 This leads to:
@@ -271,11 +271,11 @@ to write our code in such a way that we process data in 32 bit chunks.
 Of course this means some modification as the row parity is byte by
 byte. A quick analysis:
 for the column parity we use the par variable. When extending to 32 bits
-we can in the end easily calculate p0 and p1 from it.
+we can in the end easily calculate rp0 and rp1 from it.
 (because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0
-respectively)
+respectively, from MSB to LSB)
 also rp2 and rp3 can be easily retrieved from par as rp3 covers the
-first two bytes and rp2 the last two bytes.
+first two MSBs and rp2 covers the last two LSBs.
 
 Note that of course now the loop is executed only 64 times (256/4).
 And note that care must taken wrt byte ordering. The way bytes are
@@ -387,11 +387,11 @@ Analysis 2
 
 The code (of course) works, and hurray: we are a little bit faster than
 the linux driver code (about 15%). But wait, don't cheer too quickly.
-THere is more to be gained.
+There is more to be gained.
 If we look at e.g. rp14 and rp15 we see that we either xor our data with
 rp14 or with rp15. However we also have par which goes over all data.
 This means there is no need to calculate rp14 as it can be calculated from
-rp15 through rp14 = par ^ rp15;
+rp15 through rp14 = par ^ rp15, because par = rp14 ^ rp15;
 (or if desired we can avoid calculating rp15 and calculate it from
 rp14).  That is why some places refer to inverse parity.
 Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13.
@@ -419,12 +419,12 @@ with
         if (i & 0x20) rp15 ^= cur;
 
         and outside the loop added:
-    rp4  = par ^ rp5;
-    rp6  = par ^ rp7;
-    rp8  = par ^ rp9;
-    rp10  = par ^ rp11;
-    rp12  = par ^ rp13;
-    rp14  = par ^ rp15;
+        rp4  = par ^ rp5;
+        rp6  = par ^ rp7;
+        rp8  = par ^ rp9;
+        rp10  = par ^ rp11;
+        rp12  = par ^ rp13;
+        rp14  = par ^ rp15;
 
 And after that the code takes about 30% more time, although the number of
 statements is reduced. This is also reflected in the assembly code.
@@ -524,12 +524,12 @@ THe code within the for loop was changed to:
 
         cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
         cur = *bp++; tmppar ^= cur; rp6 ^= cur;
-	    cur = *bp++; tmppar ^= cur; rp4 ^= cur;
-	    cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
 
-	    cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur;
         cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur;
-	    cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur;
         cur = *bp++; tmppar ^= cur; rp8 ^= cur;
 
         cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
@@ -537,7 +537,7 @@ THe code within the for loop was changed to:
         cur = *bp++; tmppar ^= cur; rp4 ^= cur;
         cur = *bp++; tmppar ^= cur;
 
-	    par ^= tmppar;
+        par ^= tmppar;
         if ((i & 0x1) == 0) rp12 ^= tmppar;
         if ((i & 0x2) == 0) rp14 ^= tmppar;
     }
@@ -548,8 +548,8 @@ to rp12 and rp14.
 
 While making the changes I also found that I could exploit that tmppar
 contains the running parity for this iteration. So instead of having:
-rp4 ^= cur; rp6 = cur;
-I removed the rp6 = cur; statement and did rp6 ^= tmppar; on next
+rp4 ^= cur; rp6 ^= cur;
+I removed the rp6 ^= cur; statement and did rp6 ^= tmppar; on next
 statement. A similar change was done for rp8 and rp10
 
 
@@ -593,22 +593,22 @@ The new code now looks like:
 
         cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
         cur = *bp++; tmppar ^= cur; rp6 ^= cur;
-	    cur = *bp++; tmppar ^= cur; rp4 ^= cur;
-	    cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
 
-	    notrp8 = tmppar;
-	    cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+        notrp8 = tmppar;
+        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
         cur = *bp++; tmppar ^= cur; rp6 ^= cur;
-	    cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
         cur = *bp++; tmppar ^= cur;
-	    rp8 = rp8 ^ tmppar ^ notrp8;
+        rp8 = rp8 ^ tmppar ^ notrp8;
 
         cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
         cur = *bp++; tmppar ^= cur; rp6 ^= cur;
         cur = *bp++; tmppar ^= cur; rp4 ^= cur;
         cur = *bp++; tmppar ^= cur;
 
-	    par ^= tmppar;
+        par ^= tmppar;
         if ((i & 0x1) == 0) rp12 ^= tmppar;
         if ((i & 0x2) == 0) rp14 ^= tmppar;
     }
@@ -700,7 +700,7 @@ Conclusion
 The gain when calculating the ecc is tremendous. Om my development hardware
 a speedup of a factor of 18 for ecc calculation was achieved. On a test on an
 embedded system with a MIPS core a factor 7 was obtained.
-On  a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
+On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
 5 (big endian mode, gcc 4.1.2, -O3)
 For correction not much gain could be obtained (as bitflips are rare). Then
 again there are also much less cycles spent there.