243 files changed, 14277 insertions, 6002 deletions
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCU.svg b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCU.svg
new file mode 100644
index 000000000000..727e270b11e4
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCU.svg
@@ -0,0 +1,474 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:28:20 2015 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="9.1in"
+   height="8.9in"
+   viewBox="-66 -66 10932 10707"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="BigTreeClassicRCU.fig">
+  <metadata
+     id="metadata106">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs104">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3864"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="973"
+     inkscape:window-height="1137"
+     id="namedview102"
+     showgrid="false"
+     inkscape:zoom="0.9743589"
+     inkscape:cx="409.50003"
+     inkscape:cy="400.49997"
+     inkscape:window-x="915"
+     inkscape:window-y="24"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="0"
+       width="10800"
+       height="5625"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="1125"
+       y="3600"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="3825"
+       y="900"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="6525"
+       y="3600"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect12" />
+    <!-- Line -->
+    <polyline
+       points="3375,6525 3375,5046 "
+       style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline14" />
+    <!-- Arrowhead on XXXpoint 3375 6525 - 3375 4860-->
+    <!-- Circle -->
+    <circle
+       cx="7425"
+       cy="6075"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle18" />
+    <!-- Circle -->
+    <circle
+       cx="7875"
+       cy="6075"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle20" />
+    <!-- Circle -->
+    <circle
+       cx="8325"
+       cy="6075"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle22" />
+    <!-- Circle -->
+    <circle
+       cx="2025"
+       cy="6075"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle24" />
+    <!-- Circle -->
+    <circle
+       cx="2475"
+       cy="6075"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle26" />
+    <!-- Circle -->
+    <circle
+       cx="2925"
+       cy="6075"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle28" />
+    <!-- Circle -->
+    <circle
+       cx="4725"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle30" />
+    <!-- Circle -->
+    <circle
+       cx="5175"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle32" />
+    <!-- Circle -->
+    <circle
+       cx="5625"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle34" />
+    <!-- Line: box -->
+    <rect
+       x="2025"
+       y="6525"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect36" />
+    <!-- Line -->
+    <polyline
+       points="2475,3600 3975,2310 "
+       style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline38" />
+    <!-- Arrowhead on XXXpoint 2475 3600 - 4116 2190-->
+    <!-- Line -->
+    <polyline
+       points="7875,3600 6372,2310 "
+       style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline42" />
+    <!-- Arrowhead on XXXpoint 7875 3600 - 6231 2190-->
+    <!-- Line -->
+    <polyline
+       points="6975,8775 6975,5046 "
+       style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline46" />
+    <!-- Arrowhead on XXXpoint 6975 8775 - 6975 4860-->
+    <!-- Line -->
+    <polyline
+       points="1575,8775 1575,5046 "
+       style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline50" />
+    <!-- Arrowhead on XXXpoint 1575 8775 - 1575 4860-->
+    <!-- Line -->
+    <polyline
+       points="8775,6525 8775,5046 "
+       style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline54" />
+    <!-- Arrowhead on XXXpoint 8775 6525 - 8775 4860-->
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1575"
+       y="9225"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text58">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1575"
+       y="9675"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text60">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1575"
+       y="10350"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text62">CPU 0</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3375"
+       y="6975"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text64">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3375"
+       y="7425"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text66">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3375"
+       y="8100"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text68">CPU 15</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6975"
+       y="9225"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text70">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6975"
+       y="9675"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text72">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6975"
+       y="10350"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text74">CPU 1007</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8730"
+       y="6930"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text76">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8730"
+       y="7380"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text78">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8730"
+       y="8055"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text80">CPU 1023</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="225"
+       y="450"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="start"
+       id="text82">struct rcu_state</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2475"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text84">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2475"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text86">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="7875"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text88">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="7875"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text90">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5175"
+       y="1350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text92">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5175"
+       y="1800"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text94">rcu_node</text>
+    <!-- Line: box -->
+    <rect
+       x="225"
+       y="8775"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect96" />
+    <!-- Line: box -->
+    <rect
+       x="5625"
+       y="8775"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect98" />
+    <!-- Line: box -->
+    <rect
+       x="7380"
+       y="6480"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect100" />
+  </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg
new file mode 100644
index 000000000000..9bbb1944f962
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg
@@ -0,0 +1,499 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:26:09 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="5.7in"
+   height="6.6in"
+   viewBox="-44 -44 6838 7888"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="BigTreeClassicRCUBH.fig">
+  <metadata
+     id="metadata110">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs108">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3868"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow2Mend"
+       style="overflow:visible;">
+      <path
+         id="path3886"
+         style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+         transform="scale(0.6) rotate(180) translate(0,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="878"
+     inkscape:window-height="1148"
+     id="namedview106"
+     showgrid="false"
+     inkscape:zoom="1.3547758"
+     inkscape:cx="256.5"
+     inkscape:cy="297"
+     inkscape:window-x="45"
+     inkscape:window-y="24"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="450"
+       y="0"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="4950"
+       y="4950"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="600"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="450"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect12" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="1050"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect14" />
+    <!-- Circle -->
+    <circle
+       cx="2850"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle16" />
+    <!-- Circle -->
+    <circle
+       cx="3150"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle18" />
+    <!-- Circle -->
+    <circle
+       cx="3450"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle20" />
+    <!-- Circle -->
+    <circle
+       cx="1350"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle22" />
+    <!-- Circle -->
+    <circle
+       cx="1650"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle24" />
+    <!-- Circle -->
+    <circle
+       cx="1950"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle26" />
+    <!-- Circle -->
+    <circle
+       cx="4350"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle28" />
+    <!-- Circle -->
+    <circle
+       cx="4650"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle30" />
+    <!-- Circle -->
+    <circle
+       cx="4950"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle32" />
+    <!-- Line -->
+    <polyline
+       points="1350,3450 2350,2590 "
+       style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline34" />
+    <!-- Arrowhead on XXXpoint 1350 3450 - 2444 2510-->
+    <!-- Line -->
+    <polyline
+       points="4950,3450 3948,2590 "
+       style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline38" />
+    <!-- Arrowhead on XXXpoint 4950 3450 - 3854 2510-->
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="3450"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect42" />
+    <!-- Line -->
+    <polyline
+       points="2250,5400 2250,4414 "
+       style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline44" />
+    <!-- Arrowhead on XXXpoint 2250 5400 - 2250 4290-->
+    <!-- Line: box -->
+    <rect
+       x="1500"
+       y="5400"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect48" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="6600"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect50" />
+    <!-- Line: box -->
+    <rect
+       x="3750"
+       y="3450"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect52" />
+    <!-- Line: box -->
+    <rect
+       x="4500"
+       y="5400"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect54" />
+    <!-- Line: box -->
+    <rect
+       x="3300"
+       y="6600"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect56" />
+    <!-- Line: box -->
+    <rect
+       x="2250"
+       y="1650"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect58" />
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6450"
+       y="300"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text60">rcu_bh</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="1950"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text62">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="2250"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text64">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="3750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text66">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text68">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text70">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="3750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text72">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="5700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text74">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="6000"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text76">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="6900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text78">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text80">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="5700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text82">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="6000"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text84">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="6900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text86">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text88">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="450"
+       y="1350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="start"
+       id="text90">struct rcu_state</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6000"
+       y="750"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text92">rcu_sched</text>
+    <!-- Line -->
+    <polyline
+       points="5250,5400 5250,4414 "
+       style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline94" />
+    <!-- Arrowhead on XXXpoint 5250 5400 - 5250 4290-->
+    <!-- Line -->
+    <polyline
+       points="4050,6600 4050,4414 "
+       style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline98" />
+    <!-- Arrowhead on XXXpoint 4050 6600 - 4050 4290-->
+    <!-- Line -->
+    <polyline
+       points="1050,6600 1050,4414 "
+       style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline102" />
+    <!-- Arrowhead on XXXpoint 1050 6600 - 1050 4290-->
+  </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg
new file mode 100644
index 000000000000..21ba7823479d
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg
@@ -0,0 +1,695 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:20:02 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="5.7in"
+   height="8.6in"
+   viewBox="-44 -44 6838 10288"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="BigTreeClassicRCUBHdyntick.fig">
+  <metadata
+     id="metadata166">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs164">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3924"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Lend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow2Lend"
+       style="overflow:visible;">
+      <path
+         id="path3936"
+         style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+         transform="scale(1.1) rotate(180) translate(1,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="845"
+     inkscape:window-height="988"
+     id="namedview162"
+     showgrid="false"
+     inkscape:zoom="1.0452196"
+     inkscape:cx="256.5"
+     inkscape:cy="387.00003"
+     inkscape:window-x="356"
+     inkscape:window-y="61"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="450"
+       y="0"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="4950"
+       y="4950"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="600"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect10" />
+    <!-- Line -->
+    <polyline
+       points="5250,8100 5688,5912 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline12" />
+    <!-- Arrowhead on XXXpoint 5250 8100 - 5710 5790-->
+    <polyline
+       points="5714 6068 5704 5822 5598 6044 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline14" />
+    <!-- Line -->
+    <polyline
+       points="4050,9300 4486,7262 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline16" />
+    <!-- Arrowhead on XXXpoint 4050 9300 - 4512 7140-->
+    <polyline
+       points="4514 7418 4506 7172 4396 7394 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline18" />
+    <!-- Line -->
+    <polyline
+       points="1040,9300 1476,7262 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline20" />
+    <!-- Arrowhead on XXXpoint 1040 9300 - 1502 7140-->
+    <polyline
+       points="1504 7418 1496 7172 1386 7394 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline22" />
+    <!-- Line -->
+    <polyline
+       points="2240,8100 2676,6062 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline24" />
+    <!-- Arrowhead on XXXpoint 2240 8100 - 2702 5940-->
+    <polyline
+       points="2704 6218 2696 5972 2586 6194 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline26" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="450"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect28" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="1050"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect30" />
+    <!-- Line -->
+    <polyline
+       points="1350,3450 2350,2590 "
+       style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline32" />
+    <!-- Arrowhead on XXXpoint 1350 3450 - 2444 2510-->
+    <!-- Line -->
+    <polyline
+       points="4950,3450 3948,2590 "
+       style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline36" />
+    <!-- Arrowhead on XXXpoint 4950 3450 - 3854 2510-->
+    <!-- Line -->
+    <polyline
+       points="4050,6600 4050,4414 "
+       style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline40" />
+    <!-- Arrowhead on XXXpoint 4050 6600 - 4050 4290-->
+    <!-- Line -->
+    <polyline
+       points="1050,6600 1050,4414 "
+       style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline44" />
+    <!-- Arrowhead on XXXpoint 1050 6600 - 1050 4290-->
+    <!-- Line -->
+    <polyline
+       points="2250,5400 2250,4414 "
+       style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline48" />
+    <!-- Arrowhead on XXXpoint 2250 5400 - 2250 4290-->
+    <!-- Line -->
+    <polyline
+       points="2250,8100 2250,6364 "
+       style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+       id="polyline52" />
+    <!-- Arrowhead on XXXpoint 2250 8100 - 2250 6240-->
+    <!-- Line -->
+    <polyline
+       points="1050,9300 1050,7564 "
+       style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+       id="polyline56" />
+    <!-- Arrowhead on XXXpoint 1050 9300 - 1050 7440-->
+    <!-- Line -->
+    <polyline
+       points="4050,9300 4050,7564 "
+       style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+       id="polyline60" />
+    <!-- Arrowhead on XXXpoint 4050 9300 - 4050 7440-->
+    <!-- Line -->
+    <polyline
+       points="5250,8100 5250,6364 "
+       style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+       id="polyline64" />
+    <!-- Arrowhead on XXXpoint 5250 8100 - 5250 6240-->
+    <!-- Circle -->
+    <circle
+       cx="2850"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle68" />
+    <!-- Circle -->
+    <circle
+       cx="3150"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle70" />
+    <!-- Circle -->
+    <circle
+       cx="3450"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle72" />
+    <!-- Circle -->
+    <circle
+       cx="1350"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle74" />
+    <!-- Circle -->
+    <circle
+       cx="1650"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle76" />
+    <!-- Circle -->
+    <circle
+       cx="1950"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle78" />
+    <!-- Circle -->
+    <circle
+       cx="4350"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle80" />
+    <!-- Circle -->
+    <circle
+       cx="4650"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle82" />
+    <!-- Circle -->
+    <circle
+       cx="4950"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle84" />
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="3450"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect86" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="6600"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect88" />
+    <!-- Line: box -->
+    <rect
+       x="3750"
+       y="3450"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect90" />
+    <!-- Line: box -->
+    <rect
+       x="4500"
+       y="5400"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect92" />
+    <!-- Line: box -->
+    <rect
+       x="3300"
+       y="6600"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect94" />
+    <!-- Line: box -->
+    <rect
+       x="2250"
+       y="1650"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect96" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="9300"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect98" />
+    <!-- Line: box -->
+    <rect
+       x="1350"
+       y="8100"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect100" />
+    <!-- Line: box -->
+    <rect
+       x="3000"
+       y="9300"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect102" />
+    <!-- Line: box -->
+    <rect
+       x="4350"
+       y="8100"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect104" />
+    <!-- Line: box -->
+    <rect
+       x="1500"
+       y="5400"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect106" />
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6450"
+       y="300"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text108">rcu_bh</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="1950"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text110">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="2250"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text112">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="3750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text114">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text116">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text118">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="3750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text120">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="5700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text122">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="6000"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text124">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="6900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text126">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text128">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="5700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text130">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="6000"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text132">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="6900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text134">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text136">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="450"
+       y="1350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="start"
+       id="text138">struct rcu_state</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="9600"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text140">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="9900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text142">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="9600"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text144">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="9900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text146">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="8400"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text148">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="8700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text150">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5400"
+       y="8400"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text152">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5400"
+       y="8700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text154">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6000"
+       y="750"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text156">rcu_sched</text>
+    <!-- Line -->
+    <polyline
+       points="5250,5400 5250,4414 "
+       style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline158" />
+    <!-- Arrowhead on XXXpoint 5250 5400 - 5250 4290-->
+  </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg
new file mode 100644
index 000000000000..15adcac036c7
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg
@@ -0,0 +1,741 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:32:59 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="6.1in"
+   height="8.9in"
+   viewBox="-44 -44 7288 10738"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="BigTreePreemptRCUBHdyntick.fig">
+  <metadata
+     id="metadata182">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs180">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3940"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="874"
+     inkscape:window-height="1148"
+     id="namedview178"
+     showgrid="false"
+     inkscape:zoom="1.2097379"
+     inkscape:cx="274.5"
+     inkscape:cy="400.49997"
+     inkscape:window-x="946"
+     inkscape:window-y="24"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="900"
+       y="0"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="1200"
+       y="600"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="5400"
+       y="4950"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="450"
+       y="450"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect12" />
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="1050"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect14" />
+    <!-- Line: box -->
+    <rect
+       x="4950"
+       y="5400"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect16" />
+    <!-- Line -->
+    <polyline
+       points="5250,8550 5688,6362 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline18" />
+    <!-- Arrowhead on XXXpoint 5250 8550 - 5710 6240-->
+    <polyline
+       points="5714 6518 5704 6272 5598 6494 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline20" />
+    <!-- Line -->
+    <polyline
+       points="4050,9750 4486,7712 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline22" />
+    <!-- Arrowhead on XXXpoint 4050 9750 - 4512 7590-->
+    <polyline
+       points="4514 7868 4506 7622 4396 7844 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline24" />
+    <!-- Line -->
+    <polyline
+       points="1040,9750 1476,7712 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline26" />
+    <!-- Arrowhead on XXXpoint 1040 9750 - 1502 7590-->
+    <polyline
+       points="1504 7868 1496 7622 1386 7844 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline28" />
+    <!-- Line -->
+    <polyline
+       points="2240,8550 2676,6512 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline30" />
+    <!-- Arrowhead on XXXpoint 2240 8550 - 2702 6390-->
+    <polyline
+       points="2704 6668 2696 6422 2586 6644 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline32" />
+    <!-- Line -->
+    <polyline
+       points="4050,9750 5682,6360 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline34" />
+    <!-- Arrowhead on XXXpoint 4050 9750 - 5736 6246-->
+    <polyline
+       points="5672 6518 5722 6276 5562 6466 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline36" />
+    <!-- Line -->
+    <polyline
+       points="1010,9750 2642,6360 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline38" />
+    <!-- Arrowhead on XXXpoint 1010 9750 - 2696 6246-->
+    <polyline
+       points="2632 6518 2682 6276 2522 6466 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline40" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="900"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect42" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="1500"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect44" />
+    <!-- Line -->
+    <polyline
+       points="1350,3900 2350,3040 "
+       style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline46" />
+    <!-- Arrowhead on XXXpoint 1350 3900 - 2444 2960-->
+    <!-- Line -->
+    <polyline
+       points="4950,3900 3948,3040 "
+       style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline50" />
+    <!-- Arrowhead on XXXpoint 4950 3900 - 3854 2960-->
+    <!-- Line -->
+    <polyline
+       points="4050,7050 4050,4864 "
+       style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline54" />
+    <!-- Arrowhead on XXXpoint 4050 7050 - 4050 4740-->
+    <!-- Line -->
+    <polyline
+       points="1050,7050 1050,4864 "
+       style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline58" />
+    <!-- Arrowhead on XXXpoint 1050 7050 - 1050 4740-->
+    <!-- Line -->
+    <polyline
+       points="2250,5850 2250,4864 "
+       style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline62" />
+    <!-- Arrowhead on XXXpoint 2250 5850 - 2250 4740-->
+    <!-- Line -->
+    <polyline
+       points="2250,8550 2250,6814 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline66" />
+    <!-- Arrowhead on XXXpoint 2250 8550 - 2250 6690-->
+    <!-- Line -->
+    <polyline
+       points="1050,9750 1050,8014 "
+       style="stroke:#00ff00;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline70" />
+    <!-- Arrowhead on XXXpoint 1050 9750 - 1050 7890-->
+    <!-- Line -->
+    <polyline
+       points="4050,9750 4050,8014 "
+       style="stroke:#00ff00;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline74" />
+    <!-- Arrowhead on XXXpoint 4050 9750 - 4050 7890-->
+    <!-- Line -->
+    <polyline
+       points="5250,8550 5250,6814 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline78" />
+    <!-- Arrowhead on XXXpoint 5250 8550 - 5250 6690-->
+    <!-- Circle -->
+    <circle
+       cx="2850"
+       cy="4350"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle82" />
+    <!-- Circle -->
+    <circle
+       cx="3150"
+       cy="4350"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle84" />
+    <!-- Circle -->
+    <circle
+       cx="3450"
+       cy="4350"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle86" />
+    <!-- Circle -->
+    <circle
+       cx="1350"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle88" />
+    <!-- Circle -->
+    <circle
+       cx="1650"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle90" />
+    <!-- Circle -->
+    <circle
+       cx="1950"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle92" />
+    <!-- Circle -->
+    <circle
+       cx="4350"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle94" />
+    <!-- Circle -->
+    <circle
+       cx="4650"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle96" />
+    <!-- Circle -->
+    <circle
+       cx="4950"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle98" />
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="3900"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect100" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="7050"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect102" />
+    <!-- Line: box -->
+    <rect
+       x="3750"
+       y="3900"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect104" />
+    <!-- Line: box -->
+    <rect
+       x="4500"
+       y="5850"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect106" />
+    <!-- Line: box -->
+    <rect
+       x="3300"
+       y="7050"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect108" />
+    <!-- Line: box -->
+    <rect
+       x="2250"
+       y="2100"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect110" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="9750"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect112" />
+    <!-- Line: box -->
+    <rect
+       x="1350"
+       y="8550"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect114" />
+    <!-- Line: box -->
+    <rect
+       x="3000"
+       y="9750"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect116" />
+    <!-- Line: box -->
+    <rect
+       x="4350"
+       y="8550"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect118" />
+    <!-- Line: box -->
+    <rect
+       x="1500"
+       y="5850"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect120" />
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6450"
+       y="750"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text122">rcu_bh</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="2400"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text124">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="2700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text126">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="4200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text128">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text130">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text132">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="4200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text134">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="6150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text136">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="6450"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text138">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="7350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text140">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="7650"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text142">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="6150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text144">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="6450"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text146">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="7350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text148">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="7650"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text150">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="450"
+       y="1800"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="start"
+       id="text152">struct rcu_state</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="10050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text154">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="10350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text156">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="10050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text158">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="10350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text160">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="8850"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text162">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="9150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text164">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5400"
+       y="8850"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text166">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5400"
+       y="9150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text168">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6900"
+       y="300"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text170">rcu_preempt</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6000"
+       y="1200"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text172">rcu_sched</text>
+    <!-- Line -->
+    <polyline
+       points="5250,5850 5250,4864 "
+       style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline174" />
+    <!-- Arrowhead on XXXpoint 5250 5850 - 5250 4740-->
+  </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg
new file mode 100644
index 000000000000..bbc3801470d0
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg
@@ -0,0 +1,858 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:29:48 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="7.4in"
+   height="9.9in"
+   viewBox="-44 -44 8938 11938"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="BigTreePreemptRCUBHdyntickCB.svg">
+  <metadata
+     id="metadata212">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs210">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3970"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="881"
+     inkscape:window-height="1128"
+     id="namedview208"
+     showgrid="false"
+     inkscape:zoom="1.0195195"
+     inkscape:cx="333"
+     inkscape:cy="445.49997"
+     inkscape:window-x="936"
+     inkscape:window-y="24"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="900"
+       y="0"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="1200"
+       y="600"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="5400"
+       y="4950"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="450"
+       y="450"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect12" />
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="1050"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect14" />
+    <!-- Line: box -->
+    <rect
+       x="4950"
+       y="5400"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect16" />
+    <!-- Line -->
+    <polyline
+       points="5250,8550 5688,6362 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline18" />
+    <!-- Arrowhead on XXXpoint 5250 8550 - 5710 6240-->
+    <polyline
+       points="5714 6518 5704 6272 5598 6494 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline20" />
+    <!-- Line -->
+    <polyline
+       points="4050,9750 4486,7712 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline22" />
+    <!-- Arrowhead on XXXpoint 4050 9750 - 4512 7590-->
+    <polyline
+       points="4514 7868 4506 7622 4396 7844 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline24" />
+    <!-- Line -->
+    <polyline
+       points="1040,9750 1476,7712 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline26" />
+    <!-- Arrowhead on XXXpoint 1040 9750 - 1502 7590-->
+    <polyline
+       points="1504 7868 1496 7622 1386 7844 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline28" />
+    <!-- Line -->
+    <polyline
+       points="2240,8550 2676,6512 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline30" />
+    <!-- Arrowhead on XXXpoint 2240 8550 - 2702 6390-->
+    <polyline
+       points="2704 6668 2696 6422 2586 6644 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline32" />
+    <!-- Line -->
+    <polyline
+       points="4050,9600 5692,6062 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline34" />
+    <!-- Arrowhead on XXXpoint 4050 9600 - 5744 5948-->
+    <polyline
+       points="5682 6220 5730 5978 5574 6170 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline36" />
+    <!-- Line -->
+    <polyline
+       points="1086,9600 2728,6062 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline38" />
+    <!-- Arrowhead on XXXpoint 1086 9600 - 2780 5948-->
+    <polyline
+       points="2718 6220 2766 5978 2610 6170 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline40" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="900"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect42" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="1500"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect44" />
+    <!-- Line -->
+    <polyline
+       points="1350,3900 2350,3040 "
+       style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline46" />
+    <!-- Arrowhead on XXXpoint 1350 3900 - 2444 2960-->
+    <!-- Line -->
+    <polyline
+       points="4950,3900 3948,3040 "
+       style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline50" />
+    <!-- Arrowhead on XXXpoint 4950 3900 - 3854 2960-->
+    <!-- Line -->
+    <polyline
+       points="4050,7050 4050,4864 "
+       style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline54" />
+    <!-- Arrowhead on XXXpoint 4050 7050 - 4050 4740-->
+    <!-- Line -->
+    <polyline
+       points="1050,7050 1050,4864 "
+       style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline58" />
+    <!-- Arrowhead on XXXpoint 1050 7050 - 1050 4740-->
+    <!-- Line -->
+    <polyline
+       points="2250,5850 2250,4864 "
+       style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline62" />
+    <!-- Arrowhead on XXXpoint 2250 5850 - 2250 4740-->
+    <!-- Line -->
+    <polyline
+       points="2250,8550 2250,6814 "
+       style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline66" />
+    <!-- Arrowhead on XXXpoint 2250 8550 - 2250 6690-->
+    <!-- Line -->
+    <polyline
+       points="1050,9750 1050,8014 "
+       style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline70" />
+    <!-- Arrowhead on XXXpoint 1050 9750 - 1050 7890-->
+    <!-- Line -->
+    <polyline
+       points="4050,9750 4050,8014 "
+       style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline74" />
+    <!-- Arrowhead on XXXpoint 4050 9750 - 4050 7890-->
+    <!-- Line -->
+    <polyline
+       points="5250,8550 5250,6814 "
+       style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline78" />
+    <!-- Arrowhead on XXXpoint 5250 8550 - 5250 6690-->
+    <!-- Line -->
+    <polyline
+       points="6000,6300 8048,7910 "
+       style="stroke:#87cfff;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+       id="polyline82" />
+    <!-- Arrowhead on XXXpoint 6000 6300 - 8146 7986-->
+    <!-- Circle -->
+    <circle
+       cx="2850"
+       cy="4350"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle86" />
+    <!-- Circle -->
+    <circle
+       cx="3150"
+       cy="4350"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle88" />
+    <!-- Circle -->
+    <circle
+       cx="3450"
+       cy="4350"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle90" />
+    <!-- Circle -->
+    <circle
+       cx="1350"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle92" />
+    <!-- Circle -->
+    <circle
+       cx="1650"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle94" />
+    <!-- Circle -->
+    <circle
+       cx="1950"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle96" />
+    <!-- Circle -->
+    <circle
+       cx="4350"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle98" />
+    <!-- Circle -->
+    <circle
+       cx="4650"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle100" />
+    <!-- Circle -->
+    <circle
+       cx="4950"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle102" />
+    <!-- Line: box -->
+    <rect
+       x="7350"
+       y="7950"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect104" />
+    <!-- Line: box -->
+    <rect
+       x="7350"
+       y="9450"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect106" />
+    <!-- Line -->
+    <polyline
+       points="8100,8850 8100,9384 "
+       style="stroke:#000000;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+       id="polyline108" />
+    <!-- Arrowhead on XXXpoint 8100 8850 - 8100 9510-->
+    <!-- Line: box -->
+    <rect
+       x="7350"
+       y="10950"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect112" />
+    <!-- Line -->
+    <polyline
+       points="8100,10350 8100,10884 "
+       style="stroke:#000000;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+       id="polyline114" />
+    <!-- Arrowhead on XXXpoint 8100 10350 - 8100 11010-->
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="3900"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect118" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="7050"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect120" />
+    <!-- Line: box -->
+    <rect
+       x="3750"
+       y="3900"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect122" />
+    <!-- Line: box -->
+    <rect
+       x="4500"
+       y="5850"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect124" />
+    <!-- Line: box -->
+    <rect
+       x="3300"
+       y="7050"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect126" />
+    <!-- Line: box -->
+    <rect
+       x="2250"
+       y="2100"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect128" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="9750"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect130" />
+    <!-- Line: box -->
+    <rect
+       x="1350"
+       y="8550"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect132" />
+    <!-- Line: box -->
+    <rect
+       x="3000"
+       y="9750"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect134" />
+    <!-- Line: box -->
+    <rect
+       x="4350"
+       y="8550"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect136" />
+    <!-- Line: box -->
+    <rect
+       x="1500"
+       y="5850"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect138" />
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8100"
+       y="8250"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text140">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8100"
+       y="8550"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text142">rcu_head</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8100"
+       y="9750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text144">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8100"
+       y="10050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text146">rcu_head</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8100"
+       y="11250"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text148">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8100"
+       y="11550"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text150">rcu_head</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6000"
+       y="1200"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text152">rcu_sched</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6450"
+       y="750"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text154">rcu_bh</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="2400"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text156">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="2700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text158">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="4200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text160">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text162">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text164">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="4200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text166">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="6150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text168">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="6450"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text170">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="7350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text172">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="7650"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text174">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="6150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text176">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="6450"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text178">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="7350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text180">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="7650"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text182">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="450"
+       y="1800"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="start"
+       id="text184">struct rcu_state</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="10050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text186">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="10350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text188">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="10050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text190">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="10350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text192">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="8850"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text194">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="9150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text196">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5400"
+       y="8850"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text198">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5400"
+       y="9150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text200">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6900"
+       y="300"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text202">rcu_preempt</text>
+    <!-- Line -->
+    <polyline
+       points="5250,5850 5250,4864 "
+       style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline204" />
+    <!-- Arrowhead on XXXpoint 5250 5850 - 5250 4740-->
+  </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
new file mode 100644
index 000000000000..7eb47ac25ad7
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -0,0 +1,1333 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+        "http://www.w3.org/TR/html4/loose.dtd">
+        <html>
+        <head><title>A Tour Through TREE_RCU's Data Structures [LWN.net]</title>
+        <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+
+           <p>January 27, 2016</p>
+           <p>This article was contributed by Paul E.&nbsp;McKenney</p>
+
+<h3>Introduction</h3>
+
+This document describes RCU's major data structures and their relationship
+to each other.
+
+<ol>
+<li>	<a href="#Data-Structure Relationships">
+	Data-Structure Relationships</a>
+<li>	<a href="#The rcu_state Structure">
+	The <tt>rcu_state</tt> Structure</a>
+<li>	<a href="#The rcu_node Structure">
+	The <tt>rcu_node</tt> Structure</a>
+<li>	<a href="#The rcu_data Structure">
+	The <tt>rcu_data</tt> Structure</a>
+<li>	<a href="#The rcu_dynticks Structure">
+	The <tt>rcu_dynticks</tt> Structure</a>
+<li>	<a href="#The rcu_head Structure">
+	The <tt>rcu_head</tt> Structure</a>
+<li>	<a href="#RCU-Specific Fields in the task_struct Structure">
+	RCU-Specific Fields in the <tt>task_struct</tt> Structure</a>
+<li>	<a href="#Accessor Functions">
+	Accessor Functions</a>
+</ol>
+
+At the end we have the
+<a href="#Answers to Quick Quizzes">answers to the quick quizzes</a>.
+
+<h3><a name="Data-Structure Relationships">Data-Structure Relationships</a></h3>
+
+<p>RCU is for all intents and purposes a large state machine, and its
+data structures maintain the state in such a way as to allow RCU readers
+to execute extremely quickly, while also processing the RCU grace periods
+requested by updaters in an efficient and extremely scalable fashion.
+The efficiency and scalability of RCU updaters is provided primarily
+by a combining tree, as shown below:
+
+</p><p><img src="BigTreeClassicRCU.svg" alt="BigTreeClassicRCU.svg" width="30%">
+
+</p><p>This diagram shows an enclosing <tt>rcu_state</tt> structure
+containing a tree of <tt>rcu_node</tt> structures.
+Each leaf node of the <tt>rcu_node</tt> tree has up to 16
+<tt>rcu_data</tt> structures associated with it, so that there
+are <tt>NR_CPUS</tt> number of <tt>rcu_data</tt> structures,
+one for each possible CPU.
+This structure is adjusted at boot time, if needed, to handle the
+common case where <tt>nr_cpu_ids</tt> is much less than
+<tt>NR_CPUs</tt>.
+For example, a number of Linux distributions set <tt>NR_CPUs=4096</tt>,
+which results in a three-level <tt>rcu_node</tt> tree.
+If the actual hardware has only 16 CPUs, RCU will adjust itself
+at boot time, resulting in an <tt>rcu_node</tt> tree with only a single node.
+
+</p><p>The purpose of this combining tree is to allow per-CPU events
+such as quiescent states, dyntick-idle transitions,
+and CPU hotplug operations to be processed efficiently
+and scalably.
+Quiescent states are recorded by the per-CPU <tt>rcu_data</tt> structures,
+and other events are recorded by the leaf-level <tt>rcu_node</tt>
+structures.
+All of these events are combined at each level of the tree until finally
+grace periods are completed at the tree's root <tt>rcu_node</tt>
+structure.
+A grace period can be completed at the root once every CPU
+(or, in the case of <tt>CONFIG_PREEMPT_RCU</tt>, task)
+has passed through a quiescent state.
+Once a grace period has completed, record of that fact is propagated
+back down the tree.
+
+</p><p>As can be seen from the diagram, on a 64-bit system
+a two-level tree with 64 leaves can accommodate 1,024 CPUs, with a fanout
+of 64 at the root and a fanout of 16 at the leaves.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Why isn't the fanout at the leaves also 64?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	Because there are more types of events that affect the leaf-level
+	<tt>rcu_node</tt> structures than further up the tree.
+	Therefore, if the leaf <tt>rcu_node</tt> structures have fanout of
+	64, the contention on these structures' <tt>-&gt;structures</tt>
+	becomes excessive.
+	Experimentation on a wide variety of systems has shown that a fanout
+	of 16 works well for the leaves of the <tt>rcu_node</tt> tree.
+	</font>
+
+	<p><font color="ffffff">Of course, further experience with
+	systems having hundreds or thousands of CPUs may demonstrate
+	that the fanout for the non-leaf <tt>rcu_node</tt> structures
+	must also be reduced.
+	Such reduction can be easily carried out when and if it proves
+	necessary.
+	In the meantime, if you are using such a system and running into
+	contention problems on the non-leaf <tt>rcu_node</tt> structures,
+	you may use the <tt>CONFIG_RCU_FANOUT</tt> kernel configuration
+	parameter to reduce the non-leaf fanout as needed.
+	</font>
+
+	<p><font color="ffffff">Kernels built for systems with
+	strong NUMA characteristics might also need to adjust
+	<tt>CONFIG_RCU_FANOUT</tt> so that the domains of the
+	<tt>rcu_node</tt> structures align with hardware boundaries.
+	However, there has thus far been no need for this.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<p>If your system has more than 1,024 CPUs (or more than 512 CPUs on
+a 32-bit system), then RCU will automatically add more levels to the
+tree.
+For example, if you are crazy enough to build a 64-bit system with 65,536
+CPUs, RCU would configure the <tt>rcu_node</tt> tree as follows:
+
+</p><p><img src="HugeTreeClassicRCU.svg" alt="HugeTreeClassicRCU.svg" width="50%">
+
+</p><p>RCU currently permits up to a four-level tree, which on a 64-bit system
+accommodates up to 4,194,304 CPUs, though only a mere 524,288 CPUs for
+32-bit systems.
+On the other hand, you can set <tt>CONFIG_RCU_FANOUT</tt> to be
+as small as 2 if you wish, which would permit only 16 CPUs, which
+is useful for testing.
+
+</p><p>This multi-level combining tree allows us to get most of the
+performance and scalability
+benefits of partitioning, even though RCU grace-period detection is
+inherently a global operation.
+The trick here is that only the last CPU to report a quiescent state
+into a given <tt>rcu_node</tt> structure need advance to the <tt>rcu_node</tt>
+structure at the next level up the tree.
+This means that at the leaf-level <tt>rcu_node</tt> structure, only
+one access out of sixteen will progress up the tree.
+For the internal <tt>rcu_node</tt> structures, the situation is even
+more extreme:  Only one access out of sixty-four will progress up
+the tree.
+Because the vast majority of the CPUs do not progress up the tree,
+the lock contention remains roughly constant up the tree.
+No matter how many CPUs there are in the system, at most 64 quiescent-state
+reports per grace period will progress all the way to the root
+<tt>rcu_node</tt> structure, thus ensuring that the lock contention
+on that root <tt>rcu_node</tt> structure remains acceptably low.
+
+</p><p>In effect, the combining tree acts like a big shock absorber,
+keeping lock contention under control at all tree levels regardless
+of the level of loading on the system.
+
+</p><p>The Linux kernel actually supports multiple flavors of RCU
+running concurrently, so RCU builds separate data structures for each
+flavor.
+For example, for <tt>CONFIG_TREE_RCU=y</tt> kernels, RCU provides
+rcu_sched and rcu_bh, as shown below:
+
+</p><p><img src="BigTreeClassicRCUBH.svg" alt="BigTreeClassicRCUBH.svg" width="33%">
+
+</p><p>Energy efficiency is increasingly important, and for that
+reason the Linux kernel provides <tt>CONFIG_NO_HZ_IDLE</tt>, which
+turns off the scheduling-clock interrupts on idle CPUs, which in
+turn allows those CPUs to attain deeper sleep states and to consume
+less energy.
+CPUs whose scheduling-clock interrupts have been turned off are
+said to be in <i>dyntick-idle mode</i>.
+RCU must handle dyntick-idle CPUs specially
+because RCU would otherwise wake up each CPU on every grace period,
+which would defeat the whole purpose of <tt>CONFIG_NO_HZ_IDLE</tt>.
+RCU uses the <tt>rcu_dynticks</tt> structure to track
+which CPUs are in dyntick idle mode, as shown below:
+
+</p><p><img src="BigTreeClassicRCUBHdyntick.svg" alt="BigTreeClassicRCUBHdyntick.svg" width="33%">
+
+</p><p>However, if a CPU is in dyntick-idle mode, it is in that mode
+for all flavors of RCU.
+Therefore, a single <tt>rcu_dynticks</tt> structure is allocated per
+CPU, and all of a given CPU's <tt>rcu_data</tt> structures share
+that <tt>rcu_dynticks</tt>, as shown in the figure.
+
+</p><p>Kernels built with <tt>CONFIG_PREEMPT_RCU</tt> support
+rcu_preempt in addition to rcu_sched and rcu_bh, as shown below:
+
+</p><p><img src="BigTreePreemptRCUBHdyntick.svg" alt="BigTreePreemptRCUBHdyntick.svg" width="35%">
+
+</p><p>RCU updaters wait for normal grace periods by registering
+RCU callbacks, either directly via <tt>call_rcu()</tt> and
+friends (namely <tt>call_rcu_bh()</tt> and <tt>call_rcu_sched()</tt>),
+there being a separate interface per flavor of RCU)
+or indirectly via <tt>synchronize_rcu()</tt> and friends.
+RCU callbacks are represented by <tt>rcu_head</tt> structures,
+which are queued on <tt>rcu_data</tt> structures while they are
+waiting for a grace period to elapse, as shown in the following figure:
+
+</p><p><img src="BigTreePreemptRCUBHdyntickCB.svg" alt="BigTreePreemptRCUBHdyntickCB.svg" width="40%">
+
+</p><p>This figure shows how <tt>TREE_RCU</tt>'s and
+<tt>PREEMPT_RCU</tt>'s major data structures are related.
+Lesser data structures will be introduced with the algorithms that
+make use of them.
+
+</p><p>Note that each of the data structures in the above figure has
+its own synchronization:
+
+<p><ol>
+<li>	Each <tt>rcu_state</tt> structures has a lock and a mutex,
+	and some fields are protected by the corresponding root
+	<tt>rcu_node</tt> structure's lock.
+<li>	Each <tt>rcu_node</tt> structure has a spinlock.
+<li>	The fields in <tt>rcu_data</tt> are private to the corresponding
+	CPU, although a few can be read and written by other CPUs.
+<li>	Similarly, the fields in <tt>rcu_dynticks</tt> are private
+	to the corresponding CPU, although a few can be read by
+	other CPUs.
+</ol>
+
+<p>It is important to note that different data structures can have
+very different ideas about the state of RCU at any given time.
+For but one example, awareness of the start or end of a given RCU
+grace period propagates slowly through the data structures.
+This slow propagation is absolutely necessary for RCU to have good
+read-side performance.
+If this balkanized implementation seems foreign to you, one useful
+trick is to consider each instance of these data structures to be
+a different person, each having the usual slightly different
+view of reality.
+
+</p><p>The general role of each of these data structures is as
+follows:
+
+</p><ol>
+<li>	<tt>rcu_state</tt>:
+	This structure forms the interconnection between the
+	<tt>rcu_node</tt> and <tt>rcu_data</tt> structures,
+	tracks grace periods, serves as short-term repository
+	for callbacks orphaned by CPU-hotplug events,
+	maintains <tt>rcu_barrier()</tt> state,
+	tracks expedited grace-period state,
+	and maintains state used to force quiescent states when
+	grace periods extend too long,
+<li>	<tt>rcu_node</tt>: This structure forms the combining
+	tree that propagates quiescent-state
+	information from the leaves to the root, and also propagates
+	grace-period information from the root to the leaves.
+	It provides local copies of the grace-period state in order
+	to allow this information to be accessed in a synchronized
+	manner without suffering the scalability limitations that
+	would otherwise be imposed by global locking.
+	In <tt>CONFIG_PREEMPT_RCU</tt> kernels, it manages the lists
+	of tasks that have blocked while in their current
+	RCU read-side critical section.
+	In <tt>CONFIG_PREEMPT_RCU</tt> with
+	<tt>CONFIG_RCU_BOOST</tt>, it manages the
+	per-<tt>rcu_node</tt> priority-boosting
+	kernel threads (kthreads) and state.
+	Finally, it records CPU-hotplug state in order to determine
+	which CPUs should be ignored during a given grace period.
+<li>	<tt>rcu_data</tt>: This per-CPU structure is the
+	focus of quiescent-state detection and RCU callback queuing.
+	It also tracks its relationship to the corresponding leaf
+	<tt>rcu_node</tt> structure to allow more-efficient
+	propagation of quiescent states up the <tt>rcu_node</tt>
+	combining tree.
+	Like the <tt>rcu_node</tt> structure, it provides a local
+	copy of the grace-period information to allow for-free
+	synchronized
+	access to this information from the corresponding CPU.
+	Finally, this structure records past dyntick-idle state
+	for the corresponding CPU and also tracks statistics.
+<li>	<tt>rcu_dynticks</tt>:
+	This per-CPU structure tracks the current dyntick-idle
+	state for the corresponding CPU.
+	Unlike the other three structures, the <tt>rcu_dynticks</tt>
+	structure is not replicated per RCU flavor.
+<li>	<tt>rcu_head</tt>:
+	This structure represents RCU callbacks, and is the
+	only structure allocated and managed by RCU users.
+	The <tt>rcu_head</tt> structure is normally embedded
+	within the RCU-protected data structure.
+</ol>
+
+<p>If all you wanted from this article was a general notion of how
+RCU's data structures are related, you are done.
+Otherwise, each of the following sections give more details on
+the <tt>rcu_state</tt>, <tt>rcu_node</tt>, <tt>rcu_data</tt>,
+and <tt>rcu_dynticks</tt> data structures.
+
+<h3><a name="The rcu_state Structure">
+The <tt>rcu_state</tt> Structure</a></h3>
+
+<p>The <tt>rcu_state</tt> structure is the base structure that
+represents a flavor of RCU.
+This structure forms the interconnection between the
+<tt>rcu_node</tt> and <tt>rcu_data</tt> structures,
+tracks grace periods, contains the lock used to
+synchronize with CPU-hotplug events,
+and maintains state used to force quiescent states when
+grace periods extend too long,
+
+</p><p>A few of the <tt>rcu_state</tt> structure's fields are discussed,
+singly and in groups, in the following sections.
+The more specialized fields are covered in the discussion of their
+use.
+
+<h5>Relationship to rcu_node and rcu_data Structures</h5>
+
+This portion of the <tt>rcu_state</tt> structure is declared
+as follows:
+
+<pre>
+  1   struct rcu_node node[NUM_RCU_NODES];
+  2   struct rcu_node *level[NUM_RCU_LVLS + 1];
+  3   struct rcu_data __percpu *rda;
+</pre>
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Wait a minute!
+	You said that the <tt>rcu_node</tt> structures formed a tree,
+	but they are declared as a flat array!
+	What gives?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	The tree is laid out in the array.
+	The first node In the array is the head, the next set of nodes in the
+	array are children of the head node, and so on until the last set of
+	nodes in the array are the leaves.
+	</font>
+
+	<p><font color="ffffff">See the following diagrams to see how
+	this works.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<p>The <tt>rcu_node</tt> tree is embedded into the
+<tt>-&gt;node[]</tt> array as shown in the following figure:
+
+</p><p><img src="TreeMapping.svg" alt="TreeMapping.svg" width="40%">
+
+</p><p>One interesting consequence of this mapping is that a
+breadth-first traversal of the tree is implemented as a simple
+linear scan of the array, which is in fact what the
+<tt>rcu_for_each_node_breadth_first()</tt> macro does.
+This macro is used at the beginning and ends of grace periods.
+
+</p><p>Each entry of the <tt>-&gt;level</tt> array references
+the first <tt>rcu_node</tt> structure on the corresponding level
+of the tree, for example, as shown below:
+
+</p><p><img src="TreeMappingLevel.svg" alt="TreeMappingLevel.svg" width="40%">
+
+</p><p>The zero<sup>th</sup> element of the array references the root
+<tt>rcu_node</tt> structure, the first element references the
+first child of the root <tt>rcu_node</tt>, and finally the second
+element references the first leaf <tt>rcu_node</tt> structure.
+
+</p><p>For whatever it is worth, if you draw the tree to be tree-shaped
+rather than array-shaped, it is easy to draw a planar representation:
+
+</p><p><img src="TreeLevel.svg" alt="TreeLevel.svg" width="60%">
+
+</p><p>Finally, the <tt>-&gt;rda</tt> field references a per-CPU
+pointer to the corresponding CPU's <tt>rcu_data</tt> structure.
+
+</p><p>All of these fields are constant once initialization is complete,
+and therefore need no protection.
+
+<h5>Grace-Period Tracking</h5>
+
+<p>This portion of the <tt>rcu_state</tt> structure is declared
+as follows:
+
+<pre>
+  1   unsigned long gpnum;
+  2   unsigned long completed;
+</pre>
+
+<p>RCU grace periods are numbered, and
+the <tt>-&gt;gpnum</tt> field contains the number of the grace
+period that started most recently.
+The <tt>-&gt;completed</tt> field contains the number of the
+grace period that completed most recently.
+If the two fields are equal, the RCU grace period that most recently
+started has already completed, and therefore the corresponding
+flavor of RCU is idle.
+If <tt>-&gt;gpnum</tt> is one greater than <tt>-&gt;completed</tt>,
+then <tt>-&gt;gpnum</tt> gives the number of the current RCU
+grace period, which has not yet completed.
+Any other combination of values indicates that something is broken.
+These two fields are protected by the root <tt>rcu_node</tt>'s
+<tt>-&gt;lock</tt> field.
+
+</p><p>There are <tt>-&gt;gpnum</tt> and <tt>-&gt;completed</tt> fields
+in the <tt>rcu_node</tt> and <tt>rcu_data</tt> structures
+as well.
+The fields in the <tt>rcu_state</tt> structure represent the
+most current values, and those of the other structures are compared
+in order to detect the start of a new grace period in a distributed
+fashion.
+The values flow from <tt>rcu_state</tt> to <tt>rcu_node</tt>
+(down the tree from the root to the leaves) to <tt>rcu_data</tt>.
+
+<h5>Miscellaneous</h5>
+
+<p>This portion of the <tt>rcu_state</tt> structure is declared
+as follows:
+
+<pre>
+  1   unsigned long gp_max;
+  2   char abbr;
+  3   char *name;
+</pre>
+
+<p>The <tt>-&gt;gp_max</tt> field tracks the duration of the longest
+grace period in jiffies.
+It is protected by the root <tt>rcu_node</tt>'s <tt>-&gt;lock</tt>.
+
+<p>The <tt>-&gt;name</tt> field points to the name of the RCU flavor
+(for example, &ldquo;rcu_sched&rdquo;), and is constant.
+The <tt>-&gt;abbr</tt> field contains a one-character abbreviation,
+for example, &ldquo;s&rdquo; for RCU-sched.
+
+<h3><a name="The rcu_node Structure">
+The <tt>rcu_node</tt> Structure</a></h3>
+
+<p>The <tt>rcu_node</tt> structures form the combining
+tree that propagates quiescent-state
+information from the leaves to the root and also that propagates
+grace-period information from the root down to the leaves.
+They provides local copies of the grace-period state in order
+to allow this information to be accessed in a synchronized
+manner without suffering the scalability limitations that
+would otherwise be imposed by global locking.
+In <tt>CONFIG_PREEMPT_RCU</tt> kernels, they manage the lists
+of tasks that have blocked while in their current
+RCU read-side critical section.
+In <tt>CONFIG_PREEMPT_RCU</tt> with
+<tt>CONFIG_RCU_BOOST</tt>, they manage the
+per-<tt>rcu_node</tt> priority-boosting
+kernel threads (kthreads) and state.
+Finally, they record CPU-hotplug state in order to determine
+which CPUs should be ignored during a given grace period.
+
+</p><p>The <tt>rcu_node</tt> structure's fields are discussed,
+singly and in groups, in the following sections.
+
+<h5>Connection to Combining Tree</h5>
+
+<p>This portion of the <tt>rcu_node</tt> structure is declared
+as follows:
+
+<pre>
+  1   struct rcu_node *parent;
+  2   u8 level;
+  3   u8 grpnum;
+  4   unsigned long grpmask;
+  5   int grplo;
+  6   int grphi;
+</pre>
+
+<p>The <tt>-&gt;parent</tt> pointer references the <tt>rcu_node</tt>
+one level up in the tree, and is <tt>NULL</tt> for the root
+<tt>rcu_node</tt>.
+The RCU implementation makes heavy use of this field to push quiescent
+states up the tree.
+The <tt>-&gt;level</tt> field gives the level in the tree, with
+the root being at level zero, its children at level one, and so on.
+The <tt>-&gt;grpnum</tt> field gives this node's position within
+the children of its parent, so this number can range between 0 and 31
+on 32-bit systems and between 0 and 63 on 64-bit systems.
+The <tt>-&gt;level</tt> and <tt>-&gt;grpnum</tt> fields are
+used only during initialization and for tracing.
+The <tt>-&gt;grpmask</tt> field is the bitmask counterpart of
+<tt>-&gt;grpnum</tt>, and therefore always has exactly one bit set.
+This mask is used to clear the bit corresponding to this <tt>rcu_node</tt>
+structure in its parent's bitmasks, which are described later.
+Finally, the <tt>-&gt;grplo</tt> and <tt>-&gt;grphi</tt> fields
+contain the lowest and highest numbered CPU served by this
+<tt>rcu_node</tt> structure, respectively.
+
+</p><p>All of these fields are constant, and thus do not require any
+synchronization.
+
+<h5>Synchronization</h5>
+
+<p>This field of the <tt>rcu_node</tt> structure is declared
+as follows:
+
+<pre>
+  1   raw_spinlock_t lock;
+</pre>
+
+<p>This field is used to protect the remaining fields in this structure,
+unless otherwise stated.
+That said, all of the fields in this structure can be accessed without
+locking for tracing purposes.
+Yes, this can result in confusing traces, but better some tracing confusion
+than to be heisenbugged out of existence.
+
+<h5>Grace-Period Tracking</h5>
+
+<p>This portion of the <tt>rcu_node</tt> structure is declared
+as follows:
+
+<pre>
+  1   unsigned long gpnum;
+  2   unsigned long completed;
+</pre>
+
+<p>These fields are the counterparts of the fields of the same name in
+the <tt>rcu_state</tt> structure.
+They each may lag up to one behind their <tt>rcu_state</tt>
+counterparts.
+If a given <tt>rcu_node</tt> structure's <tt>-&gt;gpnum</tt> and
+<tt>-&gt;complete</tt> fields are equal, then this <tt>rcu_node</tt>
+structure believes that RCU is idle.
+Otherwise, as with the <tt>rcu_state</tt> structure,
+the <tt>-&gt;gpnum</tt> field will be one greater than the
+<tt>-&gt;complete</tt> fields, with <tt>-&gt;gpnum</tt>
+indicating which grace period this <tt>rcu_node</tt> believes
+is still being waited for.
+
+</p><p>The <tt>&gt;gpnum</tt> field of each <tt>rcu_node</tt>
+structure is updated at the beginning
+of each grace period, and the <tt>-&gt;completed</tt> fields are
+updated at the end of each grace period.
+
+<h5>Quiescent-State Tracking</h5>
+
+<p>These fields manage the propagation of quiescent states up the
+combining tree.
+
+</p><p>This portion of the <tt>rcu_node</tt> structure has fields
+as follows:
+
+<pre>
+  1   unsigned long qsmask;
+  2   unsigned long expmask;
+  3   unsigned long qsmaskinit;
+  4   unsigned long expmaskinit;
+</pre>
+
+<p>The <tt>-&gt;qsmask</tt> field tracks which of this
+<tt>rcu_node</tt> structure's children still need to report
+quiescent states for the current normal grace period.
+Such children will have a value of 1 in their corresponding bit.
+Note that the leaf <tt>rcu_node</tt> structures should be
+thought of as having <tt>rcu_data</tt> structures as their
+children.
+Similarly, the <tt>-&gt;expmask</tt> field tracks which
+of this <tt>rcu_node</tt> structure's children still need to report
+quiescent states for the current expedited grace period.
+An expedited grace period has
+the same conceptual properties as a normal grace period, but the
+expedited implementation accepts extreme CPU overhead to obtain
+much lower grace-period latency, for example, consuming a few
+tens of microseconds worth of CPU time to reduce grace-period
+duration from milliseconds to tens of microseconds.
+The <tt>-&gt;qsmaskinit</tt> field tracks which of this
+<tt>rcu_node</tt> structure's children cover for at least
+one online CPU.
+This mask is used to initialize <tt>-&gt;qsmask</tt>,
+and <tt>-&gt;expmaskinit</tt> is used to initialize
+<tt>-&gt;expmask</tt> and the beginning of the
+normal and expedited grace periods, respectively.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Why are these bitmasks protected by locking?
+	Come on, haven't you heard of atomic instructions???
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	Lockless grace-period computation!  Such a tantalizing possibility!
+	</font>
+
+	<p><font color="ffffff">But consider the following sequence of events:
+	</font>
+
+	<ol>
+	<li>	<font color="ffffff">CPU&nbsp;0 has been in dyntick-idle
+		mode for quite some time.
+		When it wakes up, it notices that the current RCU
+		grace period needs it to report in, so it sets a
+		flag where the scheduling clock interrupt will find it.
+		</font><p>
+	<li>	<font color="ffffff">Meanwhile, CPU&nbsp;1 is running
+		<tt>force_quiescent_state()</tt>,
+		and notices that CPU&nbsp;0 has been in dyntick idle mode,
+		which qualifies as an extended quiescent state.
+		</font><p>
+	<li>	<font color="ffffff">CPU&nbsp;0's scheduling clock
+		interrupt fires in the
+		middle of an RCU read-side critical section, and notices
+		that the RCU core needs something, so commences RCU softirq
+		processing.
+		</font>
+		<p>
+	<li>	<font color="ffffff">CPU&nbsp;0's softirq handler
+		executes and is just about ready
+		to report its quiescent state up the <tt>rcu_node</tt>
+		tree.
+		</font><p>
+	<li>	<font color="ffffff">But CPU&nbsp;1 beats it to the punch,
+		completing the current
+		grace period and starting a new one.
+		</font><p>
+	<li>	<font color="ffffff">CPU&nbsp;0 now reports its quiescent
+		state for the wrong
+		grace period.
+		That grace period might now end before the RCU read-side
+		critical section.
+		If that happens, disaster will ensue.
+		</font>
+	</ol>
+
+	<p><font color="ffffff">So the locking is absolutely required in
+	order to coordinate
+	clearing of the bits with the grace-period numbers in
+	<tt>-&gt;gpnum</tt> and <tt>-&gt;completed</tt>.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<h5>Blocked-Task Management</h5>
+
+<p><tt>PREEMPT_RCU</tt> allows tasks to be preempted in the
+midst of their RCU read-side critical sections, and these tasks
+must be tracked explicitly.
+The details of exactly why and how they are tracked will be covered
+in a separate article on RCU read-side processing.
+For now, it is enough to know that the <tt>rcu_node</tt>
+structure tracks them.
+
+<pre>
+  1   struct list_head blkd_tasks;
+  2   struct list_head *gp_tasks;
+  3   struct list_head *exp_tasks;
+  4   bool wait_blkd_tasks;
+</pre>
+
+<p>The <tt>-&gt;blkd_tasks</tt> field is a list header for
+the list of blocked and preempted tasks.
+As tasks undergo context switches within RCU read-side critical
+sections, their <tt>task_struct</tt> structures are enqueued
+(via the <tt>task_struct</tt>'s <tt>-&gt;rcu_node_entry</tt>
+field) onto the head of the <tt>-&gt;blkd_tasks</tt> list for the
+leaf <tt>rcu_node</tt> structure corresponding to the CPU
+on which the outgoing context switch executed.
+As these tasks later exit their RCU read-side critical sections,
+they remove themselves from the list.
+This list is therefore in reverse time order, so that if one of the tasks
+is blocking the current grace period, all subsequent tasks must
+also be blocking that same grace period.
+Therefore, a single pointer into this list suffices to track
+all tasks blocking a given grace period.
+That pointer is stored in <tt>-&gt;gp_tasks</tt> for normal
+grace periods and in <tt>-&gt;exp_tasks</tt> for expedited
+grace periods.
+These last two fields are <tt>NULL</tt> if either there is
+no grace period in flight or if there are no blocked tasks
+preventing that grace period from completing.
+If either of these two pointers is referencing a task that
+removes itself from the <tt>-&gt;blkd_tasks</tt> list,
+then that task must advance the pointer to the next task on
+the list, or set the pointer to <tt>NULL</tt> if there
+are no subsequent tasks on the list.
+
+</p><p>For example, suppose that tasks&nbsp;T1, T2, and&nbsp;T3 are
+all hard-affinitied to the largest-numbered CPU in the system.
+Then if task&nbsp;T1 blocked in an RCU read-side
+critical section, then an expedited grace period started,
+then task&nbsp;T2 blocked in an RCU read-side critical section,
+then a normal grace period started, and finally task&nbsp;3 blocked
+in an RCU read-side critical section, then the state of the
+last leaf <tt>rcu_node</tt> structure's blocked-task list
+would be as shown below:
+
+</p><p><img src="blkd_task.svg" alt="blkd_task.svg" width="60%">
+
+</p><p>Task&nbsp;T1 is blocking both grace periods, task&nbsp;T2 is
+blocking only the normal grace period, and task&nbsp;T3 is blocking
+neither grace period.
+Note that these tasks will not remove themselves from this list
+immediately upon resuming execution.
+They will instead remain on the list until they execute the outermost
+<tt>rcu_read_unlock()</tt> that ends their RCU read-side critical
+section.
+
+<p>
+The <tt>-&gt;wait_blkd_tasks</tt> field indicates whether or not
+the current grace period is waiting on a blocked task.
+
+<h5>Sizing the <tt>rcu_node</tt> Array</h5>
+
+<p>The <tt>rcu_node</tt> array is sized via a series of
+C-preprocessor expressions as follows:
+
+<pre>
+ 1 #ifdef CONFIG_RCU_FANOUT
+ 2 #define RCU_FANOUT CONFIG_RCU_FANOUT
+ 3 #else
+ 4 # ifdef CONFIG_64BIT
+ 5 # define RCU_FANOUT 64
+ 6 # else
+ 7 # define RCU_FANOUT 32
+ 8 # endif
+ 9 #endif
+10
+11 #ifdef CONFIG_RCU_FANOUT_LEAF
+12 #define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF
+13 #else
+14 # ifdef CONFIG_64BIT
+15 # define RCU_FANOUT_LEAF 64
+16 # else
+17 # define RCU_FANOUT_LEAF 32
+18 # endif
+19 #endif
+20
+21 #define RCU_FANOUT_1        (RCU_FANOUT_LEAF)
+22 #define RCU_FANOUT_2        (RCU_FANOUT_1 * RCU_FANOUT)
+23 #define RCU_FANOUT_3        (RCU_FANOUT_2 * RCU_FANOUT)
+24 #define RCU_FANOUT_4        (RCU_FANOUT_3 * RCU_FANOUT)
+25
+26 #if NR_CPUS &lt;= RCU_FANOUT_1
+27 #  define RCU_NUM_LVLS        1
+28 #  define NUM_RCU_LVL_0        1
+29 #  define NUM_RCU_NODES        NUM_RCU_LVL_0
+30 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0 }
+31 #  define RCU_NODE_NAME_INIT  { "rcu_node_0" }
+32 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0" }
+33 #  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0" }
+34 #elif NR_CPUS &lt;= RCU_FANOUT_2
+35 #  define RCU_NUM_LVLS        2
+36 #  define NUM_RCU_LVL_0        1
+37 #  define NUM_RCU_LVL_1        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+38 #  define NUM_RCU_NODES        (NUM_RCU_LVL_0 + NUM_RCU_LVL_1)
+39 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
+40 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1" }
+41 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1" }
+42 #  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1" }
+43 #elif NR_CPUS &lt;= RCU_FANOUT_3
+44 #  define RCU_NUM_LVLS        3
+45 #  define NUM_RCU_LVL_0        1
+46 #  define NUM_RCU_LVL_1        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+47 #  define NUM_RCU_LVL_2        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+48 #  define NUM_RCU_NODES        (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2)
+49 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
+50 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
+51 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
+52 #  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2" }
+53 #elif NR_CPUS &lt;= RCU_FANOUT_4
+54 #  define RCU_NUM_LVLS        4
+55 #  define NUM_RCU_LVL_0        1
+56 #  define NUM_RCU_LVL_1        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
+57 #  define NUM_RCU_LVL_2        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+58 #  define NUM_RCU_LVL_3        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+59 #  define NUM_RCU_NODES        (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
+60 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
+61 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
+62 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
+63 #  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2", "rcu_node_exp_3" }
+64 #else
+65 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
+66 #endif
+</pre>
+
+<p>The maximum number of levels in the <tt>rcu_node</tt> structure
+is currently limited to four, as specified by lines&nbsp;21-24
+and the structure of the subsequent &ldquo;if&rdquo; statement.
+For 32-bit systems, this allows 16*32*32*32=524,288 CPUs, which
+should be sufficient for the next few years at least.
+For 64-bit systems, 16*64*64*64=4,194,304 CPUs is allowed, which
+should see us through the next decade or so.
+This four-level tree also allows kernels built with
+<tt>CONFIG_RCU_FANOUT=8</tt> to support up to 4096 CPUs,
+which might be useful in very large systems having eight CPUs per
+socket (but please note that no one has yet shown any measurable
+performance degradation due to misaligned socket and <tt>rcu_node</tt>
+boundaries).
+In addition, building kernels with a full four levels of <tt>rcu_node</tt>
+tree permits better testing of RCU's combining-tree code.
+
+</p><p>The <tt>RCU_FANOUT</tt> symbol controls how many children
+are permitted at each non-leaf level of the <tt>rcu_node</tt> tree.
+If the <tt>CONFIG_RCU_FANOUT</tt> Kconfig option is not specified,
+it is set based on the word size of the system, which is also
+the Kconfig default.
+
+</p><p>The <tt>RCU_FANOUT_LEAF</tt> symbol controls how many CPUs are
+handled by each leaf <tt>rcu_node</tt> structure.
+Experience has shown that allowing a given leaf <tt>rcu_node</tt>
+structure to handle 64 CPUs, as permitted by the number of bits in
+the <tt>-&gt;qsmask</tt> field on a 64-bit system, results in
+excessive contention for the leaf <tt>rcu_node</tt> structures'
+<tt>-&gt;lock</tt> fields.
+The number of CPUs per leaf <tt>rcu_node</tt> structure is therefore
+limited to 16 given the default value of <tt>CONFIG_RCU_FANOUT_LEAF</tt>.
+If <tt>CONFIG_RCU_FANOUT_LEAF</tt> is unspecified, the value
+selected is based on the word size of the system, just as for
+<tt>CONFIG_RCU_FANOUT</tt>.
+Lines&nbsp;11-19 perform this computation.
+
+</p><p>Lines&nbsp;21-24 compute the maximum number of CPUs supported by
+a single-level (which contains a single <tt>rcu_node</tt> structure),
+two-level, three-level, and four-level <tt>rcu_node</tt> tree,
+respectively, given the fanout specified by <tt>RCU_FANOUT</tt>
+and <tt>RCU_FANOUT_LEAF</tt>.
+These numbers of CPUs are retained in the
+<tt>RCU_FANOUT_1</tt>,
+<tt>RCU_FANOUT_2</tt>,
+<tt>RCU_FANOUT_3</tt>, and
+<tt>RCU_FANOUT_4</tt>
+C-preprocessor variables, respectively.
+
+</p><p>These variables are used to control the C-preprocessor <tt>#if</tt>
+statement spanning lines&nbsp;26-66 that computes the number of
+<tt>rcu_node</tt> structures required for each level of the tree,
+as well as the number of levels required.
+The number of levels is placed in the <tt>NUM_RCU_LVLS</tt>
+C-preprocessor variable by lines&nbsp;27, 35, 44, and&nbsp;54.
+The number of <tt>rcu_node</tt> structures for the topmost level
+of the tree is always exactly one, and this value is unconditionally
+placed into <tt>NUM_RCU_LVL_0</tt> by lines&nbsp;28, 36, 45, and&nbsp;55.
+The rest of the levels (if any) of the <tt>rcu_node</tt> tree
+are computed by dividing the maximum number of CPUs by the
+fanout supported by the number of levels from the current level down,
+rounding up.  This computation is performed by lines&nbsp;37,
+46-47, and&nbsp;56-58.
+Lines&nbsp;31-33, 40-42, 50-52, and&nbsp;62-63 create initializers
+for lockdep lock-class names.
+Finally, lines&nbsp;64-66 produce an error if the maximum number of
+CPUs is too large for the specified fanout.
+
+<h3><a name="The rcu_data Structure">
+The <tt>rcu_data</tt> Structure</a></h3>
+
+<p>The <tt>rcu_data</tt> maintains the per-CPU state for the
+corresponding flavor of RCU.
+The fields in this structure may be accessed only from the corresponding
+CPU (and from tracing) unless otherwise stated.
+This structure is the
+focus of quiescent-state detection and RCU callback queuing.
+It also tracks its relationship to the corresponding leaf
+<tt>rcu_node</tt> structure to allow more-efficient
+propagation of quiescent states up the <tt>rcu_node</tt>
+combining tree.
+Like the <tt>rcu_node</tt> structure, it provides a local
+copy of the grace-period information to allow for-free
+synchronized
+access to this information from the corresponding CPU.
+Finally, this structure records past dyntick-idle state
+for the corresponding CPU and also tracks statistics.
+
+</p><p>The <tt>rcu_data</tt> structure's fields are discussed,
+singly and in groups, in the following sections.
+
+<h5>Connection to Other Data Structures</h5>
+
+<p>This portion of the <tt>rcu_data</tt> structure is declared
+as follows:
+
+<pre>
+  1   int cpu;
+  2   struct rcu_state *rsp;
+  3   struct rcu_node *mynode;
+  4   struct rcu_dynticks *dynticks;
+  5   unsigned long grpmask;
+  6   bool beenonline;
+</pre>
+
+<p>The <tt>-&gt;cpu</tt> field contains the number of the
+corresponding CPU, the <tt>-&gt;rsp</tt> pointer references
+the corresponding <tt>rcu_state</tt> structure (and is most frequently
+used to locate the name of the corresponding flavor of RCU for tracing),
+and the <tt>-&gt;mynode</tt> field references the corresponding
+<tt>rcu_node</tt> structure.
+The <tt>-&gt;mynode</tt> is used to propagate quiescent states
+up the combining tree.
+<p>The <tt>-&gt;dynticks</tt> pointer references the
+<tt>rcu_dynticks</tt> structure corresponding to this
+CPU.
+Recall that a single per-CPU instance of the <tt>rcu_dynticks</tt>
+structure is shared among all flavors of RCU.
+These first four fields are constant and therefore require not
+synchronization.
+
+</p><p>The <tt>-&gt;grpmask</tt> field indicates the bit in
+the <tt>-&gt;mynode-&gt;qsmask</tt> corresponding to this
+<tt>rcu_data</tt> structure, and is also used when propagating
+quiescent states.
+The <tt>-&gt;beenonline</tt> flag is set whenever the corresponding
+CPU comes online, which means that the debugfs tracing need not dump
+out any <tt>rcu_data</tt> structure for which this flag is not set.
+
+<h5>Quiescent-State and Grace-Period Tracking</h5>
+
+<p>This portion of the <tt>rcu_data</tt> structure is declared
+as follows:
+
+<pre>
+  1   unsigned long completed;
+  2   unsigned long gpnum;
+  3   bool cpu_no_qs;
+  4   bool core_needs_qs;
+  5   bool gpwrap;
+  6   unsigned long rcu_qs_ctr_snap;
+</pre>
+
+<p>The <tt>completed</tt> and <tt>gpnum</tt>
+fields are the counterparts of the fields of the same name
+in the <tt>rcu_state</tt> and <tt>rcu_node</tt> structures.
+They may each lag up to one behind their <tt>rcu_node</tt>
+counterparts, but in <tt>CONFIG_NO_HZ_IDLE</tt> and
+<tt>CONFIG_NO_HZ_FULL</tt> kernels can lag
+arbitrarily far behind for CPUs in dyntick-idle mode (but these counters
+will catch up upon exit from dyntick-idle mode).
+If a given <tt>rcu_data</tt> structure's <tt>-&gt;gpnum</tt> and
+<tt>-&gt;complete</tt> fields are equal, then this <tt>rcu_data</tt>
+structure believes that RCU is idle.
+Otherwise, as with the <tt>rcu_state</tt> and <tt>rcu_node</tt>
+structure,
+the <tt>-&gt;gpnum</tt> field will be one greater than the
+<tt>-&gt;complete</tt> fields, with <tt>-&gt;gpnum</tt>
+indicating which grace period this <tt>rcu_data</tt> believes
+is still being waited for.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	All this replication of the grace period numbers can only cause
+	massive confusion.
+	Why not just keep a global pair of counters and be done with it???
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	Because if there was only a single global pair of grace-period
+	numbers, there would need to be a single global lock to allow
+	safely accessing and updating them.
+	And if we are not going to have a single global lock, we need
+	to carefully manage the numbers on a per-node basis.
+	Recall from the answer to a previous Quick Quiz that the consequences
+	of applying a previously sampled quiescent state to the wrong
+	grace period are quite severe.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<p>The <tt>-&gt;cpu_no_qs</tt> flag indicates that the
+CPU has not yet passed through a quiescent state,
+while the <tt>-&gt;core_needs_qs</tt> flag indicates that the
+RCU core needs a quiescent state from the corresponding CPU.
+The <tt>-&gt;gpwrap</tt> field indicates that the corresponding
+CPU has remained idle for so long that the <tt>completed</tt>
+and <tt>gpnum</tt> counters are in danger of overflow, which
+will cause the CPU to disregard the values of its counters on
+its next exit from idle.
+Finally, the <tt>rcu_qs_ctr_snap</tt> field is used to detect
+cases where a given operation has resulted in a quiescent state
+for all flavors of RCU, for example, <tt>cond_resched_rcu_qs()</tt>.
+
+<h5>RCU Callback Handling</h5>
+
+<p>In the absence of CPU-hotplug events, RCU callbacks are invoked by
+the same CPU that registered them.
+This is strictly a cache-locality optimization: callbacks can and
+do get invoked on CPUs other than the one that registered them.
+After all, if the CPU that registered a given callback has gone
+offline before the callback can be invoked, there really is no other
+choice.
+
+</p><p>This portion of the <tt>rcu_data</tt> structure is declared
+as follows:
+
+<pre>
+ 1 struct rcu_head *nxtlist;
+ 2 struct rcu_head **nxttail[RCU_NEXT_SIZE];
+ 3 unsigned long nxtcompleted[RCU_NEXT_SIZE];
+ 4 long qlen_lazy;
+ 5 long qlen;
+ 6 long qlen_last_fqs_check;
+ 7 unsigned long n_force_qs_snap;
+ 8 unsigned long n_cbs_invoked;
+ 9 unsigned long n_cbs_orphaned;
+10 unsigned long n_cbs_adopted;
+11 long blimit;
+</pre>
+
+<p>The <tt>-&gt;nxtlist</tt> pointer and the
+<tt>-&gt;nxttail[]</tt> array form a four-segment list with
+older callbacks near the head and newer ones near the tail.
+Each segment contains callbacks with the corresponding relationship
+to the current grace period.
+The pointer out of the end of each of the four segments is referenced
+by the element of the <tt>-&gt;nxttail[]</tt> array indexed by
+<tt>RCU_DONE_TAIL</tt> (for callbacks handled by a prior grace period),
+<tt>RCU_WAIT_TAIL</tt> (for callbacks waiting on the current grace period),
+<tt>RCU_NEXT_READY_TAIL</tt> (for callbacks that will wait on the next
+grace period), and
+<tt>RCU_NEXT_TAIL</tt> (for callbacks that are not yet associated
+with a specific grace period)
+respectively, as shown in the following figure.
+
+</p><p><img src="nxtlist.svg" alt="nxtlist.svg" width="40%">
+
+</p><p>In this figure, the <tt>-&gt;nxtlist</tt> pointer references the
+first
+RCU callback in the list.
+The <tt>-&gt;nxttail[RCU_DONE_TAIL]</tt> array element references
+the <tt>-&gt;nxtlist</tt> pointer itself, indicating that none
+of the callbacks is ready to invoke.
+The <tt>-&gt;nxttail[RCU_WAIT_TAIL]</tt> array element references callback
+CB&nbsp;2's <tt>-&gt;next</tt> pointer, which indicates that
+CB&nbsp;1 and CB&nbsp;2 are both waiting on the current grace period.
+The <tt>-&gt;nxttail[RCU_NEXT_READY_TAIL]</tt> array element
+references the same RCU callback that <tt>-&gt;nxttail[RCU_WAIT_TAIL]</tt>
+does, which indicates that there are no callbacks waiting on the next
+RCU grace period.
+The <tt>-&gt;nxttail[RCU_NEXT_TAIL]</tt> array element references
+CB&nbsp;4's <tt>-&gt;next</tt> pointer, indicating that all the
+remaining RCU callbacks have not yet been assigned to an RCU grace
+period.
+Note that the <tt>-&gt;nxttail[RCU_NEXT_TAIL]</tt> array element
+always references the last RCU callback's <tt>-&gt;next</tt> pointer
+unless the callback list is empty, in which case it references
+the <tt>-&gt;nxtlist</tt> pointer.
+
+</p><p>CPUs advance their callbacks from the
+<tt>RCU_NEXT_TAIL</tt> to the <tt>RCU_NEXT_READY_TAIL</tt> to the
+<tt>RCU_WAIT_TAIL</tt> to the <tt>RCU_DONE_TAIL</tt> list segments
+as grace periods advance.
+The CPU advances the callbacks in its <tt>rcu_data</tt> structure
+whenever it notices that another RCU grace period has completed.
+The CPU detects the completion of an RCU grace period by noticing
+that the value of its <tt>rcu_data</tt> structure's
+<tt>-&gt;completed</tt> field differs from that of its leaf
+<tt>rcu_node</tt> structure.
+Recall that each <tt>rcu_node</tt> structure's
+<tt>-&gt;completed</tt> field is updated at the end of each
+grace period.
+
+</p><p>The <tt>-&gt;nxtcompleted[]</tt> array records grace-period
+numbers corresponding to the list segments.
+This allows CPUs that go idle for extended periods to determine
+which of their callbacks are ready to be invoked after reawakening.
+
+</p><p>The <tt>-&gt;qlen</tt> counter contains the number of
+callbacks in <tt>-&gt;nxtlist</tt>, and the
+<tt>-&gt;qlen_lazy</tt> contains the number of those callbacks that
+are known to only free memory, and whose invocation can therefore
+be safely deferred.
+The <tt>-&gt;qlen_last_fqs_check</tt> and
+<tt>-&gt;n_force_qs_snap</tt> coordinate the forcing of quiescent
+states from <tt>call_rcu()</tt> and friends when callback
+lists grow excessively long.
+
+</p><p>The <tt>-&gt;n_cbs_invoked</tt>,
+<tt>-&gt;n_cbs_orphaned</tt>, and <tt>-&gt;n_cbs_adopted</tt>
+fields count the number of callbacks invoked,
+sent to other CPUs when this CPU goes offline,
+and received from other CPUs when those other CPUs go offline.
+Finally, the <tt>-&gt;blimit</tt> counter is the maximum number of
+RCU callbacks that may be invoked at a given time.
+
+<h5>Dyntick-Idle Handling</h5>
+
+<p>This portion of the <tt>rcu_data</tt> structure is declared
+as follows:
+
+<pre>
+  1   int dynticks_snap;
+  2   unsigned long dynticks_fqs;
+</pre>
+
+The <tt>-&gt;dynticks_snap</tt> field is used to take a snapshot
+of the corresponding CPU's dyntick-idle state when forcing
+quiescent states, and is therefore accessed from other CPUs.
+Finally, the <tt>-&gt;dynticks_fqs</tt> field is used to
+count the number of times this CPU is determined to be in
+dyntick-idle state, and is used for tracing and debugging purposes.
+
+<h3><a name="The rcu_dynticks Structure">
+The <tt>rcu_dynticks</tt> Structure</a></h3>
+
+<p>The <tt>rcu_dynticks</tt> maintains the per-CPU dyntick-idle state
+for the corresponding CPU.
+Unlike the other structures, <tt>rcu_dynticks</tt> is not
+replicated over the different flavors of RCU.
+The fields in this structure may be accessed only from the corresponding
+CPU (and from tracing) unless otherwise stated.
+Its fields are as follows:
+
+<pre>
+  1   int dynticks_nesting;
+  2   int dynticks_nmi_nesting;
+  3   atomic_t dynticks;
+</pre>
+
+<p>The <tt>-&gt;dynticks_nesting</tt> field counts the
+nesting depth of normal interrupts.
+In addition, this counter is incremented when exiting dyntick-idle
+mode and decremented when entering it.
+This counter can therefore be thought of as counting the number
+of reasons why this CPU cannot be permitted to enter dyntick-idle
+mode, aside from non-maskable interrupts (NMIs).
+NMIs are counted by the <tt>-&gt;dynticks_nmi_nesting</tt>
+field, except that NMIs that interrupt non-dyntick-idle execution
+are not counted.
+
+</p><p>Finally, the <tt>-&gt;dynticks</tt> field counts the corresponding
+CPU's transitions to and from dyntick-idle mode, so that this counter
+has an even value when the CPU is in dyntick-idle mode and an odd
+value otherwise.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Why not just count all NMIs?
+	Wouldn't that be simpler and less error prone?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	It seems simpler only until you think hard about how to go about
+	updating the <tt>rcu_dynticks</tt> structure's
+	<tt>-&gt;dynticks</tt> field.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<p>Additional fields are present for some special-purpose
+builds, and are discussed separately.
+
+<h3><a name="The rcu_head Structure">
+The <tt>rcu_head</tt> Structure</a></h3>
+
+<p>Each <tt>rcu_head</tt> structure represents an RCU callback.
+These structures are normally embedded within RCU-protected data
+structures whose algorithms use asynchronous grace periods.
+In contrast, when using algorithms that block waiting for RCU grace periods,
+RCU users need not provide <tt>rcu_head</tt> structures.
+
+</p><p>The <tt>rcu_head</tt> structure has fields as follows:
+
+<pre>
+  1   struct rcu_head *next;
+  2   void (*func)(struct rcu_head *head);
+</pre>
+
+<p>The <tt>-&gt;next</tt> field is used
+to link the <tt>rcu_head</tt> structures together in the
+lists within the <tt>rcu_data</tt> structures.
+The <tt>-&gt;func</tt> field is a pointer to the function
+to be called when the callback is ready to be invoked, and
+this function is passed a pointer to the <tt>rcu_head</tt>
+structure.
+However, <tt>kfree_rcu()</tt> uses the <tt>-&gt;func</tt>
+field to record the offset of the <tt>rcu_head</tt>
+structure within the enclosing RCU-protected data structure.
+
+</p><p>Both of these fields are used internally by RCU.
+From the viewpoint of RCU users, this structure is an
+opaque &ldquo;cookie&rdquo;.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Given that the callback function <tt>-&gt;func</tt>
+	is passed a pointer to the <tt>rcu_head</tt> structure,
+	how is that function supposed to find the beginning of the
+	enclosing RCU-protected data structure?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	In actual practice, there is a separate callback function per
+	type of RCU-protected data structure.
+	The callback function can therefore use the <tt>container_of()</tt>
+	macro in the Linux kernel (or other pointer-manipulation facilities
+	in other software environments) to find the beginning of the
+	enclosing structure.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<h3><a name="RCU-Specific Fields in the task_struct Structure">
+RCU-Specific Fields in the <tt>task_struct</tt> Structure</a></h3>
+
+<p>The <tt>CONFIG_PREEMPT_RCU</tt> implementation uses some
+additional fields in the <tt>task_struct</tt> structure:
+
+<pre>
+ 1 #ifdef CONFIG_PREEMPT_RCU
+ 2   int rcu_read_lock_nesting;
+ 3   union rcu_special rcu_read_unlock_special;
+ 4   struct list_head rcu_node_entry;
+ 5   struct rcu_node *rcu_blocked_node;
+ 6 #endif /* #ifdef CONFIG_PREEMPT_RCU */
+ 7 #ifdef CONFIG_TASKS_RCU
+ 8   unsigned long rcu_tasks_nvcsw;
+ 9   bool rcu_tasks_holdout;
+10   struct list_head rcu_tasks_holdout_list;
+11   int rcu_tasks_idle_cpu;
+12 #endif /* #ifdef CONFIG_TASKS_RCU */
+</pre>
+
+<p>The <tt>-&gt;rcu_read_lock_nesting</tt> field records the
+nesting level for RCU read-side critical sections, and
+the <tt>-&gt;rcu_read_unlock_special</tt> field is a bitmask
+that records special conditions that require <tt>rcu_read_unlock()</tt>
+to do additional work.
+The <tt>-&gt;rcu_node_entry</tt> field is used to form lists of
+tasks that have blocked within preemptible-RCU read-side critical
+sections and the <tt>-&gt;rcu_blocked_node</tt> field references
+the <tt>rcu_node</tt> structure whose list this task is a member of,
+or <tt>NULL</tt> if it is not blocked within a preemptible-RCU
+read-side critical section.
+
+<p>The <tt>-&gt;rcu_tasks_nvcsw</tt> field tracks the number of
+voluntary context switches that this task had undergone at the
+beginning of the current tasks-RCU grace period,
+<tt>-&gt;rcu_tasks_holdout</tt> is set if the current tasks-RCU
+grace period is waiting on this task, <tt>-&gt;rcu_tasks_holdout_list</tt>
+is a list element enqueuing this task on the holdout list,
+and <tt>-&gt;rcu_tasks_idle_cpu</tt> tracks which CPU this
+idle task is running, but only if the task is currently running,
+that is, if the CPU is currently idle.
+
+<h3><a name="Accessor Functions">
+Accessor Functions</a></h3>
+
+<p>The following listing shows the
+<tt>rcu_get_root()</tt>, <tt>rcu_for_each_node_breadth_first</tt>,
+<tt>rcu_for_each_nonleaf_node_breadth_first()</tt>, and
+<tt>rcu_for_each_leaf_node()</tt> function and macros:
+
+<pre>
+  1 static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
+  2 {
+  3   return &amp;rsp-&gt;node[0];
+  4 }
+  5
+  6 #define rcu_for_each_node_breadth_first(rsp, rnp) \
+  7   for ((rnp) = &amp;(rsp)-&gt;node[0]; \
+  8        (rnp) &lt; &amp;(rsp)-&gt;node[NUM_RCU_NODES]; (rnp)++)
+  9
+ 10 #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
+ 11   for ((rnp) = &amp;(rsp)-&gt;node[0]; \
+ 12        (rnp) &lt; (rsp)-&gt;level[NUM_RCU_LVLS - 1]; (rnp)++)
+ 13
+ 14 #define rcu_for_each_leaf_node(rsp, rnp) \
+ 15   for ((rnp) = (rsp)-&gt;level[NUM_RCU_LVLS - 1]; \
+ 16        (rnp) &lt; &amp;(rsp)-&gt;node[NUM_RCU_NODES]; (rnp)++)
+</pre>
+
+<p>The <tt>rcu_get_root()</tt> simply returns a pointer to the
+first element of the specified <tt>rcu_state</tt> structure's
+<tt>-&gt;node[]</tt> array, which is the root <tt>rcu_node</tt>
+structure.
+
+</p><p>As noted earlier, the <tt>rcu_for_each_node_breadth_first()</tt>
+macro takes advantage of the layout of the <tt>rcu_node</tt>
+structures in the <tt>rcu_state</tt> structure's
+<tt>-&gt;node[]</tt> array, performing a breadth-first traversal by
+simply traversing the array in order.
+The <tt>rcu_for_each_nonleaf_node_breadth_first()</tt> macro operates
+similarly, but traverses only the first part of the array, thus excluding
+the leaf <tt>rcu_node</tt> structures.
+Finally, the <tt>rcu_for_each_leaf_node()</tt> macro traverses only
+the last part of the array, thus traversing only the leaf
+<tt>rcu_node</tt> structures.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	What do <tt>rcu_for_each_nonleaf_node_breadth_first()</tt> and
+	<tt>rcu_for_each_leaf_node()</tt> do if the <tt>rcu_node</tt> tree
+	contains only a single node?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	In the single-node case,
+	<tt>rcu_for_each_nonleaf_node_breadth_first()</tt> is a no-op
+	and <tt>rcu_for_each_leaf_node()</tt> traverses the single node.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<h3><a name="Summary">
+Summary</a></h3>
+
+So each flavor of RCU is represented by an <tt>rcu_state</tt> structure,
+which contains a combining tree of <tt>rcu_node</tt> and
+<tt>rcu_data</tt> structures.
+Finally, in <tt>CONFIG_NO_HZ_IDLE</tt> kernels, each CPU's dyntick-idle
+state is tracked by an <tt>rcu_dynticks</tt> structure.
+
+If you made it this far, you are well prepared to read the code
+walkthroughs in the other articles in this series.
+
+<h3><a name="Acknowledgments">
+Acknowledgments</a></h3>
+
+I owe thanks to Cyrill Gorcunov, Mathieu Desnoyers, Dhaval Giani, Paul
+Turner, Abhishek Srivastava, Matt Kowalczyk, and Serge Hallyn
+for helping me get this document into a more human-readable state.
+
+<h3><a name="Legal Statement">
+Legal Statement</a></h3>
+
+<p>This work represents the view of the author and does not necessarily
+represent the view of IBM.
+
+</p><p>Linux is a registered trademark of Linus Torvalds.
+
+</p><p>Other company, product, and service names may be trademarks or
+service marks of others.
+
+</body></html>
diff --git a/Documentation/RCU/Design/Data-Structures/HugeTreeClassicRCU.svg b/Documentation/RCU/Design/Data-Structures/HugeTreeClassicRCU.svg
new file mode 100644
index 000000000000..2bf12b468206
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/HugeTreeClassicRCU.svg
@@ -0,0 +1,939 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:37:22 2015 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="15.1in"
+   height="11.2in"
+   viewBox="-66 -66 18087 13407"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="HugeTreeClassicRCU.fig">
+  <metadata
+     id="metadata224">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs222">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3982"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="1134"
+     inkscape:window-height="789"
+     id="namedview220"
+     showgrid="false"
+     inkscape:zoom="0.60515873"
+     inkscape:cx="679.5"
+     inkscape:cy="504"
+     inkscape:window-x="786"
+     inkscape:window-y="24"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="450"
+       y="0"
+       width="17100"
+       height="8325"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="11025"
+       y="3600"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="4275"
+       y="3600"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="5400"
+       y="6300"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect12" />
+    <!-- Line: box -->
+    <rect
+       x="9900"
+       y="6300"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect14" />
+    <!-- Line: box -->
+    <rect
+       x="14400"
+       y="6300"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect16" />
+    <!-- Line: box -->
+    <rect
+       x="900"
+       y="6300"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect18" />
+    <!-- Line: box -->
+    <rect
+       x="7650"
+       y="900"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect20" />
+    <!-- Line -->
+    <polyline
+       points="3150,9225 3150,7746 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline22" />
+    <!-- Arrowhead on XXXpoint 3150 9225 - 3150 7560-->
+    <!-- Circle -->
+    <circle
+       cx="8550"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle26" />
+    <!-- Circle -->
+    <circle
+       cx="9000"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle28" />
+    <!-- Circle -->
+    <circle
+       cx="9450"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle30" />
+    <!-- Line -->
+    <polyline
+       points="6750,6300 8250,5010 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline32" />
+    <!-- Arrowhead on XXXpoint 6750 6300 - 8391 4890-->
+    <!-- Line -->
+    <polyline
+       points="11250,6300 9747,5010 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline36" />
+    <!-- Arrowhead on XXXpoint 11250 6300 - 9606 4890-->
+    <!-- Circle -->
+    <circle
+       cx="13950"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle40" />
+    <!-- Circle -->
+    <circle
+       cx="13500"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle42" />
+    <!-- Circle -->
+    <circle
+       cx="13050"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle44" />
+    <!-- Circle -->
+    <circle
+       cx="9450"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle46" />
+    <!-- Circle -->
+    <circle
+       cx="9000"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle48" />
+    <!-- Circle -->
+    <circle
+       cx="8550"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle50" />
+    <!-- Circle -->
+    <circle
+       cx="4950"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle52" />
+    <!-- Circle -->
+    <circle
+       cx="4500"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle54" />
+    <!-- Circle -->
+    <circle
+       cx="4050"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle56" />
+    <!-- Circle -->
+    <circle
+       cx="1800"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle58" />
+    <!-- Circle -->
+    <circle
+       cx="2250"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle60" />
+    <!-- Circle -->
+    <circle
+       cx="2700"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle62" />
+    <!-- Circle -->
+    <circle
+       cx="15300"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle64" />
+    <!-- Circle -->
+    <circle
+       cx="15750"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle66" />
+    <!-- Circle -->
+    <circle
+       cx="16200"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle68" />
+    <!-- Circle -->
+    <circle
+       cx="10800"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle70" />
+    <!-- Circle -->
+    <circle
+       cx="11250"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle72" />
+    <!-- Circle -->
+    <circle
+       cx="11700"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle74" />
+    <!-- Circle -->
+    <circle
+       cx="6300"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle76" />
+    <!-- Circle -->
+    <circle
+       cx="6750"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle78" />
+    <!-- Circle -->
+    <circle
+       cx="7200"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle80" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="11475"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect82" />
+    <!-- Line: box -->
+    <rect
+       x="1800"
+       y="9225"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect84" />
+    <!-- Line: box -->
+    <rect
+       x="4500"
+       y="11475"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect86" />
+    <!-- Line: box -->
+    <rect
+       x="6300"
+       y="9270"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect88" />
+    <!-- Line: box -->
+    <rect
+       x="8955"
+       y="11475"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect90" />
+    <!-- Line: box -->
+    <rect
+       x="10755"
+       y="9270"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect92" />
+    <!-- Line: box -->
+    <rect
+       x="13455"
+       y="11475"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect94" />
+    <!-- Line: box -->
+    <rect
+       x="15255"
+       y="9270"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect96" />
+    <!-- Line -->
+    <polyline
+       points="11700,3600 10197,2310 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline98" />
+    <!-- Arrowhead on XXXpoint 11700 3600 - 10056 2190-->
+    <!-- Line -->
+    <polyline
+       points="6300,3600 7800,2310 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline102" />
+    <!-- Arrowhead on XXXpoint 6300 3600 - 7941 2190-->
+    <!-- Line -->
+    <polyline
+       points="3150,6300 4650,5010 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline106" />
+    <!-- Arrowhead on XXXpoint 3150 6300 - 4791 4890-->
+    <!-- Line -->
+    <polyline
+       points="14850,6300 13347,5010 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline110" />
+    <!-- Arrowhead on XXXpoint 14850 6300 - 13206 4890-->
+    <!-- Line -->
+    <polyline
+       points="1350,11475 1350,7746 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline114" />
+    <!-- Arrowhead on XXXpoint 1350 11475 - 1350 7560-->
+    <!-- Line -->
+    <polyline
+       points="16650,9225 16650,7746 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline118" />
+    <!-- Arrowhead on XXXpoint 16650 9225 - 16650 7560-->
+    <!-- Line -->
+    <polyline
+       points="14850,11475 14850,7746 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline122" />
+    <!-- Arrowhead on XXXpoint 14850 11475 - 14850 7560-->
+    <!-- Line -->
+    <polyline
+       points="12150,9225 12150,7746 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline126" />
+    <!-- Arrowhead on XXXpoint 12150 9225 - 12150 7560-->
+    <!-- Line -->
+    <polyline
+       points="10350,11475 10350,7746 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline130" />
+    <!-- Arrowhead on XXXpoint 10350 11475 - 10350 7560-->
+    <!-- Line -->
+    <polyline
+       points="7650,9225 7650,7746 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline134" />
+    <!-- Arrowhead on XXXpoint 7650 9225 - 7650 7560-->
+    <!-- Line -->
+    <polyline
+       points="5850,11475 5850,7746 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline138" />
+    <!-- Arrowhead on XXXpoint 5850 11475 - 5850 7560-->
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="12375"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text142">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="12375"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text144">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5625"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text146">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5625"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text148">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6750"
+       y="6750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text150">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6750"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text152">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11250"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text154">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11250"
+       y="6750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text156">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="15750"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text158">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="15750"
+       y="6750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text160">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="6750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text162">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text164">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1350"
+       y="13050"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text166">CPU 0</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1350"
+       y="11925"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text168">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1350"
+       y="12375"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text170">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="10800"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text172">CPU 15</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="9675"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text174">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="10125"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text176">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5850"
+       y="11925"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text178">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5850"
+       y="12375"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text180">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5850"
+       y="13050"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text182">CPU 21823</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="7650"
+       y="10845"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text184">CPU 21839</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="7650"
+       y="10170"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text186">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="7650"
+       y="9720"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text188">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="10305"
+       y="11925"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text190">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="10305"
+       y="12375"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text192">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="10305"
+       y="13050"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text194">CPU 43679</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="12105"
+       y="10845"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text196">CPU 43695</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="12105"
+       y="10170"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text198">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="12105"
+       y="9720"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text200">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="14805"
+       y="11925"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text202">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="14805"
+       y="12375"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text204">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="14805"
+       y="13050"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text206">CPU 65519</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="16605"
+       y="10845"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text208">CPU 65535</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="16605"
+       y="10170"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text210">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="16605"
+       y="9720"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text212">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="675"
+       y="450"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="start"
+       id="text214">struct rcu_state</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="9000"
+       y="1350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text216">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="9000"
+       y="1800"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text218">rcu_node</text>
+  </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/TreeLevel.svg b/Documentation/RCU/Design/Data-Structures/TreeLevel.svg
new file mode 100644
index 000000000000..7a7eb3bac95c
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/TreeLevel.svg
@@ -0,0 +1,828 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:41:29 2015 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="17.7in"
+   height="10.4in"
+   viewBox="-66 -66 21237 12507"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="TreeLevel.fig">
+  <metadata
+     id="metadata216">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs214">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3974"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="1023"
+     inkscape:window-height="1148"
+     id="namedview212"
+     showgrid="false"
+     inkscape:zoom="0.55869424"
+     inkscape:cx="796.50006"
+     inkscape:cy="467.99997"
+     inkscape:window-x="897"
+     inkscape:window-y="24"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="0"
+       width="20655"
+       height="8325"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="14130"
+       y="3600"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="7380"
+       y="3600"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="8505"
+       y="6300"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect12" />
+    <!-- Line: box -->
+    <rect
+       x="13005"
+       y="6300"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect14" />
+    <!-- Line: box -->
+    <rect
+       x="17505"
+       y="6300"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect16" />
+    <!-- Line: box -->
+    <rect
+       x="4005"
+       y="6300"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect18" />
+    <!-- Line: box -->
+    <rect
+       x="10755"
+       y="900"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect20" />
+    <!-- Line -->
+    <polyline
+       points="6255,9225 6255,7746 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline22" />
+    <!-- Arrowhead on XXXpoint 6255 9225 - 6255 7560-->
+    <!-- Circle -->
+    <circle
+       cx="11655"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle26" />
+    <!-- Circle -->
+    <circle
+       cx="12105"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle28" />
+    <!-- Circle -->
+    <circle
+       cx="12555"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle30" />
+    <!-- Line -->
+    <polyline
+       points="9855,6300 11355,5010 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline32" />
+    <!-- Arrowhead on XXXpoint 9855 6300 - 11496 4890-->
+    <!-- Line -->
+    <polyline
+       points="14355,6300 12852,5010 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline36" />
+    <!-- Arrowhead on XXXpoint 14355 6300 - 12711 4890-->
+    <!-- Circle -->
+    <circle
+       cx="17055"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle40" />
+    <!-- Circle -->
+    <circle
+       cx="16605"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle42" />
+    <!-- Circle -->
+    <circle
+       cx="16155"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle44" />
+    <!-- Circle -->
+    <circle
+       cx="12555"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle46" />
+    <!-- Circle -->
+    <circle
+       cx="12105"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle48" />
+    <!-- Circle -->
+    <circle
+       cx="11655"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle50" />
+    <!-- Circle -->
+    <circle
+       cx="8055"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle52" />
+    <!-- Circle -->
+    <circle
+       cx="7605"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle54" />
+    <!-- Circle -->
+    <circle
+       cx="7155"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle56" />
+    <!-- Circle -->
+    <circle
+       cx="4905"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle58" />
+    <!-- Circle -->
+    <circle
+       cx="5355"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle60" />
+    <!-- Circle -->
+    <circle
+       cx="5805"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle62" />
+    <!-- Circle -->
+    <circle
+       cx="18405"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle64" />
+    <!-- Circle -->
+    <circle
+       cx="18855"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle66" />
+    <!-- Circle -->
+    <circle
+       cx="19305"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle68" />
+    <!-- Circle -->
+    <circle
+       cx="13905"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle70" />
+    <!-- Circle -->
+    <circle
+       cx="14355"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle72" />
+    <!-- Circle -->
+    <circle
+       cx="14805"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle74" />
+    <!-- Circle -->
+    <circle
+       cx="9405"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle76" />
+    <!-- Circle -->
+    <circle
+       cx="9855"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle78" />
+    <!-- Circle -->
+    <circle
+       cx="10305"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle80" />
+    <!-- Line: box -->
+    <rect
+       x="225"
+       y="1125"
+       width="3150"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:21; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect82" />
+    <!-- Line: box -->
+    <rect
+       x="225"
+       y="2250"
+       width="3150"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:21; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect84" />
+    <!-- Line: box -->
+    <rect
+       x="225"
+       y="3375"
+       width="3150"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:21; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect86" />
+    <!-- Line -->
+    <polyline
+       points="14805,3600 13302,2310 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline88" />
+    <!-- Arrowhead on XXXpoint 14805 3600 - 13161 2190-->
+    <!-- Line -->
+    <polyline
+       points="9405,3600 10905,2310 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline92" />
+    <!-- Arrowhead on XXXpoint 9405 3600 - 11046 2190-->
+    <!-- Line -->
+    <polyline
+       points="6255,6300 7755,5010 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline96" />
+    <!-- Arrowhead on XXXpoint 6255 6300 - 7896 4890-->
+    <!-- Line -->
+    <polyline
+       points="17955,6300 16452,5010 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline100" />
+    <!-- Arrowhead on XXXpoint 17955 6300 - 16311 4890-->
+    <!-- Line -->
+    <polyline
+       points="4455,11025 4455,7746 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline104" />
+    <!-- Arrowhead on XXXpoint 4455 11025 - 4455 7560-->
+    <!-- Line -->
+    <polyline
+       points="19755,9225 19755,7746 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline108" />
+    <!-- Arrowhead on XXXpoint 19755 9225 - 19755 7560-->
+    <!-- Line -->
+    <polyline
+       points="17955,11025 17955,7746 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline112" />
+    <!-- Arrowhead on XXXpoint 17955 11025 - 17955 7560-->
+    <!-- Line -->
+    <polyline
+       points="15255,9225 15255,7746 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline116" />
+    <!-- Arrowhead on XXXpoint 15255 9225 - 15255 7560-->
+    <!-- Line -->
+    <polyline
+       points="13455,11025 13455,7746 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline120" />
+    <!-- Arrowhead on XXXpoint 13455 11025 - 13455 7560-->
+    <!-- Line -->
+    <polyline
+       points="10755,9225 10755,7746 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline124" />
+    <!-- Arrowhead on XXXpoint 10755 9225 - 10755 7560-->
+    <!-- Line -->
+    <polyline
+       points="8955,11025 8955,7746 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline128" />
+    <!-- Arrowhead on XXXpoint 8955 11025 - 8955 7560-->
+    <!-- Line: box -->
+    <rect
+       x="12105"
+       y="11025"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect132" />
+    <!-- Line: box -->
+    <rect
+       x="13905"
+       y="9225"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect134" />
+    <!-- Line: box -->
+    <rect
+       x="16605"
+       y="11025"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect136" />
+    <!-- Line: box -->
+    <rect
+       x="18405"
+       y="9225"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect138" />
+    <!-- Line: box -->
+    <rect
+       x="9405"
+       y="9225"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect140" />
+    <!-- Line: box -->
+    <rect
+       x="7605"
+       y="11025"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect142" />
+    <!-- Line: box -->
+    <rect
+       x="4905"
+       y="9225"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect144" />
+    <!-- Line: box -->
+    <rect
+       x="3105"
+       y="11025"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect146" />
+    <!-- Line -->
+    <polyline
+       points="3375,1575 10701,1575 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline148" />
+    <!-- Arrowhead on XXXpoint 3375 1575 - 10890 1575-->
+    <!-- Line -->
+    <polyline
+       points="3375,3825 4050,3825 4050,5400 2700,5400 2700,6975 3951,6975 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline152" />
+    <!-- Arrowhead on XXXpoint 2700 6975 - 4140 6975-->
+    <!-- Line -->
+    <polyline
+       points="3375,2700 5175,2700 5175,4275 7326,4275 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline156" />
+    <!-- Arrowhead on XXXpoint 5175 4275 - 7515 4275-->
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="15480"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text160">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="15480"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text162">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8730"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text164">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8730"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text166">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="9855"
+       y="6750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text168">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="9855"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text170">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="14355"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text172">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="14355"
+       y="6750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text174">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="18855"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text176">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="18855"
+       y="6750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text178">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5355"
+       y="6750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text180">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5355"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text182">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="450"
+       y="1800"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="324"
+       text-anchor="start"
+       id="text184">-&gt;level[0]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="450"
+       y="2925"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="324"
+       text-anchor="start"
+       id="text186">-&gt;level[1]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="450"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="324"
+       text-anchor="start"
+       id="text188">-&gt;level[2]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="12105"
+       y="1350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text190">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="12105"
+       y="1800"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text192">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6255"
+       y="10125"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text194">CPU 15</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4455"
+       y="11925"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text196">CPU 0</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="19755"
+       y="10125"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text198">CPU 65535</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="17955"
+       y="11925"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text200">CPU 65519</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="15255"
+       y="10125"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text202">CPU 43695</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="13455"
+       y="11925"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text204">CPU 43679</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="10755"
+       y="10125"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text206">CPU 21839</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8955"
+       y="11925"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text208">CPU 21823</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="225"
+       y="450"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="start"
+       id="text210">struct rcu_state</text>
+  </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/TreeMapping.svg b/Documentation/RCU/Design/Data-Structures/TreeMapping.svg
new file mode 100644
index 000000000000..729cfa9e6cdb
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/TreeMapping.svg
@@ -0,0 +1,305 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:43:22 2015 -->
+
+<!-- Magnification: 1.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="3.1in"
+   height="0.9in"
+   viewBox="-12 -12 3699 1074"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="TreeMapping.fig">
+  <metadata
+     id="metadata66">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs64">
+    <marker
+       inkscape:stockid="Arrow2Lend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow2Lend"
+       style="overflow:visible;">
+      <path
+         id="path3836"
+         style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+         transform="scale(1.1) rotate(180) translate(1,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow2Mend"
+       style="overflow:visible;">
+      <path
+         id="path3842"
+         style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+         transform="scale(0.6) rotate(180) translate(0,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3824"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="991"
+     inkscape:window-height="606"
+     id="namedview62"
+     showgrid="false"
+     inkscape:zoom="3.0752688"
+     inkscape:cx="139.5"
+     inkscape:cy="40.5"
+     inkscape:window-x="891"
+     inkscape:window-y="177"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="0"
+       width="3675"
+       height="1050"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="75"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="600"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="1125"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect12" />
+    <!-- Line: box -->
+    <rect
+       x="1650"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect14" />
+    <!-- Line: box -->
+    <rect
+       x="2175"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect16" />
+    <!-- Line: box -->
+    <rect
+       x="3225"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect18" />
+    <!-- Line -->
+    <polyline
+       points="675,375 675,150 300,150 300,358 "
+       style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline20" />
+    <!-- Arrowhead on XXXpoint 300 150 - 300 390-->
+    <!-- Line -->
+    <polyline
+       points="1200,675 1200,900 300,900 300,691 "
+       style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline24" />
+    <!-- Arrowhead on XXXpoint 300 900 - 300 660-->
+    <!-- Line -->
+    <polyline
+       points="1725,375 1725,150 900,150 900,358 "
+       style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline28" />
+    <!-- Arrowhead on XXXpoint 900 150 - 900 390-->
+    <!-- Line -->
+    <polyline
+       points="2250,375 2250,75 825,75 825,358 "
+       style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline32" />
+    <!-- Arrowhead on XXXpoint 825 75 - 825 390-->
+    <!-- Line -->
+    <polyline
+       points="2775,675 2775,900 1425,900 1425,691 "
+       style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline36" />
+    <!-- Arrowhead on XXXpoint 1425 900 - 1425 660-->
+    <!-- Line -->
+    <polyline
+       points="3300,675 3300,975 1350,975 1350,691 "
+       style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline40" />
+    <!-- Arrowhead on XXXpoint 1350 975 - 1350 660-->
+    <!-- Line: box -->
+    <rect
+       x="2700"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect44" />
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="300"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text46">0:7  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1350"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text48">4:7  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1875"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text50">0:1  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text52">2:3  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2925"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text54">4:5  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3450"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text56">6:7  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="825"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text58">0:3  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3600"
+       y="150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="end"
+       id="text60">struct rcu_state</text>
+  </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/TreeMappingLevel.svg b/Documentation/RCU/Design/Data-Structures/TreeMappingLevel.svg
new file mode 100644
index 000000000000..5b416a4b8453
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/TreeMappingLevel.svg
@@ -0,0 +1,380 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:45:19 2015 -->
+
+<!-- Magnification: 1.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="3.1in"
+   height="1.8in"
+   viewBox="-12 -12 3699 2124"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="TreeMappingLevel.svg">
+  <metadata
+     id="metadata98">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs96">
+    <marker
+       inkscape:stockid="Arrow2Lend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow2Lend"
+       style="overflow:visible;">
+      <path
+         id="path3868"
+         style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+         transform="scale(1.1) rotate(180) translate(1,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="1598"
+     inkscape:window-height="1211"
+     id="namedview94"
+     showgrid="false"
+     inkscape:zoom="5.2508961"
+     inkscape:cx="139.5"
+     inkscape:cy="81"
+     inkscape:window-x="840"
+     inkscape:window-y="122"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="0"
+       width="3675"
+       height="2100"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="75"
+       y="1350"
+       width="750"
+       height="225"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="75"
+       y="1575"
+       width="750"
+       height="225"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="75"
+       y="1800"
+       width="750"
+       height="225"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect12" />
+    <!-- Arc -->
+    <path
+       style="stroke:#000000;stroke-width:7;stroke-linecap:butt;"
+       d="M 1800,900 A 118 118  0  0  0  1800  1125 "
+       id="path14" />
+    <!-- Arc -->
+    <path
+       style="stroke:#000000;stroke-width:7;stroke-linecap:butt;"
+       d="M 750,900 A 75 75  0  0  0  750  1050 "
+       id="path16" />
+    <!-- Line -->
+    <polyline
+       points="750,900 750,691 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline18" />
+    <!-- Arrowhead on XXXpoint 750 900 - 750 660-->
+    <!-- Line: box -->
+    <rect
+       x="75"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect22" />
+    <!-- Line: box -->
+    <rect
+       x="600"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect24" />
+    <!-- Line: box -->
+    <rect
+       x="1650"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect26" />
+    <!-- Line: box -->
+    <rect
+       x="2175"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect28" />
+    <!-- Line: box -->
+    <rect
+       x="3225"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect30" />
+    <!-- Line -->
+    <polyline
+       points="675,375 675,150 300,150 300,358 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline32" />
+    <!-- Arrowhead on XXXpoint 300 150 - 300 390-->
+    <!-- Line -->
+    <polyline
+       points="1725,375 1725,150 900,150 900,358 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline36" />
+    <!-- Arrowhead on XXXpoint 900 150 - 900 390-->
+    <!-- Line -->
+    <polyline
+       points="2250,375 2250,75 825,75 825,358 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline40" />
+    <!-- Arrowhead on XXXpoint 825 75 - 825 390-->
+    <!-- Line -->
+    <polyline
+       points="2775,675 2775,975 1425,975 1425,691 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline44" />
+    <!-- Arrowhead on XXXpoint 1425 975 - 1425 660-->
+    <!-- Line: box -->
+    <rect
+       x="2700"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect48" />
+    <!-- Line: box -->
+    <rect
+       x="1125"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect50" />
+    <!-- Line -->
+    <polyline
+       points="3300,675 3300,1050 1350,1050 1350,691 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline52" />
+    <!-- Arrowhead on XXXpoint 1350 1050 - 1350 660-->
+    <!-- Line -->
+    <polyline
+       points="825,1425 975,1425 975,1200 225,1200 225,691 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline56" />
+    <!-- Arrowhead on XXXpoint 225 1200 - 225 660-->
+    <!-- Line -->
+    <polyline
+       points="1200,675 1200,975 300,975 300,691 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline60" />
+    <!-- Arrowhead on XXXpoint 300 975 - 300 660-->
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="150"
+       y="1500"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="108"
+       text-anchor="start"
+       id="text64">-&gt;level[0]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="150"
+       y="1725"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="108"
+       text-anchor="start"
+       id="text66">-&gt;level[1]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="150"
+       y="1950"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="108"
+       text-anchor="start"
+       id="text68">-&gt;level[2]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="300"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text70">0:7  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1350"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text72">4:7  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1875"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text74">0:1  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text76">2:3  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2925"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text78">4:5  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3450"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text80">6:7  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="825"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text82">0:3  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3600"
+       y="150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="end"
+       id="text84">struct rcu_state</text>
+    <!-- Line -->
+    <polyline
+       points="825,1875 1800,1875 1800,1125 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:none"
+       id="polyline86" />
+    <!-- Line -->
+    <polyline
+       points="1800,900 1800,691 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline88" />
+    <!-- Arrowhead on XXXpoint 1800 900 - 1800 660-->
+    <!-- Line -->
+    <polyline
+       points="825,1650 1200,1650 1200,1125 750,1125 750,1050 "
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline92" />
+  </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/blkd_task.svg b/Documentation/RCU/Design/Data-Structures/blkd_task.svg
new file mode 100644
index 000000000000..00e810bb8419
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/blkd_task.svg
@@ -0,0 +1,843 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:35:03 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="10.1in"
+   height="8.6in"
+   viewBox="-44 -44 12088 10288"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="blkd_task.fig">
+  <metadata
+     id="metadata212">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs210">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3970"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="1087"
+     inkscape:window-height="1144"
+     id="namedview208"
+     showgrid="false"
+     inkscape:zoom="1.0495049"
+     inkscape:cx="454.50003"
+     inkscape:cy="387.00003"
+     inkscape:window-x="833"
+     inkscape:window-y="28"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="450"
+       y="0"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="4950"
+       y="4950"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="600"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect10" />
+    <!-- Line -->
+    <polyline
+       points="5250,8100 5688,5912 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline12" />
+    <!-- Arrowhead on XXXpoint 5250 8100 - 5710 5790-->
+    <polyline
+       points="5714 6068 5704 5822 5598 6044 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline14" />
+    <!-- Line -->
+    <polyline
+       points="4050,9300 4486,7262 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline16" />
+    <!-- Arrowhead on XXXpoint 4050 9300 - 4512 7140-->
+    <polyline
+       points="4514 7418 4506 7172 4396 7394 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline18" />
+    <!-- Line -->
+    <polyline
+       points="1040,9300 1476,7262 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline20" />
+    <!-- Arrowhead on XXXpoint 1040 9300 - 1502 7140-->
+    <polyline
+       points="1504 7418 1496 7172 1386 7394 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline22" />
+    <!-- Line -->
+    <polyline
+       points="2240,8100 2676,6062 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline24" />
+    <!-- Arrowhead on XXXpoint 2240 8100 - 2702 5940-->
+    <polyline
+       points="2704 6218 2696 5972 2586 6194 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline26" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="450"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect28" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="1050"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect30" />
+    <!-- Line -->
+    <polyline
+       points="1350,3450 2350,2590 "
+       style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline32" />
+    <!-- Arrowhead on XXXpoint 1350 3450 - 2444 2510-->
+    <!-- Line -->
+    <polyline
+       points="4950,3450 3948,2590 "
+       style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline36" />
+    <!-- Arrowhead on XXXpoint 4950 3450 - 3854 2510-->
+    <!-- Line -->
+    <polyline
+       points="4050,6600 4050,4414 "
+       style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline40" />
+    <!-- Arrowhead on XXXpoint 4050 6600 - 4050 4290-->
+    <!-- Line -->
+    <polyline
+       points="1050,6600 1050,4414 "
+       style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline44" />
+    <!-- Arrowhead on XXXpoint 1050 6600 - 1050 4290-->
+    <!-- Line -->
+    <polyline
+       points="2250,5400 2250,4414 "
+       style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline48" />
+    <!-- Arrowhead on XXXpoint 2250 5400 - 2250 4290-->
+    <!-- Line -->
+    <polyline
+       points="2250,8100 2250,6364 "
+       style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline52" />
+    <!-- Arrowhead on XXXpoint 2250 8100 - 2250 6240-->
+    <!-- Line -->
+    <polyline
+       points="1050,9300 1050,7564 "
+       style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline56" />
+    <!-- Arrowhead on XXXpoint 1050 9300 - 1050 7440-->
+    <!-- Line -->
+    <polyline
+       points="4050,9300 4050,7564 "
+       style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline60" />
+    <!-- Arrowhead on XXXpoint 4050 9300 - 4050 7440-->
+    <!-- Line -->
+    <polyline
+       points="5250,8100 5250,6364 "
+       style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline64" />
+    <!-- Arrowhead on XXXpoint 5250 8100 - 5250 6240-->
+    <!-- Circle -->
+    <circle
+       cx="2850"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle68" />
+    <!-- Circle -->
+    <circle
+       cx="3150"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle70" />
+    <!-- Circle -->
+    <circle
+       cx="3450"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle72" />
+    <!-- Circle -->
+    <circle
+       cx="1350"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle74" />
+    <!-- Circle -->
+    <circle
+       cx="1650"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle76" />
+    <!-- Circle -->
+    <circle
+       cx="1950"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle78" />
+    <!-- Circle -->
+    <circle
+       cx="4350"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle80" />
+    <!-- Circle -->
+    <circle
+       cx="4650"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle82" />
+    <!-- Circle -->
+    <circle
+       cx="4950"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle84" />
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="3450"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect86" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="6600"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect88" />
+    <!-- Line: box -->
+    <rect
+       x="4500"
+       y="5400"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect90" />
+    <!-- Line: box -->
+    <rect
+       x="3300"
+       y="6600"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect92" />
+    <!-- Line: box -->
+    <rect
+       x="2250"
+       y="1650"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect94" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="9300"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect96" />
+    <!-- Line: box -->
+    <rect
+       x="1350"
+       y="8100"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect98" />
+    <!-- Line: box -->
+    <rect
+       x="3000"
+       y="9300"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect100" />
+    <!-- Line: box -->
+    <rect
+       x="4350"
+       y="8100"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect102" />
+    <!-- Line: box -->
+    <rect
+       x="1500"
+       y="5400"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect104" />
+    <!-- Line -->
+    <polygon
+       points="5550,3450 7350,2850 7350,5100 5550,4350 5550,3450 "
+       style="stroke:#000000;stroke-width:14; stroke-linejoin:miter; stroke-linecap:butt; stroke-dasharray:120 120;fill:#ffbfbf; "
+       id="polygon106" />
+    <!-- Line -->
+    <polyline
+       points="9300,3150 10734,3150 "
+       style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline108" />
+    <!-- Arrowhead on XXXpoint 9300 3150 - 10860 3150-->
+    <!-- Line: box -->
+    <rect
+       x="10800"
+       y="2850"
+       width="1200"
+       height="750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect112" />
+    <!-- Line -->
+    <polyline
+       points="11400,3600 11400,4284 "
+       style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline114" />
+    <!-- Arrowhead on XXXpoint 11400 3600 - 11400 4410-->
+    <!-- Line: box -->
+    <rect
+       x="10800"
+       y="4350"
+       width="1200"
+       height="750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect118" />
+    <!-- Line -->
+    <polyline
+       points="11400,5100 11400,5784 "
+       style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline120" />
+    <!-- Arrowhead on XXXpoint 11400 5100 - 11400 5910-->
+    <!-- Line: box -->
+    <rect
+       x="10800"
+       y="5850"
+       width="1200"
+       height="750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect124" />
+    <!-- Line -->
+    <polyline
+       points="9300,3900 9900,3900 9900,4650 10734,4650 "
+       style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline126" />
+    <!-- Arrowhead on XXXpoint 9900 4650 - 10860 4650-->
+    <!-- Line -->
+    <polyline
+       points="9300,4650 9600,4650 9600,6150 10734,6150 "
+       style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline130" />
+    <!-- Arrowhead on XXXpoint 9600 6150 - 10860 6150-->
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6450"
+       y="300"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text134">rcu_bh</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="1950"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text136">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="2250"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text138">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="3750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text140">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text142">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="5700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text144">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="6000"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text146">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="6900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text148">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text150">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="5700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text152">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="6000"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text154">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="6900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text156">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text158">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="450"
+       y="1350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="start"
+       id="text160">struct rcu_state</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="9600"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text162">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="9900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text164">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="9600"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text166">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="9900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text168">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="8400"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text170">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="8700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text172">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5400"
+       y="8400"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text174">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5400"
+       y="8700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text176">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6000"
+       y="750"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text178">rcu_sched</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11400"
+       y="3300"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="216"
+       text-anchor="middle"
+       id="text180">T3</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11400"
+       y="4800"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="216"
+       text-anchor="middle"
+       id="text182">T2</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11400"
+       y="6300"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="216"
+       text-anchor="middle"
+       id="text184">T1</text>
+    <!-- Line -->
+    <polyline
+       points="5250,5400 5250,4414 "
+       style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline186" />
+    <!-- Arrowhead on XXXpoint 5250 5400 - 5250 4290-->
+    <!-- Line: box -->
+    <rect
+       x="3750"
+       y="3450"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect190" />
+    <!-- Line: box -->
+    <rect
+       x="7350"
+       y="2850"
+       width="1950"
+       height="750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect192" />
+    <!-- Line: box -->
+    <rect
+       x="7350"
+       y="3600"
+       width="1950"
+       height="750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect194" />
+    <!-- Line: box -->
+    <rect
+       x="7350"
+       y="4350"
+       width="1950"
+       height="750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect196" />
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text198">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="3750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text200">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="7500"
+       y="3300"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="start"
+       id="text202">blkd_tasks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="7500"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="start"
+       id="text204">gp_tasks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="7500"
+       y="4800"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="start"
+       id="text206">exp_tasks</text>
+  </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/nxtlist.svg b/Documentation/RCU/Design/Data-Structures/nxtlist.svg
new file mode 100644
index 000000000000..abc4cc73a097
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/nxtlist.svg
@@ -0,0 +1,396 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:39:46 2015 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="10.4in"
+   height="10.4in"
+   viewBox="-66 -66 12507 12507"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="nxtlist.fig">
+  <metadata
+     id="metadata94">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs92">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3852"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="925"
+     inkscape:window-height="928"
+     id="namedview90"
+     showgrid="false"
+     inkscape:zoom="0.80021373"
+     inkscape:cx="467.99997"
+     inkscape:cy="467.99997"
+     inkscape:window-x="948"
+     inkscape:window-y="73"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="0"
+       width="7875"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="1125"
+       width="7875"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="2250"
+       width="7875"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="3375"
+       width="7875"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect12" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="4500"
+       width="7875"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect14" />
+    <!-- Line: box -->
+    <rect
+       x="10575"
+       y="0"
+       width="1800"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect16" />
+    <!-- Line: box -->
+    <rect
+       x="10575"
+       y="1125"
+       width="1800"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect18" />
+    <!-- Line -->
+    <polyline
+       points="11475,2250 11475,3276 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline20" />
+    <!-- Arrowhead on XXXpoint 11475 2250 - 11475 3465-->
+    <!-- Line: box -->
+    <rect
+       x="10575"
+       y="6750"
+       width="1800"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect24" />
+    <!-- Line: box -->
+    <rect
+       x="10575"
+       y="7875"
+       width="1800"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect26" />
+    <!-- Line: box -->
+    <rect
+       x="10575"
+       y="10125"
+       width="1800"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect28" />
+    <!-- Line: box -->
+    <rect
+       x="10575"
+       y="11250"
+       width="1800"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect30" />
+    <!-- Line: box -->
+    <rect
+       x="10575"
+       y="3375"
+       width="1800"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect32" />
+    <!-- Line -->
+    <polyline
+       points="11475,5625 11475,6651 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline34" />
+    <!-- Arrowhead on XXXpoint 11475 5625 - 11475 6840-->
+    <!-- Line -->
+    <polyline
+       points="7875,225 10476,225 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline38" />
+    <!-- Arrowhead on XXXpoint 7875 225 - 10665 225-->
+    <!-- Line -->
+    <polyline
+       points="7875,1350 9675,1350 9675,675 7971,675 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline42" />
+    <!-- Arrowhead on XXXpoint 9675 675 - 7785 675-->
+    <!-- Line -->
+    <polyline
+       points="7875,2475 9675,2475 9675,4725 10476,4725 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline46" />
+    <!-- Arrowhead on XXXpoint 9675 4725 - 10665 4725-->
+    <!-- Line -->
+    <polyline
+       points="7875,3600 9225,3600 9225,5175 10476,5175 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline50" />
+    <!-- Arrowhead on XXXpoint 9225 5175 - 10665 5175-->
+    <!-- Line -->
+    <polyline
+       points="7875,4725 8775,4725 8775,11475 10476,11475 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline54" />
+    <!-- Arrowhead on XXXpoint 8775 11475 - 10665 11475-->
+    <!-- Line: box -->
+    <rect
+       x="10575"
+       y="4500"
+       width="1800"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect58" />
+    <!-- Line -->
+    <polyline
+       points="11475,9000 11475,10026 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline60" />
+    <!-- Arrowhead on XXXpoint 11475 9000 - 11475 10215-->
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="225"
+       y="675"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="324"
+       text-anchor="start"
+       id="text64">nxtlist</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="225"
+       y="1800"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="324"
+       text-anchor="start"
+       id="text66">nxttail[RCU_DONE_TAIL]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="225"
+       y="2925"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="324"
+       text-anchor="start"
+       id="text68">nxttail[RCU_WAIT_TAIL]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="225"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="324"
+       text-anchor="start"
+       id="text70">nxttail[RCU_NEXT_READY_TAIL]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="225"
+       y="5175"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="324"
+       text-anchor="start"
+       id="text72">nxttail[RCU_NEXT_TAIL]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11475"
+       y="675"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text74">CB 1</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11475"
+       y="1800"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text76">next</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11475"
+       y="7425"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text78">CB 3</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11475"
+       y="8550"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text80">next</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11475"
+       y="10800"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text82">CB 4</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11475"
+       y="11925"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text84">next</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11475"
+       y="4050"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text86">CB 2</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11475"
+       y="5175"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text88">next</text>
+  </g>
+</svg>
diff --git a/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png b/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png
deleted file mode 100644
index 7496a55e4e7b..000000000000
--- a/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png
+++ /dev/null
diff --git a/Documentation/RCU/Design/Requirements/RCUApplicability.svg b/Documentation/RCU/Design/Requirements/RCUApplicability.svg
deleted file mode 100644
index ebcbeee391ed..000000000000
--- a/Documentation/RCU/Design/Requirements/RCUApplicability.svg
+++ /dev/null
@@ -1,237 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Creator: fig2dev Version 3.2 Patchlevel 5d -->
-
-<!-- CreationDate: Tue Mar  4 18:34:25 2014 -->
-
-<!-- Magnification: 3.000 -->
-
-<svg
-   xmlns:dc="http://purl.org/dc/elements/1.1/"
-   xmlns:cc="http://creativecommons.org/ns#"
-   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-   xmlns:svg="http://www.w3.org/2000/svg"
-   xmlns="http://www.w3.org/2000/svg"
-   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
-   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
-   width="1089.1382"
-   height="668.21368"
-   viewBox="-2121 -36 14554.634 8876.4061"
-   id="svg2"
-   version="1.1"
-   inkscape:version="0.48.3.1 r9886"
-   sodipodi:docname="RCUApplicability.svg">
-  <metadata
-     id="metadata40">
-    <rdf:RDF>
-      <cc:Work
-         rdf:about="">
-        <dc:format>image/svg+xml</dc:format>
-        <dc:type
-           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-        <dc:title />
-      </cc:Work>
-    </rdf:RDF>
-  </metadata>
-  <defs
-     id="defs38" />
-  <sodipodi:namedview
-     pagecolor="#ffffff"
-     bordercolor="#666666"
-     borderopacity="1"
-     objecttolerance="10"
-     gridtolerance="10"
-     guidetolerance="10"
-     inkscape:pageopacity="0"
-     inkscape:pageshadow="2"
-     inkscape:window-width="849"
-     inkscape:window-height="639"
-     id="namedview36"
-     showgrid="false"
-     inkscape:zoom="0.51326165"
-     inkscape:cx="544.56912"
-     inkscape:cy="334.10686"
-     inkscape:window-x="149"
-     inkscape:window-y="448"
-     inkscape:window-maximized="0"
-     inkscape:current-layer="g4"
-     fit-margin-top="5"
-     fit-margin-left="5"
-     fit-margin-right="5"
-     fit-margin-bottom="5" />
-  <g
-     style="fill:none;stroke-width:0.025in"
-     id="g4"
-     transform="translate(-2043.6828,14.791398)">
-    <!-- Line: box -->
-    <rect
-       x="0"
-       y="0"
-       width="14400"
-       height="8775"
-       rx="0"
-       style="fill:#ffa1a1;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
-       id="rect6" />
-    <!-- Line: box -->
-    <rect
-       x="1350"
-       y="0"
-       width="11700"
-       height="6075"
-       rx="0"
-       style="fill:#ffff00;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
-       id="rect8" />
-    <!-- Line: box -->
-    <rect
-       x="2700"
-       y="0"
-       width="9000"
-       height="4275"
-       rx="0"
-       style="fill:#00ff00;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
-       id="rect10" />
-    <!-- Line: box -->
-    <rect
-       x="4050"
-       y="0"
-       width="6300"
-       height="2475"
-       rx="0"
-       style="fill:#87cfff;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
-       id="rect12" />
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="900"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text14"
-       sodipodi:linespacing="125%"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
-         style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-         id="tspan3017">Read-Mostly, Stale &amp;</tspan></text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="1350"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text16"
-       sodipodi:linespacing="125%"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
-         style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-         id="tspan3019">Inconsistent Data OK</tspan></text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="1800"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text18"
-       sodipodi:linespacing="125%"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
-         style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-         id="tspan3021">(RCU Works Great!!!)</tspan></text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="3825"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text20"
-       sodipodi:linespacing="125%"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
-         style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-         id="tspan3023">(RCU Works Well)</tspan></text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="3375"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text22"
-       sodipodi:linespacing="125%"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
-         style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-         id="tspan3025">Read-Mostly, Need Consistent Data</tspan></text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="5175"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text24"
-       sodipodi:linespacing="125%"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
-         style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-         id="tspan3027">Read-Write, Need Consistent Data</tspan></text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="6975"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text26"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-       sodipodi:linespacing="125%">Update-Mostly, Need Consistent Data</text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="5625"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text28"
-       sodipodi:linespacing="125%"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
-         style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-         id="tspan3029">(RCU Might Be OK...)</tspan></text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="7875"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text30"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-       sodipodi:linespacing="125%">(1) Provide Existence Guarantees For Update-Friendly Mechanisms</text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="8325"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text32"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-       sodipodi:linespacing="125%">(2) Provide Wait-Free Read-Side Primitives for Real-Time Use)</text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="7425"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text34"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-       sodipodi:linespacing="125%">(RCU is Very Unlikely to be the Right Tool For The Job, But it Can:</text>
-  </g>
-</svg>
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index a725f9900ec8..e7e24b3e86e2 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -1,5 +1,3 @@
-<!-- DO NOT HAND EDIT. -->
-<!-- Instead, edit Documentation/RCU/Design/Requirements/Requirements.htmlx and run 'sh htmlqqz.sh Documentation/RCU/Design/Requirements/Requirements' -->
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
         "http://www.w3.org/TR/html4/loose.dtd">
         <html>
@@ -65,8 +63,8 @@ All that aside, here are the categories of currently known RCU requirements:
 
 <p>
 This is followed by a <a href="#Summary">summary</a>,
-which is in turn followed by the inevitable
-<a href="#Answers to Quick Quizzes">answers to the quick quizzes</a>.
+however, the answers to each quick quiz immediately follows the quiz.
+Select the big white space with your mouse to see the answer.
 
 <h2><a name="Fundamental Requirements">Fundamental Requirements</a></h2>
 
@@ -153,13 +151,27 @@ Therefore, the outcome:
 </blockquote>
 cannot happen.
 
-<p><a name="Quick Quiz 1"><b>Quick Quiz 1</b>:</a>
-Wait a minute!
-You said that updaters can make useful forward progress concurrently
-with readers, but pre-existing readers will block
-<tt>synchronize_rcu()</tt>!!!
-Just who are you trying to fool???
-<br><a href="#qq1answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Wait a minute!
+	You said that updaters can make useful forward progress concurrently
+	with readers, but pre-existing readers will block
+	<tt>synchronize_rcu()</tt>!!!
+	Just who are you trying to fool???
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	First, if updaters do not wish to be blocked by readers, they can use
+	<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
+	be discussed later.
+	Second, even when using <tt>synchronize_rcu()</tt>, the other
+	update-side code does run concurrently with readers, whether
+	pre-existing or not.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 This scenario resembles one of the first uses of RCU in
@@ -210,9 +222,20 @@ to guarantee that <tt>do_something()</tt> never runs concurrently
 with <tt>recovery()</tt>, but with little or no synchronization
 overhead in <tt>do_something_dlm()</tt>.
 
-<p><a name="Quick Quiz 2"><b>Quick Quiz 2</b>:</a>
-Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
-<br><a href="#qq2answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	Without that extra grace period, memory reordering could result in
+	<tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
+	concurrently with the last bits of <tt>recovery()</tt>.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 In order to avoid fatal problems such as deadlocks,
@@ -332,12 +355,27 @@ It also prevents any number of &ldquo;interesting&rdquo; compiler
 optimizations, for example, the use of <tt>gp</tt> as a scratch
 location immediately preceding the assignment.
 
-<p><a name="Quick Quiz 3"><b>Quick Quiz 3</b>:</a>
-But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
-two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
-from being reordered.
-Can't that also cause problems?
-<br><a href="#qq3answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
+	two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
+	from being reordered.
+	Can't that also cause problems?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	No, it cannot.
+	The readers cannot see either of these two fields until
+	the assignment to <tt>gp</tt>, by which time both fields are
+	fully initialized.
+	So reordering the assignments
+	to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt> cannot possibly
+	cause any problems.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 It is tempting to assume that the reader need not do anything special
@@ -494,11 +532,42 @@ The <tt>rcu_access_pointer()</tt> on line&nbsp;6 is similar to
 	code protected by the corresponding update-side lock.
 </ol>
 
-<p><a name="Quick Quiz 4"><b>Quick Quiz 4</b>:</a>
-Without the <tt>rcu_dereference()</tt> or the
-<tt>rcu_access_pointer()</tt>, what destructive optimizations
-might the compiler make use of?
-<br><a href="#qq4answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Without the <tt>rcu_dereference()</tt> or the
+	<tt>rcu_access_pointer()</tt>, what destructive optimizations
+	might the compiler make use of?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	Let's start with what happens to <tt>do_something_gp()</tt>
+	if it fails to use <tt>rcu_dereference()</tt>.
+	It could reuse a value formerly fetched from this same pointer.
+	It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
+	manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
+	mash-up of two distince pointer values.
+	It might even use value-speculation optimizations, where it makes
+	a wrong guess, but by the time it gets around to checking the
+	value, an update has changed the pointer to match the wrong guess.
+	Too bad about any dereferences that returned pre-initialization garbage
+	in the meantime!
+	</font>
+
+	<p><font color="ffffff">
+	For <tt>remove_gp_synchronous()</tt>, as long as all modifications
+	to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
+	the above optimizations are harmless.
+	However,
+	with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
+	<tt>sparse</tt> will complain if you
+	define <tt>gp</tt> with <tt>__rcu</tt> and then
+	access it without using
+	either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 In short, RCU's publish-subscribe guarantee is provided by the combination
@@ -571,17 +640,156 @@ systems with more than one CPU:
 	<tt>synchronize_rcu()</tt> migrates in the meantime.
 </ol>
 
-<p><a name="Quick Quiz 5"><b>Quick Quiz 5</b>:</a>
-Given that multiple CPUs can start RCU read-side critical sections
-at any time without any ordering whatsoever, how can RCU possibly tell whether
-or not a given RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>?
-<br><a href="#qq5answer">Answer</a>
-
-<p><a name="Quick Quiz 6"><b>Quick Quiz 6</b>:</a>
-The first and second guarantees require unbelievably strict ordering!
-Are all these memory barriers <i> really</i> required?
-<br><a href="#qq6answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Given that multiple CPUs can start RCU read-side critical sections
+	at any time without any ordering whatsoever, how can RCU possibly
+	tell whether or not a given RCU read-side critical section starts
+	before a given instance of <tt>synchronize_rcu()</tt>?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	If RCU cannot tell whether or not a given
+	RCU read-side critical section starts before a
+	given instance of <tt>synchronize_rcu()</tt>,
+	then it must assume that the RCU read-side critical section
+	started first.
+	In other words, a given instance of <tt>synchronize_rcu()</tt>
+	can avoid waiting on a given RCU read-side critical section only
+	if it can prove that <tt>synchronize_rcu()</tt> started first.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	The first and second guarantees require unbelievably strict ordering!
+	Are all these memory barriers <i> really</i> required?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	Yes, they really are required.
+	To see why the first guarantee is required, consider the following
+	sequence of events:
+	</font>
+
+	<ol>
+	<li>	<font color="ffffff">
+		CPU 1: <tt>rcu_read_lock()</tt>
+		</font>
+	<li>	<font color="ffffff">
+		CPU 1: <tt>q = rcu_dereference(gp);
+		/* Very likely to return p. */</tt>
+		</font>
+	<li>	<font color="ffffff">
+		CPU 0: <tt>list_del_rcu(p);</tt>
+		</font>
+	<li>	<font color="ffffff">
+		CPU 0: <tt>synchronize_rcu()</tt> starts.
+		</font>
+	<li>	<font color="ffffff">
+		CPU 1: <tt>do_something_with(q-&gt;a);
+		/* No smp_mb(), so might happen after kfree(). */</tt>
+		</font>
+	<li>	<font color="ffffff">
+		CPU 1: <tt>rcu_read_unlock()</tt>
+		</font>
+	<li>	<font color="ffffff">
+		CPU 0: <tt>synchronize_rcu()</tt> returns.
+		</font>
+	<li>	<font color="ffffff">
+		CPU 0: <tt>kfree(p);</tt>
+		</font>
+	</ol>
+
+	<p><font color="ffffff">
+	Therefore, there absolutely must be a full memory barrier between the
+	end of the RCU read-side critical section and the end of the
+	grace period.
+	</font>
+
+	<p><font color="ffffff">
+	The sequence of events demonstrating the necessity of the second rule
+	is roughly similar:
+	</font>
+
+	<ol>
+	<li>	<font color="ffffff">CPU 0: <tt>list_del_rcu(p);</tt>
+		</font>
+	<li>	<font color="ffffff">CPU 0: <tt>synchronize_rcu()</tt> starts.
+		</font>
+	<li>	<font color="ffffff">CPU 1: <tt>rcu_read_lock()</tt>
+		</font>
+	<li>	<font color="ffffff">CPU 1: <tt>q = rcu_dereference(gp);
+		/* Might return p if no memory barrier. */</tt>
+		</font>
+	<li>	<font color="ffffff">CPU 0: <tt>synchronize_rcu()</tt> returns.
+		</font>
+	<li>	<font color="ffffff">CPU 0: <tt>kfree(p);</tt>
+		</font>
+	<li>	<font color="ffffff">
+		CPU 1: <tt>do_something_with(q-&gt;a); /* Boom!!! */</tt>
+		</font>
+	<li>	<font color="ffffff">CPU 1: <tt>rcu_read_unlock()</tt>
+		</font>
+	</ol>
+
+	<p><font color="ffffff">
+	And similarly, without a memory barrier between the beginning of the
+	grace period and the beginning of the RCU read-side critical section,
+	CPU&nbsp;1 might end up accessing the freelist.
+	</font>
+
+	<p><font color="ffffff">
+	The &ldquo;as if&rdquo; rule of course applies, so that any
+	implementation that acts as if the appropriate memory barriers
+	were in place is a correct implementation.
+	That said, it is much easier to fool yourself into believing
+	that you have adhered to the as-if rule than it is to actually
+	adhere to it!
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	You claim that <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
+	generate absolutely no code in some kernel builds.
+	This means that the compiler might arbitrarily rearrange consecutive
+	RCU read-side critical sections.
+	Given such rearrangement, if a given RCU read-side critical section
+	is done, how can you be sure that all prior RCU read-side critical
+	sections are done?
+	Won't the compiler rearrangements make that impossible to determine?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	In cases where <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
+	generate absolutely no code, RCU infers quiescent states only at
+	special locations, for example, within the scheduler.
+	Because calls to <tt>schedule()</tt> had better prevent calling-code
+	accesses to shared variables from being rearranged across the call to
+	<tt>schedule()</tt>, if RCU detects the end of a given RCU read-side
+	critical section, it will necessarily detect the end of all prior
+	RCU read-side critical sections, no matter how aggressively the
+	compiler scrambles the code.
+	</font>
+
+	<p><font color="ffffff">
+	Again, this all assumes that the compiler cannot scramble code across
+	calls to the scheduler, out of interrupt handlers, into the idle loop,
+	into user-mode code, and so on.
+	But if your kernel build allows that sort of scrambling, you have broken
+	far more than just RCU!
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 Note that these memory-barrier requirements do not replace the fundamental
@@ -626,9 +834,19 @@ inconvenience can be avoided through use of the
 <tt>call_rcu()</tt> and <tt>kfree_rcu()</tt> API members
 described later in this document.
 
-<p><a name="Quick Quiz 7"><b>Quick Quiz 7</b>:</a>
-But how does the upgrade-to-write operation exclude other readers?
-<br><a href="#qq7answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	But how does the upgrade-to-write operation exclude other readers?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	It doesn't, just like normal RCU updates, which also do not exclude
+	RCU readers.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 This guarantee allows lookup code to be shared between read-side
@@ -714,9 +932,20 @@ to do significant reordering.
 This is by design:  Any significant ordering constraints would slow down
 these fast-path APIs.
 
-<p><a name="Quick Quiz 8"><b>Quick Quiz 8</b>:</a>
-Can't the compiler also reorder this code?
-<br><a href="#qq8answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Can't the compiler also reorder this code?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	No, the volatile casts in <tt>READ_ONCE()</tt> and
+	<tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
+	this particular case.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <h3><a name="Readers Do Not Exclude Updaters">Readers Do Not Exclude Updaters</a></h3>
 
@@ -769,10 +998,28 @@ new readers can start immediately after <tt>synchronize_rcu()</tt>
 starts, and <tt>synchronize_rcu()</tt> is under no
 obligation to wait for these new readers.
 
-<p><a name="Quick Quiz 9"><b>Quick Quiz 9</b>:</a>
-Suppose that synchronize_rcu() did wait until all readers had completed.
-Would the updater be able to rely on this?
-<br><a href="#qq9answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Suppose that synchronize_rcu() did wait until <i>all</i>
+	readers had completed instead of waiting only on
+	pre-existing readers.
+	For how long would the updater be able to rely on there
+	being no readers?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	For no time at all.
+	Even if <tt>synchronize_rcu()</tt> were to wait until
+	all readers had completed, a new reader might start immediately after
+	<tt>synchronize_rcu()</tt> completed.
+	Therefore, the code following
+	<tt>synchronize_rcu()</tt> can <i>never</i> rely on there being
+	no readers.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <h3><a name="Grace Periods Don't Partition Read-Side Critical Sections">
 Grace Periods Don't Partition Read-Side Critical Sections</a></h3>
@@ -969,11 +1216,24 @@ grace period.
 As a result, an RCU read-side critical section cannot partition a pair
 of RCU grace periods.
 
-<p><a name="Quick Quiz 10"><b>Quick Quiz 10</b>:</a>
-How long a sequence of grace periods, each separated by an RCU read-side
-critical section, would be required to partition the RCU read-side
-critical sections at the beginning and end of the chain?
-<br><a href="#qq10answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	How long a sequence of grace periods, each separated by an RCU
+	read-side critical section, would be required to partition the RCU
+	read-side critical sections at the beginning and end of the chain?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	In theory, an infinite number.
+	In practice, an unknown number that is sensitive to both implementation
+	details and timing considerations.
+	Therefore, even in practice, RCU users must abide by the
+	theoretical rather than the practical answer.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <h3><a name="Disabling Preemption Does Not Block Grace Periods">
 Disabling Preemption Does Not Block Grace Periods</a></h3>
@@ -1109,12 +1369,27 @@ These classes is covered in the following sections.
 <h3><a name="Specialization">Specialization</a></h3>
 
 <p>
-RCU is and always has been intended primarily for read-mostly situations, as
-illustrated by the following figure.
-This means that RCU's read-side primitives are optimized, often at the
+RCU is and always has been intended primarily for read-mostly situations,
+which means that RCU's read-side primitives are optimized, often at the
 expense of its update-side primitives.
+Experience thus far is captured by the following list of situations:
 
-<p><img src="RCUApplicability.svg" alt="RCUApplicability.svg" width="70%"></p>
+<ol>
+<li>	Read-mostly data, where stale and inconsistent data is not
+	a problem:   RCU works great!
+<li>	Read-mostly data, where data must be consistent:
+	RCU works well.
+<li>	Read-write data, where data must be consistent:
+	RCU <i>might</i> work OK.
+	Or not.
+<li>	Write-mostly data, where data must be consistent:
+	RCU is very unlikely to be the right tool for the job,
+	with the following exceptions, where RCU can provide:
+	<ol type=a>
+	<li>	Existence guarantees for update-friendly mechanisms.
+	<li>	Wait-free read-side primitives for real-time use.
+	</ol>
+</ol>
 
 <p>
 This focus on read-mostly situations means that RCU must interoperate
@@ -1127,9 +1402,43 @@ synchronization primitives be legal within RCU read-side critical sections,
 including spinlocks, sequence locks, atomic operations, reference
 counters, and memory barriers.
 
-<p><a name="Quick Quiz 11"><b>Quick Quiz 11</b>:</a>
-What about sleeping locks?
-<br><a href="#qq11answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	What about sleeping locks?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	These are forbidden within Linux-kernel RCU read-side critical
+	sections because it is not legal to place a quiescent state
+	(in this case, voluntary context switch) within an RCU read-side
+	critical section.
+	However, sleeping locks may be used within userspace RCU read-side
+	critical sections, and also within Linux-kernel sleepable RCU
+	<a href="#Sleepable RCU"><font color="ffffff">(SRCU)</font></a>
+	read-side critical sections.
+	In addition, the -rt patchset turns spinlocks into a
+	sleeping locks so that the corresponding critical sections
+	can be preempted, which also means that these sleeplockified
+	spinlocks (but not other sleeping locks!)  may be acquire within
+	-rt-Linux-kernel RCU read-side critical sections.
+	</font>
+
+	<p><font color="ffffff">
+	Note that it <i>is</i> legal for a normal RCU read-side
+	critical section to conditionally acquire a sleeping locks
+	(as in <tt>mutex_trylock()</tt>), but only as long as it does
+	not loop indefinitely attempting to conditionally acquire that
+	sleeping locks.
+	The key point is that things like <tt>mutex_trylock()</tt>
+	either return with the mutex held, or return an error indication if
+	the mutex was not immediately available.
+	Either way, <tt>mutex_trylock()</tt> returns immediately without
+	sleeping.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 It often comes as a surprise that many algorithms do not require a
@@ -1160,10 +1469,7 @@ some period of time, so the exact wait period is a judgment call.
 One of our pair of veternarians might wait 30 seconds before pronouncing
 the cat dead, while the other might insist on waiting a full minute.
 The two veternarians would then disagree on the state of the cat during
-the final 30 seconds of the minute following the last heartbeat, as
-fancifully illustrated below:
-
-<p><img src="2013-08-is-it-dead.png" alt="2013-08-is-it-dead.png" width="431"></p>
+the final 30 seconds of the minute following the last heartbeat.
 
 <p>
 Interestingly enough, this same situation applies to hardware.
@@ -1343,7 +1649,8 @@ situations where neither <tt>synchronize_rcu()</tt> nor
 <tt>synchronize_rcu_expedited()</tt> would be legal,
 including within preempt-disable code, <tt>local_bh_disable()</tt> code,
 interrupt-disable code, and interrupt handlers.
-However, even <tt>call_rcu()</tt> is illegal within NMI handlers.
+However, even <tt>call_rcu()</tt> is illegal within NMI handlers
+and from idle and offline CPUs.
 The callback function (<tt>remove_gp_cb()</tt> in this case) will be
 executed within softirq (software interrupt) environment within the
 Linux kernel,
@@ -1354,12 +1661,27 @@ write an RCU callback function that takes too long.
 Long-running operations should be relegated to separate threads or
 (in the Linux kernel) workqueues.
 
-<p><a name="Quick Quiz 12"><b>Quick Quiz 12</b>:</a>
-Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
-After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
-structure, which would interact badly with concurrent insertions.
-Doesn't this mean that <tt>rcu_dereference()</tt> is required?
-<br><a href="#qq12answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
+	After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
+	structure, which would interact badly with concurrent insertions.
+	Doesn't this mean that <tt>rcu_dereference()</tt> is required?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	Presumably the <tt>-&gt;gp_lock</tt> acquired on line&nbsp;18 excludes
+	any changes, including any insertions that <tt>rcu_dereference()</tt>
+	would protect against.
+	Therefore, any insertions will be delayed until after
+	<tt>-&gt;gp_lock</tt>
+	is released on line&nbsp;25, which in turn means that
+	<tt>rcu_access_pointer()</tt> suffices.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 However, all that <tt>remove_gp_cb()</tt> is doing is
@@ -1406,14 +1728,31 @@ This was due to the fact that RCU was not heavily used within DYNIX/ptx,
 so the very few places that needed something like
 <tt>synchronize_rcu()</tt> simply open-coded it.
 
-<p><a name="Quick Quiz 13"><b>Quick Quiz 13</b>:</a>
-Earlier it was claimed that <tt>call_rcu()</tt> and
-<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
-by readers.
-But how can that be correct, given that the invocation of the callback
-and the freeing of the memory (respectively) must still wait for
-a grace period to elapse?
-<br><a href="#qq13answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Earlier it was claimed that <tt>call_rcu()</tt> and
+	<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
+	by readers.
+	But how can that be correct, given that the invocation of the callback
+	and the freeing of the memory (respectively) must still wait for
+	a grace period to elapse?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	We could define things this way, but keep in mind that this sort of
+	definition would say that updates in garbage-collected languages
+	cannot complete until the next time the garbage collector runs,
+	which does not seem at all reasonable.
+	The key point is that in most cases, an updater using either
+	<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
+	next update as soon as it has invoked <tt>call_rcu()</tt> or
+	<tt>kfree_rcu()</tt>, without having to wait for a subsequent
+	grace period.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 But what if the updater must wait for the completion of code to be
@@ -1838,11 +2177,26 @@ kthreads to be spawned.
 Therefore, invoking <tt>synchronize_rcu()</tt> during scheduler
 initialization can result in deadlock.
 
-<p><a name="Quick Quiz 14"><b>Quick Quiz 14</b>:</a>
-So what happens with <tt>synchronize_rcu()</tt> during
-scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
-kernels?
-<br><a href="#qq14answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	So what happens with <tt>synchronize_rcu()</tt> during
+	scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
+	kernels?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
+	maps directly to <tt>synchronize_sched()</tt>.
+	Therefore, <tt>synchronize_rcu()</tt> works normally throughout
+	boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
+	However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
+	so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
+	during scheduler initialization.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 I learned of these boot-time requirements as a result of a series of
@@ -2171,6 +2525,14 @@ This real-time requirement motivated the grace-period kthread, which
 also simplified handling of a number of race conditions.
 
 <p>
+RCU must avoid degrading real-time response for CPU-bound threads, whether
+executing in usermode (which is one use case for
+<tt>CONFIG_NO_HZ_FULL=y</tt>) or in the kernel.
+That said, CPU-bound loops in the kernel must execute
+<tt>cond_resched_rcu_qs()</tt> at least once per few tens of milliseconds
+in order to avoid receiving an IPI from RCU.
+
+<p>
 Finally, RCU's status as a synchronization primitive means that
 any RCU failure can result in arbitrary memory corruption that can be
 extremely difficult to debug.
@@ -2223,6 +2585,8 @@ described in a separate section.
 <li>	<a href="#Sched Flavor">Sched Flavor</a>
 <li>	<a href="#Sleepable RCU">Sleepable RCU</a>
 <li>	<a href="#Tasks RCU">Tasks RCU</a>
+<li>	<a href="#Waiting for Multiple Grace Periods">
+	Waiting for Multiple Grace Periods</a>
 </ol>
 
 <h3><a name="Bottom-Half Flavor">Bottom-Half Flavor</a></h3>
@@ -2472,6 +2836,94 @@ The tasks-RCU API is quite compact, consisting only of
 <tt>synchronize_rcu_tasks()</tt>, and
 <tt>rcu_barrier_tasks()</tt>.
 
+<h3><a name="Waiting for Multiple Grace Periods">
+Waiting for Multiple Grace Periods</a></h3>
+
+<p>
+Perhaps you have an RCU protected data structure that is accessed from
+RCU read-side critical sections, from softirq handlers, and from
+hardware interrupt handlers.
+That is three flavors of RCU, the normal flavor, the bottom-half flavor,
+and the sched flavor.
+How to wait for a compound grace period?
+
+<p>
+The best approach is usually to &ldquo;just say no!&rdquo; and
+insert <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
+around each RCU read-side critical section, regardless of what
+environment it happens to be in.
+But suppose that some of the RCU read-side critical sections are
+on extremely hot code paths, and that use of <tt>CONFIG_PREEMPT=n</tt>
+is not a viable option, so that <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> are not free.
+What then?
+
+<p>
+You <i>could</i> wait on all three grace periods in succession, as follows:
+
+<blockquote>
+<pre>
+ 1 synchronize_rcu();
+ 2 synchronize_rcu_bh();
+ 3 synchronize_sched();
+</pre>
+</blockquote>
+
+<p>
+This works, but triples the update-side latency penalty.
+In cases where this is not acceptable, <tt>synchronize_rcu_mult()</tt>
+may be used to wait on all three flavors of grace period concurrently:
+
+<blockquote>
+<pre>
+ 1 synchronize_rcu_mult(call_rcu, call_rcu_bh, call_rcu_sched);
+</pre>
+</blockquote>
+
+<p>
+But what if it is necessary to also wait on SRCU?
+This can be done as follows:
+
+<blockquote>
+<pre>
+ 1 static void call_my_srcu(struct rcu_head *head,
+ 2        void (*func)(struct rcu_head *head))
+ 3 {
+ 4   call_srcu(&amp;my_srcu, head, func);
+ 5 }
+ 6
+ 7 synchronize_rcu_mult(call_rcu, call_rcu_bh, call_rcu_sched, call_my_srcu);
+</pre>
+</blockquote>
+
+<p>
+If you needed to wait on multiple different flavors of SRCU
+(but why???), you would need to create a wrapper function resembling
+<tt>call_my_srcu()</tt> for each SRCU flavor.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	But what if I need to wait for multiple RCU flavors, but I also need
+	the grace periods to be expedited?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	If you are using expedited grace periods, there should be less penalty
+	for waiting on them in succession.
+	But if that is nevertheless a problem, you can use workqueues
+	or multiple kthreads to wait on the various expedited grace
+	periods concurrently.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<p>
+Again, it is usually better to adjust the RCU read-side critical sections
+to use a single flavor of RCU, but when this is not feasible, you can use
+<tt>synchronize_rcu_mult()</tt>.
+
 <h2><a name="Possible Future Changes">Possible Future Changes</a></h2>
 
 <p>
@@ -2569,329 +3021,4 @@ and is provided
 under the terms of the Creative Commons Attribution-Share Alike 3.0
 United States license.
 
-<h3><a name="Answers to Quick Quizzes">
-Answers to Quick Quizzes</a></h3>
-
-<a name="qq1answer"></a>
-<p><b>Quick Quiz 1</b>:
-Wait a minute!
-You said that updaters can make useful forward progress concurrently
-with readers, but pre-existing readers will block
-<tt>synchronize_rcu()</tt>!!!
-Just who are you trying to fool???
-
-
-</p><p><b>Answer</b>:
-First, if updaters do not wish to be blocked by readers, they can use
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
-be discussed later.
-Second, even when using <tt>synchronize_rcu()</tt>, the other
-update-side code does run concurrently with readers, whether pre-existing
-or not.
-
-
-</p><p><a href="#Quick%20Quiz%201"><b>Back to Quick Quiz 1</b>.</a>
-
-<a name="qq2answer"></a>
-<p><b>Quick Quiz 2</b>:
-Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
-
-
-</p><p><b>Answer</b>:
-Without that extra grace period, memory reordering could result in
-<tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
-concurrently with the last bits of <tt>recovery()</tt>.
-
-
-</p><p><a href="#Quick%20Quiz%202"><b>Back to Quick Quiz 2</b>.</a>
-
-<a name="qq3answer"></a>
-<p><b>Quick Quiz 3</b>:
-But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
-two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
-from being reordered.
-Can't that also cause problems?
-
-
-</p><p><b>Answer</b>:
-No, it cannot.
-The readers cannot see either of these two fields until
-the assignment to <tt>gp</tt>, by which time both fields are
-fully initialized.
-So reordering the assignments
-to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt> cannot possibly
-cause any problems.
-
-
-</p><p><a href="#Quick%20Quiz%203"><b>Back to Quick Quiz 3</b>.</a>
-
-<a name="qq4answer"></a>
-<p><b>Quick Quiz 4</b>:
-Without the <tt>rcu_dereference()</tt> or the
-<tt>rcu_access_pointer()</tt>, what destructive optimizations
-might the compiler make use of?
-
-
-</p><p><b>Answer</b>:
-Let's start with what happens to <tt>do_something_gp()</tt>
-if it fails to use <tt>rcu_dereference()</tt>.
-It could reuse a value formerly fetched from this same pointer.
-It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
-manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
-mash-up of two distince pointer values.
-It might even use value-speculation optimizations, where it makes a wrong
-guess, but by the time it gets around to checking the value, an update
-has changed the pointer to match the wrong guess.
-Too bad about any dereferences that returned pre-initialization garbage
-in the meantime!
-
-<p>
-For <tt>remove_gp_synchronous()</tt>, as long as all modifications
-to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
-the above optimizations are harmless.
-However,
-with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
-<tt>sparse</tt> will complain if you
-define <tt>gp</tt> with <tt>__rcu</tt> and then
-access it without using
-either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
-
-
-</p><p><a href="#Quick%20Quiz%204"><b>Back to Quick Quiz 4</b>.</a>
-
-<a name="qq5answer"></a>
-<p><b>Quick Quiz 5</b>:
-Given that multiple CPUs can start RCU read-side critical sections
-at any time without any ordering whatsoever, how can RCU possibly tell whether
-or not a given RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>?
-
-
-</p><p><b>Answer</b>:
-If RCU cannot tell whether or not a given
-RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>,
-then it must assume that the RCU read-side critical section
-started first.
-In other words, a given instance of <tt>synchronize_rcu()</tt>
-can avoid waiting on a given RCU read-side critical section only
-if it can prove that <tt>synchronize_rcu()</tt> started first.
-
-
-</p><p><a href="#Quick%20Quiz%205"><b>Back to Quick Quiz 5</b>.</a>
-
-<a name="qq6answer"></a>
-<p><b>Quick Quiz 6</b>:
-The first and second guarantees require unbelievably strict ordering!
-Are all these memory barriers <i> really</i> required?
-
-
-</p><p><b>Answer</b>:
-Yes, they really are required.
-To see why the first guarantee is required, consider the following
-sequence of events:
-
-<ol>
-<li>	CPU 1: <tt>rcu_read_lock()</tt>
-<li>	CPU 1: <tt>q = rcu_dereference(gp);
-	/* Very likely to return p. */</tt>
-<li>	CPU 0: <tt>list_del_rcu(p);</tt>
-<li>	CPU 0: <tt>synchronize_rcu()</tt> starts.
-<li>	CPU 1: <tt>do_something_with(q-&gt;a);
-	/* No smp_mb(), so might happen after kfree(). */</tt>
-<li>	CPU 1: <tt>rcu_read_unlock()</tt>
-<li>	CPU 0: <tt>synchronize_rcu()</tt> returns.
-<li>	CPU 0: <tt>kfree(p);</tt>
-</ol>
-
-<p>
-Therefore, there absolutely must be a full memory barrier between the
-end of the RCU read-side critical section and the end of the
-grace period.
-
-<p>
-The sequence of events demonstrating the necessity of the second rule
-is roughly similar:
-
-<ol>
-<li>	CPU 0: <tt>list_del_rcu(p);</tt>
-<li>	CPU 0: <tt>synchronize_rcu()</tt> starts.
-<li>	CPU 1: <tt>rcu_read_lock()</tt>
-<li>	CPU 1: <tt>q = rcu_dereference(gp);
-	/* Might return p if no memory barrier. */</tt>
-<li>	CPU 0: <tt>synchronize_rcu()</tt> returns.
-<li>	CPU 0: <tt>kfree(p);</tt>
-<li>	CPU 1: <tt>do_something_with(q-&gt;a); /* Boom!!! */</tt>
-<li>	CPU 1: <tt>rcu_read_unlock()</tt>
-</ol>
-
-<p>
-And similarly, without a memory barrier between the beginning of the
-grace period and the beginning of the RCU read-side critical section,
-CPU&nbsp;1 might end up accessing the freelist.
-
-<p>
-The &ldquo;as if&rdquo; rule of course applies, so that any implementation
-that acts as if the appropriate memory barriers were in place is a
-correct implementation.
-That said, it is much easier to fool yourself into believing that you have
-adhered to the as-if rule than it is to actually adhere to it!
-
-
-</p><p><a href="#Quick%20Quiz%206"><b>Back to Quick Quiz 6</b>.</a>
-
-<a name="qq7answer"></a>
-<p><b>Quick Quiz 7</b>:
-But how does the upgrade-to-write operation exclude other readers?
-
-
-</p><p><b>Answer</b>:
-It doesn't, just like normal RCU updates, which also do not exclude
-RCU readers.
-
-
-</p><p><a href="#Quick%20Quiz%207"><b>Back to Quick Quiz 7</b>.</a>
-
-<a name="qq8answer"></a>
-<p><b>Quick Quiz 8</b>:
-Can't the compiler also reorder this code?
-
-
-</p><p><b>Answer</b>:
-No, the volatile casts in <tt>READ_ONCE()</tt> and
-<tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
-this particular case.
-
-
-</p><p><a href="#Quick%20Quiz%208"><b>Back to Quick Quiz 8</b>.</a>
-
-<a name="qq9answer"></a>
-<p><b>Quick Quiz 9</b>:
-Suppose that synchronize_rcu() did wait until all readers had completed.
-Would the updater be able to rely on this?
-
-
-</p><p><b>Answer</b>:
-No.
-Even if <tt>synchronize_rcu()</tt> were to wait until
-all readers had completed, a new reader might start immediately after
-<tt>synchronize_rcu()</tt> completed.
-Therefore, the code following
-<tt>synchronize_rcu()</tt> cannot rely on there being no readers
-in any case.
-
-
-</p><p><a href="#Quick%20Quiz%209"><b>Back to Quick Quiz 9</b>.</a>
-
-<a name="qq10answer"></a>
-<p><b>Quick Quiz 10</b>:
-How long a sequence of grace periods, each separated by an RCU read-side
-critical section, would be required to partition the RCU read-side
-critical sections at the beginning and end of the chain?
-
-
-</p><p><b>Answer</b>:
-In theory, an infinite number.
-In practice, an unknown number that is sensitive to both implementation
-details and timing considerations.
-Therefore, even in practice, RCU users must abide by the theoretical rather
-than the practical answer.
-
-
-</p><p><a href="#Quick%20Quiz%2010"><b>Back to Quick Quiz 10</b>.</a>
-
-<a name="qq11answer"></a>
-<p><b>Quick Quiz 11</b>:
-What about sleeping locks?
-
-
-</p><p><b>Answer</b>:
-These are forbidden within Linux-kernel RCU read-side critical sections
-because it is not legal to place a quiescent state (in this case,
-voluntary context switch) within an RCU read-side critical section.
-However, sleeping locks may be used within userspace RCU read-side critical
-sections, and also within Linux-kernel sleepable RCU
-<a href="#Sleepable RCU">(SRCU)</a>
-read-side critical sections.
-In addition, the -rt patchset turns spinlocks into a sleeping locks so
-that the corresponding critical sections can be preempted, which
-also means that these sleeplockified spinlocks (but not other sleeping locks!)
-may be acquire within -rt-Linux-kernel RCU read-side critical sections.
-
-<p>
-Note that it <i>is</i> legal for a normal RCU read-side critical section
-to conditionally acquire a sleeping locks (as in <tt>mutex_trylock()</tt>),
-but only as long as it does not loop indefinitely attempting to
-conditionally acquire that sleeping locks.
-The key point is that things like <tt>mutex_trylock()</tt>
-either return with the mutex held, or return an error indication if
-the mutex was not immediately available.
-Either way, <tt>mutex_trylock()</tt> returns immediately without sleeping.
-
-
-</p><p><a href="#Quick%20Quiz%2011"><b>Back to Quick Quiz 11</b>.</a>
-
-<a name="qq12answer"></a>
-<p><b>Quick Quiz 12</b>:
-Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
-After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
-structure, which would interact badly with concurrent insertions.
-Doesn't this mean that <tt>rcu_dereference()</tt> is required?
-
-
-</p><p><b>Answer</b>:
-Presumably the <tt>-&gt;gp_lock</tt> acquired on line&nbsp;18 excludes
-any changes, including any insertions that <tt>rcu_dereference()</tt>
-would protect against.
-Therefore, any insertions will be delayed until after <tt>-&gt;gp_lock</tt>
-is released on line&nbsp;25, which in turn means that
-<tt>rcu_access_pointer()</tt> suffices.
-
-
-</p><p><a href="#Quick%20Quiz%2012"><b>Back to Quick Quiz 12</b>.</a>
-
-<a name="qq13answer"></a>
-<p><b>Quick Quiz 13</b>:
-Earlier it was claimed that <tt>call_rcu()</tt> and
-<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
-by readers.
-But how can that be correct, given that the invocation of the callback
-and the freeing of the memory (respectively) must still wait for
-a grace period to elapse?
-
-
-</p><p><b>Answer</b>:
-We could define things this way, but keep in mind that this sort of
-definition would say that updates in garbage-collected languages
-cannot complete until the next time the garbage collector runs,
-which does not seem at all reasonable.
-The key point is that in most cases, an updater using either
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
-next update as soon as it has invoked <tt>call_rcu()</tt> or
-<tt>kfree_rcu()</tt>, without having to wait for a subsequent
-grace period.
-
-
-</p><p><a href="#Quick%20Quiz%2013"><b>Back to Quick Quiz 13</b>.</a>
-
-<a name="qq14answer"></a>
-<p><b>Quick Quiz 14</b>:
-So what happens with <tt>synchronize_rcu()</tt> during
-scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
-kernels?
-
-
-</p><p><b>Answer</b>:
-In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
-maps directly to <tt>synchronize_sched()</tt>.
-Therefore, <tt>synchronize_rcu()</tt> works normally throughout
-boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
-However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
-so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
-during scheduler initialization.
-
-
-</p><p><a href="#Quick%20Quiz%2014"><b>Back to Quick Quiz 14</b>.</a>
-
-
 </body></html>
diff --git a/Documentation/RCU/Design/Requirements/Requirements.htmlx b/Documentation/RCU/Design/Requirements/Requirements.htmlx
deleted file mode 100644
index 3a97ba490c42..000000000000
--- a/Documentation/RCU/Design/Requirements/Requirements.htmlx
+++ /dev/null
@@ -1,2741 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
-        "http://www.w3.org/TR/html4/loose.dtd">
-        <html>
-        <head><title>A Tour Through RCU's Requirements [LWN.net]</title>
-        <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
-
-<h1>A Tour Through RCU's Requirements</h1>
-
-<p>Copyright IBM Corporation, 2015</p>
-<p>Author: Paul E.&nbsp;McKenney</p>
-<p><i>The initial version of this document appeared in the
-<a href="https://lwn.net/">LWN</a> articles
-<a href="https://lwn.net/Articles/652156/">here</a>,
-<a href="https://lwn.net/Articles/652677/">here</a>, and
-<a href="https://lwn.net/Articles/653326/">here</a>.</i></p>
-
-<h2>Introduction</h2>
-
-<p>
-Read-copy update (RCU) is a synchronization mechanism that is often
-used as a replacement for reader-writer locking.
-RCU is unusual in that updaters do not block readers,
-which means that RCU's read-side primitives can be exceedingly fast
-and scalable.
-In addition, updaters can make useful forward progress concurrently
-with readers.
-However, all this concurrency between RCU readers and updaters does raise
-the question of exactly what RCU readers are doing, which in turn
-raises the question of exactly what RCU's requirements are.
-
-<p>
-This document therefore summarizes RCU's requirements, and can be thought
-of as an informal, high-level specification for RCU.
-It is important to understand that RCU's specification is primarily
-empirical in nature;
-in fact, I learned about many of these requirements the hard way.
-This situation might cause some consternation, however, not only
-has this learning process been a lot of fun, but it has also been
-a great privilege to work with so many people willing to apply
-technologies in interesting new ways.
-
-<p>
-All that aside, here are the categories of currently known RCU requirements:
-</p>
-
-<ol>
-<li>	<a href="#Fundamental Requirements">
-	Fundamental Requirements</a>
-<li>	<a href="#Fundamental Non-Requirements">Fundamental Non-Requirements</a>
-<li>	<a href="#Parallelism Facts of Life">
-	Parallelism Facts of Life</a>
-<li>	<a href="#Quality-of-Implementation Requirements">
-	Quality-of-Implementation Requirements</a>
-<li>	<a href="#Linux Kernel Complications">
-	Linux Kernel Complications</a>
-<li>	<a href="#Software-Engineering Requirements">
-	Software-Engineering Requirements</a>
-<li>	<a href="#Other RCU Flavors">
-	Other RCU Flavors</a>
-<li>	<a href="#Possible Future Changes">
-	Possible Future Changes</a>
-</ol>
-
-<p>
-This is followed by a <a href="#Summary">summary</a>,
-which is in turn followed by the inevitable
-<a href="#Answers to Quick Quizzes">answers to the quick quizzes</a>.
-
-<h2><a name="Fundamental Requirements">Fundamental Requirements</a></h2>
-
-<p>
-RCU's fundamental requirements are the closest thing RCU has to hard
-mathematical requirements.
-These are:
-
-<ol>
-<li>	<a href="#Grace-Period Guarantee">
-	Grace-Period Guarantee</a>
-<li>	<a href="#Publish-Subscribe Guarantee">
-	Publish-Subscribe Guarantee</a>
-<li>	<a href="#Memory-Barrier Guarantees">
-	Memory-Barrier Guarantees</a>
-<li>	<a href="#RCU Primitives Guaranteed to Execute Unconditionally">
-	RCU Primitives Guaranteed to Execute Unconditionally</a>
-<li>	<a href="#Guaranteed Read-to-Write Upgrade">
-	Guaranteed Read-to-Write Upgrade</a>
-</ol>
-
-<h3><a name="Grace-Period Guarantee">Grace-Period Guarantee</a></h3>
-
-<p>
-RCU's grace-period guarantee is unusual in being premeditated:
-Jack Slingwine and I had this guarantee firmly in mind when we started
-work on RCU (then called &ldquo;rclock&rdquo;) in the early 1990s.
-That said, the past two decades of experience with RCU have produced
-a much more detailed understanding of this guarantee.
-
-<p>
-RCU's grace-period guarantee allows updaters to wait for the completion
-of all pre-existing RCU read-side critical sections.
-An RCU read-side critical section
-begins with the marker <tt>rcu_read_lock()</tt> and ends with
-the marker <tt>rcu_read_unlock()</tt>.
-These markers may be nested, and RCU treats a nested set as one
-big RCU read-side critical section.
-Production-quality implementations of <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> are extremely lightweight, and in
-fact have exactly zero overhead in Linux kernels built for production
-use with <tt>CONFIG_PREEMPT=n</tt>.
-
-<p>
-This guarantee allows ordering to be enforced with extremely low
-overhead to readers, for example:
-
-<blockquote>
-<pre>
- 1 int x, y;
- 2
- 3 void thread0(void)
- 4 {
- 5   rcu_read_lock();
- 6   r1 = READ_ONCE(x);
- 7   r2 = READ_ONCE(y);
- 8   rcu_read_unlock();
- 9 }
-10
-11 void thread1(void)
-12 {
-13   WRITE_ONCE(x, 1);
-14   synchronize_rcu();
-15   WRITE_ONCE(y, 1);
-16 }
-</pre>
-</blockquote>
-
-<p>
-Because the <tt>synchronize_rcu()</tt> on line&nbsp;14 waits for
-all pre-existing readers, any instance of <tt>thread0()</tt> that
-loads a value of zero from <tt>x</tt> must complete before
-<tt>thread1()</tt> stores to <tt>y</tt>, so that instance must
-also load a value of zero from <tt>y</tt>.
-Similarly, any instance of <tt>thread0()</tt> that loads a value of
-one from <tt>y</tt> must have started after the
-<tt>synchronize_rcu()</tt> started, and must therefore also load
-a value of one from <tt>x</tt>.
-Therefore, the outcome:
-<blockquote>
-<pre>
-(r1 == 0 &amp;&amp; r2 == 1)
-</pre>
-</blockquote>
-cannot happen.
-
-<p>@@QQ@@
-Wait a minute!
-You said that updaters can make useful forward progress concurrently
-with readers, but pre-existing readers will block
-<tt>synchronize_rcu()</tt>!!!
-Just who are you trying to fool???
-<p>@@QQA@@
-First, if updaters do not wish to be blocked by readers, they can use
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
-be discussed later.
-Second, even when using <tt>synchronize_rcu()</tt>, the other
-update-side code does run concurrently with readers, whether pre-existing
-or not.
-<p>@@QQE@@
-
-<p>
-This scenario resembles one of the first uses of RCU in
-<a href="https://en.wikipedia.org/wiki/DYNIX">DYNIX/ptx</a>,
-which managed a distributed lock manager's transition into
-a state suitable for handling recovery from node failure,
-more or less as follows:
-
-<blockquote>
-<pre>
- 1 #define STATE_NORMAL        0
- 2 #define STATE_WANT_RECOVERY 1
- 3 #define STATE_RECOVERING    2
- 4 #define STATE_WANT_NORMAL   3
- 5
- 6 int state = STATE_NORMAL;
- 7
- 8 void do_something_dlm(void)
- 9 {
-10   int state_snap;
-11
-12   rcu_read_lock();
-13   state_snap = READ_ONCE(state);
-14   if (state_snap == STATE_NORMAL)
-15     do_something();
-16   else
-17     do_something_carefully();
-18   rcu_read_unlock();
-19 }
-20
-21 void start_recovery(void)
-22 {
-23   WRITE_ONCE(state, STATE_WANT_RECOVERY);
-24   synchronize_rcu();
-25   WRITE_ONCE(state, STATE_RECOVERING);
-26   recovery();
-27   WRITE_ONCE(state, STATE_WANT_NORMAL);
-28   synchronize_rcu();
-29   WRITE_ONCE(state, STATE_NORMAL);
-30 }
-</pre>
-</blockquote>
-
-<p>
-The RCU read-side critical section in <tt>do_something_dlm()</tt>
-works with the <tt>synchronize_rcu()</tt> in <tt>start_recovery()</tt>
-to guarantee that <tt>do_something()</tt> never runs concurrently
-with <tt>recovery()</tt>, but with little or no synchronization
-overhead in <tt>do_something_dlm()</tt>.
-
-<p>@@QQ@@
-Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
-<p>@@QQA@@
-Without that extra grace period, memory reordering could result in
-<tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
-concurrently with the last bits of <tt>recovery()</tt>.
-<p>@@QQE@@
-
-<p>
-In order to avoid fatal problems such as deadlocks,
-an RCU read-side critical section must not contain calls to
-<tt>synchronize_rcu()</tt>.
-Similarly, an RCU read-side critical section must not
-contain anything that waits, directly or indirectly, on completion of
-an invocation of <tt>synchronize_rcu()</tt>.
-
-<p>
-Although RCU's grace-period guarantee is useful in and of itself, with
-<a href="https://lwn.net/Articles/573497/">quite a few use cases</a>,
-it would be good to be able to use RCU to coordinate read-side
-access to linked data structures.
-For this, the grace-period guarantee is not sufficient, as can
-be seen in function <tt>add_gp_buggy()</tt> below.
-We will look at the reader's code later, but in the meantime, just think of
-the reader as locklessly picking up the <tt>gp</tt> pointer,
-and, if the value loaded is non-<tt>NULL</tt>, locklessly accessing the
-<tt>-&gt;a</tt> and <tt>-&gt;b</tt> fields.
-
-<blockquote>
-<pre>
- 1 bool add_gp_buggy(int a, int b)
- 2 {
- 3   p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4   if (!p)
- 5     return -ENOMEM;
- 6   spin_lock(&amp;gp_lock);
- 7   if (rcu_access_pointer(gp)) {
- 8     spin_unlock(&amp;gp_lock);
- 9     return false;
-10   }
-11   p-&gt;a = a;
-12   p-&gt;b = a;
-13   gp = p; /* ORDERING BUG */
-14   spin_unlock(&amp;gp_lock);
-15   return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-The problem is that both the compiler and weakly ordered CPUs are within
-their rights to reorder this code as follows:
-
-<blockquote>
-<pre>
- 1 bool add_gp_buggy_optimized(int a, int b)
- 2 {
- 3   p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4   if (!p)
- 5     return -ENOMEM;
- 6   spin_lock(&amp;gp_lock);
- 7   if (rcu_access_pointer(gp)) {
- 8     spin_unlock(&amp;gp_lock);
- 9     return false;
-10   }
-<b>11   gp = p; /* ORDERING BUG */
-12   p-&gt;a = a;
-13   p-&gt;b = a;</b>
-14   spin_unlock(&amp;gp_lock);
-15   return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-If an RCU reader fetches <tt>gp</tt> just after
-<tt>add_gp_buggy_optimized</tt> executes line&nbsp;11,
-it will see garbage in the <tt>-&gt;a</tt> and <tt>-&gt;b</tt>
-fields.
-And this is but one of many ways in which compiler and hardware optimizations
-could cause trouble.
-Therefore, we clearly need some way to prevent the compiler and the CPU from
-reordering in this manner, which brings us to the publish-subscribe
-guarantee discussed in the next section.
-
-<h3><a name="Publish-Subscribe Guarantee">Publish/Subscribe Guarantee</a></h3>
-
-<p>
-RCU's publish-subscribe guarantee allows data to be inserted
-into a linked data structure without disrupting RCU readers.
-The updater uses <tt>rcu_assign_pointer()</tt> to insert the
-new data, and readers use <tt>rcu_dereference()</tt> to
-access data, whether new or old.
-The following shows an example of insertion:
-
-<blockquote>
-<pre>
- 1 bool add_gp(int a, int b)
- 2 {
- 3   p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4   if (!p)
- 5     return -ENOMEM;
- 6   spin_lock(&amp;gp_lock);
- 7   if (rcu_access_pointer(gp)) {
- 8     spin_unlock(&amp;gp_lock);
- 9     return false;
-10   }
-11   p-&gt;a = a;
-12   p-&gt;b = a;
-13   rcu_assign_pointer(gp, p);
-14   spin_unlock(&amp;gp_lock);
-15   return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-The <tt>rcu_assign_pointer()</tt> on line&nbsp;13 is conceptually
-equivalent to a simple assignment statement, but also guarantees
-that its assignment will
-happen after the two assignments in lines&nbsp;11 and&nbsp;12,
-similar to the C11 <tt>memory_order_release</tt> store operation.
-It also prevents any number of &ldquo;interesting&rdquo; compiler
-optimizations, for example, the use of <tt>gp</tt> as a scratch
-location immediately preceding the assignment.
-
-<p>@@QQ@@
-But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
-two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
-from being reordered.
-Can't that also cause problems?
-<p>@@QQA@@
-No, it cannot.
-The readers cannot see either of these two fields until
-the assignment to <tt>gp</tt>, by which time both fields are
-fully initialized.
-So reordering the assignments
-to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt> cannot possibly
-cause any problems.
-<p>@@QQE@@
-
-<p>
-It is tempting to assume that the reader need not do anything special
-to control its accesses to the RCU-protected data,
-as shown in <tt>do_something_gp_buggy()</tt> below:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp_buggy(void)
- 2 {
- 3   rcu_read_lock();
- 4   p = gp;  /* OPTIMIZATIONS GALORE!!! */
- 5   if (p) {
- 6     do_something(p-&gt;a, p-&gt;b);
- 7     rcu_read_unlock();
- 8     return true;
- 9   }
-10   rcu_read_unlock();
-11   return false;
-12 }
-</pre>
-</blockquote>
-
-<p>
-However, this temptation must be resisted because there are a
-surprisingly large number of ways that the compiler
-(to say nothing of
-<a href="https://h71000.www7.hp.com/wizard/wiz_2637.html">DEC Alpha CPUs</a>)
-can trip this code up.
-For but one example, if the compiler were short of registers, it
-might choose to refetch from <tt>gp</tt> rather than keeping
-a separate copy in <tt>p</tt> as follows:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp_buggy_optimized(void)
- 2 {
- 3   rcu_read_lock();
- 4   if (gp) { /* OPTIMIZATIONS GALORE!!! */
-<b> 5     do_something(gp-&gt;a, gp-&gt;b);</b>
- 6     rcu_read_unlock();
- 7     return true;
- 8   }
- 9   rcu_read_unlock();
-10   return false;
-11 }
-</pre>
-</blockquote>
-
-<p>
-If this function ran concurrently with a series of updates that
-replaced the current structure with a new one,
-the fetches of <tt>gp-&gt;a</tt>
-and <tt>gp-&gt;b</tt> might well come from two different structures,
-which could cause serious confusion.
-To prevent this (and much else besides), <tt>do_something_gp()</tt> uses
-<tt>rcu_dereference()</tt> to fetch from <tt>gp</tt>:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp(void)
- 2 {
- 3   rcu_read_lock();
- 4   p = rcu_dereference(gp);
- 5   if (p) {
- 6     do_something(p-&gt;a, p-&gt;b);
- 7     rcu_read_unlock();
- 8     return true;
- 9   }
-10   rcu_read_unlock();
-11   return false;
-12 }
-</pre>
-</blockquote>
-
-<p>
-The <tt>rcu_dereference()</tt> uses volatile casts and (for DEC Alpha)
-memory barriers in the Linux kernel.
-Should a
-<a href="http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf">high-quality implementation of C11 <tt>memory_order_consume</tt> [PDF]</a>
-ever appear, then <tt>rcu_dereference()</tt> could be implemented
-as a <tt>memory_order_consume</tt> load.
-Regardless of the exact implementation, a pointer fetched by
-<tt>rcu_dereference()</tt> may not be used outside of the
-outermost RCU read-side critical section containing that
-<tt>rcu_dereference()</tt>, unless protection of
-the corresponding data element has been passed from RCU to some
-other synchronization mechanism, most commonly locking or
-<a href="https://www.kernel.org/doc/Documentation/RCU/rcuref.txt">reference counting</a>.
-
-<p>
-In short, updaters use <tt>rcu_assign_pointer()</tt> and readers
-use <tt>rcu_dereference()</tt>, and these two RCU API elements
-work together to ensure that readers have a consistent view of
-newly added data elements.
-
-<p>
-Of course, it is also necessary to remove elements from RCU-protected
-data structures, for example, using the following process:
-
-<ol>
-<li>	Remove the data element from the enclosing structure.
-<li>	Wait for all pre-existing RCU read-side critical sections
-	to complete (because only pre-existing readers can possibly have
-	a reference to the newly removed data element).
-<li>	At this point, only the updater has a reference to the
-	newly removed data element, so it can safely reclaim
-	the data element, for example, by passing it to <tt>kfree()</tt>.
-</ol>
-
-This process is implemented by <tt>remove_gp_synchronous()</tt>:
-
-<blockquote>
-<pre>
- 1 bool remove_gp_synchronous(void)
- 2 {
- 3   struct foo *p;
- 4
- 5   spin_lock(&amp;gp_lock);
- 6   p = rcu_access_pointer(gp);
- 7   if (!p) {
- 8     spin_unlock(&amp;gp_lock);
- 9     return false;
-10   }
-11   rcu_assign_pointer(gp, NULL);
-12   spin_unlock(&amp;gp_lock);
-13   synchronize_rcu();
-14   kfree(p);
-15   return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-This function is straightforward, with line&nbsp;13 waiting for a grace
-period before line&nbsp;14 frees the old data element.
-This waiting ensures that readers will reach line&nbsp;7 of
-<tt>do_something_gp()</tt> before the data element referenced by
-<tt>p</tt> is freed.
-The <tt>rcu_access_pointer()</tt> on line&nbsp;6 is similar to
-<tt>rcu_dereference()</tt>, except that:
-
-<ol>
-<li>	The value returned by <tt>rcu_access_pointer()</tt>
-	cannot be dereferenced.
-	If you want to access the value pointed to as well as
-	the pointer itself, use <tt>rcu_dereference()</tt>
-	instead of <tt>rcu_access_pointer()</tt>.
-<li>	The call to <tt>rcu_access_pointer()</tt> need not be
-	protected.
-	In contrast, <tt>rcu_dereference()</tt> must either be
-	within an RCU read-side critical section or in a code
-	segment where the pointer cannot change, for example, in
-	code protected by the corresponding update-side lock.
-</ol>
-
-<p>@@QQ@@
-Without the <tt>rcu_dereference()</tt> or the
-<tt>rcu_access_pointer()</tt>, what destructive optimizations
-might the compiler make use of?
-<p>@@QQA@@
-Let's start with what happens to <tt>do_something_gp()</tt>
-if it fails to use <tt>rcu_dereference()</tt>.
-It could reuse a value formerly fetched from this same pointer.
-It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
-manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
-mash-up of two distince pointer values.
-It might even use value-speculation optimizations, where it makes a wrong
-guess, but by the time it gets around to checking the value, an update
-has changed the pointer to match the wrong guess.
-Too bad about any dereferences that returned pre-initialization garbage
-in the meantime!
-
-<p>
-For <tt>remove_gp_synchronous()</tt>, as long as all modifications
-to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
-the above optimizations are harmless.
-However,
-with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
-<tt>sparse</tt> will complain if you
-define <tt>gp</tt> with <tt>__rcu</tt> and then
-access it without using
-either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
-<p>@@QQE@@
-
-<p>
-In short, RCU's publish-subscribe guarantee is provided by the combination
-of <tt>rcu_assign_pointer()</tt> and <tt>rcu_dereference()</tt>.
-This guarantee allows data elements to be safely added to RCU-protected
-linked data structures without disrupting RCU readers.
-This guarantee can be used in combination with the grace-period
-guarantee to also allow data elements to be removed from RCU-protected
-linked data structures, again without disrupting RCU readers.
-
-<p>
-This guarantee was only partially premeditated.
-DYNIX/ptx used an explicit memory barrier for publication, but had nothing
-resembling <tt>rcu_dereference()</tt> for subscription, nor did it
-have anything resembling the <tt>smp_read_barrier_depends()</tt>
-that was later subsumed into <tt>rcu_dereference()</tt>.
-The need for these operations made itself known quite suddenly at a
-late-1990s meeting with the DEC Alpha architects, back in the days when
-DEC was still a free-standing company.
-It took the Alpha architects a good hour to convince me that any sort
-of barrier would ever be needed, and it then took me a good <i>two</i> hours
-to convince them that their documentation did not make this point clear.
-More recent work with the C and C++ standards committees have provided
-much education on tricks and traps from the compiler.
-In short, compilers were much less tricky in the early 1990s, but in
-2015, don't even think about omitting <tt>rcu_dereference()</tt>!
-
-<h3><a name="Memory-Barrier Guarantees">Memory-Barrier Guarantees</a></h3>
-
-<p>
-The previous section's simple linked-data-structure scenario clearly
-demonstrates the need for RCU's stringent memory-ordering guarantees on
-systems with more than one CPU:
-
-<ol>
-<li>	Each CPU that has an RCU read-side critical section that
-	begins before <tt>synchronize_rcu()</tt> starts is
-	guaranteed to execute a full memory barrier between the time
-	that the RCU read-side critical section ends and the time that
-	<tt>synchronize_rcu()</tt> returns.
-	Without this guarantee, a pre-existing RCU read-side critical section
-	might hold a reference to the newly removed <tt>struct foo</tt>
-	after the <tt>kfree()</tt> on line&nbsp;14 of
-	<tt>remove_gp_synchronous()</tt>.
-<li>	Each CPU that has an RCU read-side critical section that ends
-	after <tt>synchronize_rcu()</tt> returns is guaranteed
-	to execute a full memory barrier between the time that
-	<tt>synchronize_rcu()</tt> begins and the time that the RCU
-	read-side critical section begins.
-	Without this guarantee, a later RCU read-side critical section
-	running after the <tt>kfree()</tt> on line&nbsp;14 of
-	<tt>remove_gp_synchronous()</tt> might
-	later run <tt>do_something_gp()</tt> and find the
-	newly deleted <tt>struct foo</tt>.
-<li>	If the task invoking <tt>synchronize_rcu()</tt> remains
-	on a given CPU, then that CPU is guaranteed to execute a full
-	memory barrier sometime during the execution of
-	<tt>synchronize_rcu()</tt>.
-	This guarantee ensures that the <tt>kfree()</tt> on
-	line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
-	execute after the removal on line&nbsp;11.
-<li>	If the task invoking <tt>synchronize_rcu()</tt> migrates
-	among a group of CPUs during that invocation, then each of the
-	CPUs in that group is guaranteed to execute a full memory barrier
-	sometime during the execution of <tt>synchronize_rcu()</tt>.
-	This guarantee also ensures that the <tt>kfree()</tt> on
-	line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
-	execute after the removal on
-	line&nbsp;11, but also in the case where the thread executing the
-	<tt>synchronize_rcu()</tt> migrates in the meantime.
-</ol>
-
-<p>@@QQ@@
-Given that multiple CPUs can start RCU read-side critical sections
-at any time without any ordering whatsoever, how can RCU possibly tell whether
-or not a given RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>?
-<p>@@QQA@@
-If RCU cannot tell whether or not a given
-RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>,
-then it must assume that the RCU read-side critical section
-started first.
-In other words, a given instance of <tt>synchronize_rcu()</tt>
-can avoid waiting on a given RCU read-side critical section only
-if it can prove that <tt>synchronize_rcu()</tt> started first.
-<p>@@QQE@@
-
-<p>@@QQ@@
-The first and second guarantees require unbelievably strict ordering!
-Are all these memory barriers <i> really</i> required?
-<p>@@QQA@@
-Yes, they really are required.
-To see why the first guarantee is required, consider the following
-sequence of events:
-
-<ol>
-<li>	CPU 1: <tt>rcu_read_lock()</tt>
-<li>	CPU 1: <tt>q = rcu_dereference(gp);
-	/* Very likely to return p. */</tt>
-<li>	CPU 0: <tt>list_del_rcu(p);</tt>
-<li>	CPU 0: <tt>synchronize_rcu()</tt> starts.
-<li>	CPU 1: <tt>do_something_with(q-&gt;a);
-	/* No smp_mb(), so might happen after kfree(). */</tt>
-<li>	CPU 1: <tt>rcu_read_unlock()</tt>
-<li>	CPU 0: <tt>synchronize_rcu()</tt> returns.
-<li>	CPU 0: <tt>kfree(p);</tt>
-</ol>
-
-<p>
-Therefore, there absolutely must be a full memory barrier between the
-end of the RCU read-side critical section and the end of the
-grace period.
-
-<p>
-The sequence of events demonstrating the necessity of the second rule
-is roughly similar:
-
-<ol>
-<li>	CPU 0: <tt>list_del_rcu(p);</tt>
-<li>	CPU 0: <tt>synchronize_rcu()</tt> starts.
-<li>	CPU 1: <tt>rcu_read_lock()</tt>
-<li>	CPU 1: <tt>q = rcu_dereference(gp);
-	/* Might return p if no memory barrier. */</tt>
-<li>	CPU 0: <tt>synchronize_rcu()</tt> returns.
-<li>	CPU 0: <tt>kfree(p);</tt>
-<li>	CPU 1: <tt>do_something_with(q-&gt;a); /* Boom!!! */</tt>
-<li>	CPU 1: <tt>rcu_read_unlock()</tt>
-</ol>
-
-<p>
-And similarly, without a memory barrier between the beginning of the
-grace period and the beginning of the RCU read-side critical section,
-CPU&nbsp;1 might end up accessing the freelist.
-
-<p>
-The &ldquo;as if&rdquo; rule of course applies, so that any implementation
-that acts as if the appropriate memory barriers were in place is a
-correct implementation.
-That said, it is much easier to fool yourself into believing that you have
-adhered to the as-if rule than it is to actually adhere to it!
-<p>@@QQE@@
-
-<p>
-Note that these memory-barrier requirements do not replace the fundamental
-RCU requirement that a grace period wait for all pre-existing readers.
-On the contrary, the memory barriers called out in this section must operate in
-such a way as to <i>enforce</i> this fundamental requirement.
-Of course, different implementations enforce this requirement in different
-ways, but enforce it they must.
-
-<h3><a name="RCU Primitives Guaranteed to Execute Unconditionally">RCU Primitives Guaranteed to Execute Unconditionally</a></h3>
-
-<p>
-The common-case RCU primitives are unconditional.
-They are invoked, they do their job, and they return, with no possibility
-of error, and no need to retry.
-This is a key RCU design philosophy.
-
-<p>
-However, this philosophy is pragmatic rather than pigheaded.
-If someone comes up with a good justification for a particular conditional
-RCU primitive, it might well be implemented and added.
-After all, this guarantee was reverse-engineered, not premeditated.
-The unconditional nature of the RCU primitives was initially an
-accident of implementation, and later experience with synchronization
-primitives with conditional primitives caused me to elevate this
-accident to a guarantee.
-Therefore, the justification for adding a conditional primitive to
-RCU would need to be based on detailed and compelling use cases.
-
-<h3><a name="Guaranteed Read-to-Write Upgrade">Guaranteed Read-to-Write Upgrade</a></h3>
-
-<p>
-As far as RCU is concerned, it is always possible to carry out an
-update within an RCU read-side critical section.
-For example, that RCU read-side critical section might search for
-a given data element, and then might acquire the update-side
-spinlock in order to update that element, all while remaining
-in that RCU read-side critical section.
-Of course, it is necessary to exit the RCU read-side critical section
-before invoking <tt>synchronize_rcu()</tt>, however, this
-inconvenience can be avoided through use of the
-<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt> API members
-described later in this document.
-
-<p>@@QQ@@
-But how does the upgrade-to-write operation exclude other readers?
-<p>@@QQA@@
-It doesn't, just like normal RCU updates, which also do not exclude
-RCU readers.
-<p>@@QQE@@
-
-<p>
-This guarantee allows lookup code to be shared between read-side
-and update-side code, and was premeditated, appearing in the earliest
-DYNIX/ptx RCU documentation.
-
-<h2><a name="Fundamental Non-Requirements">Fundamental Non-Requirements</a></h2>
-
-<p>
-RCU provides extremely lightweight readers, and its read-side guarantees,
-though quite useful, are correspondingly lightweight.
-It is therefore all too easy to assume that RCU is guaranteeing more
-than it really is.
-Of course, the list of things that RCU does not guarantee is infinitely
-long, however, the following sections list a few non-guarantees that
-have caused confusion.
-Except where otherwise noted, these non-guarantees were premeditated.
-
-<ol>
-<li>	<a href="#Readers Impose Minimal Ordering">
-	Readers Impose Minimal Ordering</a>
-<li>	<a href="#Readers Do Not Exclude Updaters">
-	Readers Do Not Exclude Updaters</a>
-<li>	<a href="#Updaters Only Wait For Old Readers">
-	Updaters Only Wait For Old Readers</a>
-<li>	<a href="#Grace Periods Don't Partition Read-Side Critical Sections">
-	Grace Periods Don't Partition Read-Side Critical Sections</a>
-<li>	<a href="#Read-Side Critical Sections Don't Partition Grace Periods">
-	Read-Side Critical Sections Don't Partition Grace Periods</a>
-<li>	<a href="#Disabling Preemption Does Not Block Grace Periods">
-	Disabling Preemption Does Not Block Grace Periods</a>
-</ol>
-
-<h3><a name="Readers Impose Minimal Ordering">Readers Impose Minimal Ordering</a></h3>
-
-<p>
-Reader-side markers such as <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> provide absolutely no ordering guarantees
-except through their interaction with the grace-period APIs such as
-<tt>synchronize_rcu()</tt>.
-To see this, consider the following pair of threads:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3   rcu_read_lock();
- 4   WRITE_ONCE(x, 1);
- 5   rcu_read_unlock();
- 6   rcu_read_lock();
- 7   WRITE_ONCE(y, 1);
- 8   rcu_read_unlock();
- 9 }
-10
-11 void thread1(void)
-12 {
-13   rcu_read_lock();
-14   r1 = READ_ONCE(y);
-15   rcu_read_unlock();
-16   rcu_read_lock();
-17   r2 = READ_ONCE(x);
-18   rcu_read_unlock();
-19 }
-</pre>
-</blockquote>
-
-<p>
-After <tt>thread0()</tt> and <tt>thread1()</tt> execute
-concurrently, it is quite possible to have
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 0)
-</pre>
-</blockquote>
-
-(that is, <tt>y</tt> appears to have been assigned before <tt>x</tt>),
-which would not be possible if <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> had much in the way of ordering
-properties.
-But they do not, so the CPU is within its rights
-to do significant reordering.
-This is by design:  Any significant ordering constraints would slow down
-these fast-path APIs.
-
-<p>@@QQ@@
-Can't the compiler also reorder this code?
-<p>@@QQA@@
-No, the volatile casts in <tt>READ_ONCE()</tt> and
-<tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
-this particular case.
-<p>@@QQE@@
-
-<h3><a name="Readers Do Not Exclude Updaters">Readers Do Not Exclude Updaters</a></h3>
-
-<p>
-Neither <tt>rcu_read_lock()</tt> nor <tt>rcu_read_unlock()</tt>
-exclude updates.
-All they do is to prevent grace periods from ending.
-The following example illustrates this:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3   rcu_read_lock();
- 4   r1 = READ_ONCE(y);
- 5   if (r1) {
- 6     do_something_with_nonzero_x();
- 7     r2 = READ_ONCE(x);
- 8     WARN_ON(!r2); /* BUG!!! */
- 9   }
-10   rcu_read_unlock();
-11 }
-12
-13 void thread1(void)
-14 {
-15   spin_lock(&amp;my_lock);
-16   WRITE_ONCE(x, 1);
-17   WRITE_ONCE(y, 1);
-18   spin_unlock(&amp;my_lock);
-19 }
-</pre>
-</blockquote>
-
-<p>
-If the <tt>thread0()</tt> function's <tt>rcu_read_lock()</tt>
-excluded the <tt>thread1()</tt> function's update,
-the <tt>WARN_ON()</tt> could never fire.
-But the fact is that <tt>rcu_read_lock()</tt> does not exclude
-much of anything aside from subsequent grace periods, of which
-<tt>thread1()</tt> has none, so the
-<tt>WARN_ON()</tt> can and does fire.
-
-<h3><a name="Updaters Only Wait For Old Readers">Updaters Only Wait For Old Readers</a></h3>
-
-<p>
-It might be tempting to assume that after <tt>synchronize_rcu()</tt>
-completes, there are no readers executing.
-This temptation must be avoided because
-new readers can start immediately after <tt>synchronize_rcu()</tt>
-starts, and <tt>synchronize_rcu()</tt> is under no
-obligation to wait for these new readers.
-
-<p>@@QQ@@
-Suppose that synchronize_rcu() did wait until all readers had completed.
-Would the updater be able to rely on this?
-<p>@@QQA@@
-No.
-Even if <tt>synchronize_rcu()</tt> were to wait until
-all readers had completed, a new reader might start immediately after
-<tt>synchronize_rcu()</tt> completed.
-Therefore, the code following
-<tt>synchronize_rcu()</tt> cannot rely on there being no readers
-in any case.
-<p>@@QQE@@
-
-<h3><a name="Grace Periods Don't Partition Read-Side Critical Sections">
-Grace Periods Don't Partition Read-Side Critical Sections</a></h3>
-
-<p>
-It is tempting to assume that if any part of one RCU read-side critical
-section precedes a given grace period, and if any part of another RCU
-read-side critical section follows that same grace period, then all of
-the first RCU read-side critical section must precede all of the second.
-However, this just isn't the case: A single grace period does not
-partition the set of RCU read-side critical sections.
-An example of this situation can be illustrated as follows, where
-<tt>x</tt>, <tt>y</tt>, and <tt>z</tt> are initially all zero:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3   rcu_read_lock();
- 4   WRITE_ONCE(a, 1);
- 5   WRITE_ONCE(b, 1);
- 6   rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11   r1 = READ_ONCE(a);
-12   synchronize_rcu();
-13   WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18   rcu_read_lock();
-19   r2 = READ_ONCE(b);
-20   r3 = READ_ONCE(c);
-21   rcu_read_unlock();
-22 }
-</pre>
-</blockquote>
-
-<p>
-It turns out that the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 0 &amp;&amp; r3 == 1)
-</pre>
-</blockquote>
-
-is entirely possible.
-The following figure show how this can happen, with each circled
-<tt>QS</tt> indicating the point at which RCU recorded a
-<i>quiescent state</i> for each thread, that is, a state in which
-RCU knows that the thread cannot be in the midst of an RCU read-side
-critical section that started before the current grace period:
-
-<p><img src="GPpartitionReaders1.svg" alt="GPpartitionReaders1.svg" width="60%"></p>
-
-<p>
-If it is necessary to partition RCU read-side critical sections in this
-manner, it is necessary to use two grace periods, where the first
-grace period is known to end before the second grace period starts:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3   rcu_read_lock();
- 4   WRITE_ONCE(a, 1);
- 5   WRITE_ONCE(b, 1);
- 6   rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11   r1 = READ_ONCE(a);
-12   synchronize_rcu();
-13   WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18   r2 = READ_ONCE(c);
-19   synchronize_rcu();
-20   WRITE_ONCE(d, 1);
-21 }
-22
-23 void thread3(void)
-24 {
-25   rcu_read_lock();
-26   r3 = READ_ONCE(b);
-27   r4 = READ_ONCE(d);
-28   rcu_read_unlock();
-29 }
-</pre>
-</blockquote>
-
-<p>
-Here, if <tt>(r1 == 1)</tt>, then
-<tt>thread0()</tt>'s write to <tt>b</tt> must happen
-before the end of <tt>thread1()</tt>'s grace period.
-If in addition <tt>(r4 == 1)</tt>, then
-<tt>thread3()</tt>'s read from <tt>b</tt> must happen
-after the beginning of <tt>thread2()</tt>'s grace period.
-If it is also the case that <tt>(r2 == 1)</tt>, then the
-end of <tt>thread1()</tt>'s grace period must precede the
-beginning of <tt>thread2()</tt>'s grace period.
-This mean that the two RCU read-side critical sections cannot overlap,
-guaranteeing that <tt>(r3 == 1)</tt>.
-As a result, the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 0 &amp;&amp; r4 == 1)
-</pre>
-</blockquote>
-
-cannot happen.
-
-<p>
-This non-requirement was also non-premeditated, but became apparent
-when studying RCU's interaction with memory ordering.
-
-<h3><a name="Read-Side Critical Sections Don't Partition Grace Periods">
-Read-Side Critical Sections Don't Partition Grace Periods</a></h3>
-
-<p>
-It is also tempting to assume that if an RCU read-side critical section
-happens between a pair of grace periods, then those grace periods cannot
-overlap.
-However, this temptation leads nowhere good, as can be illustrated by
-the following, with all variables initially zero:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3   rcu_read_lock();
- 4   WRITE_ONCE(a, 1);
- 5   WRITE_ONCE(b, 1);
- 6   rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11   r1 = READ_ONCE(a);
-12   synchronize_rcu();
-13   WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18   rcu_read_lock();
-19   WRITE_ONCE(d, 1);
-20   r2 = READ_ONCE(c);
-21   rcu_read_unlock();
-22 }
-23
-24 void thread3(void)
-25 {
-26   r3 = READ_ONCE(d);
-27   synchronize_rcu();
-28   WRITE_ONCE(e, 1);
-29 }
-30
-31 void thread4(void)
-32 {
-33   rcu_read_lock();
-34   r4 = READ_ONCE(b);
-35   r5 = READ_ONCE(e);
-36   rcu_read_unlock();
-37 }
-</pre>
-</blockquote>
-
-<p>
-In this case, the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 1 &amp;&amp; r4 == 0 &amp&amp; r5 == 1)
-</pre>
-</blockquote>
-
-is entirely possible, as illustrated below:
-
-<p><img src="ReadersPartitionGP1.svg" alt="ReadersPartitionGP1.svg" width="100%"></p>
-
-<p>
-Again, an RCU read-side critical section can overlap almost all of a
-given grace period, just so long as it does not overlap the entire
-grace period.
-As a result, an RCU read-side critical section cannot partition a pair
-of RCU grace periods.
-
-<p>@@QQ@@
-How long a sequence of grace periods, each separated by an RCU read-side
-critical section, would be required to partition the RCU read-side
-critical sections at the beginning and end of the chain?
-<p>@@QQA@@
-In theory, an infinite number.
-In practice, an unknown number that is sensitive to both implementation
-details and timing considerations.
-Therefore, even in practice, RCU users must abide by the theoretical rather
-than the practical answer.
-<p>@@QQE@@
-
-<h3><a name="Disabling Preemption Does Not Block Grace Periods">
-Disabling Preemption Does Not Block Grace Periods</a></h3>
-
-<p>
-There was a time when disabling preemption on any given CPU would block
-subsequent grace periods.
-However, this was an accident of implementation and is not a requirement.
-And in the current Linux-kernel implementation, disabling preemption
-on a given CPU in fact does not block grace periods, as Oleg Nesterov
-<a href="https://lkml.kernel.org/g/20150614193825.GA19582@redhat.com">demonstrated</a>.
-
-<p>
-If you need a preempt-disable region to block grace periods, you need to add
-<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>, for example
-as follows:
-
-<blockquote>
-<pre>
- 1 preempt_disable();
- 2 rcu_read_lock();
- 3 do_something();
- 4 rcu_read_unlock();
- 5 preempt_enable();
- 6
- 7 /* Spinlocks implicitly disable preemption. */
- 8 spin_lock(&amp;mylock);
- 9 rcu_read_lock();
-10 do_something();
-11 rcu_read_unlock();
-12 spin_unlock(&amp;mylock);
-</pre>
-</blockquote>
-
-<p>
-In theory, you could enter the RCU read-side critical section first,
-but it is more efficient to keep the entire RCU read-side critical
-section contained in the preempt-disable region as shown above.
-Of course, RCU read-side critical sections that extend outside of
-preempt-disable regions will work correctly, but such critical sections
-can be preempted, which forces <tt>rcu_read_unlock()</tt> to do
-more work.
-And no, this is <i>not</i> an invitation to enclose all of your RCU
-read-side critical sections within preempt-disable regions, because
-doing so would degrade real-time response.
-
-<p>
-This non-requirement appeared with preemptible RCU.
-If you need a grace period that waits on non-preemptible code regions, use
-<a href="#Sched Flavor">RCU-sched</a>.
-
-<h2><a name="Parallelism Facts of Life">Parallelism Facts of Life</a></h2>
-
-<p>
-These parallelism facts of life are by no means specific to RCU, but
-the RCU implementation must abide by them.
-They therefore bear repeating:
-
-<ol>
-<li>	Any CPU or task may be delayed at any time,
-	and any attempts to avoid these delays by disabling
-	preemption, interrupts, or whatever are completely futile.
-	This is most obvious in preemptible user-level
-	environments and in virtualized environments (where
-	a given guest OS's VCPUs can be preempted at any time by
-	the underlying hypervisor), but can also happen in bare-metal
-	environments due to ECC errors, NMIs, and other hardware
-	events.
-	Although a delay of more than about 20 seconds can result
-	in splats, the RCU implementation is obligated to use
-	algorithms that can tolerate extremely long delays, but where
-	&ldquo;extremely long&rdquo; is not long enough to allow
-	wrap-around when incrementing a 64-bit counter.
-<li>	Both the compiler and the CPU can reorder memory accesses.
-	Where it matters, RCU must use compiler directives and
-	memory-barrier instructions to preserve ordering.
-<li>	Conflicting writes to memory locations in any given cache line
-	will result in expensive cache misses.
-	Greater numbers of concurrent writes and more-frequent
-	concurrent writes will result in more dramatic slowdowns.
-	RCU is therefore obligated to use algorithms that have
-	sufficient locality to avoid significant performance and
-	scalability problems.
-<li>	As a rough rule of thumb, only one CPU's worth of processing
-	may be carried out under the protection of any given exclusive
-	lock.
-	RCU must therefore use scalable locking designs.
-<li>	Counters are finite, especially on 32-bit systems.
-	RCU's use of counters must therefore tolerate counter wrap,
-	or be designed such that counter wrap would take way more
-	time than a single system is likely to run.
-	An uptime of ten years is quite possible, a runtime
-	of a century much less so.
-	As an example of the latter, RCU's dyntick-idle nesting counter
-	allows 54 bits for interrupt nesting level (this counter
-	is 64 bits even on a 32-bit system).
-	Overflowing this counter requires 2<sup>54</sup>
-	half-interrupts on a given CPU without that CPU ever going idle.
-	If a half-interrupt happened every microsecond, it would take
-	570 years of runtime to overflow this counter, which is currently
-	believed to be an acceptably long time.
-<li>	Linux systems can have thousands of CPUs running a single
-	Linux kernel in a single shared-memory environment.
-	RCU must therefore pay close attention to high-end scalability.
-</ol>
-
-<p>
-This last parallelism fact of life means that RCU must pay special
-attention to the preceding facts of life.
-The idea that Linux might scale to systems with thousands of CPUs would
-have been met with some skepticism in the 1990s, but these requirements
-would have otherwise have been unsurprising, even in the early 1990s.
-
-<h2><a name="Quality-of-Implementation Requirements">Quality-of-Implementation Requirements</a></h2>
-
-<p>
-These sections list quality-of-implementation requirements.
-Although an RCU implementation that ignores these requirements could
-still be used, it would likely be subject to limitations that would
-make it inappropriate for industrial-strength production use.
-Classes of quality-of-implementation requirements are as follows:
-
-<ol>
-<li>	<a href="#Specialization">Specialization</a>
-<li>	<a href="#Performance and Scalability">Performance and Scalability</a>
-<li>	<a href="#Composability">Composability</a>
-<li>	<a href="#Corner Cases">Corner Cases</a>
-</ol>
-
-<p>
-These classes is covered in the following sections.
-
-<h3><a name="Specialization">Specialization</a></h3>
-
-<p>
-RCU is and always has been intended primarily for read-mostly situations, as
-illustrated by the following figure.
-This means that RCU's read-side primitives are optimized, often at the
-expense of its update-side primitives.
-
-<p><img src="RCUApplicability.svg" alt="RCUApplicability.svg" width="70%"></p>
-
-<p>
-This focus on read-mostly situations means that RCU must interoperate
-with other synchronization primitives.
-For example, the <tt>add_gp()</tt> and <tt>remove_gp_synchronous()</tt>
-examples discussed earlier use RCU to protect readers and locking to
-coordinate updaters.
-However, the need extends much farther, requiring that a variety of
-synchronization primitives be legal within RCU read-side critical sections,
-including spinlocks, sequence locks, atomic operations, reference
-counters, and memory barriers.
-
-<p>@@QQ@@
-What about sleeping locks?
-<p>@@QQA@@
-These are forbidden within Linux-kernel RCU read-side critical sections
-because it is not legal to place a quiescent state (in this case,
-voluntary context switch) within an RCU read-side critical section.
-However, sleeping locks may be used within userspace RCU read-side critical
-sections, and also within Linux-kernel sleepable RCU
-<a href="#Sleepable RCU">(SRCU)</a>
-read-side critical sections.
-In addition, the -rt patchset turns spinlocks into a sleeping locks so
-that the corresponding critical sections can be preempted, which
-also means that these sleeplockified spinlocks (but not other sleeping locks!)
-may be acquire within -rt-Linux-kernel RCU read-side critical sections.
-
-<p>
-Note that it <i>is</i> legal for a normal RCU read-side critical section
-to conditionally acquire a sleeping locks (as in <tt>mutex_trylock()</tt>),
-but only as long as it does not loop indefinitely attempting to
-conditionally acquire that sleeping locks.
-The key point is that things like <tt>mutex_trylock()</tt>
-either return with the mutex held, or return an error indication if
-the mutex was not immediately available.
-Either way, <tt>mutex_trylock()</tt> returns immediately without sleeping.
-<p>@@QQE@@
-
-<p>
-It often comes as a surprise that many algorithms do not require a
-consistent view of data, but many can function in that mode,
-with network routing being the poster child.
-Internet routing algorithms take significant time to propagate
-updates, so that by the time an update arrives at a given system,
-that system has been sending network traffic the wrong way for
-a considerable length of time.
-Having a few threads continue to send traffic the wrong way for a
-few more milliseconds is clearly not a problem:  In the worst case,
-TCP retransmissions will eventually get the data where it needs to go.
-In general, when tracking the state of the universe outside of the
-computer, some level of inconsistency must be tolerated due to
-speed-of-light delays if nothing else.
-
-<p>
-Furthermore, uncertainty about external state is inherent in many cases.
-For example, a pair of veternarians might use heartbeat to determine
-whether or not a given cat was alive.
-But how long should they wait after the last heartbeat to decide that
-the cat is in fact dead?
-Waiting less than 400 milliseconds makes no sense because this would
-mean that a relaxed cat would be considered to cycle between death
-and life more than 100 times per minute.
-Moreover, just as with human beings, a cat's heart might stop for
-some period of time, so the exact wait period is a judgment call.
-One of our pair of veternarians might wait 30 seconds before pronouncing
-the cat dead, while the other might insist on waiting a full minute.
-The two veternarians would then disagree on the state of the cat during
-the final 30 seconds of the minute following the last heartbeat, as
-fancifully illustrated below:
-
-<p><img src="2013-08-is-it-dead.png" alt="2013-08-is-it-dead.png" width="431"></p>
-
-<p>
-Interestingly enough, this same situation applies to hardware.
-When push comes to shove, how do we tell whether or not some
-external server has failed?
-We send messages to it periodically, and declare it failed if we
-don't receive a response within a given period of time.
-Policy decisions can usually tolerate short
-periods of inconsistency.
-The policy was decided some time ago, and is only now being put into
-effect, so a few milliseconds of delay is normally inconsequential.
-
-<p>
-However, there are algorithms that absolutely must see consistent data.
-For example, the translation between a user-level SystemV semaphore
-ID to the corresponding in-kernel data structure is protected by RCU,
-but it is absolutely forbidden to update a semaphore that has just been
-removed.
-In the Linux kernel, this need for consistency is accommodated by acquiring
-spinlocks located in the in-kernel data structure from within
-the RCU read-side critical section, and this is indicated by the
-green box in the figure above.
-Many other techniques may be used, and are in fact used within the
-Linux kernel.
-
-<p>
-In short, RCU is not required to maintain consistency, and other
-mechanisms may be used in concert with RCU when consistency is required.
-RCU's specialization allows it to do its job extremely well, and its
-ability to interoperate with other synchronization mechanisms allows
-the right mix of synchronization tools to be used for a given job.
-
-<h3><a name="Performance and Scalability">Performance and Scalability</a></h3>
-
-<p>
-Energy efficiency is a critical component of performance today,
-and Linux-kernel RCU implementations must therefore avoid unnecessarily
-awakening idle CPUs.
-I cannot claim that this requirement was premeditated.
-In fact, I learned of it during a telephone conversation in which I
-was given &ldquo;frank and open&rdquo; feedback on the importance
-of energy efficiency in battery-powered systems and on specific
-energy-efficiency shortcomings of the Linux-kernel RCU implementation.
-In my experience, the battery-powered embedded community will consider
-any unnecessary wakeups to be extremely unfriendly acts.
-So much so that mere Linux-kernel-mailing-list posts are
-insufficient to vent their ire.
-
-<p>
-Memory consumption is not particularly important for in most
-situations, and has become decreasingly
-so as memory sizes have expanded and memory
-costs have plummeted.
-However, as I learned from Matt Mackall's
-<a href="http://elinux.org/Linux_Tiny-FAQ">bloatwatch</a>
-efforts, memory footprint is critically important on single-CPU systems with
-non-preemptible (<tt>CONFIG_PREEMPT=n</tt>) kernels, and thus
-<a href="https://lkml.kernel.org/g/20090113221724.GA15307@linux.vnet.ibm.com">tiny RCU</a>
-was born.
-Josh Triplett has since taken over the small-memory banner with his
-<a href="https://tiny.wiki.kernel.org/">Linux kernel tinification</a>
-project, which resulted in
-<a href="#Sleepable RCU">SRCU</a>
-becoming optional for those kernels not needing it.
-
-<p>
-The remaining performance requirements are, for the most part,
-unsurprising.
-For example, in keeping with RCU's read-side specialization,
-<tt>rcu_dereference()</tt> should have negligible overhead (for
-example, suppression of a few minor compiler optimizations).
-Similarly, in non-preemptible environments, <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> should have exactly zero overhead.
-
-<p>
-In preemptible environments, in the case where the RCU read-side
-critical section was not preempted (as will be the case for the
-highest-priority real-time process), <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> should have minimal overhead.
-In particular, they should not contain atomic read-modify-write
-operations, memory-barrier instructions, preemption disabling,
-interrupt disabling, or backwards branches.
-However, in the case where the RCU read-side critical section was preempted,
-<tt>rcu_read_unlock()</tt> may acquire spinlocks and disable interrupts.
-This is why it is better to nest an RCU read-side critical section
-within a preempt-disable region than vice versa, at least in cases
-where that critical section is short enough to avoid unduly degrading
-real-time latencies.
-
-<p>
-The <tt>synchronize_rcu()</tt> grace-period-wait primitive is
-optimized for throughput.
-It may therefore incur several milliseconds of latency in addition to
-the duration of the longest RCU read-side critical section.
-On the other hand, multiple concurrent invocations of
-<tt>synchronize_rcu()</tt> are required to use batching optimizations
-so that they can be satisfied by a single underlying grace-period-wait
-operation.
-For example, in the Linux kernel, it is not unusual for a single
-grace-period-wait operation to serve more than
-<a href="https://www.usenix.org/conference/2004-usenix-annual-technical-conference/making-rcu-safe-deep-sub-millisecond-response">1,000 separate invocations</a>
-of <tt>synchronize_rcu()</tt>, thus amortizing the per-invocation
-overhead down to nearly zero.
-However, the grace-period optimization is also required to avoid
-measurable degradation of real-time scheduling and interrupt latencies.
-
-<p>
-In some cases, the multi-millisecond <tt>synchronize_rcu()</tt>
-latencies are unacceptable.
-In these cases, <tt>synchronize_rcu_expedited()</tt> may be used
-instead, reducing the grace-period latency down to a few tens of
-microseconds on small systems, at least in cases where the RCU read-side
-critical sections are short.
-There are currently no special latency requirements for
-<tt>synchronize_rcu_expedited()</tt> on large systems, but,
-consistent with the empirical nature of the RCU specification,
-that is subject to change.
-However, there most definitely are scalability requirements:
-A storm of <tt>synchronize_rcu_expedited()</tt> invocations on 4096
-CPUs should at least make reasonable forward progress.
-In return for its shorter latencies, <tt>synchronize_rcu_expedited()</tt>
-is permitted to impose modest degradation of real-time latency
-on non-idle online CPUs.
-That said, it will likely be necessary to take further steps to reduce this
-degradation, hopefully to roughly that of a scheduling-clock interrupt.
-
-<p>
-There are a number of situations where even
-<tt>synchronize_rcu_expedited()</tt>'s reduced grace-period
-latency is unacceptable.
-In these situations, the asynchronous <tt>call_rcu()</tt> can be
-used in place of <tt>synchronize_rcu()</tt> as follows:
-
-<blockquote>
-<pre>
- 1 struct foo {
- 2   int a;
- 3   int b;
- 4   struct rcu_head rh;
- 5 };
- 6
- 7 static void remove_gp_cb(struct rcu_head *rhp)
- 8 {
- 9   struct foo *p = container_of(rhp, struct foo, rh);
-10
-11   kfree(p);
-12 }
-13
-14 bool remove_gp_asynchronous(void)
-15 {
-16   struct foo *p;
-17
-18   spin_lock(&amp;gp_lock);
-19   p = rcu_dereference(gp);
-20   if (!p) {
-21     spin_unlock(&amp;gp_lock);
-22     return false;
-23   }
-24   rcu_assign_pointer(gp, NULL);
-25   call_rcu(&amp;p-&gt;rh, remove_gp_cb);
-26   spin_unlock(&amp;gp_lock);
-27   return true;
-28 }
-</pre>
-</blockquote>
-
-<p>
-A definition of <tt>struct foo</tt> is finally needed, and appears
-on lines&nbsp;1-5.
-The function <tt>remove_gp_cb()</tt> is passed to <tt>call_rcu()</tt>
-on line&nbsp;25, and will be invoked after the end of a subsequent
-grace period.
-This gets the same effect as <tt>remove_gp_synchronous()</tt>,
-but without forcing the updater to wait for a grace period to elapse.
-The <tt>call_rcu()</tt> function may be used in a number of
-situations where neither <tt>synchronize_rcu()</tt> nor
-<tt>synchronize_rcu_expedited()</tt> would be legal,
-including within preempt-disable code, <tt>local_bh_disable()</tt> code,
-interrupt-disable code, and interrupt handlers.
-However, even <tt>call_rcu()</tt> is illegal within NMI handlers.
-The callback function (<tt>remove_gp_cb()</tt> in this case) will be
-executed within softirq (software interrupt) environment within the
-Linux kernel,
-either within a real softirq handler or under the protection
-of <tt>local_bh_disable()</tt>.
-In both the Linux kernel and in userspace, it is bad practice to
-write an RCU callback function that takes too long.
-Long-running operations should be relegated to separate threads or
-(in the Linux kernel) workqueues.
-
-<p>@@QQ@@
-Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
-After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
-structure, which would interact badly with concurrent insertions.
-Doesn't this mean that <tt>rcu_dereference()</tt> is required?
-<p>@@QQA@@
-Presumably the <tt>-&gt;gp_lock</tt> acquired on line&nbsp;18 excludes
-any changes, including any insertions that <tt>rcu_dereference()</tt>
-would protect against.
-Therefore, any insertions will be delayed until after <tt>-&gt;gp_lock</tt>
-is released on line&nbsp;25, which in turn means that
-<tt>rcu_access_pointer()</tt> suffices.
-<p>@@QQE@@
-
-<p>
-However, all that <tt>remove_gp_cb()</tt> is doing is
-invoking <tt>kfree()</tt> on the data element.
-This is a common idiom, and is supported by <tt>kfree_rcu()</tt>,
-which allows &ldquo;fire and forget&rdquo; operation as shown below:
-
-<blockquote>
-<pre>
- 1 struct foo {
- 2   int a;
- 3   int b;
- 4   struct rcu_head rh;
- 5 };
- 6
- 7 bool remove_gp_faf(void)
- 8 {
- 9   struct foo *p;
-10
-11   spin_lock(&amp;gp_lock);
-12   p = rcu_dereference(gp);
-13   if (!p) {
-14     spin_unlock(&amp;gp_lock);
-15     return false;
-16   }
-17   rcu_assign_pointer(gp, NULL);
-18   kfree_rcu(p, rh);
-19   spin_unlock(&amp;gp_lock);
-20   return true;
-21 }
-</pre>
-</blockquote>
-
-<p>
-Note that <tt>remove_gp_faf()</tt> simply invokes
-<tt>kfree_rcu()</tt> and proceeds, without any need to pay any
-further attention to the subsequent grace period and <tt>kfree()</tt>.
-It is permissible to invoke <tt>kfree_rcu()</tt> from the same
-environments as for <tt>call_rcu()</tt>.
-Interestingly enough, DYNIX/ptx had the equivalents of
-<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>, but not
-<tt>synchronize_rcu()</tt>.
-This was due to the fact that RCU was not heavily used within DYNIX/ptx,
-so the very few places that needed something like
-<tt>synchronize_rcu()</tt> simply open-coded it.
-
-<p>@@QQ@@
-Earlier it was claimed that <tt>call_rcu()</tt> and
-<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
-by readers.
-But how can that be correct, given that the invocation of the callback
-and the freeing of the memory (respectively) must still wait for
-a grace period to elapse?
-<p>@@QQA@@
-We could define things this way, but keep in mind that this sort of
-definition would say that updates in garbage-collected languages
-cannot complete until the next time the garbage collector runs,
-which does not seem at all reasonable.
-The key point is that in most cases, an updater using either
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
-next update as soon as it has invoked <tt>call_rcu()</tt> or
-<tt>kfree_rcu()</tt>, without having to wait for a subsequent
-grace period.
-<p>@@QQE@@
-
-<p>
-But what if the updater must wait for the completion of code to be
-executed after the end of the grace period, but has other tasks
-that can be carried out in the meantime?
-The polling-style <tt>get_state_synchronize_rcu()</tt> and
-<tt>cond_synchronize_rcu()</tt> functions may be used for this
-purpose, as shown below:
-
-<blockquote>
-<pre>
- 1 bool remove_gp_poll(void)
- 2 {
- 3   struct foo *p;
- 4   unsigned long s;
- 5
- 6   spin_lock(&amp;gp_lock);
- 7   p = rcu_access_pointer(gp);
- 8   if (!p) {
- 9     spin_unlock(&amp;gp_lock);
-10     return false;
-11   }
-12   rcu_assign_pointer(gp, NULL);
-13   spin_unlock(&amp;gp_lock);
-14   s = get_state_synchronize_rcu();
-15   do_something_while_waiting();
-16   cond_synchronize_rcu(s);
-17   kfree(p);
-18   return true;
-19 }
-</pre>
-</blockquote>
-
-<p>
-On line&nbsp;14, <tt>get_state_synchronize_rcu()</tt> obtains a
-&ldquo;cookie&rdquo; from RCU,
-then line&nbsp;15 carries out other tasks,
-and finally, line&nbsp;16 returns immediately if a grace period has
-elapsed in the meantime, but otherwise waits as required.
-The need for <tt>get_state_synchronize_rcu</tt> and
-<tt>cond_synchronize_rcu()</tt> has appeared quite recently,
-so it is too early to tell whether they will stand the test of time.
-
-<p>
-RCU thus provides a range of tools to allow updaters to strike the
-required tradeoff between latency, flexibility and CPU overhead.
-
-<h3><a name="Composability">Composability</a></h3>
-
-<p>
-Composability has received much attention in recent years, perhaps in part
-due to the collision of multicore hardware with object-oriented techniques
-designed in single-threaded environments for single-threaded use.
-And in theory, RCU read-side critical sections may be composed, and in
-fact may be nested arbitrarily deeply.
-In practice, as with all real-world implementations of composable
-constructs, there are limitations.
-
-<p>
-Implementations of RCU for which <tt>rcu_read_lock()</tt>
-and <tt>rcu_read_unlock()</tt> generate no code, such as
-Linux-kernel RCU when <tt>CONFIG_PREEMPT=n</tt>, can be
-nested arbitrarily deeply.
-After all, there is no overhead.
-Except that if all these instances of <tt>rcu_read_lock()</tt>
-and <tt>rcu_read_unlock()</tt> are visible to the compiler,
-compilation will eventually fail due to exhausting memory,
-mass storage, or user patience, whichever comes first.
-If the nesting is not visible to the compiler, as is the case with
-mutually recursive functions each in its own translation unit,
-stack overflow will result.
-If the nesting takes the form of loops, either the control variable
-will overflow or (in the Linux kernel) you will get an RCU CPU stall warning.
-Nevertheless, this class of RCU implementations is one
-of the most composable constructs in existence.
-
-<p>
-RCU implementations that explicitly track nesting depth
-are limited by the nesting-depth counter.
-For example, the Linux kernel's preemptible RCU limits nesting to
-<tt>INT_MAX</tt>.
-This should suffice for almost all practical purposes.
-That said, a consecutive pair of RCU read-side critical sections
-between which there is an operation that waits for a grace period
-cannot be enclosed in another RCU read-side critical section.
-This is because it is not legal to wait for a grace period within
-an RCU read-side critical section:  To do so would result either
-in deadlock or
-in RCU implicitly splitting the enclosing RCU read-side critical
-section, neither of which is conducive to a long-lived and prosperous
-kernel.
-
-<p>
-It is worth noting that RCU is not alone in limiting composability.
-For example, many transactional-memory implementations prohibit
-composing a pair of transactions separated by an irrevocable
-operation (for example, a network receive operation).
-For another example, lock-based critical sections can be composed
-surprisingly freely, but only if deadlock is avoided.
-
-<p>
-In short, although RCU read-side critical sections are highly composable,
-care is required in some situations, just as is the case for any other
-composable synchronization mechanism.
-
-<h3><a name="Corner Cases">Corner Cases</a></h3>
-
-<p>
-A given RCU workload might have an endless and intense stream of
-RCU read-side critical sections, perhaps even so intense that there
-was never a point in time during which there was not at least one
-RCU read-side critical section in flight.
-RCU cannot allow this situation to block grace periods:  As long as
-all the RCU read-side critical sections are finite, grace periods
-must also be finite.
-
-<p>
-That said, preemptible RCU implementations could potentially result
-in RCU read-side critical sections being preempted for long durations,
-which has the effect of creating a long-duration RCU read-side
-critical section.
-This situation can arise only in heavily loaded systems, but systems using
-real-time priorities are of course more vulnerable.
-Therefore, RCU priority boosting is provided to help deal with this
-case.
-That said, the exact requirements on RCU priority boosting will likely
-evolve as more experience accumulates.
-
-<p>
-Other workloads might have very high update rates.
-Although one can argue that such workloads should instead use
-something other than RCU, the fact remains that RCU must
-handle such workloads gracefully.
-This requirement is another factor driving batching of grace periods,
-but it is also the driving force behind the checks for large numbers
-of queued RCU callbacks in the <tt>call_rcu()</tt> code path.
-Finally, high update rates should not delay RCU read-side critical
-sections, although some read-side delays can occur when using
-<tt>synchronize_rcu_expedited()</tt>, courtesy of this function's use
-of <tt>try_stop_cpus()</tt>.
-(In the future, <tt>synchronize_rcu_expedited()</tt> will be
-converted to use lighter-weight inter-processor interrupts (IPIs),
-but this will still disturb readers, though to a much smaller degree.)
-
-<p>
-Although all three of these corner cases were understood in the early
-1990s, a simple user-level test consisting of <tt>close(open(path))</tt>
-in a tight loop
-in the early 2000s suddenly provided a much deeper appreciation of the
-high-update-rate corner case.
-This test also motivated addition of some RCU code to react to high update
-rates, for example, if a given CPU finds itself with more than 10,000
-RCU callbacks queued, it will cause RCU to take evasive action by
-more aggressively starting grace periods and more aggressively forcing
-completion of grace-period processing.
-This evasive action causes the grace period to complete more quickly,
-but at the cost of restricting RCU's batching optimizations, thus
-increasing the CPU overhead incurred by that grace period.
-
-<h2><a name="Software-Engineering Requirements">
-Software-Engineering Requirements</a></h2>
-
-<p>
-Between Murphy's Law and &ldquo;To err is human&rdquo;, it is necessary to
-guard against mishaps and misuse:
-
-<ol>
-<li>	It is all too easy to forget to use <tt>rcu_read_lock()</tt>
-	everywhere that it is needed, so kernels built with
-	<tt>CONFIG_PROVE_RCU=y</tt> will spat if
-	<tt>rcu_dereference()</tt> is used outside of an
-	RCU read-side critical section.
-	Update-side code can use <tt>rcu_dereference_protected()</tt>,
-	which takes a
-	<a href="https://lwn.net/Articles/371986/">lockdep expression</a>
-	to indicate what is providing the protection.
-	If the indicated protection is not provided, a lockdep splat
-	is emitted.
-
-	<p>
-	Code shared between readers and updaters can use
-	<tt>rcu_dereference_check()</tt>, which also takes a
-	lockdep expression, and emits a lockdep splat if neither
-	<tt>rcu_read_lock()</tt> nor the indicated protection
-	is in place.
-	In addition, <tt>rcu_dereference_raw()</tt> is used in those
-	(hopefully rare) cases where the required protection cannot
-	be easily described.
-	Finally, <tt>rcu_read_lock_held()</tt> is provided to
-	allow a function to verify that it has been invoked within
-	an RCU read-side critical section.
-	I was made aware of this set of requirements shortly after Thomas
-	Gleixner audited a number of RCU uses.
-<li>	A given function might wish to check for RCU-related preconditions
-	upon entry, before using any other RCU API.
-	The <tt>rcu_lockdep_assert()</tt> does this job,
-	asserting the expression in kernels having lockdep enabled
-	and doing nothing otherwise.
-<li>	It is also easy to forget to use <tt>rcu_assign_pointer()</tt>
-	and <tt>rcu_dereference()</tt>, perhaps (incorrectly)
-	substituting a simple assignment.
-	To catch this sort of error, a given RCU-protected pointer may be
-	tagged with <tt>__rcu</tt>, after which running sparse
-	with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt> will complain
-	about simple-assignment accesses to that pointer.
-	Arnd Bergmann made me aware of this requirement, and also
-	supplied the needed
-	<a href="https://lwn.net/Articles/376011/">patch series</a>.
-<li>	Kernels built with <tt>CONFIG_DEBUG_OBJECTS_RCU_HEAD=y</tt>
-	will splat if a data element is passed to <tt>call_rcu()</tt>
-	twice in a row, without a grace period in between.
-	(This error is similar to a double free.)
-	The corresponding <tt>rcu_head</tt> structures that are
-	dynamically allocated are automatically tracked, but
-	<tt>rcu_head</tt> structures allocated on the stack
-	must be initialized with <tt>init_rcu_head_on_stack()</tt>
-	and cleaned up with <tt>destroy_rcu_head_on_stack()</tt>.
-	Similarly, statically allocated non-stack <tt>rcu_head</tt>
-	structures must be initialized with <tt>init_rcu_head()</tt>
-	and cleaned up with <tt>destroy_rcu_head()</tt>.
-	Mathieu Desnoyers made me aware of this requirement, and also
-	supplied the needed
-	<a href="https://lkml.kernel.org/g/20100319013024.GA28456@Krystal">patch</a>.
-<li>	An infinite loop in an RCU read-side critical section will
-	eventually trigger an RCU CPU stall warning splat, with
-	the duration of &ldquo;eventually&rdquo; being controlled by the
-	<tt>RCU_CPU_STALL_TIMEOUT</tt> <tt>Kconfig</tt> option, or,
-	alternatively, by the
-	<tt>rcupdate.rcu_cpu_stall_timeout</tt> boot/sysfs
-	parameter.
-	However, RCU is not obligated to produce this splat
-	unless there is a grace period waiting on that particular
-	RCU read-side critical section.
-	<p>
-	Some extreme workloads might intentionally delay
-	RCU grace periods, and systems running those workloads can
-	be booted with <tt>rcupdate.rcu_cpu_stall_suppress</tt>
-	to suppress the splats.
-	This kernel parameter may also be set via <tt>sysfs</tt>.
-	Furthermore, RCU CPU stall warnings are counter-productive
-	during sysrq dumps and during panics.
-	RCU therefore supplies the <tt>rcu_sysrq_start()</tt> and
-	<tt>rcu_sysrq_end()</tt> API members to be called before
-	and after long sysrq dumps.
-	RCU also supplies the <tt>rcu_panic()</tt> notifier that is
-	automatically invoked at the beginning of a panic to suppress
-	further RCU CPU stall warnings.
-
-	<p>
-	This requirement made itself known in the early 1990s, pretty
-	much the first time that it was necessary to debug a CPU stall.
-	That said, the initial implementation in DYNIX/ptx was quite
-	generic in comparison with that of Linux.
-<li>	Although it would be very good to detect pointers leaking out
-	of RCU read-side critical sections, there is currently no
-	good way of doing this.
-	One complication is the need to distinguish between pointers
-	leaking and pointers that have been handed off from RCU to
-	some other synchronization mechanism, for example, reference
-	counting.
-<li>	In kernels built with <tt>CONFIG_RCU_TRACE=y</tt>, RCU-related
-	information is provided via both debugfs and event tracing.
-<li>	Open-coded use of <tt>rcu_assign_pointer()</tt> and
-	<tt>rcu_dereference()</tt> to create typical linked
-	data structures can be surprisingly error-prone.
-	Therefore, RCU-protected
-	<a href="https://lwn.net/Articles/609973/#RCU List APIs">linked lists</a>
-	and, more recently, RCU-protected
-	<a href="https://lwn.net/Articles/612100/">hash tables</a>
-	are available.
-	Many other special-purpose RCU-protected data structures are
-	available in the Linux kernel and the userspace RCU library.
-<li>	Some linked structures are created at compile time, but still
-	require <tt>__rcu</tt> checking.
-	The <tt>RCU_POINTER_INITIALIZER()</tt> macro serves this
-	purpose.
-<li>	It is not necessary to use <tt>rcu_assign_pointer()</tt>
-	when creating linked structures that are to be published via
-	a single external pointer.
-	The <tt>RCU_INIT_POINTER()</tt> macro is provided for
-	this task and also for assigning <tt>NULL</tt> pointers
-	at runtime.
-</ol>
-
-<p>
-This not a hard-and-fast list:  RCU's diagnostic capabilities will
-continue to be guided by the number and type of usage bugs found
-in real-world RCU usage.
-
-<h2><a name="Linux Kernel Complications">Linux Kernel Complications</a></h2>
-
-<p>
-The Linux kernel provides an interesting environment for all kinds of
-software, including RCU.
-Some of the relevant points of interest are as follows:
-
-<ol>
-<li>	<a href="#Configuration">Configuration</a>.
-<li>	<a href="#Firmware Interface">Firmware Interface</a>.
-<li>	<a href="#Early Boot">Early Boot</a>.
-<li>	<a href="#Interrupts and NMIs">
-	Interrupts and non-maskable interrupts (NMIs)</a>.
-<li>	<a href="#Loadable Modules">Loadable Modules</a>.
-<li>	<a href="#Hotplug CPU">Hotplug CPU</a>.
-<li>	<a href="#Scheduler and RCU">Scheduler and RCU</a>.
-<li>	<a href="#Tracing and RCU">Tracing and RCU</a>.
-<li>	<a href="#Energy Efficiency">Energy Efficiency</a>.
-<li>	<a href="#Memory Efficiency">Memory Efficiency</a>.
-<li>	<a href="#Performance, Scalability, Response Time, and Reliability">
-	Performance, Scalability, Response Time, and Reliability</a>.
-</ol>
-
-<p>
-This list is probably incomplete, but it does give a feel for the
-most notable Linux-kernel complications.
-Each of the following sections covers one of the above topics.
-
-<h3><a name="Configuration">Configuration</a></h3>
-
-<p>
-RCU's goal is automatic configuration, so that almost nobody
-needs to worry about RCU's <tt>Kconfig</tt> options.
-And for almost all users, RCU does in fact work well
-&ldquo;out of the box.&rdquo;
-
-<p>
-However, there are specialized use cases that are handled by
-kernel boot parameters and <tt>Kconfig</tt> options.
-Unfortunately, the <tt>Kconfig</tt> system will explicitly ask users
-about new <tt>Kconfig</tt> options, which requires almost all of them
-be hidden behind a <tt>CONFIG_RCU_EXPERT</tt> <tt>Kconfig</tt> option.
-
-<p>
-This all should be quite obvious, but the fact remains that
-Linus Torvalds recently had to
-<a href="https://lkml.kernel.org/g/CA+55aFy4wcCwaL4okTs8wXhGZ5h-ibecy_Meg9C4MNQrUnwMcg@mail.gmail.com">remind</a>
-me of this requirement.
-
-<h3><a name="Firmware Interface">Firmware Interface</a></h3>
-
-<p>
-In many cases, kernel obtains information about the system from the
-firmware, and sometimes things are lost in translation.
-Or the translation is accurate, but the original message is bogus.
-
-<p>
-For example, some systems' firmware overreports the number of CPUs,
-sometimes by a large factor.
-If RCU naively believed the firmware, as it used to do,
-it would create too many per-CPU kthreads.
-Although the resulting system will still run correctly, the extra
-kthreads needlessly consume memory and can cause confusion
-when they show up in <tt>ps</tt> listings.
-
-<p>
-RCU must therefore wait for a given CPU to actually come online before
-it can allow itself to believe that the CPU actually exists.
-The resulting &ldquo;ghost CPUs&rdquo; (which are never going to
-come online) cause a number of
-<a href="https://paulmck.livejournal.com/37494.html">interesting complications</a>.
-
-<h3><a name="Early Boot">Early Boot</a></h3>
-
-<p>
-The Linux kernel's boot sequence is an interesting process,
-and RCU is used early, even before <tt>rcu_init()</tt>
-is invoked.
-In fact, a number of RCU's primitives can be used as soon as the
-initial task's <tt>task_struct</tt> is available and the
-boot CPU's per-CPU variables are set up.
-The read-side primitives (<tt>rcu_read_lock()</tt>,
-<tt>rcu_read_unlock()</tt>, <tt>rcu_dereference()</tt>,
-and <tt>rcu_access_pointer()</tt>) will operate normally very early on,
-as will <tt>rcu_assign_pointer()</tt>.
-
-<p>
-Although <tt>call_rcu()</tt> may be invoked at any
-time during boot, callbacks are not guaranteed to be invoked until after
-the scheduler is fully up and running.
-This delay in callback invocation is due to the fact that RCU does not
-invoke callbacks until it is fully initialized, and this full initialization
-cannot occur until after the scheduler has initialized itself to the
-point where RCU can spawn and run its kthreads.
-In theory, it would be possible to invoke callbacks earlier,
-however, this is not a panacea because there would be severe restrictions
-on what operations those callbacks could invoke.
-
-<p>
-Perhaps surprisingly, <tt>synchronize_rcu()</tt>,
-<a href="#Bottom-Half Flavor"><tt>synchronize_rcu_bh()</tt></a>
-(<a href="#Bottom-Half Flavor">discussed below</a>),
-and
-<a href="#Sched Flavor"><tt>synchronize_sched()</tt></a>
-will all operate normally
-during very early boot, the reason being that there is only one CPU
-and preemption is disabled.
-This means that the call <tt>synchronize_rcu()</tt> (or friends)
-itself is a quiescent
-state and thus a grace period, so the early-boot implementation can
-be a no-op.
-
-<p>
-Both <tt>synchronize_rcu_bh()</tt> and <tt>synchronize_sched()</tt>
-continue to operate normally through the remainder of boot, courtesy
-of the fact that preemption is disabled across their RCU read-side
-critical sections and also courtesy of the fact that there is still
-only one CPU.
-However, once the scheduler starts initializing, preemption is enabled.
-There is still only a single CPU, but the fact that preemption is enabled
-means that the no-op implementation of <tt>synchronize_rcu()</tt> no
-longer works in <tt>CONFIG_PREEMPT=y</tt> kernels.
-Therefore, as soon as the scheduler starts initializing, the early-boot
-fastpath is disabled.
-This means that <tt>synchronize_rcu()</tt> switches to its runtime
-mode of operation where it posts callbacks, which in turn means that
-any call to <tt>synchronize_rcu()</tt> will block until the corresponding
-callback is invoked.
-Unfortunately, the callback cannot be invoked until RCU's runtime
-grace-period machinery is up and running, which cannot happen until
-the scheduler has initialized itself sufficiently to allow RCU's
-kthreads to be spawned.
-Therefore, invoking <tt>synchronize_rcu()</tt> during scheduler
-initialization can result in deadlock.
-
-<p>@@QQ@@
-So what happens with <tt>synchronize_rcu()</tt> during
-scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
-kernels?
-<p>@@QQA@@
-In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
-maps directly to <tt>synchronize_sched()</tt>.
-Therefore, <tt>synchronize_rcu()</tt> works normally throughout
-boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
-However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
-so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
-during scheduler initialization.
-<p>@@QQE@@
-
-<p>
-I learned of these boot-time requirements as a result of a series of
-system hangs.
-
-<h3><a name="Interrupts and NMIs">Interrupts and NMIs</a></h3>
-
-<p>
-The Linux kernel has interrupts, and RCU read-side critical sections are
-legal within interrupt handlers and within interrupt-disabled regions
-of code, as are invocations of <tt>call_rcu()</tt>.
-
-<p>
-Some Linux-kernel architectures can enter an interrupt handler from
-non-idle process context, and then just never leave it, instead stealthily
-transitioning back to process context.
-This trick is sometimes used to invoke system calls from inside the kernel.
-These &ldquo;half-interrupts&rdquo; mean that RCU has to be very careful
-about how it counts interrupt nesting levels.
-I learned of this requirement the hard way during a rewrite
-of RCU's dyntick-idle code.
-
-<p>
-The Linux kernel has non-maskable interrupts (NMIs), and
-RCU read-side critical sections are legal within NMI handlers.
-Thankfully, RCU update-side primitives, including
-<tt>call_rcu()</tt>, are prohibited within NMI handlers.
-
-<p>
-The name notwithstanding, some Linux-kernel architectures
-can have nested NMIs, which RCU must handle correctly.
-Andy Lutomirski
-<a href="https://lkml.kernel.org/g/CALCETrXLq1y7e_dKFPgou-FKHB6Pu-r8+t-6Ds+8=va7anBWDA@mail.gmail.com">surprised me</a>
-with this requirement;
-he also kindly surprised me with
-<a href="https://lkml.kernel.org/g/CALCETrXSY9JpW3uE6H8WYk81sg56qasA2aqmjMPsq5dOtzso=g@mail.gmail.com">an algorithm</a>
-that meets this requirement.
-
-<h3><a name="Loadable Modules">Loadable Modules</a></h3>
-
-<p>
-The Linux kernel has loadable modules, and these modules can
-also be unloaded.
-After a given module has been unloaded, any attempt to call
-one of its functions results in a segmentation fault.
-The module-unload functions must therefore cancel any
-delayed calls to loadable-module functions, for example,
-any outstanding <tt>mod_timer()</tt> must be dealt with
-via <tt>del_timer_sync()</tt> or similar.
-
-<p>
-Unfortunately, there is no way to cancel an RCU callback;
-once you invoke <tt>call_rcu()</tt>, the callback function is
-going to eventually be invoked, unless the system goes down first.
-Because it is normally considered socially irresponsible to crash the system
-in response to a module unload request, we need some other way
-to deal with in-flight RCU callbacks.
-
-<p>
-RCU therefore provides
-<tt><a href="https://lwn.net/Articles/217484/">rcu_barrier()</a></tt>,
-which waits until all in-flight RCU callbacks have been invoked.
-If a module uses <tt>call_rcu()</tt>, its exit function should therefore
-prevent any future invocation of <tt>call_rcu()</tt>, then invoke
-<tt>rcu_barrier()</tt>.
-In theory, the underlying module-unload code could invoke
-<tt>rcu_barrier()</tt> unconditionally, but in practice this would
-incur unacceptable latencies.
-
-<p>
-Nikita Danilov noted this requirement for an analogous filesystem-unmount
-situation, and Dipankar Sarma incorporated <tt>rcu_barrier()</tt> into RCU.
-The need for <tt>rcu_barrier()</tt> for module unloading became
-apparent later.
-
-<h3><a name="Hotplug CPU">Hotplug CPU</a></h3>
-
-<p>
-The Linux kernel supports CPU hotplug, which means that CPUs
-can come and go.
-It is of course illegal to use any RCU API member from an offline CPU.
-This requirement was present from day one in DYNIX/ptx, but
-on the other hand, the Linux kernel's CPU-hotplug implementation
-is &ldquo;interesting.&rdquo;
-
-<p>
-The Linux-kernel CPU-hotplug implementation has notifiers that
-are used to allow the various kernel subsystems (including RCU)
-to respond appropriately to a given CPU-hotplug operation.
-Most RCU operations may be invoked from CPU-hotplug notifiers,
-including even normal synchronous grace-period operations
-such as <tt>synchronize_rcu()</tt>.
-However, expedited grace-period operations such as
-<tt>synchronize_rcu_expedited()</tt> are not supported,
-due to the fact that current implementations block CPU-hotplug
-operations, which could result in deadlock.
-
-<p>
-In addition, all-callback-wait operations such as
-<tt>rcu_barrier()</tt> are also not supported, due to the
-fact that there are phases of CPU-hotplug operations where
-the outgoing CPU's callbacks will not be invoked until after
-the CPU-hotplug operation ends, which could also result in deadlock.
-
-<h3><a name="Scheduler and RCU">Scheduler and RCU</a></h3>
-
-<p>
-RCU depends on the scheduler, and the scheduler uses RCU to
-protect some of its data structures.
-This means the scheduler is forbidden from acquiring
-the runqueue locks and the priority-inheritance locks
-in the middle of an outermost RCU read-side critical section unless either
-(1)&nbsp;it releases them before exiting that same
-RCU read-side critical section, or
-(2)&nbsp;interrupts are disabled across
-that entire RCU read-side critical section.
-This same prohibition also applies (recursively!) to any lock that is acquired
-while holding any lock to which this prohibition applies.
-Adhering to this rule prevents preemptible RCU from invoking
-<tt>rcu_read_unlock_special()</tt> while either runqueue or
-priority-inheritance locks are held, thus avoiding deadlock.
-
-<p>
-Prior to v4.4, it was only necessary to disable preemption across
-RCU read-side critical sections that acquired scheduler locks.
-In v4.4, expedited grace periods started using IPIs, and these
-IPIs could force a <tt>rcu_read_unlock()</tt> to take the slowpath.
-Therefore, this expedited-grace-period change required disabling of
-interrupts, not just preemption.
-
-<p>
-For RCU's part, the preemptible-RCU <tt>rcu_read_unlock()</tt>
-implementation must be written carefully to avoid similar deadlocks.
-In particular, <tt>rcu_read_unlock()</tt> must tolerate an
-interrupt where the interrupt handler invokes both
-<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
-This possibility requires <tt>rcu_read_unlock()</tt> to use
-negative nesting levels to avoid destructive recursion via
-interrupt handler's use of RCU.
-
-<p>
-This pair of mutual scheduler-RCU requirements came as a
-<a href="https://lwn.net/Articles/453002/">complete surprise</a>.
-
-<p>
-As noted above, RCU makes use of kthreads, and it is necessary to
-avoid excessive CPU-time accumulation by these kthreads.
-This requirement was no surprise, but RCU's violation of it
-when running context-switch-heavy workloads when built with
-<tt>CONFIG_NO_HZ_FULL=y</tt>
-<a href="http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf">did come as a surprise [PDF]</a>.
-RCU has made good progress towards meeting this requirement, even
-for context-switch-have <tt>CONFIG_NO_HZ_FULL=y</tt> workloads,
-but there is room for further improvement.
-
-<h3><a name="Tracing and RCU">Tracing and RCU</a></h3>
-
-<p>
-It is possible to use tracing on RCU code, but tracing itself
-uses RCU.
-For this reason, <tt>rcu_dereference_raw_notrace()</tt>
-is provided for use by tracing, which avoids the destructive
-recursion that could otherwise ensue.
-This API is also used by virtualization in some architectures,
-where RCU readers execute in environments in which tracing
-cannot be used.
-The tracing folks both located the requirement and provided the
-needed fix, so this surprise requirement was relatively painless.
-
-<h3><a name="Energy Efficiency">Energy Efficiency</a></h3>
-
-<p>
-Interrupting idle CPUs is considered socially unacceptable,
-especially by people with battery-powered embedded systems.
-RCU therefore conserves energy by detecting which CPUs are
-idle, including tracking CPUs that have been interrupted from idle.
-This is a large part of the energy-efficiency requirement,
-so I learned of this via an irate phone call.
-
-<p>
-Because RCU avoids interrupting idle CPUs, it is illegal to
-execute an RCU read-side critical section on an idle CPU.
-(Kernels built with <tt>CONFIG_PROVE_RCU=y</tt> will splat
-if you try it.)
-The <tt>RCU_NONIDLE()</tt> macro and <tt>_rcuidle</tt>
-event tracing is provided to work around this restriction.
-In addition, <tt>rcu_is_watching()</tt> may be used to
-test whether or not it is currently legal to run RCU read-side
-critical sections on this CPU.
-I learned of the need for diagnostics on the one hand
-and <tt>RCU_NONIDLE()</tt> on the other while inspecting
-idle-loop code.
-Steven Rostedt supplied <tt>_rcuidle</tt> event tracing,
-which is used quite heavily in the idle loop.
-
-<p>
-It is similarly socially unacceptable to interrupt an
-<tt>nohz_full</tt> CPU running in userspace.
-RCU must therefore track <tt>nohz_full</tt> userspace
-execution.
-And in
-<a href="https://lwn.net/Articles/558284/"><tt>CONFIG_NO_HZ_FULL_SYSIDLE=y</tt></a>
-kernels, RCU must separately track idle CPUs on the one hand and
-CPUs that are either idle or executing in userspace on the other.
-In both cases, RCU must be able to sample state at two points in
-time, and be able to determine whether or not some other CPU spent
-any time idle and/or executing in userspace.
-
-<p>
-These energy-efficiency requirements have proven quite difficult to
-understand and to meet, for example, there have been more than five
-clean-sheet rewrites of RCU's energy-efficiency code, the last of
-which was finally able to demonstrate
-<a href="http://www.rdrop.com/users/paulmck/realtime/paper/AMPenergy.2013.04.19a.pdf">real energy savings running on real hardware [PDF]</a>.
-As noted earlier,
-I learned of many of these requirements via angry phone calls:
-Flaming me on the Linux-kernel mailing list was apparently not
-sufficient to fully vent their ire at RCU's energy-efficiency bugs!
-
-<h3><a name="Memory Efficiency">Memory Efficiency</a></h3>
-
-<p>
-Although small-memory non-realtime systems can simply use Tiny RCU,
-code size is only one aspect of memory efficiency.
-Another aspect is the size of the <tt>rcu_head</tt> structure
-used by <tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>.
-Although this structure contains nothing more than a pair of pointers,
-it does appear in many RCU-protected data structures, including
-some that are size critical.
-The <tt>page</tt> structure is a case in point, as evidenced by
-the many occurrences of the <tt>union</tt> keyword within that structure.
-
-<p>
-This need for memory efficiency is one reason that RCU uses hand-crafted
-singly linked lists to track the <tt>rcu_head</tt> structures that
-are waiting for a grace period to elapse.
-It is also the reason why <tt>rcu_head</tt> structures do not contain
-debug information, such as fields tracking the file and line of the
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> that posted them.
-Although this information might appear in debug-only kernel builds at some
-point, in the meantime, the <tt>-&gt;func</tt> field will often provide
-the needed debug information.
-
-<p>
-However, in some cases, the need for memory efficiency leads to even
-more extreme measures.
-Returning to the <tt>page</tt> structure, the <tt>rcu_head</tt> field
-shares storage with a great many other structures that are used at
-various points in the corresponding page's lifetime.
-In order to correctly resolve certain
-<a href="https://lkml.kernel.org/g/1439976106-137226-1-git-send-email-kirill.shutemov@linux.intel.com">race conditions</a>,
-the Linux kernel's memory-management subsystem needs a particular bit
-to remain zero during all phases of grace-period processing,
-and that bit happens to map to the bottom bit of the
-<tt>rcu_head</tt> structure's <tt>-&gt;next</tt> field.
-RCU makes this guarantee as long as <tt>call_rcu()</tt>
-is used to post the callback, as opposed to <tt>kfree_rcu()</tt>
-or some future &ldquo;lazy&rdquo;
-variant of <tt>call_rcu()</tt> that might one day be created for
-energy-efficiency purposes.
-
-<h3><a name="Performance, Scalability, Response Time, and Reliability">
-Performance, Scalability, Response Time, and Reliability</a></h3>
-
-<p>
-Expanding on the
-<a href="#Performance and Scalability">earlier discussion</a>,
-RCU is used heavily by hot code paths in performance-critical
-portions of the Linux kernel's networking, security, virtualization,
-and scheduling code paths.
-RCU must therefore use efficient implementations, especially in its
-read-side primitives.
-To that end, it would be good if preemptible RCU's implementation
-of <tt>rcu_read_lock()</tt> could be inlined, however, doing
-this requires resolving <tt>#include</tt> issues with the
-<tt>task_struct</tt> structure.
-
-<p>
-The Linux kernel supports hardware configurations with up to
-4096 CPUs, which means that RCU must be extremely scalable.
-Algorithms that involve frequent acquisitions of global locks or
-frequent atomic operations on global variables simply cannot be
-tolerated within the RCU implementation.
-RCU therefore makes heavy use of a combining tree based on the
-<tt>rcu_node</tt> structure.
-RCU is required to tolerate all CPUs continuously invoking any
-combination of RCU's runtime primitives with minimal per-operation
-overhead.
-In fact, in many cases, increasing load must <i>decrease</i> the
-per-operation overhead, witness the batching optimizations for
-<tt>synchronize_rcu()</tt>, <tt>call_rcu()</tt>,
-<tt>synchronize_rcu_expedited()</tt>, and <tt>rcu_barrier()</tt>.
-As a general rule, RCU must cheerfully accept whatever the
-rest of the Linux kernel decides to throw at it.
-
-<p>
-The Linux kernel is used for real-time workloads, especially
-in conjunction with the
-<a href="https://rt.wiki.kernel.org/index.php/Main_Page">-rt patchset</a>.
-The real-time-latency response requirements are such that the
-traditional approach of disabling preemption across RCU
-read-side critical sections is inappropriate.
-Kernels built with <tt>CONFIG_PREEMPT=y</tt> therefore
-use an RCU implementation that allows RCU read-side critical
-sections to be preempted.
-This requirement made its presence known after users made it
-clear that an earlier
-<a href="https://lwn.net/Articles/107930/">real-time patch</a>
-did not meet their needs, in conjunction with some
-<a href="https://lkml.kernel.org/g/20050318002026.GA2693@us.ibm.com">RCU issues</a>
-encountered by a very early version of the -rt patchset.
-
-<p>
-In addition, RCU must make do with a sub-100-microsecond real-time latency
-budget.
-In fact, on smaller systems with the -rt patchset, the Linux kernel
-provides sub-20-microsecond real-time latencies for the whole kernel,
-including RCU.
-RCU's scalability and latency must therefore be sufficient for
-these sorts of configurations.
-To my surprise, the sub-100-microsecond real-time latency budget
-<a href="http://www.rdrop.com/users/paulmck/realtime/paper/bigrt.2013.01.31a.LCA.pdf">
-applies to even the largest systems [PDF]</a>,
-up to and including systems with 4096 CPUs.
-This real-time requirement motivated the grace-period kthread, which
-also simplified handling of a number of race conditions.
-
-<p>
-Finally, RCU's status as a synchronization primitive means that
-any RCU failure can result in arbitrary memory corruption that can be
-extremely difficult to debug.
-This means that RCU must be extremely reliable, which in
-practice also means that RCU must have an aggressive stress-test
-suite.
-This stress-test suite is called <tt>rcutorture</tt>.
-
-<p>
-Although the need for <tt>rcutorture</tt> was no surprise,
-the current immense popularity of the Linux kernel is posing
-interesting&mdash;and perhaps unprecedented&mdash;validation
-challenges.
-To see this, keep in mind that there are well over one billion
-instances of the Linux kernel running today, given Android
-smartphones, Linux-powered televisions, and servers.
-This number can be expected to increase sharply with the advent of
-the celebrated Internet of Things.
-
-<p>
-Suppose that RCU contains a race condition that manifests on average
-once per million years of runtime.
-This bug will be occurring about three times per <i>day</i> across
-the installed base.
-RCU could simply hide behind hardware error rates, given that no one
-should really expect their smartphone to last for a million years.
-However, anyone taking too much comfort from this thought should
-consider the fact that in most jurisdictions, a successful multi-year
-test of a given mechanism, which might include a Linux kernel,
-suffices for a number of types of safety-critical certifications.
-In fact, rumor has it that the Linux kernel is already being used
-in production for safety-critical applications.
-I don't know about you, but I would feel quite bad if a bug in RCU
-killed someone.
-Which might explain my recent focus on validation and verification.
-
-<h2><a name="Other RCU Flavors">Other RCU Flavors</a></h2>
-
-<p>
-One of the more surprising things about RCU is that there are now
-no fewer than five <i>flavors</i>, or API families.
-In addition, the primary flavor that has been the sole focus up to
-this point has two different implementations, non-preemptible and
-preemptible.
-The other four flavors are listed below, with requirements for each
-described in a separate section.
-
-<ol>
-<li>	<a href="#Bottom-Half Flavor">Bottom-Half Flavor</a>
-<li>	<a href="#Sched Flavor">Sched Flavor</a>
-<li>	<a href="#Sleepable RCU">Sleepable RCU</a>
-<li>	<a href="#Tasks RCU">Tasks RCU</a>
-</ol>
-
-<h3><a name="Bottom-Half Flavor">Bottom-Half Flavor</a></h3>
-
-<p>
-The softirq-disable (AKA &ldquo;bottom-half&rdquo;,
-hence the &ldquo;_bh&rdquo; abbreviations)
-flavor of RCU, or <i>RCU-bh</i>, was developed by
-Dipankar Sarma to provide a flavor of RCU that could withstand the
-network-based denial-of-service attacks researched by Robert
-Olsson.
-These attacks placed so much networking load on the system
-that some of the CPUs never exited softirq execution,
-which in turn prevented those CPUs from ever executing a context switch,
-which, in the RCU implementation of that time, prevented grace periods
-from ever ending.
-The result was an out-of-memory condition and a system hang.
-
-<p>
-The solution was the creation of RCU-bh, which does
-<tt>local_bh_disable()</tt>
-across its read-side critical sections, and which uses the transition
-from one type of softirq processing to another as a quiescent state
-in addition to context switch, idle, user mode, and offline.
-This means that RCU-bh grace periods can complete even when some of
-the CPUs execute in softirq indefinitely, thus allowing algorithms
-based on RCU-bh to withstand network-based denial-of-service attacks.
-
-<p>
-Because
-<tt>rcu_read_lock_bh()</tt> and <tt>rcu_read_unlock_bh()</tt>
-disable and re-enable softirq handlers, any attempt to start a softirq
-handlers during the
-RCU-bh read-side critical section will be deferred.
-In this case, <tt>rcu_read_unlock_bh()</tt>
-will invoke softirq processing, which can take considerable time.
-One can of course argue that this softirq overhead should be associated
-with the code following the RCU-bh read-side critical section rather
-than <tt>rcu_read_unlock_bh()</tt>, but the fact
-is that most profiling tools cannot be expected to make this sort
-of fine distinction.
-For example, suppose that a three-millisecond-long RCU-bh read-side
-critical section executes during a time of heavy networking load.
-There will very likely be an attempt to invoke at least one softirq
-handler during that three milliseconds, but any such invocation will
-be delayed until the time of the <tt>rcu_read_unlock_bh()</tt>.
-This can of course make it appear at first glance as if
-<tt>rcu_read_unlock_bh()</tt> was executing very slowly.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-bh API</a>
-includes
-<tt>rcu_read_lock_bh()</tt>,
-<tt>rcu_read_unlock_bh()</tt>,
-<tt>rcu_dereference_bh()</tt>,
-<tt>rcu_dereference_bh_check()</tt>,
-<tt>synchronize_rcu_bh()</tt>,
-<tt>synchronize_rcu_bh_expedited()</tt>,
-<tt>call_rcu_bh()</tt>,
-<tt>rcu_barrier_bh()</tt>, and
-<tt>rcu_read_lock_bh_held()</tt>.
-
-<h3><a name="Sched Flavor">Sched Flavor</a></h3>
-
-<p>
-Before preemptible RCU, waiting for an RCU grace period had the
-side effect of also waiting for all pre-existing interrupt
-and NMI handlers.
-However, there are legitimate preemptible-RCU implementations that
-do not have this property, given that any point in the code outside
-of an RCU read-side critical section can be a quiescent state.
-Therefore, <i>RCU-sched</i> was created, which follows &ldquo;classic&rdquo;
-RCU in that an RCU-sched grace period waits for for pre-existing
-interrupt and NMI handlers.
-In kernels built with <tt>CONFIG_PREEMPT=n</tt>, the RCU and RCU-sched
-APIs have identical implementations, while kernels built with
-<tt>CONFIG_PREEMPT=y</tt> provide a separate implementation for each.
-
-<p>
-Note well that in <tt>CONFIG_PREEMPT=y</tt> kernels,
-<tt>rcu_read_lock_sched()</tt> and <tt>rcu_read_unlock_sched()</tt>
-disable and re-enable preemption, respectively.
-This means that if there was a preemption attempt during the
-RCU-sched read-side critical section, <tt>rcu_read_unlock_sched()</tt>
-will enter the scheduler, with all the latency and overhead entailed.
-Just as with <tt>rcu_read_unlock_bh()</tt>, this can make it look
-as if <tt>rcu_read_unlock_sched()</tt> was executing very slowly.
-However, the highest-priority task won't be preempted, so that task
-will enjoy low-overhead <tt>rcu_read_unlock_sched()</tt> invocations.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-sched API</a>
-includes
-<tt>rcu_read_lock_sched()</tt>,
-<tt>rcu_read_unlock_sched()</tt>,
-<tt>rcu_read_lock_sched_notrace()</tt>,
-<tt>rcu_read_unlock_sched_notrace()</tt>,
-<tt>rcu_dereference_sched()</tt>,
-<tt>rcu_dereference_sched_check()</tt>,
-<tt>synchronize_sched()</tt>,
-<tt>synchronize_rcu_sched_expedited()</tt>,
-<tt>call_rcu_sched()</tt>,
-<tt>rcu_barrier_sched()</tt>, and
-<tt>rcu_read_lock_sched_held()</tt>.
-However, anything that disables preemption also marks an RCU-sched
-read-side critical section, including
-<tt>preempt_disable()</tt> and <tt>preempt_enable()</tt>,
-<tt>local_irq_save()</tt> and <tt>local_irq_restore()</tt>,
-and so on.
-
-<h3><a name="Sleepable RCU">Sleepable RCU</a></h3>
-
-<p>
-For well over a decade, someone saying &ldquo;I need to block within
-an RCU read-side critical section&rdquo; was a reliable indication
-that this someone did not understand RCU.
-After all, if you are always blocking in an RCU read-side critical
-section, you can probably afford to use a higher-overhead synchronization
-mechanism.
-However, that changed with the advent of the Linux kernel's notifiers,
-whose RCU read-side critical
-sections almost never sleep, but sometimes need to.
-This resulted in the introduction of
-<a href="https://lwn.net/Articles/202847/">sleepable RCU</a>,
-or <i>SRCU</i>.
-
-<p>
-SRCU allows different domains to be defined, with each such domain
-defined by an instance of an <tt>srcu_struct</tt> structure.
-A pointer to this structure must be passed in to each SRCU function,
-for example, <tt>synchronize_srcu(&amp;ss)</tt>, where
-<tt>ss</tt> is the <tt>srcu_struct</tt> structure.
-The key benefit of these domains is that a slow SRCU reader in one
-domain does not delay an SRCU grace period in some other domain.
-That said, one consequence of these domains is that read-side code
-must pass a &ldquo;cookie&rdquo; from <tt>srcu_read_lock()</tt>
-to <tt>srcu_read_unlock()</tt>, for example, as follows:
-
-<blockquote>
-<pre>
- 1 int idx;
- 2
- 3 idx = srcu_read_lock(&amp;ss);
- 4 do_something();
- 5 srcu_read_unlock(&amp;ss, idx);
-</pre>
-</blockquote>
-
-<p>
-As noted above, it is legal to block within SRCU read-side critical sections,
-however, with great power comes great responsibility.
-If you block forever in one of a given domain's SRCU read-side critical
-sections, then that domain's grace periods will also be blocked forever.
-Of course, one good way to block forever is to deadlock, which can
-happen if any operation in a given domain's SRCU read-side critical
-section can block waiting, either directly or indirectly, for that domain's
-grace period to elapse.
-For example, this results in a self-deadlock:
-
-<blockquote>
-<pre>
- 1 int idx;
- 2
- 3 idx = srcu_read_lock(&amp;ss);
- 4 do_something();
- 5 synchronize_srcu(&amp;ss);
- 6 srcu_read_unlock(&amp;ss, idx);
-</pre>
-</blockquote>
-
-<p>
-However, if line&nbsp;5 acquired a mutex that was held across
-a <tt>synchronize_srcu()</tt> for domain <tt>ss</tt>,
-deadlock would still be possible.
-Furthermore, if line&nbsp;5 acquired a mutex that was held across
-a <tt>synchronize_srcu()</tt> for some other domain <tt>ss1</tt>,
-and if an <tt>ss1</tt>-domain SRCU read-side critical section
-acquired another mutex that was held across as <tt>ss</tt>-domain
-<tt>synchronize_srcu()</tt>,
-deadlock would again be possible.
-Such a deadlock cycle could extend across an arbitrarily large number
-of different SRCU domains.
-Again, with great power comes great responsibility.
-
-<p>
-Unlike the other RCU flavors, SRCU read-side critical sections can
-run on idle and even offline CPUs.
-This ability requires that <tt>srcu_read_lock()</tt> and
-<tt>srcu_read_unlock()</tt> contain memory barriers, which means
-that SRCU readers will run a bit slower than would RCU readers.
-It also motivates the <tt>smp_mb__after_srcu_read_unlock()</tt>
-API, which, in combination with <tt>srcu_read_unlock()</tt>,
-guarantees a full memory barrier.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">SRCU API</a>
-includes
-<tt>srcu_read_lock()</tt>,
-<tt>srcu_read_unlock()</tt>,
-<tt>srcu_dereference()</tt>,
-<tt>srcu_dereference_check()</tt>,
-<tt>synchronize_srcu()</tt>,
-<tt>synchronize_srcu_expedited()</tt>,
-<tt>call_srcu()</tt>,
-<tt>srcu_barrier()</tt>, and
-<tt>srcu_read_lock_held()</tt>.
-It also includes
-<tt>DEFINE_SRCU()</tt>,
-<tt>DEFINE_STATIC_SRCU()</tt>, and
-<tt>init_srcu_struct()</tt>
-APIs for defining and initializing <tt>srcu_struct</tt> structures.
-
-<h3><a name="Tasks RCU">Tasks RCU</a></h3>
-
-<p>
-Some forms of tracing use &ldquo;tramopolines&rdquo; to handle the
-binary rewriting required to install different types of probes.
-It would be good to be able to free old trampolines, which sounds
-like a job for some form of RCU.
-However, because it is necessary to be able to install a trace
-anywhere in the code, it is not possible to use read-side markers
-such as <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
-In addition, it does not work to have these markers in the trampoline
-itself, because there would need to be instructions following
-<tt>rcu_read_unlock()</tt>.
-Although <tt>synchronize_rcu()</tt> would guarantee that execution
-reached the <tt>rcu_read_unlock()</tt>, it would not be able to
-guarantee that execution had completely left the trampoline.
-
-<p>
-The solution, in the form of
-<a href="https://lwn.net/Articles/607117/"><i>Tasks RCU</i></a>,
-is to have implicit
-read-side critical sections that are delimited by voluntary context
-switches, that is, calls to <tt>schedule()</tt>,
-<tt>cond_resched_rcu_qs()</tt>, and
-<tt>synchronize_rcu_tasks()</tt>.
-In addition, transitions to and from userspace execution also delimit
-tasks-RCU read-side critical sections.
-
-<p>
-The tasks-RCU API is quite compact, consisting only of
-<tt>call_rcu_tasks()</tt>,
-<tt>synchronize_rcu_tasks()</tt>, and
-<tt>rcu_barrier_tasks()</tt>.
-
-<h2><a name="Possible Future Changes">Possible Future Changes</a></h2>
-
-<p>
-One of the tricks that RCU uses to attain update-side scalability is
-to increase grace-period latency with increasing numbers of CPUs.
-If this becomes a serious problem, it will be necessary to rework the
-grace-period state machine so as to avoid the need for the additional
-latency.
-
-<p>
-Expedited grace periods scan the CPUs, so their latency and overhead
-increases with increasing numbers of CPUs.
-If this becomes a serious problem on large systems, it will be necessary
-to do some redesign to avoid this scalability problem.
-
-<p>
-RCU disables CPU hotplug in a few places, perhaps most notably in the
-expedited grace-period and <tt>rcu_barrier()</tt> operations.
-If there is a strong reason to use expedited grace periods in CPU-hotplug
-notifiers, it will be necessary to avoid disabling CPU hotplug.
-This would introduce some complexity, so there had better be a <i>very</i>
-good reason.
-
-<p>
-The tradeoff between grace-period latency on the one hand and interruptions
-of other CPUs on the other hand may need to be re-examined.
-The desire is of course for zero grace-period latency as well as zero
-interprocessor interrupts undertaken during an expedited grace period
-operation.
-While this ideal is unlikely to be achievable, it is quite possible that
-further improvements can be made.
-
-<p>
-The multiprocessor implementations of RCU use a combining tree that
-groups CPUs so as to reduce lock contention and increase cache locality.
-However, this combining tree does not spread its memory across NUMA
-nodes nor does it align the CPU groups with hardware features such
-as sockets or cores.
-Such spreading and alignment is currently believed to be unnecessary
-because the hotpath read-side primitives do not access the combining
-tree, nor does <tt>call_rcu()</tt> in the common case.
-If you believe that your architecture needs such spreading and alignment,
-then your architecture should also benefit from the
-<tt>rcutree.rcu_fanout_leaf</tt> boot parameter, which can be set
-to the number of CPUs in a socket, NUMA node, or whatever.
-If the number of CPUs is too large, use a fraction of the number of
-CPUs.
-If the number of CPUs is a large prime number, well, that certainly
-is an &ldquo;interesting&rdquo; architectural choice!
-More flexible arrangements might be considered, but only if
-<tt>rcutree.rcu_fanout_leaf</tt> has proven inadequate, and only
-if the inadequacy has been demonstrated by a carefully run and
-realistic system-level workload.
-
-<p>
-Please note that arrangements that require RCU to remap CPU numbers will
-require extremely good demonstration of need and full exploration of
-alternatives.
-
-<p>
-There is an embarrassingly large number of flavors of RCU, and this
-number has been increasing over time.
-Perhaps it will be possible to combine some at some future date.
-
-<p>
-RCU's various kthreads are reasonably recent additions.
-It is quite likely that adjustments will be required to more gracefully
-handle extreme loads.
-It might also be necessary to be able to relate CPU utilization by
-RCU's kthreads and softirq handlers to the code that instigated this
-CPU utilization.
-For example, RCU callback overhead might be charged back to the
-originating <tt>call_rcu()</tt> instance, though probably not
-in production kernels.
-
-<h2><a name="Summary">Summary</a></h2>
-
-<p>
-This document has presented more than two decade's worth of RCU
-requirements.
-Given that the requirements keep changing, this will not be the last
-word on this subject, but at least it serves to get an important
-subset of the requirements set forth.
-
-<h2><a name="Acknowledgments">Acknowledgments</a></h2>
-
-I am grateful to Steven Rostedt, Lai Jiangshan, Ingo Molnar,
-Oleg Nesterov, Borislav Petkov, Peter Zijlstra, Boqun Feng, and
-Andy Lutomirski for their help in rendering
-this article human readable, and to Michelle Rankin for her support
-of this effort.
-Other contributions are acknowledged in the Linux kernel's git archive.
-The cartoon is copyright (c) 2013 by Melissa Broussard,
-and is provided
-under the terms of the Creative Commons Attribution-Share Alike 3.0
-United States license.
-
-<p>@@QQAL@@
-
-</body></html>
diff --git a/Documentation/RCU/Design/htmlqqz.sh b/Documentation/RCU/Design/htmlqqz.sh
deleted file mode 100755
index d354f069559b..000000000000
--- a/Documentation/RCU/Design/htmlqqz.sh
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/bin/sh
-#
-# Usage: sh htmlqqz.sh file
-#
-# Extracts and converts quick quizzes in a proto-HTML document file.htmlx.
-# Commands, all of which must be on a line by themselves:
-#
-#	"<p>@@QQ@@": Start of a quick quiz.
-#	"<p>@@QQA@@": Start of a quick-quiz answer.
-#	"<p>@@QQE@@": End of a quick-quiz answer, and thus of the quick quiz.
-#	"<p>@@QQAL@@": Place to put quick-quiz answer list.
-#
-# Places the result in file.html.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
-# Copyright (c) 2013 Paul E. McKenney, IBM Corporation.
-
-fn=$1
-if test ! -r $fn.htmlx
-then
-	echo "Error: $fn.htmlx unreadable."
-	exit 1
-fi
-
-echo "<!-- DO NOT HAND EDIT. -->" > $fn.html
-echo "<!-- Instead, edit $fn.htmlx and run 'sh htmlqqz.sh $fn' -->" >> $fn.html
-awk < $fn.htmlx >> $fn.html '
-
-state == "" && $1 != "<p>@@QQ@@" && $1 != "<p>@@QQAL@@" {
-	print $0;
-	if ($0 ~ /^<p>@@QQ/)
-		print "Bad Quick Quiz command: " NR " (expected <p>@@QQ@@ or <p>@@QQAL@@)." > "/dev/stderr"
-	next;
-}
-
-state == "" && $1 == "<p>@@QQ@@" {
-	qqn++;
-	qqlineno = NR;
-	haveqq = 1;
-	state = "qq";
-	print "<p><a name=\"Quick Quiz " qqn "\"><b>Quick Quiz " qqn "</b>:</a>"
-	next;
-}
-
-state == "qq" && $1 != "<p>@@QQA@@" {
-	qq[qqn] = qq[qqn] $0 "\n";
-	print $0
-	if ($0 ~ /^<p>@@QQ/)
-		print "Bad Quick Quiz command: " NR ". (expected <p>@@QQA@@)" > "/dev/stderr"
-	next;
-}
-
-state == "qq" && $1 == "<p>@@QQA@@" {
-	state = "qqa";
-	print "<br><a href=\"#qq" qqn "answer\">Answer</a>"
-	next;
-}
-
-state == "qqa" && $1 != "<p>@@QQE@@" {
-	qqa[qqn] = qqa[qqn] $0 "\n";
-	if ($0 ~ /^<p>@@QQ/)
-		print "Bad Quick Quiz command: " NR " (expected <p>@@QQE@@)." > "/dev/stderr"
-	next;
-}
-
-state == "qqa" && $1 == "<p>@@QQE@@" {
-	state = "";
-	next;
-}
-
-state == "" && $1 == "<p>@@QQAL@@" {
-	haveqq = "";
-	print "<h3><a name=\"Answers to Quick Quizzes\">"
-	print "Answers to Quick Quizzes</a></h3>"
-	print "";
-	for (i = 1; i <= qqn; i++) {
-		print "<a name=\"qq" i "answer\"></a>"
-		print "<p><b>Quick Quiz " i "</b>:"
-		print qq[i];
-		print "";
-		print "</p><p><b>Answer</b>:"
-		print qqa[i];
-		print "";
-		print "</p><p><a href=\"#Quick%20Quiz%20" i "\"><b>Back to Quick Quiz " i "</b>.</a>"
-		print "";
-	}
-	next;
-}
-
-END {
-	if (state != "")
-		print "Unterminated Quick Quiz: " qqlineno "." > "/dev/stderr"
-	else if (haveqq)
-		print "Missing \"<p>@@QQAL@@\", no Quick Quiz." > "/dev/stderr"
-}'
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index ec6998b1b6d0..00a3a38b375a 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -237,17 +237,17 @@ o	"ktl" is the low-order 16 bits (in hexadecimal) of the count of
 
 The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
 
-s=21872 wd0=0 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
+s=21872 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
 
 These fields are as follows:
 
 o	"s" is the sequence number, with an odd number indicating that
 	an expedited grace period is in progress.
 
-o	"wd0", "wd1", "wd2", and "wd3" are the number of times that an
-	attempt to start an expedited grace period found that someone
-	else had completed an expedited grace period that satisfies the
-	attempted request.  "Our work is done."
+o	"wd1", "wd2", and "wd3" are the number of times that an attempt
+	to start an expedited grace period found that someone else had
+	completed an expedited grace period that satisfies the attempted
+	request.  "Our work is done."
 
 o	"n" is number of times that a concurrent CPU-hotplug operation
 	forced a fallback to a normal grace period.
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index dc49c6712b17..111770ffa10e 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -681,22 +681,30 @@ Although RCU can be used in many different ways, a very common use of
 RCU is analogous to reader-writer locking.  The following unified
 diff shows how closely related RCU and reader-writer locking can be.
 
+	@@ -5,5 +5,5 @@ struct el {
+	 	int data;
+	 	/* Other data fields */
+	 };
+	-rwlock_t listmutex;
+	+spinlock_t listmutex;
+	 struct el head;
+
 	@@ -13,15 +14,15 @@
 		struct list_head *lp;
 		struct el *p;
 
-	-	read_lock();
+	-	read_lock(&listmutex);
 	-	list_for_each_entry(p, head, lp) {
 	+	rcu_read_lock();
 	+	list_for_each_entry_rcu(p, head, lp) {
 			if (p->key == key) {
 				*result = p->data;
-	-			read_unlock();
+	-			read_unlock(&listmutex);
 	+			rcu_read_unlock();
 				return 1;
 			}
 		}
-	-	read_unlock();
+	-	read_unlock(&listmutex);
 	+	rcu_read_unlock();
 		return 0;
 	 }
@@ -732,7 +740,7 @@ Or, for those who prefer a side-by-side listing:
  5   int data;                          5   int data;
  6   /* Other data fields */            6   /* Other data fields */
  7 };                                   7 };
- 8 spinlock_t listmutex;                8 spinlock_t listmutex;
+ 8 rwlock_t listmutex;                  8 spinlock_t listmutex;
  9 struct el head;                      9 struct el head;
 
  1 int search(long key, int *result)    1 int search(long key, int *result)
@@ -740,15 +748,15 @@ Or, for those who prefer a side-by-side listing:
  3   struct list_head *lp;              3   struct list_head *lp;
  4   struct el *p;                      4   struct el *p;
  5                                      5
- 6   read_lock();                       6   rcu_read_lock();
+ 6   read_lock(&listmutex);             6   rcu_read_lock();
  7   list_for_each_entry(p, head, lp) { 7   list_for_each_entry_rcu(p, head, lp) {
  8     if (p->key == key) {             8     if (p->key == key) {
  9       *result = p->data;             9       *result = p->data;
-10       read_unlock();                10       rcu_read_unlock();
+10       read_unlock(&listmutex);      10       rcu_read_unlock();
 11       return 1;                     11       return 1;
 12     }                               12     }
 13   }                                 13   }
-14   read_unlock();                    14   rcu_read_unlock();
+14   read_unlock(&listmutex);          14   rcu_read_unlock();
 15   return 0;                         15   return 0;
 16 }                                   16 }
 
diff --git a/Documentation/devicetree/bindings/regmap/regmap.txt b/Documentation/devicetree/bindings/regmap/regmap.txt
index e98a9652ccc8..0127be360fe8 100644
--- a/Documentation/devicetree/bindings/regmap/regmap.txt
+++ b/Documentation/devicetree/bindings/regmap/regmap.txt
@@ -1,50 +1,29 @@
-Device-Tree binding for regmap
-
-The endianness mode of CPU & Device scenarios:
-Index     Device     Endianness properties
----------------------------------------------------
-1         BE         'big-endian'
-2         LE         'little-endian'
-3	  Native     'native-endian'
-
-For one device driver, which will run in different scenarios above
-on different SoCs using the devicetree, we need one way to simplify
-this.
+Devicetree binding for regmap
 
 Optional properties:
-- {big,little,native}-endian: these are boolean properties, if absent
-  then the implementation will choose a default based on the device
-  being controlled.  These properties are for register values and all
-  the buffers only.  Native endian means that the CPU and device have
-  the same endianness.
 
-Examples:
-Scenario 1 : CPU in LE mode & device in LE mode.
-dev: dev@40031000 {
-	      compatible = "name";
-	      reg = <0x40031000 0x1000>;
-	      ...
-};
+   little-endian,
+   big-endian,
+   native-endian:	See common-properties.txt for a definition
 
-Scenario 2 : CPU in LE mode & device in BE mode.
-dev: dev@40031000 {
-	      compatible = "name";
-	      reg = <0x40031000 0x1000>;
-	      ...
-	      big-endian;
-};
+Note:
+Regmap defaults to little-endian register access on MMIO based
+devices, this is by far the most common setting. On CPU
+architectures that typically run big-endian operating systems
+(e.g. PowerPC), registers can be defined as big-endian and must
+be marked that way in the devicetree.
 
-Scenario 3 : CPU in BE mode & device in BE mode.
-dev: dev@40031000 {
-	      compatible = "name";
-	      reg = <0x40031000 0x1000>;
-	      ...
-};
+On SoCs that can be operated in both big-endian and little-endian
+modes, with a single hardware switch controlling both the endianess
+of the CPU and a byteswap for MMIO registers (e.g. many Broadcom MIPS
+chips), "native-endian" is used to allow using the same device tree
+blob in both cases.
 
-Scenario 4 : CPU in BE mode & device in LE mode.
+Examples:
+Scenario 1 : a register set in big-endian mode.
 dev: dev@40031000 {
-	      compatible = "name";
+	      compatible = "syscon";
 	      reg = <0x40031000 0x1000>;
+	      big-endian;
 	      ...
-	      little-endian;
 };
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 0b3de80ec8f6..49673bd30b87 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3284,6 +3284,44 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Lazy RCU callbacks are those which RCU can
 			prove do nothing more than free memory.
 
+	rcuperf.gp_exp= [KNL]
+			Measure performance of expedited synchronous
+			grace-period primitives.
+
+	rcuperf.holdoff= [KNL]
+			Set test-start holdoff period.  The purpose of
+			this parameter is to delay the start of the
+			test until boot completes in order to avoid
+			interference.
+
+	rcuperf.nreaders= [KNL]
+			Set number of RCU readers.  The value -1 selects
+			N, where N is the number of CPUs.  A value
+			"n" less than -1 selects N-n+1, where N is again
+			the number of CPUs.  For example, -2 selects N
+			(the number of CPUs), -3 selects N+1, and so on.
+			A value of "n" less than or equal to -N selects
+			a single reader.
+
+	rcuperf.nwriters= [KNL]
+			Set number of RCU writers.  The values operate
+			the same as for rcuperf.nreaders.
+			N, where N is the number of CPUs
+
+	rcuperf.perf_runnable= [BOOT]
+			Start rcuperf running at boot time.
+
+	rcuperf.shutdown= [KNL]
+			Shut the system down after performance tests
+			complete.  This is useful for hands-off automated
+			testing.
+
+	rcuperf.perf_type= [KNL]
+			Specify the RCU implementation to test.
+
+	rcuperf.verbose= [KNL]
+			Enable additional printk() statements.
+
 	rcutorture.cbflood_inter_holdoff= [KNL]
 			Set holdoff time (jiffies) between successive
 			callback-flood tests.
diff --git a/Documentation/locking/lockdep-design.txt b/Documentation/locking/lockdep-design.txt
index 5001280e9d82..9de1c158d44c 100644
--- a/Documentation/locking/lockdep-design.txt
+++ b/Documentation/locking/lockdep-design.txt
@@ -97,7 +97,7 @@ between any two lock-classes:
    <hardirq-safe>   ->  <hardirq-unsafe>
    <softirq-safe>   ->  <softirq-unsafe>
 
-The first rule comes from the fact the a hardirq-safe lock could be
+The first rule comes from the fact that a hardirq-safe lock could be
 taken by a hardirq context, interrupting a hardirq-unsafe lock - and
 thus could result in a lock inversion deadlock. Likewise, a softirq-safe
 lock could be taken by an softirq context, interrupting a softirq-unsafe
@@ -220,7 +220,7 @@ calculated, which hash is unique for every lock chain. The hash value,
 when the chain is validated for the first time, is then put into a hash
 table, which hash-table can be checked in a lockfree manner. If the
 locking chain occurs again later on, the hash table tells us that we
-dont have to validate the chain again.
+don't have to validate the chain again.
 
 Troubleshooting:
 ----------------
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 3729cbe60e41..147ae8ec836f 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -4,8 +4,40 @@
 
 By: David Howells <dhowells@redhat.com>
     Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+    Will Deacon <will.deacon@arm.com>
+    Peter Zijlstra <peterz@infradead.org>
 
-Contents:
+==========
+DISCLAIMER
+==========
+
+This document is not a specification; it is intentionally (for the sake of
+brevity) and unintentionally (due to being human) incomplete. This document is
+meant as a guide to using the various memory barriers provided by Linux, but
+in case of any doubt (and there are many) please ask.
+
+To repeat, this document is not a specification of what Linux expects from
+hardware.
+
+The purpose of this document is twofold:
+
+ (1) to specify the minimum functionality that one can rely on for any
+     particular barrier, and
+
+ (2) to provide a guide as to how to use the barriers that are available.
+
+Note that an architecture can provide more than the minimum requirement
+for any particular barrier, but if the architecure provides less than
+that, that architecture is incorrect.
+
+Note also that it is possible that a barrier may be a no-op for an
+architecture because the way that arch works renders an explicit barrier
+unnecessary in that case.
+
+
+========
+CONTENTS
+========
 
  (*) Abstract memory access model.
 
@@ -31,15 +63,15 @@ Contents:
 
  (*) Implicit kernel memory barriers.
 
-     - Locking functions.
+     - Lock acquisition functions.
      - Interrupt disabling functions.
      - Sleep and wake-up functions.
      - Miscellaneous functions.
 
- (*) Inter-CPU locking barrier effects.
+ (*) Inter-CPU acquiring barrier effects.
 
-     - Locks vs memory accesses.
-     - Locks vs I/O accesses.
+     - Acquires vs memory accesses.
+     - Acquires vs I/O accesses.
 
  (*) Where are memory barriers needed?
 
@@ -61,6 +93,7 @@ Contents:
  (*) The things CPUs get up to.
 
      - And then there's the Alpha.
+     - Virtual Machine Guests.
 
  (*) Example uses.
 
@@ -148,7 +181,7 @@ As a further example, consider this sequence of events:
 
 	CPU 1		CPU 2
 	===============	===============
-	{ A == 1, B == 2, C = 3, P == &A, Q == &C }
+	{ A == 1, B == 2, C == 3, P == &A, Q == &C }
 	B = 4;		Q = P;
 	P = &B		D = *Q;
 
@@ -430,8 +463,9 @@ And a couple of implicit varieties:
      This acts as a one-way permeable barrier.  It guarantees that all memory
      operations after the ACQUIRE operation will appear to happen after the
      ACQUIRE operation with respect to the other components of the system.
-     ACQUIRE operations include LOCK operations and smp_load_acquire()
-     operations.
+     ACQUIRE operations include LOCK operations and both smp_load_acquire()
+     and smp_cond_acquire() operations. The later builds the necessary ACQUIRE
+     semantics from relying on a control dependency and smp_rmb().
 
      Memory operations that occur before an ACQUIRE operation may appear to
      happen after it completes.
@@ -464,6 +498,11 @@ And a couple of implicit varieties:
      This means that ACQUIRE acts as a minimal "acquire" operation and
      RELEASE acts as a minimal "release" operation.
 
+A subset of the atomic operations described in atomic_ops.txt have ACQUIRE
+and RELEASE variants in addition to fully-ordered and relaxed (no barrier
+semantics) definitions.  For compound atomics performing both a load and a
+store, ACQUIRE semantics apply only to the load and RELEASE semantics apply
+only to the store portion of the operation.
 
 Memory barriers are only required where there's a possibility of interaction
 between two CPUs or between a CPU and a device.  If it can be guaranteed that
@@ -517,7 +556,7 @@ following sequence of events:
 
 	CPU 1		      CPU 2
 	===============	      ===============
-	{ A == 1, B == 2, C = 3, P == &A, Q == &C }
+	{ A == 1, B == 2, C == 3, P == &A, Q == &C }
 	B = 4;
 	<write barrier>
 	WRITE_ONCE(P, &B)
@@ -544,7 +583,7 @@ between the address load and the data load:
 
 	CPU 1		      CPU 2
 	===============	      ===============
-	{ A == 1, B == 2, C = 3, P == &A, Q == &C }
+	{ A == 1, B == 2, C == 3, P == &A, Q == &C }
 	B = 4;
 	<write barrier>
 	WRITE_ONCE(P, &B);
@@ -813,9 +852,10 @@ In summary:
       the same variable, then those stores must be ordered, either by
       preceding both of them with smp_mb() or by using smp_store_release()
       to carry out the stores.  Please note that it is -not- sufficient
-      to use barrier() at beginning of each leg of the "if" statement,
-      as optimizing compilers do not necessarily respect barrier()
-      in this case.
+      to use barrier() at beginning of each leg of the "if" statement
+      because, as shown by the example above, optimizing compilers can
+      destroy the control dependency while respecting the letter of the
+      barrier() law.
 
   (*) Control dependencies require at least one run-time conditional
       between the prior load and the subsequent store, and this
@@ -1731,15 +1771,15 @@ The Linux kernel has eight basic CPU memory barriers:
 
 
 All memory barriers except the data dependency barriers imply a compiler
-barrier. Data dependencies do not impose any additional compiler ordering.
+barrier.  Data dependencies do not impose any additional compiler ordering.
 
 Aside: In the case of data dependencies, the compiler would be expected
 to issue the loads in the correct order (eg. `a[b]` would have to load
 the value of b before loading a[b]), however there is no guarantee in
 the C specification that the compiler may not speculate the value of b
 (eg. is equal to 1) and load a before b (eg. tmp = a[1]; if (b != 1)
-tmp = a[b]; ). There is also the problem of a compiler reloading b after
-having loaded a[b], thus having a newer copy of b than a[b]. A consensus
+tmp = a[b]; ).  There is also the problem of a compiler reloading b after
+having loaded a[b], thus having a newer copy of b than a[b].  A consensus
 has not yet been reached about these problems, however the READ_ONCE()
 macro is a good place to start looking.
 
@@ -1794,6 +1834,7 @@ There are some more advanced barrier functions:
 
 
  (*) lockless_dereference();
+
      This can be thought of as a pointer-fetch wrapper around the
      smp_read_barrier_depends() data-dependency barrier.
 
@@ -1858,7 +1899,7 @@ This is a variation on the mandatory write barrier that causes writes to weakly
 ordered I/O regions to be partially ordered.  Its effects may go beyond the
 CPU->Hardware interface and actually affect the hardware at some level.
 
-See the subsection "Locks vs I/O accesses" for more information.
+See the subsection "Acquires vs I/O accesses" for more information.
 
 
 ===============================
@@ -1873,8 +1914,8 @@ provide more substantial guarantees, but these may not be relied upon outside
 of arch specific code.
 
 
-ACQUIRING FUNCTIONS
--------------------
+LOCK ACQUISITION FUNCTIONS
+--------------------------
 
 The Linux kernel has a number of locking constructs:
 
@@ -1895,7 +1936,7 @@ for each construct.  These operations all imply certain barriers:
      Memory operations issued before the ACQUIRE may be completed after
      the ACQUIRE operation has completed.  An smp_mb__before_spinlock(),
      combined with a following ACQUIRE, orders prior stores against
-     subsequent loads and stores. Note that this is weaker than smp_mb()!
+     subsequent loads and stores.  Note that this is weaker than smp_mb()!
      The smp_mb__before_spinlock() primitive is free on many architectures.
 
  (2) RELEASE operation implication:
@@ -2090,9 +2131,9 @@ or:
 	event_indicated = 1;
 	wake_up_process(event_daemon);
 
-A write memory barrier is implied by wake_up() and co. if and only if they wake
-something up.  The barrier occurs before the task state is cleared, and so sits
-between the STORE to indicate the event and the STORE to set TASK_RUNNING:
+A write memory barrier is implied by wake_up() and co.  if and only if they
+wake something up.  The barrier occurs before the task state is cleared, and so
+sits between the STORE to indicate the event and the STORE to set TASK_RUNNING:
 
 	CPU 1				CPU 2
 	===============================	===============================
@@ -2206,7 +2247,7 @@ three CPUs; then should the following sequence of events occur:
 
 Then there is no guarantee as to what order CPU 3 will see the accesses to *A
 through *H occur in, other than the constraints imposed by the separate locks
-on the separate CPUs. It might, for example, see:
+on the separate CPUs.  It might, for example, see:
 
 	*E, ACQUIRE M, ACQUIRE Q, *G, *C, *F, *A, *B, RELEASE Q, *D, *H, RELEASE M
 
@@ -2486,9 +2527,9 @@ The following operations are special locking primitives:
 	clear_bit_unlock();
 	__clear_bit_unlock();
 
-These implement ACQUIRE-class and RELEASE-class operations. These should be used in
-preference to other operations when implementing locking primitives, because
-their implementations can be optimised on many architectures.
+These implement ACQUIRE-class and RELEASE-class operations.  These should be
+used in preference to other operations when implementing locking primitives,
+because their implementations can be optimised on many architectures.
 
 [!] Note that special memory barrier primitives are available for these
 situations because on some CPUs the atomic instructions used imply full memory
@@ -2568,12 +2609,12 @@ explicit barriers are used.
 
 Normally this won't be a problem because the I/O accesses done inside such
 sections will include synchronous load operations on strictly ordered I/O
-registers that form implicit I/O barriers. If this isn't sufficient then an
+registers that form implicit I/O barriers.  If this isn't sufficient then an
 mmiowb() may need to be used explicitly.
 
 
 A similar situation may occur between an interrupt routine and two routines
-running on separate CPUs that communicate with each other. If such a case is
+running on separate CPUs that communicate with each other.  If such a case is
 likely, then interrupt-disabling locks should be used to guarantee ordering.
 
 
@@ -2587,8 +2628,8 @@ functions:
  (*) inX(), outX():
 
      These are intended to talk to I/O space rather than memory space, but
-     that's primarily a CPU-specific concept. The i386 and x86_64 processors do
-     indeed have special I/O space access cycles and instructions, but many
+     that's primarily a CPU-specific concept.  The i386 and x86_64 processors
+     do indeed have special I/O space access cycles and instructions, but many
      CPUs don't have such a concept.
 
      The PCI bus, amongst others, defines an I/O space concept which - on such
@@ -2610,7 +2651,7 @@ functions:
 
      Whether these are guaranteed to be fully ordered and uncombined with
      respect to each other on the issuing CPU depends on the characteristics
-     defined for the memory window through which they're accessing. On later
+     defined for the memory window through which they're accessing.  On later
      i386 architecture machines, for example, this is controlled by way of the
      MTRR registers.
 
@@ -2635,10 +2676,10 @@ functions:
  (*) readX_relaxed(), writeX_relaxed()
 
      These are similar to readX() and writeX(), but provide weaker memory
-     ordering guarantees. Specifically, they do not guarantee ordering with
+     ordering guarantees.  Specifically, they do not guarantee ordering with
      respect to normal memory accesses (e.g. DMA buffers) nor do they guarantee
-     ordering with respect to LOCK or UNLOCK operations. If the latter is
-     required, an mmiowb() barrier can be used. Note that relaxed accesses to
+     ordering with respect to LOCK or UNLOCK operations.  If the latter is
+     required, an mmiowb() barrier can be used.  Note that relaxed accesses to
      the same peripheral are guaranteed to be ordered with respect to each
      other.
 
@@ -3040,8 +3081,9 @@ The Alpha defines the Linux kernel's memory barrier model.
 
 See the subsection on "Cache Coherency" above.
 
+
 VIRTUAL MACHINE GUESTS
--------------------
+----------------------
 
 Guests running within virtual machines might be affected by SMP effects even if
 the guest itself is compiled without SMP support.  This is an artifact of
@@ -3050,7 +3092,7 @@ barriers for this use-case would be possible but is often suboptimal.
 
 To handle this case optimally, low-level virt_mb() etc macros are available.
 These have the same effect as smp_mb() etc when SMP is enabled, but generate
-identical code for SMP and non-SMP systems. For example, virtual machine guests
+identical code for SMP and non-SMP systems.  For example, virtual machine guests
 should use virt_mb() rather than smp_mb() when synchronizing against a
 (possibly SMP) host.
 
@@ -3058,6 +3100,7 @@ These are equivalent to smp_mb() etc counterparts in all other respects,
 in particular, they do not control MMIO effects: to control
 MMIO effects, use mandatory barriers.
 
+
 ============
 EXAMPLE USES
 ============
diff --git a/Makefile b/Makefile
index acf6155421cc..0f9cb36d45c2 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 6
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Charred Weasel
 
 # *DOCUMENTATION*
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index a83bbea62c67..0131a7058778 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h
@@ -63,7 +63,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 	return res >= 0 ? 1 : 0;
 }
 
-static inline void __down_write(struct rw_semaphore *sem)
+static inline long ___down_write(struct rw_semaphore *sem)
 {
 	long oldcount;
 #ifndef	CONFIG_SMP
@@ -83,10 +83,24 @@ static inline void __down_write(struct rw_semaphore *sem)
 	:"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
 	:"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
 #endif
-	if (unlikely(oldcount))
+	return oldcount;
+}
+
+static inline void __down_write(struct rw_semaphore *sem)
+{
+	if (unlikely(___down_write(sem)))
 		rwsem_down_write_failed(sem);
 }
 
+static inline int __down_write_killable(struct rw_semaphore *sem)
+{
+	if (unlikely(___down_write(sem)))
+		if (IS_ERR(rwsem_down_write_failed_killable(sem)))
+			return -EINTR;
+
+	return 0;
+}
+
 /*
  * trylock for writing -- returns 1 if successful, 0 if contention
  */
diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi
index 0827d594b1f0..cd0cd5fd09a3 100644
--- a/arch/arm/boot/dts/at91sam9x5.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5.dtsi
@@ -106,7 +106,7 @@
 
 			pmc: pmc@fffffc00 {
 				compatible = "atmel,at91sam9x5-pmc", "syscon";
-				reg = <0xfffffc00 0x100>;
+				reg = <0xfffffc00 0x200>;
 				interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
 				interrupt-controller;
 				#address-cells = <1>;
diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
index 78996bdbd3df..9817090c1b73 100644
--- a/arch/arm/boot/dts/sama5d2.dtsi
+++ b/arch/arm/boot/dts/sama5d2.dtsi
@@ -280,7 +280,7 @@
 			status = "disabled";
 
 			nfc@c0000000 {
-				compatible = "atmel,sama5d4-nfc";
+				compatible = "atmel,sama5d3-nfc";
 				#address-cells = <1>;
 				#size-cells = <1>;
 				reg = < /* NFC Command Registers */
diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h
index e0eea72deb87..a708fa1f0905 100644
--- a/arch/arm/include/asm/efi.h
+++ b/arch/arm/include/asm/efi.h
@@ -17,34 +17,28 @@
 #include <asm/mach/map.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
+#include <asm/ptrace.h>
 
 #ifdef CONFIG_EFI
 void efi_init(void);
 
 int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
+int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
 
-#define efi_call_virt(f, ...)						\
-({									\
-	efi_##f##_t *__f;						\
-	efi_status_t __s;						\
-									\
-	efi_virtmap_load();						\
-	__f = efi.systab->runtime->f;					\
-	__s = __f(__VA_ARGS__);						\
-	efi_virtmap_unload();						\
-	__s;								\
-})
+#define arch_efi_call_virt_setup()	efi_virtmap_load()
+#define arch_efi_call_virt_teardown()	efi_virtmap_unload()
 
-#define __efi_call_virt(f, ...)						\
+#define arch_efi_call_virt(f, args...)					\
 ({									\
 	efi_##f##_t *__f;						\
-									\
-	efi_virtmap_load();						\
 	__f = efi.systab->runtime->f;					\
-	__f(__VA_ARGS__);						\
-	efi_virtmap_unload();						\
+	__f(args);							\
 })
 
+#define ARCH_EFI_IRQ_FLAGS_MASK \
+	(PSR_J_BIT | PSR_E_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | \
+	 PSR_T_BIT | MODE_MASK)
+
 static inline void efi_set_pgd(struct mm_struct *mm)
 {
 	check_and_switch_context(mm, NULL);
@@ -59,7 +53,16 @@ void efi_virtmap_unload(void);
 
 /* arch specific definitions used by the stub code */
 
-#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
+#define efi_call_early(f, ...)		sys_table_arg->boottime->f(__VA_ARGS__)
+#define __efi_call_early(f, ...)	f(__VA_ARGS__)
+#define efi_is_64bit()			(false)
+
+struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg);
+void free_screen_info(efi_system_table_t *sys_table, struct screen_info *si);
+
+static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
+{
+}
 
 /*
  * A reasonable upper bound for the uncompressed kernel size is 32 MBytes,
diff --git a/arch/arm/kernel/efi.c b/arch/arm/kernel/efi.c
index ff8a9d8acfac..9f43ba012d10 100644
--- a/arch/arm/kernel/efi.c
+++ b/arch/arm/kernel/efi.c
@@ -11,6 +11,41 @@
 #include <asm/mach/map.h>
 #include <asm/mmu_context.h>
 
+static int __init set_permissions(pte_t *ptep, pgtable_t token,
+				  unsigned long addr, void *data)
+{
+	efi_memory_desc_t *md = data;
+	pte_t pte = *ptep;
+
+	if (md->attribute & EFI_MEMORY_RO)
+		pte = set_pte_bit(pte, __pgprot(L_PTE_RDONLY));
+	if (md->attribute & EFI_MEMORY_XP)
+		pte = set_pte_bit(pte, __pgprot(L_PTE_XN));
+	set_pte_ext(ptep, pte, PTE_EXT_NG);
+	return 0;
+}
+
+int __init efi_set_mapping_permissions(struct mm_struct *mm,
+				       efi_memory_desc_t *md)
+{
+	unsigned long base, size;
+
+	base = md->virt_addr;
+	size = md->num_pages << EFI_PAGE_SHIFT;
+
+	/*
+	 * We can only use apply_to_page_range() if we can guarantee that the
+	 * entire region was mapped using pages. This should be the case if the
+	 * region does not cover any naturally aligned SECTION_SIZE sized
+	 * blocks.
+	 */
+	if (round_down(base + size, SECTION_SIZE) <
+	    round_up(base, SECTION_SIZE) + SECTION_SIZE)
+		return apply_to_page_range(mm, base, size, set_permissions, md);
+
+	return 0;
+}
+
 int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
 {
 	struct map_desc desc = {
@@ -34,5 +69,11 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
 		desc.type = MT_DEVICE;
 
 	create_mapping_late(mm, &desc, true);
+
+	/*
+	 * If stricter permissions were specified, apply them now.
+	 */
+	if (md->attribute & (EFI_MEMORY_RO | EFI_MEMORY_XP))
+		return efi_set_mapping_permissions(mm, md);
 	return 0;
 }
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 2c4bea39cf22..7d4e2850910c 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -883,7 +883,8 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
 		request_resource(&ioport_resource, &lp2);
 }
 
-#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE)
+#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) || \
+    defined(CONFIG_EFI)
 struct screen_info screen_info = {
  .orig_video_lines	= 30,
  .orig_video_cols	= 80,
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index efa77c146415..521b1ec59157 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -2,6 +2,7 @@ menu "Platform selection"
 
 config ARCH_SUNXI
 	bool "Allwinner sunxi 64-bit SoC Family"
+	select GENERIC_IRQ_CHIP
 	help
 	  This enables support for Allwinner sunxi based SoCs like the A64.
 
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 8e88a696c9cb..622db3c6474e 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -4,6 +4,7 @@
 #include <asm/io.h>
 #include <asm/mmu_context.h>
 #include <asm/neon.h>
+#include <asm/ptrace.h>
 #include <asm/tlbflush.h>
 
 #ifdef CONFIG_EFI
@@ -14,32 +15,29 @@ extern void efi_init(void);
 
 int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
 
-#define efi_call_virt(f, ...)						\
+#define efi_set_mapping_permissions	efi_create_mapping
+
+#define arch_efi_call_virt_setup()					\
 ({									\
-	efi_##f##_t *__f;						\
-	efi_status_t __s;						\
-									\
 	kernel_neon_begin();						\
 	efi_virtmap_load();						\
-	__f = efi.systab->runtime->f;					\
-	__s = __f(__VA_ARGS__);						\
-	efi_virtmap_unload();						\
-	kernel_neon_end();						\
-	__s;								\
 })
 
-#define __efi_call_virt(f, ...)						\
+#define arch_efi_call_virt(f, args...)					\
 ({									\
 	efi_##f##_t *__f;						\
-									\
-	kernel_neon_begin();						\
-	efi_virtmap_load();						\
 	__f = efi.systab->runtime->f;					\
-	__f(__VA_ARGS__);						\
+	__f(args);							\
+})
+
+#define arch_efi_call_virt_teardown()					\
+({									\
 	efi_virtmap_unload();						\
 	kernel_neon_end();						\
 })
 
+#define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
+
 /* arch specific definitions used by the stub code */
 
 /*
@@ -50,7 +48,16 @@ int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
 #define EFI_FDT_ALIGN	SZ_2M   /* used by allocate_new_fdt_and_exit_boot() */
 #define MAX_FDT_OFFSET	SZ_512M
 
-#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
+#define efi_call_early(f, ...)		sys_table_arg->boottime->f(__VA_ARGS__)
+#define __efi_call_early(f, ...)	f(__VA_ARGS__)
+#define efi_is_64bit()			(true)
+
+#define alloc_screen_info(x...)		&screen_info
+#define free_screen_info(x...)
+
+static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
+{
+}
 
 #define EFI_ALLOC_ALIGN		SZ_64K
 
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index b6abc852f2a1..78f52488f9ff 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -17,22 +17,51 @@
 
 #include <asm/efi.h>
 
-int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
+/*
+ * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
+ * executable, everything else can be mapped with the XN bits
+ * set. Also take the new (optional) RO/XP bits into account.
+ */
+static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
 {
-	pteval_t prot_val;
+	u64 attr = md->attribute;
+	u32 type = md->type;
 
-	/*
-	 * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
-	 * executable, everything else can be mapped with the XN bits
-	 * set.
-	 */
-	if ((md->attribute & EFI_MEMORY_WB) == 0)
-		prot_val = PROT_DEVICE_nGnRE;
-	else if (md->type == EFI_RUNTIME_SERVICES_CODE ||
-		 !PAGE_ALIGNED(md->phys_addr))
-		prot_val = pgprot_val(PAGE_KERNEL_EXEC);
-	else
-		prot_val = pgprot_val(PAGE_KERNEL);
+	if (type == EFI_MEMORY_MAPPED_IO)
+		return PROT_DEVICE_nGnRE;
+
+	if (WARN_ONCE(!PAGE_ALIGNED(md->phys_addr),
+		      "UEFI Runtime regions are not aligned to 64 KB -- buggy firmware?"))
+		/*
+		 * If the region is not aligned to the page size of the OS, we
+		 * can not use strict permissions, since that would also affect
+		 * the mapping attributes of the adjacent regions.
+		 */
+		return pgprot_val(PAGE_KERNEL_EXEC);
+
+	/* R-- */
+	if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) ==
+	    (EFI_MEMORY_XP | EFI_MEMORY_RO))
+		return pgprot_val(PAGE_KERNEL_RO);
+
+	/* R-X */
+	if (attr & EFI_MEMORY_RO)
+		return pgprot_val(PAGE_KERNEL_ROX);
+
+	/* RW- */
+	if (attr & EFI_MEMORY_XP || type != EFI_RUNTIME_SERVICES_CODE)
+		return pgprot_val(PAGE_KERNEL);
+
+	/* RWX */
+	return pgprot_val(PAGE_KERNEL_EXEC);
+}
+
+/* we will fill this structure from the stub, so don't put it in .bss */
+struct screen_info screen_info __section(.data);
+
+int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
+{
+	pteval_t prot_val = create_mapping_protection(md);
 
 	create_pgd_mapping(mm, md->phys_addr, md->virt_addr,
 			   md->num_pages << EFI_PAGE_SHIFT,
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 5e360ce88f10..1428849aece8 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -112,6 +112,7 @@ __efistub___memset		= KALLSYMS_HIDE(__pi_memset);
 __efistub__text			= KALLSYMS_HIDE(_text);
 __efistub__end			= KALLSYMS_HIDE(_end);
 __efistub__edata		= KALLSYMS_HIDE(_edata);
+__efistub_screen_info		= KALLSYMS_HIDE(screen_info);
 
 #endif
 
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index a34420a5df9a..b405bbb54431 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -476,6 +476,7 @@ emit_cond_jmp:
 		case BPF_JGE:
 			jmp_cond = A64_COND_CS;
 			break;
+		case BPF_JSET:
 		case BPF_JNE:
 			jmp_cond = A64_COND_NE;
 			break;
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index ce112472bdd6..8b23e070b844 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -49,8 +49,8 @@ __down_read (struct rw_semaphore *sem)
 /*
  * lock for writing
  */
-static inline void
-__down_write (struct rw_semaphore *sem)
+static inline long
+___down_write (struct rw_semaphore *sem)
 {
 	long old, new;
 
@@ -59,10 +59,26 @@ __down_write (struct rw_semaphore *sem)
 		new = old + RWSEM_ACTIVE_WRITE_BIAS;
 	} while (cmpxchg_acq(&sem->count, old, new) != old);
 
-	if (old != 0)
+	return old;
+}
+
+static inline void
+__down_write (struct rw_semaphore *sem)
+{
+	if (___down_write(sem))
 		rwsem_down_write_failed(sem);
 }
 
+static inline int
+__down_write_killable (struct rw_semaphore *sem)
+{
+	if (___down_write(sem))
+		if (IS_ERR(rwsem_down_write_failed_killable(sem)))
+			return -EINTR;
+
+	return 0;
+}
+
 /*
  * unlock after reading
  */
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 300dac3702f1..bf0865cd438a 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -531,8 +531,6 @@ efi_init (void)
 	       efi.systab->hdr.revision >> 16,
 	       efi.systab->hdr.revision & 0xffff, vendor);
 
-	set_bit(EFI_SYSTEM_TABLES, &efi.flags);
-
 	palo_phys      = EFI_INVALID_TABLE_ADDR;
 
 	if (efi_config_init(arch_tables) != 0)
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index fead491dfc28..c75e4471e618 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -90,7 +90,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 /*
  * lock for writing
  */
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
+static inline long ___down_write(struct rw_semaphore *sem)
 {
 	signed long old, new, tmp;
 
@@ -104,13 +104,23 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
 		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
 		: "Q" (sem->count), "m" (tmp)
 		: "cc", "memory");
-	if (old != 0)
-		rwsem_down_write_failed(sem);
+
+	return old;
 }
 
 static inline void __down_write(struct rw_semaphore *sem)
 {
-	__down_write_nested(sem, 0);
+	if (___down_write(sem))
+		rwsem_down_write_failed(sem);
+}
+
+static inline int __down_write_killable(struct rw_semaphore *sem)
+{
+	if (___down_write(sem))
+		if (IS_ERR(rwsem_down_write_failed_killable(sem)))
+			return -EINTR;
+
+	return 0;
 }
 
 /*
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index a319745a7b63..751c3373a92c 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -26,6 +26,7 @@ generic-y += percpu.h
 generic-y += poll.h
 generic-y += preempt.h
 generic-y += resource.h
+generic-y += rwsem.h
 generic-y += sembuf.h
 generic-y += serial.h
 generic-y += shmbuf.h
diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h
deleted file mode 100644
index edab57265293..000000000000
--- a/arch/sh/include/asm/rwsem.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * include/asm-sh/rwsem.h: R/W semaphores for SH using the stuff
- * in lib/rwsem.c.
- */
-
-#ifndef _ASM_SH_RWSEM_H
-#define _ASM_SH_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-
-#define RWSEM_UNLOCKED_VALUE		0x00000000
-#define RWSEM_ACTIVE_BIAS		0x00000001
-#define RWSEM_ACTIVE_MASK		0x0000ffff
-#define RWSEM_WAITING_BIAS		(-0x00010000)
-#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
-	if (atomic_inc_return((atomic_t *)(&sem->count)) > 0)
-		smp_wmb();
-	else
-		rwsem_down_read_failed(sem);
-}
-
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	while ((tmp = sem->count) >= 0) {
-		if (tmp == cmpxchg(&sem->count, tmp,
-				   tmp + RWSEM_ACTIVE_READ_BIAS)) {
-			smp_wmb();
-			return 1;
-		}
-	}
-	return 0;
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	tmp = atomic_add_return(RWSEM_ACTIVE_WRITE_BIAS,
-				(atomic_t *)(&sem->count));
-	if (tmp == RWSEM_ACTIVE_WRITE_BIAS)
-		smp_wmb();
-	else
-		rwsem_down_write_failed(sem);
-}
-
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
-		      RWSEM_ACTIVE_WRITE_BIAS);
-	smp_wmb();
-	return tmp == RWSEM_UNLOCKED_VALUE;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	smp_wmb();
-	tmp = atomic_dec_return((atomic_t *)(&sem->count));
-	if (tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)
-		rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
-	smp_wmb();
-	if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS,
-			      (atomic_t *)(&sem->count)) < 0)
-		rwsem_wake(sem);
-}
-
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
-{
-	atomic_add(delta, (atomic_t *)(&sem->count));
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	smp_wmb();
-	tmp = atomic_add_return(-RWSEM_WAITING_BIAS, (atomic_t *)(&sem->count));
-	if (tmp < 0)
-		rwsem_downgrade_wake(sem);
-}
-
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
-{
-	__down_write(sem);
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
-{
-	smp_mb();
-	return atomic_add_return(delta, (atomic_t *)(&sem->count));
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_SH_RWSEM_H */
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index e928618838bc..6024c26c0585 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -16,6 +16,7 @@ generic-y += mm-arch-hooks.h
 generic-y += module.h
 generic-y += mutex.h
 generic-y += preempt.h
+generic-y += rwsem.h
 generic-y += serial.h
 generic-y += trace_clock.h
 generic-y += types.h
diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h
deleted file mode 100644
index 069bf4d663a1..000000000000
--- a/arch/sparc/include/asm/rwsem.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * rwsem.h: R/W semaphores implemented using CAS
- *
- * Written by David S. Miller (davem@redhat.com), 2001.
- * Derived from asm-i386/rwsem.h
- */
-#ifndef _SPARC64_RWSEM_H
-#define _SPARC64_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-
-#define RWSEM_UNLOCKED_VALUE		0x00000000L
-#define RWSEM_ACTIVE_BIAS		0x00000001L
-#define RWSEM_ACTIVE_MASK		0xffffffffL
-#define RWSEM_WAITING_BIAS		(-RWSEM_ACTIVE_MASK-1)
-#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
-	if (unlikely(atomic64_inc_return((atomic64_t *)(&sem->count)) <= 0L))
-		rwsem_down_read_failed(sem);
-}
-
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
-	long tmp;
-
-	while ((tmp = sem->count) >= 0L) {
-		if (tmp == cmpxchg(&sem->count, tmp,
-				   tmp + RWSEM_ACTIVE_READ_BIAS)) {
-			return 1;
-		}
-	}
-	return 0;
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
-{
-	long tmp;
-
-	tmp = atomic64_add_return(RWSEM_ACTIVE_WRITE_BIAS,
-				  (atomic64_t *)(&sem->count));
-	if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
-		rwsem_down_write_failed(sem);
-}
-
-static inline void __down_write(struct rw_semaphore *sem)
-{
-	__down_write_nested(sem, 0);
-}
-
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
-	long tmp;
-
-	tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
-		      RWSEM_ACTIVE_WRITE_BIAS);
-	return tmp == RWSEM_UNLOCKED_VALUE;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
-	long tmp;
-
-	tmp = atomic64_dec_return((atomic64_t *)(&sem->count));
-	if (unlikely(tmp < -1L && (tmp & RWSEM_ACTIVE_MASK) == 0L))
-		rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
-	if (unlikely(atomic64_sub_return(RWSEM_ACTIVE_WRITE_BIAS,
-					 (atomic64_t *)(&sem->count)) < 0L))
-		rwsem_wake(sem);
-}
-
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
-	atomic64_add(delta, (atomic64_t *)(&sem->count));
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
-	long tmp;
-
-	tmp = atomic64_add_return(-RWSEM_WAITING_BIAS, (atomic64_t *)(&sem->count));
-	if (tmp < 0L)
-		rwsem_downgrade_wake(sem);
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
-	return atomic64_add_return(delta, (atomic64_t *)(&sem->count));
-}
-
-#endif /* __KERNEL__ */
-
-#endif /* _SPARC64_RWSEM_H */
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 583d539a4197..52fef606bc54 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -571,312 +571,6 @@ free_handle:
 	efi_call_early(free_pool, pci_handle);
 }
 
-static void
-setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line,
-		 struct efi_pixel_bitmask pixel_info, int pixel_format)
-{
-	if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) {
-		si->lfb_depth = 32;
-		si->lfb_linelength = pixels_per_scan_line * 4;
-		si->red_size = 8;
-		si->red_pos = 0;
-		si->green_size = 8;
-		si->green_pos = 8;
-		si->blue_size = 8;
-		si->blue_pos = 16;
-		si->rsvd_size = 8;
-		si->rsvd_pos = 24;
-	} else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) {
-		si->lfb_depth = 32;
-		si->lfb_linelength = pixels_per_scan_line * 4;
-		si->red_size = 8;
-		si->red_pos = 16;
-		si->green_size = 8;
-		si->green_pos = 8;
-		si->blue_size = 8;
-		si->blue_pos = 0;
-		si->rsvd_size = 8;
-		si->rsvd_pos = 24;
-	} else if (pixel_format == PIXEL_BIT_MASK) {
-		find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size);
-		find_bits(pixel_info.green_mask, &si->green_pos,
-			  &si->green_size);
-		find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size);
-		find_bits(pixel_info.reserved_mask, &si->rsvd_pos,
-			  &si->rsvd_size);
-		si->lfb_depth = si->red_size + si->green_size +
-			si->blue_size + si->rsvd_size;
-		si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8;
-	} else {
-		si->lfb_depth = 4;
-		si->lfb_linelength = si->lfb_width / 2;
-		si->red_size = 0;
-		si->red_pos = 0;
-		si->green_size = 0;
-		si->green_pos = 0;
-		si->blue_size = 0;
-		si->blue_pos = 0;
-		si->rsvd_size = 0;
-		si->rsvd_pos = 0;
-	}
-}
-
-static efi_status_t
-__gop_query32(struct efi_graphics_output_protocol_32 *gop32,
-	      struct efi_graphics_output_mode_info **info,
-	      unsigned long *size, u64 *fb_base)
-{
-	struct efi_graphics_output_protocol_mode_32 *mode;
-	efi_status_t status;
-	unsigned long m;
-
-	m = gop32->mode;
-	mode = (struct efi_graphics_output_protocol_mode_32 *)m;
-
-	status = efi_early->call(gop32->query_mode, gop32,
-				 mode->mode, size, info);
-	if (status != EFI_SUCCESS)
-		return status;
-
-	*fb_base = mode->frame_buffer_base;
-	return status;
-}
-
-static efi_status_t
-setup_gop32(struct screen_info *si, efi_guid_t *proto,
-	    unsigned long size, void **gop_handle)
-{
-	struct efi_graphics_output_protocol_32 *gop32, *first_gop;
-	unsigned long nr_gops;
-	u16 width, height;
-	u32 pixels_per_scan_line;
-	u32 ext_lfb_base;
-	u64 fb_base;
-	struct efi_pixel_bitmask pixel_info;
-	int pixel_format;
-	efi_status_t status;
-	u32 *handles = (u32 *)(unsigned long)gop_handle;
-	int i;
-
-	first_gop = NULL;
-	gop32 = NULL;
-
-	nr_gops = size / sizeof(u32);
-	for (i = 0; i < nr_gops; i++) {
-		struct efi_graphics_output_mode_info *info = NULL;
-		efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
-		bool conout_found = false;
-		void *dummy = NULL;
-		u32 h = handles[i];
-		u64 current_fb_base;
-
-		status = efi_call_early(handle_protocol, h,
-					proto, (void **)&gop32);
-		if (status != EFI_SUCCESS)
-			continue;
-
-		status = efi_call_early(handle_protocol, h,
-					&conout_proto, &dummy);
-		if (status == EFI_SUCCESS)
-			conout_found = true;
-
-		status = __gop_query32(gop32, &info, &size, &current_fb_base);
-		if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
-			/*
-			 * Systems that use the UEFI Console Splitter may
-			 * provide multiple GOP devices, not all of which are
-			 * backed by real hardware. The workaround is to search
-			 * for a GOP implementing the ConOut protocol, and if
-			 * one isn't found, to just fall back to the first GOP.
-			 */
-			width = info->horizontal_resolution;
-			height = info->vertical_resolution;
-			pixel_format = info->pixel_format;
-			pixel_info = info->pixel_information;
-			pixels_per_scan_line = info->pixels_per_scan_line;
-			fb_base = current_fb_base;
-
-			/*
-			 * Once we've found a GOP supporting ConOut,
-			 * don't bother looking any further.
-			 */
-			first_gop = gop32;
-			if (conout_found)
-				break;
-		}
-	}
-
-	/* Did we find any GOPs? */
-	if (!first_gop)
-		goto out;
-
-	/* EFI framebuffer */
-	si->orig_video_isVGA = VIDEO_TYPE_EFI;
-
-	si->lfb_width = width;
-	si->lfb_height = height;
-	si->lfb_base = fb_base;
-
-	ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
-	if (ext_lfb_base) {
-		si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
-		si->ext_lfb_base = ext_lfb_base;
-	}
-
-	si->pages = 1;
-
-	setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
-
-	si->lfb_size = si->lfb_linelength * si->lfb_height;
-
-	si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
-out:
-	return status;
-}
-
-static efi_status_t
-__gop_query64(struct efi_graphics_output_protocol_64 *gop64,
-	      struct efi_graphics_output_mode_info **info,
-	      unsigned long *size, u64 *fb_base)
-{
-	struct efi_graphics_output_protocol_mode_64 *mode;
-	efi_status_t status;
-	unsigned long m;
-
-	m = gop64->mode;
-	mode = (struct efi_graphics_output_protocol_mode_64 *)m;
-
-	status = efi_early->call(gop64->query_mode, gop64,
-				 mode->mode, size, info);
-	if (status != EFI_SUCCESS)
-		return status;
-
-	*fb_base = mode->frame_buffer_base;
-	return status;
-}
-
-static efi_status_t
-setup_gop64(struct screen_info *si, efi_guid_t *proto,
-	    unsigned long size, void **gop_handle)
-{
-	struct efi_graphics_output_protocol_64 *gop64, *first_gop;
-	unsigned long nr_gops;
-	u16 width, height;
-	u32 pixels_per_scan_line;
-	u32 ext_lfb_base;
-	u64 fb_base;
-	struct efi_pixel_bitmask pixel_info;
-	int pixel_format;
-	efi_status_t status;
-	u64 *handles = (u64 *)(unsigned long)gop_handle;
-	int i;
-
-	first_gop = NULL;
-	gop64 = NULL;
-
-	nr_gops = size / sizeof(u64);
-	for (i = 0; i < nr_gops; i++) {
-		struct efi_graphics_output_mode_info *info = NULL;
-		efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
-		bool conout_found = false;
-		void *dummy = NULL;
-		u64 h = handles[i];
-		u64 current_fb_base;
-
-		status = efi_call_early(handle_protocol, h,
-					proto, (void **)&gop64);
-		if (status != EFI_SUCCESS)
-			continue;
-
-		status = efi_call_early(handle_protocol, h,
-					&conout_proto, &dummy);
-		if (status == EFI_SUCCESS)
-			conout_found = true;
-
-		status = __gop_query64(gop64, &info, &size, &current_fb_base);
-		if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
-			/*
-			 * Systems that use the UEFI Console Splitter may
-			 * provide multiple GOP devices, not all of which are
-			 * backed by real hardware. The workaround is to search
-			 * for a GOP implementing the ConOut protocol, and if
-			 * one isn't found, to just fall back to the first GOP.
-			 */
-			width = info->horizontal_resolution;
-			height = info->vertical_resolution;
-			pixel_format = info->pixel_format;
-			pixel_info = info->pixel_information;
-			pixels_per_scan_line = info->pixels_per_scan_line;
-			fb_base = current_fb_base;
-
-			/*
-			 * Once we've found a GOP supporting ConOut,
-			 * don't bother looking any further.
-			 */
-			first_gop = gop64;
-			if (conout_found)
-				break;
-		}
-	}
-
-	/* Did we find any GOPs? */
-	if (!first_gop)
-		goto out;
-
-	/* EFI framebuffer */
-	si->orig_video_isVGA = VIDEO_TYPE_EFI;
-
-	si->lfb_width = width;
-	si->lfb_height = height;
-	si->lfb_base = fb_base;
-
-	ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
-	if (ext_lfb_base) {
-		si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
-		si->ext_lfb_base = ext_lfb_base;
-	}
-
-	si->pages = 1;
-
-	setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
-
-	si->lfb_size = si->lfb_linelength * si->lfb_height;
-
-	si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
-out:
-	return status;
-}
-
-/*
- * See if we have Graphics Output Protocol
- */
-static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
-			      unsigned long size)
-{
-	efi_status_t status;
-	void **gop_handle = NULL;
-
-	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
-				size, (void **)&gop_handle);
-	if (status != EFI_SUCCESS)
-		return status;
-
-	status = efi_call_early(locate_handle,
-				EFI_LOCATE_BY_PROTOCOL,
-				proto, NULL, &size, gop_handle);
-	if (status != EFI_SUCCESS)
-		goto free_handle;
-
-	if (efi_early->is64)
-		status = setup_gop64(si, proto, size, gop_handle);
-	else
-		status = setup_gop32(si, proto, size, gop_handle);
-
-free_handle:
-	efi_call_early(free_pool, gop_handle);
-	return status;
-}
-
 static efi_status_t
 setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height)
 {
@@ -1038,7 +732,7 @@ void setup_graphics(struct boot_params *boot_params)
 				EFI_LOCATE_BY_PROTOCOL,
 				&graphics_proto, NULL, &size, gop_handle);
 	if (status == EFI_BUFFER_TOO_SMALL)
-		status = setup_gop(si, &graphics_proto, size);
+		status = efi_setup_gop(NULL, si, &graphics_proto, size);
 
 	if (status != EFI_SUCCESS) {
 		size = 0;
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index d487e727f1ec..c0223f1a89d7 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -11,80 +11,6 @@
 
 #define DESC_TYPE_CODE_DATA	(1 << 0)
 
-#define EFI_CONSOLE_OUT_DEVICE_GUID    \
-	EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \
-		  0x3f, 0xc1, 0x4d)
-
-#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR		0
-#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR		1
-#define PIXEL_BIT_MASK					2
-#define PIXEL_BLT_ONLY					3
-#define PIXEL_FORMAT_MAX				4
-
-struct efi_pixel_bitmask {
-	u32 red_mask;
-	u32 green_mask;
-	u32 blue_mask;
-	u32 reserved_mask;
-};
-
-struct efi_graphics_output_mode_info {
-	u32 version;
-	u32 horizontal_resolution;
-	u32 vertical_resolution;
-	int pixel_format;
-	struct efi_pixel_bitmask pixel_information;
-	u32 pixels_per_scan_line;
-} __packed;
-
-struct efi_graphics_output_protocol_mode_32 {
-	u32 max_mode;
-	u32 mode;
-	u32 info;
-	u32 size_of_info;
-	u64 frame_buffer_base;
-	u32 frame_buffer_size;
-} __packed;
-
-struct efi_graphics_output_protocol_mode_64 {
-	u32 max_mode;
-	u32 mode;
-	u64 info;
-	u64 size_of_info;
-	u64 frame_buffer_base;
-	u64 frame_buffer_size;
-} __packed;
-
-struct efi_graphics_output_protocol_mode {
-	u32 max_mode;
-	u32 mode;
-	unsigned long info;
-	unsigned long size_of_info;
-	u64 frame_buffer_base;
-	unsigned long frame_buffer_size;
-} __packed;
-
-struct efi_graphics_output_protocol_32 {
-	u32 query_mode;
-	u32 set_mode;
-	u32 blt;
-	u32 mode;
-};
-
-struct efi_graphics_output_protocol_64 {
-	u64 query_mode;
-	u64 set_mode;
-	u64 blt;
-	u64 mode;
-};
-
-struct efi_graphics_output_protocol {
-	void *query_mode;
-	unsigned long set_mode;
-	unsigned long blt;
-	struct efi_graphics_output_protocol_mode *mode;
-};
-
 struct efi_uga_draw_protocol_32 {
 	u32 get_mode;
 	u32 set_mode;
diff --git a/arch/x86/configs/kvm_guest.config b/arch/x86/configs/kvm_guest.config
index f9affcc3b9f1..9906505c998a 100644
--- a/arch/x86/configs/kvm_guest.config
+++ b/arch/x86/configs/kvm_guest.config
@@ -26,3 +26,6 @@ CONFIG_VIRTIO_NET=y
 CONFIG_9P_FS=y
 CONFIG_NET_9P=y
 CONFIG_NET_9P_VIRTIO=y
+CONFIG_SCSI_LOWLEVEL=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_VIRTIO_INPUT=y
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index b30dd8154cc2..4cddd17153fb 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -384,5 +384,5 @@
 375	i386	membarrier		sys_membarrier
 376	i386	mlock2			sys_mlock2
 377	i386	copy_file_range		sys_copy_file_range
-378	i386	preadv2			sys_preadv2
-379	i386	pwritev2		sys_pwritev2
+378	i386	preadv2			sys_preadv2			compat_sys_preadv2
+379	i386	pwritev2		sys_pwritev2			compat_sys_pwritev2
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 53748c45e488..78d1e7467eae 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -3,6 +3,7 @@
 
 #include <asm/fpu/api.h>
 #include <asm/pgtable.h>
+#include <asm/processor-flags.h>
 #include <asm/tlb.h>
 
 /*
@@ -28,33 +29,22 @@
 
 #define MAX_CMDLINE_ADDRESS	UINT_MAX
 
-#ifdef CONFIG_X86_32
+#define ARCH_EFI_IRQ_FLAGS_MASK	X86_EFLAGS_IF
 
+#ifdef CONFIG_X86_32
 
 extern unsigned long asmlinkage efi_call_phys(void *, ...);
 
+#define arch_efi_call_virt_setup()	kernel_fpu_begin()
+#define arch_efi_call_virt_teardown()	kernel_fpu_end()
+
 /*
  * Wrap all the virtual calls in a way that forces the parameters on the stack.
  */
-
-/* Use this macro if your virtual returns a non-void value */
-#define efi_call_virt(f, args...) \
+#define arch_efi_call_virt(f, args...)					\
 ({									\
-	efi_status_t __s;						\
-	kernel_fpu_begin();						\
-	__s = ((efi_##f##_t __attribute__((regparm(0)))*)		\
-		efi.systab->runtime->f)(args);				\
-	kernel_fpu_end();						\
-	__s;								\
-})
-
-/* Use this macro if your virtual call does not return any value */
-#define __efi_call_virt(f, args...) \
-({									\
-	kernel_fpu_begin();						\
 	((efi_##f##_t __attribute__((regparm(0)))*)			\
 		efi.systab->runtime->f)(args);				\
-	kernel_fpu_end();						\
 })
 
 #define efi_ioremap(addr, size, type, attr)	ioremap_cache(addr, size)
@@ -78,10 +68,8 @@ struct efi_scratch {
 	u64	phys_stack;
 } __packed;
 
-#define efi_call_virt(f, ...)						\
+#define arch_efi_call_virt_setup()					\
 ({									\
-	efi_status_t __s;						\
-									\
 	efi_sync_low_kernel_mappings();					\
 	preempt_disable();						\
 	__kernel_fpu_begin();						\
@@ -91,9 +79,13 @@ struct efi_scratch {
 		write_cr3((unsigned long)efi_scratch.efi_pgt);		\
 		__flush_tlb_all();					\
 	}								\
-									\
-	__s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__);	\
-									\
+})
+
+#define arch_efi_call_virt(f, args...)					\
+	efi_call((void *)efi.systab->runtime->f, args)			\
+
+#define arch_efi_call_virt_teardown()					\
+({									\
 	if (efi_scratch.use_pgd) {					\
 		write_cr3(efi_scratch.prev_cr3);			\
 		__flush_tlb_all();					\
@@ -101,15 +93,8 @@ struct efi_scratch {
 									\
 	__kernel_fpu_end();						\
 	preempt_enable();						\
-	__s;								\
 })
 
-/*
- * All X86_64 virt calls return non-void values. Thus, use non-void call for
- * virt calls that would be void on X86_32.
- */
-#define __efi_call_virt(f, args...) efi_call_virt(f, args)
-
 extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
 					u32 type, u64 attribute);
 
@@ -180,6 +165,8 @@ static inline bool efi_runtime_supported(void)
 extern struct console early_efi_console;
 extern void parse_efi_setup(u64 phys_addr, u32 data_len);
 
+extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
+
 #ifdef CONFIG_EFI_MIXED
 extern void efi_thunk_runtime_setup(void);
 extern efi_status_t efi_thunk_set_virtual_address_map(
@@ -225,6 +212,11 @@ __pure const struct efi_config *__efi_early(void);
 #define efi_call_early(f, ...)						\
 	__efi_early()->call(__efi_early()->f, __VA_ARGS__);
 
+#define __efi_call_early(f, ...)					\
+	__efi_early()->call((unsigned long)f, __VA_ARGS__);
+
+#define efi_is_64bit()		__efi_early()->is64
+
 extern bool efi_reboot_required(void);
 
 #else
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index ceec86eb68e9..453744c1d347 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -99,26 +99,36 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 /*
  * lock for writing
  */
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
+#define ____down_write(sem, slow_path)			\
+({							\
+	long tmp;					\
+	struct rw_semaphore* ret;			\
+	asm volatile("# beginning down_write\n\t"	\
+		     LOCK_PREFIX "  xadd      %1,(%3)\n\t"	\
+		     /* adds 0xffff0001, returns the old value */ \
+		     "  test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \
+		     /* was the active mask 0 before? */\
+		     "  jz        1f\n"			\
+		     "  call " slow_path "\n"		\
+		     "1:\n"				\
+		     "# ending down_write"		\
+		     : "+m" (sem->count), "=d" (tmp), "=a" (ret)	\
+		     : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \
+		     : "memory", "cc");			\
+	ret;						\
+})
+
+static inline void __down_write(struct rw_semaphore *sem)
 {
-	long tmp;
-	asm volatile("# beginning down_write\n\t"
-		     LOCK_PREFIX "  xadd      %1,(%2)\n\t"
-		     /* adds 0xffff0001, returns the old value */
-		     "  test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t"
-		     /* was the active mask 0 before? */
-		     "  jz        1f\n"
-		     "  call call_rwsem_down_write_failed\n"
-		     "1:\n"
-		     "# ending down_write"
-		     : "+m" (sem->count), "=d" (tmp)
-		     : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS)
-		     : "memory", "cc");
+	____down_write(sem, "call_rwsem_down_write_failed");
 }
 
-static inline void __down_write(struct rw_semaphore *sem)
+static inline int __down_write_killable(struct rw_semaphore *sem)
 {
-	__down_write_nested(sem, 0);
+	if (IS_ERR(____down_write(sem, "call_rwsem_down_write_failed_killable")))
+		return -EINTR;
+
+	return 0;
 }
 
 /*
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a969ae607be8..2e7513d1f1f4 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -108,6 +108,14 @@ struct exception_table_entry {
 
 #define ARCH_HAS_RELATIVE_EXTABLE
 
+#define swap_ex_entry_fixup(a, b, tmp, delta)			\
+	do {							\
+		(a)->fixup = (b)->fixup + (delta);		\
+		(b)->fixup = (tmp).fixup - (delta);		\
+		(a)->handler = (b)->handler + (delta);		\
+		(b)->handler = (tmp).handler - (delta);		\
+	} while (0)
+
 extern int fixup_exception(struct pt_regs *regs, int trapnr);
 extern bool ex_has_fault_handler(unsigned long ip);
 extern int early_fixup_exception(unsigned long *ip);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index ab0adc0fa5db..a9b31eb815f2 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -535,6 +535,15 @@ static void native_machine_emergency_restart(void)
 	mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0;
 	*((unsigned short *)__va(0x472)) = mode;
 
+	/*
+	 * If an EFI capsule has been registered with the firmware then
+	 * override the reboot= parameter.
+	 */
+	if (efi_capsule_pending(NULL)) {
+		pr_info("EFI capsule is pending, forcing EFI reboot.\n");
+		reboot_type = BOOT_EFI;
+	}
+
 	for (;;) {
 		/* Could also try the reset bit in the Hammer NB */
 		switch (reboot_type) {
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 548ddf7d6fd2..3e84ef16f657 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -248,18 +248,17 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 	if (config_enabled(CONFIG_X86_64))
 		sp -= 128;
 
-	if (!onsigstack) {
-		/* This is the X/Open sanctioned signal stack switching.  */
-		if (ka->sa.sa_flags & SA_ONSTACK) {
-			if (current->sas_ss_size)
-				sp = current->sas_ss_sp + current->sas_ss_size;
-		} else if (config_enabled(CONFIG_X86_32) &&
-			   (regs->ss & 0xffff) != __USER_DS &&
-			   !(ka->sa.sa_flags & SA_RESTORER) &&
-			   ka->sa.sa_restorer) {
-				/* This is the legacy signal stack switching. */
-				sp = (unsigned long) ka->sa.sa_restorer;
-		}
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (sas_ss_flags(sp) == 0)
+			sp = current->sas_ss_sp + current->sas_ss_size;
+	} else if (config_enabled(CONFIG_X86_32) &&
+		   !onsigstack &&
+		   (regs->ss & 0xffff) != __USER_DS &&
+		   !(ka->sa.sa_flags & SA_RESTORER) &&
+		   ka->sa.sa_restorer) {
+		/* This is the legacy signal stack switching. */
+		sp = (unsigned long) ka->sa.sa_restorer;
 	}
 
 	if (fpu->fpstate_active) {
diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c
index 5da924bbf0a0..623965e86b65 100644
--- a/arch/x86/kernel/sysfb_efi.c
+++ b/arch/x86/kernel/sysfb_efi.c
@@ -68,6 +68,21 @@ struct efifb_dmi_info efifb_dmi_list[] = {
 	[M_UNKNOWN] = { NULL, 0, 0, 0, 0, OVERRIDE_NONE }
 };
 
+void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
+{
+	int i;
+
+	for (i = 0; i < M_UNKNOWN; i++) {
+		if (efifb_dmi_list[i].base != 0 &&
+		    !strcmp(opt, efifb_dmi_list[i].optname)) {
+			si->lfb_base = efifb_dmi_list[i].base;
+			si->lfb_linelength = efifb_dmi_list[i].stride;
+			si->lfb_width = efifb_dmi_list[i].width;
+			si->lfb_height = efifb_dmi_list[i].height;
+		}
+	}
+}
+
 #define choose_value(dmivalue, fwvalue, field, flags) ({	\
 		typeof(fwvalue) _ret_ = fwvalue;		\
 		if ((flags) & (field))				\
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0f6294376fbd..a2f24af3c999 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5110,13 +5110,17 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
 
 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
 {
+	register void *__sp asm(_ASM_SP);
 	ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
+
 	if (!(ctxt->d & ByteOp))
 		fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
+
 	asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
 	    : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
-	      [fastop]"+S"(fop)
+	      [fastop]"+S"(fop), "+r"(__sp)
 	    : "c"(ctxt->src2.val));
+
 	ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
 	if (!fop) /* exception is returned in fop variable */
 		return emulate_de(ctxt);
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
index be110efa0096..bf2c6074efd2 100644
--- a/arch/x86/lib/rwsem.S
+++ b/arch/x86/lib/rwsem.S
@@ -29,8 +29,10 @@
  * there is contention on the semaphore.
  *
  * %eax contains the semaphore pointer on entry. Save the C-clobbered
- * registers (%eax, %edx and %ecx) except %eax whish is either a return
- * value or just clobbered..
+ * registers (%eax, %edx and %ecx) except %eax which is either a return
+ * value or just gets clobbered. Same is true for %edx so make sure GCC
+ * reloads it after the slow path, by making it hold a temporary, for
+ * example see ____down_write().
  */
 
 #define save_common_regs \
@@ -106,6 +108,16 @@ ENTRY(call_rwsem_down_write_failed)
 	ret
 ENDPROC(call_rwsem_down_write_failed)
 
+ENTRY(call_rwsem_down_write_failed_killable)
+	FRAME_BEGIN
+	save_common_regs
+	movq %rax,%rdi
+	call rwsem_down_write_failed_killable
+	restore_common_regs
+	FRAME_END
+	ret
+ENDPROC(call_rwsem_down_write_failed_killable)
+
 ENTRY(call_rwsem_wake)
 	FRAME_BEGIN
 	/* do nothing if still outstanding active readers */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 01be9ec3bf79..a1f0e1d0ddc2 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1125,8 +1125,14 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
 static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
 			       int primary)
 {
-	if (cpa->pgd)
+	if (cpa->pgd) {
+		/*
+		 * Right now, we only execute this code path when mapping
+		 * the EFI virtual memory map regions, no other users
+		 * provide a ->pgd value. This may change in the future.
+		 */
 		return populate_pgd(cpa, vaddr);
+	}
 
 	/*
 	 * Ignore all non primary paths.
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 994a7df84a7b..f93545e7dc54 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -54,10 +54,6 @@
 #include <asm/rtc.h>
 #include <asm/uv/uv.h>
 
-#define EFI_DEBUG
-
-struct efi_memory_map memmap;
-
 static struct efi efi_phys __initdata;
 static efi_system_table_t efi_systab __initdata;
 
@@ -119,11 +115,10 @@ void efi_get_time(struct timespec *now)
 
 void __init efi_find_mirror(void)
 {
-	void *p;
+	efi_memory_desc_t *md;
 	u64 mirror_size = 0, total_size = 0;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		efi_memory_desc_t *md = p;
+	for_each_efi_memory_desc(md) {
 		unsigned long long start = md->phys_addr;
 		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
 
@@ -146,10 +141,9 @@ void __init efi_find_mirror(void)
 
 static void __init do_add_efi_memmap(void)
 {
-	void *p;
+	efi_memory_desc_t *md;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		efi_memory_desc_t *md = p;
+	for_each_efi_memory_desc(md) {
 		unsigned long long start = md->phys_addr;
 		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
 		int e820_type;
@@ -209,47 +203,47 @@ int __init efi_memblock_x86_reserve_range(void)
 #else
 	pmap = (e->efi_memmap |	((__u64)e->efi_memmap_hi << 32));
 #endif
-	memmap.phys_map		= pmap;
-	memmap.nr_map		= e->efi_memmap_size /
+	efi.memmap.phys_map	= pmap;
+	efi.memmap.nr_map	= e->efi_memmap_size /
 				  e->efi_memdesc_size;
-	memmap.desc_size	= e->efi_memdesc_size;
-	memmap.desc_version	= e->efi_memdesc_version;
+	efi.memmap.desc_size	= e->efi_memdesc_size;
+	efi.memmap.desc_version	= e->efi_memdesc_version;
 
-	memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
+	WARN(efi.memmap.desc_version != 1,
+	     "Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
+	     efi.memmap.desc_version);
 
-	efi.memmap = &memmap;
+	memblock_reserve(pmap, efi.memmap.nr_map * efi.memmap.desc_size);
 
 	return 0;
 }
 
 void __init efi_print_memmap(void)
 {
-#ifdef EFI_DEBUG
 	efi_memory_desc_t *md;
-	void *p;
-	int i;
+	int i = 0;
 
-	for (p = memmap.map, i = 0;
-	     p < memmap.map_end;
-	     p += memmap.desc_size, i++) {
+	for_each_efi_memory_desc(md) {
 		char buf[64];
 
-		md = p;
 		pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
-			i, efi_md_typeattr_format(buf, sizeof(buf), md),
+			i++, efi_md_typeattr_format(buf, sizeof(buf), md),
 			md->phys_addr,
 			md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
 			(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
 	}
-#endif  /*  EFI_DEBUG  */
 }
 
 void __init efi_unmap_memmap(void)
 {
+	unsigned long size;
+
 	clear_bit(EFI_MEMMAP, &efi.flags);
-	if (memmap.map) {
-		early_memunmap(memmap.map, memmap.nr_map * memmap.desc_size);
-		memmap.map = NULL;
+
+	size = efi.memmap.nr_map * efi.memmap.desc_size;
+	if (efi.memmap.map) {
+		early_memunmap(efi.memmap.map, size);
+		efi.memmap.map = NULL;
 	}
 }
 
@@ -352,8 +346,6 @@ static int __init efi_systab_init(void *phys)
 		       efi.systab->hdr.revision >> 16,
 		       efi.systab->hdr.revision & 0xffff);
 
-	set_bit(EFI_SYSTEM_TABLES, &efi.flags);
-
 	return 0;
 }
 
@@ -440,17 +432,22 @@ static int __init efi_runtime_init(void)
 
 static int __init efi_memmap_init(void)
 {
+	unsigned long addr, size;
+
 	if (efi_enabled(EFI_PARAVIRT))
 		return 0;
 
 	/* Map the EFI memory map */
-	memmap.map = early_memremap((unsigned long)memmap.phys_map,
-				   memmap.nr_map * memmap.desc_size);
-	if (memmap.map == NULL) {
+	size = efi.memmap.nr_map * efi.memmap.desc_size;
+	addr = (unsigned long)efi.memmap.phys_map;
+
+	efi.memmap.map = early_memremap(addr, size);
+	if (efi.memmap.map == NULL) {
 		pr_err("Could not map the memory map!\n");
 		return -ENOMEM;
 	}
-	memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
+
+	efi.memmap.map_end = efi.memmap.map + size;
 
 	if (add_efi_memmap)
 		do_add_efi_memmap();
@@ -552,12 +549,9 @@ void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
 void __init runtime_code_page_mkexec(void)
 {
 	efi_memory_desc_t *md;
-	void *p;
 
 	/* Make EFI runtime service code area executable */
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
-
+	for_each_efi_memory_desc(md) {
 		if (md->type != EFI_RUNTIME_SERVICES_CODE)
 			continue;
 
@@ -604,12 +598,10 @@ void __init old_map_region(efi_memory_desc_t *md)
 /* Merge contiguous regions of the same type and attribute */
 static void __init efi_merge_regions(void)
 {
-	void *p;
 	efi_memory_desc_t *md, *prev_md = NULL;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+	for_each_efi_memory_desc(md) {
 		u64 prev_size;
-		md = p;
 
 		if (!prev_md) {
 			prev_md = md;
@@ -651,30 +643,31 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md)
 static void __init save_runtime_map(void)
 {
 #ifdef CONFIG_KEXEC_CORE
+	unsigned long desc_size;
 	efi_memory_desc_t *md;
-	void *tmp, *p, *q = NULL;
+	void *tmp, *q = NULL;
 	int count = 0;
 
 	if (efi_enabled(EFI_OLD_MEMMAP))
 		return;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
+	desc_size = efi.memmap.desc_size;
 
+	for_each_efi_memory_desc(md) {
 		if (!(md->attribute & EFI_MEMORY_RUNTIME) ||
 		    (md->type == EFI_BOOT_SERVICES_CODE) ||
 		    (md->type == EFI_BOOT_SERVICES_DATA))
 			continue;
-		tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL);
+		tmp = krealloc(q, (count + 1) * desc_size, GFP_KERNEL);
 		if (!tmp)
 			goto out;
 		q = tmp;
 
-		memcpy(q + count * memmap.desc_size, md, memmap.desc_size);
+		memcpy(q + count * desc_size, md, desc_size);
 		count++;
 	}
 
-	efi_runtime_map_setup(q, count, memmap.desc_size);
+	efi_runtime_map_setup(q, count, desc_size);
 	return;
 
 out:
@@ -714,10 +707,10 @@ static inline void *efi_map_next_entry_reverse(void *entry)
 {
 	/* Initial call */
 	if (!entry)
-		return memmap.map_end - memmap.desc_size;
+		return efi.memmap.map_end - efi.memmap.desc_size;
 
-	entry -= memmap.desc_size;
-	if (entry < memmap.map)
+	entry -= efi.memmap.desc_size;
+	if (entry < efi.memmap.map)
 		return NULL;
 
 	return entry;
@@ -759,10 +752,10 @@ static void *efi_map_next_entry(void *entry)
 
 	/* Initial call */
 	if (!entry)
-		return memmap.map;
+		return efi.memmap.map;
 
-	entry += memmap.desc_size;
-	if (entry >= memmap.map_end)
+	entry += efi.memmap.desc_size;
+	if (entry >= efi.memmap.map_end)
 		return NULL;
 
 	return entry;
@@ -776,8 +769,11 @@ static void * __init efi_map_regions(int *count, int *pg_shift)
 {
 	void *p, *new_memmap = NULL;
 	unsigned long left = 0;
+	unsigned long desc_size;
 	efi_memory_desc_t *md;
 
+	desc_size = efi.memmap.desc_size;
+
 	p = NULL;
 	while ((p = efi_map_next_entry(p))) {
 		md = p;
@@ -792,7 +788,7 @@ static void * __init efi_map_regions(int *count, int *pg_shift)
 		efi_map_region(md);
 		get_systab_virt_addr(md);
 
-		if (left < memmap.desc_size) {
+		if (left < desc_size) {
 			new_memmap = realloc_pages(new_memmap, *pg_shift);
 			if (!new_memmap)
 				return NULL;
@@ -801,10 +797,9 @@ static void * __init efi_map_regions(int *count, int *pg_shift)
 			(*pg_shift)++;
 		}
 
-		memcpy(new_memmap + (*count * memmap.desc_size), md,
-		       memmap.desc_size);
+		memcpy(new_memmap + (*count * desc_size), md, desc_size);
 
-		left -= memmap.desc_size;
+		left -= desc_size;
 		(*count)++;
 	}
 
@@ -816,7 +811,6 @@ static void __init kexec_enter_virtual_mode(void)
 #ifdef CONFIG_KEXEC_CORE
 	efi_memory_desc_t *md;
 	unsigned int num_pages;
-	void *p;
 
 	efi.systab = NULL;
 
@@ -840,8 +834,7 @@ static void __init kexec_enter_virtual_mode(void)
 	* Map efi regions which were passed via setup_data. The virt_addr is a
 	* fixed addr which was used in first kernel of a kexec boot.
 	*/
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
+	for_each_efi_memory_desc(md) {
 		efi_map_region_fixed(md); /* FIXME: add error handling */
 		get_systab_virt_addr(md);
 	}
@@ -850,10 +843,10 @@ static void __init kexec_enter_virtual_mode(void)
 
 	BUG_ON(!efi.systab);
 
-	num_pages = ALIGN(memmap.nr_map * memmap.desc_size, PAGE_SIZE);
+	num_pages = ALIGN(efi.memmap.nr_map * efi.memmap.desc_size, PAGE_SIZE);
 	num_pages >>= PAGE_SHIFT;
 
-	if (efi_setup_page_tables(memmap.phys_map, num_pages)) {
+	if (efi_setup_page_tables(efi.memmap.phys_map, num_pages)) {
 		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
 		return;
 	}
@@ -937,16 +930,16 @@ static void __init __efi_enter_virtual_mode(void)
 
 	if (efi_is_native()) {
 		status = phys_efi_set_virtual_address_map(
-				memmap.desc_size * count,
-				memmap.desc_size,
-				memmap.desc_version,
+				efi.memmap.desc_size * count,
+				efi.memmap.desc_size,
+				efi.memmap.desc_version,
 				(efi_memory_desc_t *)__pa(new_memmap));
 	} else {
 		status = efi_thunk_set_virtual_address_map(
 				efi_phys.set_virtual_address_map,
-				memmap.desc_size * count,
-				memmap.desc_size,
-				memmap.desc_version,
+				efi.memmap.desc_size * count,
+				efi.memmap.desc_size,
+				efi.memmap.desc_version,
 				(efi_memory_desc_t *)__pa(new_memmap));
 	}
 
@@ -1011,13 +1004,11 @@ void __init efi_enter_virtual_mode(void)
 u32 efi_mem_type(unsigned long phys_addr)
 {
 	efi_memory_desc_t *md;
-	void *p;
 
 	if (!efi_enabled(EFI_MEMMAP))
 		return 0;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
+	for_each_efi_memory_desc(md) {
 		if ((md->phys_addr <= phys_addr) &&
 		    (phys_addr < (md->phys_addr +
 				  (md->num_pages << EFI_PAGE_SHIFT))))
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 49e4dd4a1f58..6e7242be1c87 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -55,14 +55,12 @@ struct efi_scratch efi_scratch;
 static void __init early_code_mapping_set_exec(int executable)
 {
 	efi_memory_desc_t *md;
-	void *p;
 
 	if (!(__supported_pte_mask & _PAGE_NX))
 		return;
 
 	/* Make EFI service code area executable */
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
+	for_each_efi_memory_desc(md) {
 		if (md->type == EFI_RUNTIME_SERVICES_CODE ||
 		    md->type == EFI_BOOT_SERVICES_CODE)
 			efi_set_executable(md, executable);
@@ -253,7 +251,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 	 * Map all of RAM so that we can access arguments in the 1:1
 	 * mapping when making EFI runtime calls.
 	 */
-	for_each_efi_memory_desc(&memmap, md) {
+	for_each_efi_memory_desc(md) {
 		if (md->type != EFI_CONVENTIONAL_MEMORY &&
 		    md->type != EFI_LOADER_DATA &&
 		    md->type != EFI_LOADER_CODE)
@@ -398,7 +396,6 @@ void __init efi_runtime_update_mappings(void)
 	unsigned long pfn;
 	pgd_t *pgd = efi_pgd;
 	efi_memory_desc_t *md;
-	void *p;
 
 	if (efi_enabled(EFI_OLD_MEMMAP)) {
 		if (__supported_pte_mask & _PAGE_NX)
@@ -409,9 +406,8 @@ void __init efi_runtime_update_mappings(void)
 	if (!efi_enabled(EFI_NX_PE_DATA))
 		return;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+	for_each_efi_memory_desc(md) {
 		unsigned long pf = 0;
-		md = p;
 
 		if (!(md->attribute & EFI_MEMORY_RUNTIME))
 			continue;
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index ab50ada1d56e..097cb09d917b 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -195,10 +195,9 @@ static bool can_free_region(u64 start, u64 size)
 */
 void __init efi_reserve_boot_services(void)
 {
-	void *p;
+	efi_memory_desc_t *md;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		efi_memory_desc_t *md = p;
+	for_each_efi_memory_desc(md) {
 		u64 start = md->phys_addr;
 		u64 size = md->num_pages << EFI_PAGE_SHIFT;
 		bool already_reserved;
@@ -250,10 +249,9 @@ void __init efi_reserve_boot_services(void)
 
 void __init efi_free_boot_services(void)
 {
-	void *p;
+	efi_memory_desc_t *md;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		efi_memory_desc_t *md = p;
+	for_each_efi_memory_desc(md) {
 		unsigned long long start = md->phys_addr;
 		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
 
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index b56855a1382a..28cf4c5d65ef 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += mm-arch-hooks.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += resource.h
+generic-y += rwsem.h
 generic-y += sections.h
 generic-y += siginfo.h
 generic-y += statfs.h
diff --git a/arch/xtensa/include/asm/rwsem.h b/arch/xtensa/include/asm/rwsem.h
deleted file mode 100644
index 249619e7e7f2..000000000000
--- a/arch/xtensa/include/asm/rwsem.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * include/asm-xtensa/rwsem.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Largely copied from include/asm-ppc/rwsem.h
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
-#ifndef _XTENSA_RWSEM_H
-#define _XTENSA_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
-#endif
-
-#define RWSEM_UNLOCKED_VALUE		0x00000000
-#define RWSEM_ACTIVE_BIAS		0x00000001
-#define RWSEM_ACTIVE_MASK		0x0000ffff
-#define RWSEM_WAITING_BIAS		(-0x00010000)
-#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
-	if (atomic_add_return(1,(atomic_t *)(&sem->count)) > 0)
-		smp_wmb();
-	else
-		rwsem_down_read_failed(sem);
-}
-
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	while ((tmp = sem->count) >= 0) {
-		if (tmp == cmpxchg(&sem->count, tmp,
-				   tmp + RWSEM_ACTIVE_READ_BIAS)) {
-			smp_wmb();
-			return 1;
-		}
-	}
-	return 0;
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	tmp = atomic_add_return(RWSEM_ACTIVE_WRITE_BIAS,
-				(atomic_t *)(&sem->count));
-	if (tmp == RWSEM_ACTIVE_WRITE_BIAS)
-		smp_wmb();
-	else
-		rwsem_down_write_failed(sem);
-}
-
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
-		      RWSEM_ACTIVE_WRITE_BIAS);
-	smp_wmb();
-	return tmp == RWSEM_UNLOCKED_VALUE;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	smp_wmb();
-	tmp = atomic_sub_return(1,(atomic_t *)(&sem->count));
-	if (tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)
-		rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
-	smp_wmb();
-	if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS,
-			      (atomic_t *)(&sem->count)) < 0)
-		rwsem_wake(sem);
-}
-
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
-{
-	atomic_add(delta, (atomic_t *)(&sem->count));
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	smp_wmb();
-	tmp = atomic_add_return(-RWSEM_WAITING_BIAS, (atomic_t *)(&sem->count));
-	if (tmp < 0)
-		rwsem_downgrade_wake(sem);
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
-{
-	smp_mb();
-	return atomic_add_return(delta, (atomic_t *)(&sem->count));
-}
-
-#endif	/* _XTENSA_RWSEM_H */
diff --git a/block/blk-map.c b/block/blk-map.c
index a54f0543b956..b9f88b7751fb 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -9,24 +9,6 @@
 
 #include "blk.h"
 
-static bool iovec_gap_to_prv(struct request_queue *q,
-			     struct iovec *prv, struct iovec *cur)
-{
-	unsigned long prev_end;
-
-	if (!queue_virt_boundary(q))
-		return false;
-
-	if (prv->iov_base == NULL && prv->iov_len == 0)
-		/* prv is not set - don't check */
-		return false;
-
-	prev_end = (unsigned long)(prv->iov_base + prv->iov_len);
-
-	return (((unsigned long)cur->iov_base & queue_virt_boundary(q)) ||
-		prev_end & queue_virt_boundary(q));
-}
-
 int blk_rq_append_bio(struct request_queue *q, struct request *rq,
 		      struct bio *bio)
 {
@@ -125,31 +107,18 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
 			struct rq_map_data *map_data,
 			const struct iov_iter *iter, gfp_t gfp_mask)
 {
-	struct iovec iov, prv = {.iov_base = NULL, .iov_len = 0};
-	bool copy = (q->dma_pad_mask & iter->count) || map_data;
+	bool copy = false;
+	unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
 	struct bio *bio = NULL;
 	struct iov_iter i;
 	int ret;
 
-	if (!iter || !iter->count)
-		return -EINVAL;
-
-	iov_for_each(iov, i, *iter) {
-		unsigned long uaddr = (unsigned long) iov.iov_base;
-
-		if (!iov.iov_len)
-			return -EINVAL;
-
-		/*
-		 * Keep going so we check length of all segments
-		 */
-		if ((uaddr & queue_dma_alignment(q)) ||
-		    iovec_gap_to_prv(q, &prv, &iov))
-			copy = true;
-
-		prv.iov_base = iov.iov_base;
-		prv.iov_len = iov.iov_len;
-	}
+	if (map_data)
+		copy = true;
+	else if (iov_iter_alignment(iter) & align)
+		copy = true;
+	else if (queue_virt_boundary(q))
+		copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter);
 
 	i = *iter;
 	do {
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index b86883aedca1..7d4acc449233 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1776,6 +1776,7 @@ static int alg_test_drbg(const struct alg_test_desc *desc, const char *driver,
 static int do_test_rsa(struct crypto_akcipher *tfm,
 		       struct akcipher_testvec *vecs)
 {
+	char *xbuf[XBUFSIZE];
 	struct akcipher_request *req;
 	void *outbuf_enc = NULL;
 	void *outbuf_dec = NULL;
@@ -1784,9 +1785,12 @@ static int do_test_rsa(struct crypto_akcipher *tfm,
 	int err = -ENOMEM;
 	struct scatterlist src, dst, src_tab[2];
 
+	if (testmgr_alloc_buf(xbuf))
+		return err;
+
 	req = akcipher_request_alloc(tfm, GFP_KERNEL);
 	if (!req)
-		return err;
+		goto free_xbuf;
 
 	init_completion(&result.completion);
 
@@ -1804,9 +1808,14 @@ static int do_test_rsa(struct crypto_akcipher *tfm,
 	if (!outbuf_enc)
 		goto free_req;
 
+	if (WARN_ON(vecs->m_size > PAGE_SIZE))
+		goto free_all;
+
+	memcpy(xbuf[0], vecs->m, vecs->m_size);
+
 	sg_init_table(src_tab, 2);
-	sg_set_buf(&src_tab[0], vecs->m, 8);
-	sg_set_buf(&src_tab[1], vecs->m + 8, vecs->m_size - 8);
+	sg_set_buf(&src_tab[0], xbuf[0], 8);
+	sg_set_buf(&src_tab[1], xbuf[0] + 8, vecs->m_size - 8);
 	sg_init_one(&dst, outbuf_enc, out_len_max);
 	akcipher_request_set_crypt(req, src_tab, &dst, vecs->m_size,
 				   out_len_max);
@@ -1825,7 +1834,7 @@ static int do_test_rsa(struct crypto_akcipher *tfm,
 		goto free_all;
 	}
 	/* verify that encrypted message is equal to expected */
-	if (memcmp(vecs->c, sg_virt(req->dst), vecs->c_size)) {
+	if (memcmp(vecs->c, outbuf_enc, vecs->c_size)) {
 		pr_err("alg: rsa: encrypt test failed. Invalid output\n");
 		err = -EINVAL;
 		goto free_all;
@@ -1840,7 +1849,13 @@ static int do_test_rsa(struct crypto_akcipher *tfm,
 		err = -ENOMEM;
 		goto free_all;
 	}
-	sg_init_one(&src, vecs->c, vecs->c_size);
+
+	if (WARN_ON(vecs->c_size > PAGE_SIZE))
+		goto free_all;
+
+	memcpy(xbuf[0], vecs->c, vecs->c_size);
+
+	sg_init_one(&src, xbuf[0], vecs->c_size);
 	sg_init_one(&dst, outbuf_dec, out_len_max);
 	init_completion(&result.completion);
 	akcipher_request_set_crypt(req, &src, &dst, vecs->c_size, out_len_max);
@@ -1867,6 +1882,8 @@ free_all:
 	kfree(outbuf_enc);
 free_req:
 	akcipher_request_free(req);
+free_xbuf:
+	testmgr_free_buf(xbuf);
 	return err;
 }
 
diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 5c79526245c2..a0380338946a 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -13,6 +13,7 @@
 #ifndef _REGMAP_INTERNAL_H
 #define _REGMAP_INTERNAL_H
 
+#include <linux/device.h>
 #include <linux/regmap.h>
 #include <linux/fs.h>
 #include <linux/list.h>
diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c
index 7526906ca080..5189fd6182f6 100644
--- a/drivers/base/regmap/regmap-mmio.c
+++ b/drivers/base/regmap/regmap-mmio.c
@@ -23,6 +23,8 @@
 #include <linux/regmap.h>
 #include <linux/slab.h>
 
+#include "internal.h"
+
 struct regmap_mmio_context {
 	void __iomem *regs;
 	unsigned val_bytes;
@@ -212,6 +214,7 @@ static const struct regmap_bus regmap_mmio = {
 	.reg_write = regmap_mmio_write,
 	.reg_read = regmap_mmio_read,
 	.free_context = regmap_mmio_free_context,
+	.val_format_endian_default = REGMAP_ENDIAN_LITTLE,
 };
 
 static struct regmap_mmio_context *regmap_mmio_gen_context(struct device *dev,
@@ -245,7 +248,7 @@ static struct regmap_mmio_context *regmap_mmio_gen_context(struct device *dev,
 	ctx->val_bytes = config->val_bits / 8;
 	ctx->clk = ERR_PTR(-ENODEV);
 
-	switch (config->reg_format_endian) {
+	switch (regmap_get_val_endian(dev, &regmap_mmio, config)) {
 	case REGMAP_ENDIAN_DEFAULT:
 	case REGMAP_ENDIAN_LITTLE:
 #ifdef __LITTLE_ENDIAN
diff --git a/drivers/base/regmap/regmap-spmi.c b/drivers/base/regmap/regmap-spmi.c
index 7e58f6560399..4a36e415e938 100644
--- a/drivers/base/regmap/regmap-spmi.c
+++ b/drivers/base/regmap/regmap-spmi.c
@@ -142,7 +142,7 @@ static int regmap_spmi_ext_read(void *context,
 	while (val_size) {
 		len = min_t(size_t, val_size, 8);
 
-		err = spmi_ext_register_readl(context, addr, val, val_size);
+		err = spmi_ext_register_readl(context, addr, val, len);
 		if (err)
 			goto err_out;
 
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index e1670d533f97..6394152f648f 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -87,6 +87,31 @@ config EFI_RUNTIME_WRAPPERS
 config EFI_ARMSTUB
 	bool
 
+config EFI_BOOTLOADER_CONTROL
+	tristate "EFI Bootloader Control"
+	depends on EFI_VARS
+	default n
+	---help---
+	  This module installs a reboot hook, such that if reboot() is
+	  invoked with a string argument NNN, "NNN" is copied to the
+	  "LoaderEntryOneShot" EFI variable, to be read by the
+	  bootloader. If the string matches one of the boot labels
+	  defined in its configuration, the bootloader will boot once
+	  to that label. The "LoaderEntryRebootReason" EFI variable is
+	  set with the reboot reason: "reboot" or "shutdown". The
+	  bootloader reads this reboot reason and takes particular
+	  action according to its policy.
+
+config EFI_CAPSULE_LOADER
+	tristate "EFI capsule loader"
+	depends on EFI
+	help
+	  This option exposes a loader interface "/dev/efi_capsule_loader" for
+	  users to load EFI capsules. This driver requires working runtime
+	  capsule support in the firmware, which many OEMs do not provide.
+
+	  Most users should say N.
+
 endmenu
 
 config UEFI_CPER
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index 62e654f255f4..a219640f881f 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -9,7 +9,8 @@
 #
 KASAN_SANITIZE_runtime-wrappers.o	:= n
 
-obj-$(CONFIG_EFI)			+= efi.o vars.o reboot.o
+obj-$(CONFIG_EFI)			+= efi.o vars.o reboot.o memattr.o
+obj-$(CONFIG_EFI)			+= capsule.o
 obj-$(CONFIG_EFI_VARS)			+= efivars.o
 obj-$(CONFIG_EFI_ESRT)			+= esrt.o
 obj-$(CONFIG_EFI_VARS_PSTORE)		+= efi-pstore.o
@@ -18,7 +19,9 @@ obj-$(CONFIG_EFI_RUNTIME_MAP)		+= runtime-map.o
 obj-$(CONFIG_EFI_RUNTIME_WRAPPERS)	+= runtime-wrappers.o
 obj-$(CONFIG_EFI_STUB)			+= libstub/
 obj-$(CONFIG_EFI_FAKE_MEMMAP)		+= fake_mem.o
+obj-$(CONFIG_EFI_BOOTLOADER_CONTROL)	+= efibc.o
 
 arm-obj-$(CONFIG_EFI)			:= arm-init.o arm-runtime.o
 obj-$(CONFIG_ARM)			+= $(arm-obj-y)
 obj-$(CONFIG_ARM64)			+= $(arm-obj-y)
+obj-$(CONFIG_EFI_CAPSULE_LOADER)	+= capsule-loader.o
diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c
index 8714f8c271ba..ef90f0c4b70a 100644
--- a/drivers/firmware/efi/arm-init.c
+++ b/drivers/firmware/efi/arm-init.c
@@ -11,17 +11,19 @@
  *
  */
 
+#define pr_fmt(fmt)	"efi: " fmt
+
 #include <linux/efi.h>
 #include <linux/init.h>
 #include <linux/memblock.h>
 #include <linux/mm_types.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
+#include <linux/platform_device.h>
+#include <linux/screen_info.h>
 
 #include <asm/efi.h>
 
-struct efi_memory_map memmap;
-
 u64 efi_system_table;
 
 static int __init is_normal_ram(efi_memory_desc_t *md)
@@ -40,7 +42,7 @@ static phys_addr_t efi_to_phys(unsigned long addr)
 {
 	efi_memory_desc_t *md;
 
-	for_each_efi_memory_desc(&memmap, md) {
+	for_each_efi_memory_desc(md) {
 		if (!(md->attribute & EFI_MEMORY_RUNTIME))
 			continue;
 		if (md->virt_addr == 0)
@@ -53,6 +55,36 @@ static phys_addr_t efi_to_phys(unsigned long addr)
 	return addr;
 }
 
+static __initdata unsigned long screen_info_table = EFI_INVALID_TABLE_ADDR;
+
+static __initdata efi_config_table_type_t arch_tables[] = {
+	{LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID, NULL, &screen_info_table},
+	{NULL_GUID, NULL, NULL}
+};
+
+static void __init init_screen_info(void)
+{
+	struct screen_info *si;
+
+	if (screen_info_table != EFI_INVALID_TABLE_ADDR) {
+		si = early_memremap_ro(screen_info_table, sizeof(*si));
+		if (!si) {
+			pr_err("Could not map screen_info config table\n");
+			return;
+		}
+		screen_info = *si;
+		early_memunmap(si, sizeof(*si));
+
+		/* dummycon on ARM needs non-zero values for columns/lines */
+		screen_info.orig_video_cols = 80;
+		screen_info.orig_video_lines = 25;
+	}
+
+	if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI &&
+	    memblock_is_map_memory(screen_info.lfb_base))
+		memblock_mark_nomap(screen_info.lfb_base, screen_info.lfb_size);
+}
+
 static int __init uefi_init(void)
 {
 	efi_char16_t *c16;
@@ -85,6 +117,8 @@ static int __init uefi_init(void)
 			efi.systab->hdr.revision >> 16,
 			efi.systab->hdr.revision & 0xffff);
 
+	efi.runtime_version = efi.systab->hdr.revision;
+
 	/* Show what we know for posterity */
 	c16 = early_memremap_ro(efi_to_phys(efi.systab->fw_vendor),
 				sizeof(vendor) * sizeof(efi_char16_t));
@@ -108,7 +142,8 @@ static int __init uefi_init(void)
 		goto out;
 	}
 	retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables,
-					 sizeof(efi_config_table_t), NULL);
+					 sizeof(efi_config_table_t),
+					 arch_tables);
 
 	early_memunmap(config_tables, table_size);
 out:
@@ -143,7 +178,7 @@ static __init void reserve_regions(void)
 	if (efi_enabled(EFI_DBG))
 		pr_info("Processing EFI memory map:\n");
 
-	for_each_efi_memory_desc(&memmap, md) {
+	for_each_efi_memory_desc(md) {
 		paddr = md->phys_addr;
 		npages = md->num_pages;
 
@@ -184,9 +219,9 @@ void __init efi_init(void)
 
 	efi_system_table = params.system_table;
 
-	memmap.phys_map = params.mmap;
-	memmap.map = early_memremap_ro(params.mmap, params.mmap_size);
-	if (memmap.map == NULL) {
+	efi.memmap.phys_map = params.mmap;
+	efi.memmap.map = early_memremap_ro(params.mmap, params.mmap_size);
+	if (efi.memmap.map == NULL) {
 		/*
 		* If we are booting via UEFI, the UEFI memory map is the only
 		* description of memory we have, so there is little point in
@@ -194,28 +229,37 @@ void __init efi_init(void)
 		*/
 		panic("Unable to map EFI memory map.\n");
 	}
-	memmap.map_end = memmap.map + params.mmap_size;
-	memmap.desc_size = params.desc_size;
-	memmap.desc_version = params.desc_ver;
+	efi.memmap.map_end = efi.memmap.map + params.mmap_size;
+	efi.memmap.desc_size = params.desc_size;
+	efi.memmap.desc_version = params.desc_ver;
+
+	WARN(efi.memmap.desc_version != 1,
+	     "Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
+	      efi.memmap.desc_version);
 
 	if (uefi_init() < 0)
 		return;
 
 	reserve_regions();
-	early_memunmap(memmap.map, params.mmap_size);
+	efi_memattr_init();
+	early_memunmap(efi.memmap.map, params.mmap_size);
 
-	if (IS_ENABLED(CONFIG_ARM)) {
-		/*
-		 * ARM currently does not allow ioremap_cache() to be called on
-		 * memory regions that are covered by struct page. So remove the
-		 * UEFI memory map from the linear mapping.
-		 */
-		memblock_mark_nomap(params.mmap & PAGE_MASK,
-				    PAGE_ALIGN(params.mmap_size +
-					       (params.mmap & ~PAGE_MASK)));
-	} else {
-		memblock_reserve(params.mmap & PAGE_MASK,
-				 PAGE_ALIGN(params.mmap_size +
-					    (params.mmap & ~PAGE_MASK)));
-	}
+	memblock_reserve(params.mmap & PAGE_MASK,
+			 PAGE_ALIGN(params.mmap_size +
+				    (params.mmap & ~PAGE_MASK)));
+
+	init_screen_info();
+}
+
+static int __init register_gop_device(void)
+{
+	void *pd;
+
+	if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI)
+		return 0;
+
+	pd = platform_device_register_data(NULL, "efi-framebuffer", 0,
+					   &screen_info, sizeof(screen_info));
+	return PTR_ERR_OR_ZERO(pd);
 }
+subsys_initcall(register_gop_device);
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index 6ae21e41a429..17ccf0a8787a 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -42,11 +42,13 @@ static struct mm_struct efi_mm = {
 static bool __init efi_virtmap_init(void)
 {
 	efi_memory_desc_t *md;
+	bool systab_found;
 
 	efi_mm.pgd = pgd_alloc(&efi_mm);
 	init_new_context(NULL, &efi_mm);
 
-	for_each_efi_memory_desc(&memmap, md) {
+	systab_found = false;
+	for_each_efi_memory_desc(md) {
 		phys_addr_t phys = md->phys_addr;
 		int ret;
 
@@ -64,7 +66,25 @@ static bool __init efi_virtmap_init(void)
 				&phys, ret);
 			return false;
 		}
+		/*
+		 * If this entry covers the address of the UEFI system table,
+		 * calculate and record its virtual address.
+		 */
+		if (efi_system_table >= phys &&
+		    efi_system_table < phys + (md->num_pages * EFI_PAGE_SIZE)) {
+			efi.systab = (void *)(unsigned long)(efi_system_table -
+							     phys + md->virt_addr);
+			systab_found = true;
+		}
+	}
+	if (!systab_found) {
+		pr_err("No virtual mapping found for the UEFI System Table\n");
+		return false;
 	}
+
+	if (efi_memattr_apply_permissions(&efi_mm, efi_set_mapping_permissions))
+		return false;
+
 	return true;
 }
 
@@ -89,26 +109,17 @@ static int __init arm_enable_runtime_services(void)
 
 	pr_info("Remapping and enabling EFI services.\n");
 
-	mapsize = memmap.map_end - memmap.map;
-	memmap.map = (__force void *)ioremap_cache(memmap.phys_map,
-						   mapsize);
-	if (!memmap.map) {
-		pr_err("Failed to remap EFI memory map\n");
-		return -ENOMEM;
-	}
-	memmap.map_end = memmap.map + mapsize;
-	efi.memmap = &memmap;
+	mapsize = efi.memmap.map_end - efi.memmap.map;
 
-	efi.systab = (__force void *)ioremap_cache(efi_system_table,
-						   sizeof(efi_system_table_t));
-	if (!efi.systab) {
-		pr_err("Failed to remap EFI System Table\n");
+	efi.memmap.map = memremap(efi.memmap.phys_map, mapsize, MEMREMAP_WB);
+	if (!efi.memmap.map) {
+		pr_err("Failed to remap EFI memory map\n");
 		return -ENOMEM;
 	}
-	set_bit(EFI_SYSTEM_TABLES, &efi.flags);
+	efi.memmap.map_end = efi.memmap.map + mapsize;
 
 	if (!efi_virtmap_init()) {
-		pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n");
+		pr_err("UEFI virtual mapping missing or invalid -- runtime services will not be available\n");
 		return -ENOMEM;
 	}
 
@@ -116,8 +127,6 @@ static int __init arm_enable_runtime_services(void)
 	efi_native_runtime_setup();
 	set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
 
-	efi.runtime_version = efi.systab->hdr.revision;
-
 	return 0;
 }
 early_initcall(arm_enable_runtime_services);
diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
new file mode 100644
index 000000000000..c99c24bc79b0
--- /dev/null
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -0,0 +1,343 @@
+/*
+ * EFI capsule loader driver.
+ *
+ * Copyright 2015 Intel Corporation
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ */
+
+#define pr_fmt(fmt) "efi: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/efi.h>
+
+#define NO_FURTHER_WRITE_ACTION -1
+
+struct capsule_info {
+	bool		header_obtained;
+	int		reset_type;
+	long		index;
+	size_t		count;
+	size_t		total_size;
+	struct page	**pages;
+	size_t		page_bytes_remain;
+};
+
+/**
+ * efi_free_all_buff_pages - free all previous allocated buffer pages
+ * @cap_info: pointer to current instance of capsule_info structure
+ *
+ *	In addition to freeing buffer pages, it flags NO_FURTHER_WRITE_ACTION
+ *	to cease processing data in subsequent write(2) calls until close(2)
+ *	is called.
+ **/
+static void efi_free_all_buff_pages(struct capsule_info *cap_info)
+{
+	while (cap_info->index > 0)
+		__free_page(cap_info->pages[--cap_info->index]);
+
+	cap_info->index = NO_FURTHER_WRITE_ACTION;
+}
+
+/**
+ * efi_capsule_setup_info - obtain the efi capsule header in the binary and
+ *			    setup capsule_info structure
+ * @cap_info: pointer to current instance of capsule_info structure
+ * @kbuff: a mapped first page buffer pointer
+ * @hdr_bytes: the total received number of bytes for efi header
+ **/
+static ssize_t efi_capsule_setup_info(struct capsule_info *cap_info,
+				      void *kbuff, size_t hdr_bytes)
+{
+	efi_capsule_header_t *cap_hdr;
+	size_t pages_needed;
+	int ret;
+	void *temp_page;
+
+	/* Only process data block that is larger than efi header size */
+	if (hdr_bytes < sizeof(efi_capsule_header_t))
+		return 0;
+
+	/* Reset back to the correct offset of header */
+	cap_hdr = kbuff - cap_info->count;
+	pages_needed = ALIGN(cap_hdr->imagesize, PAGE_SIZE) >> PAGE_SHIFT;
+
+	if (pages_needed == 0) {
+		pr_err("%s: pages count invalid\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Check if the capsule binary supported */
+	ret = efi_capsule_supported(cap_hdr->guid, cap_hdr->flags,
+				    cap_hdr->imagesize,
+				    &cap_info->reset_type);
+	if (ret) {
+		pr_err("%s: efi_capsule_supported() failed\n",
+		       __func__);
+		return ret;
+	}
+
+	cap_info->total_size = cap_hdr->imagesize;
+	temp_page = krealloc(cap_info->pages,
+			     pages_needed * sizeof(void *),
+			     GFP_KERNEL | __GFP_ZERO);
+	if (!temp_page) {
+		pr_debug("%s: krealloc() failed\n", __func__);
+		return -ENOMEM;
+	}
+
+	cap_info->pages = temp_page;
+	cap_info->header_obtained = true;
+
+	return 0;
+}
+
+/**
+ * efi_capsule_submit_update - invoke the efi_capsule_update API once binary
+ *			       upload done
+ * @cap_info: pointer to current instance of capsule_info structure
+ **/
+static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info)
+{
+	int ret;
+	void *cap_hdr_temp;
+
+	cap_hdr_temp = kmap(cap_info->pages[0]);
+	if (!cap_hdr_temp) {
+		pr_debug("%s: kmap() failed\n", __func__);
+		return -EFAULT;
+	}
+
+	ret = efi_capsule_update(cap_hdr_temp, cap_info->pages);
+	kunmap(cap_info->pages[0]);
+	if (ret) {
+		pr_err("%s: efi_capsule_update() failed\n", __func__);
+		return ret;
+	}
+
+	/* Indicate capsule binary uploading is done */
+	cap_info->index = NO_FURTHER_WRITE_ACTION;
+	pr_info("%s: Successfully upload capsule file with reboot type '%s'\n",
+		__func__, !cap_info->reset_type ? "RESET_COLD" :
+		cap_info->reset_type == 1 ? "RESET_WARM" :
+		"RESET_SHUTDOWN");
+	return 0;
+}
+
+/**
+ * efi_capsule_write - store the capsule binary and pass it to
+ *		       efi_capsule_update() API
+ * @file: file pointer
+ * @buff: buffer pointer
+ * @count: number of bytes in @buff
+ * @offp: not used
+ *
+ *	Expectation:
+ *	- A user space tool should start at the beginning of capsule binary and
+ *	  pass data in sequentially.
+ *	- Users should close and re-open this file note in order to upload more
+ *	  capsules.
+ *	- After an error returned, user should close the file and restart the
+ *	  operation for the next try otherwise -EIO will be returned until the
+ *	  file is closed.
+ *	- An EFI capsule header must be located at the beginning of capsule
+ *	  binary file and passed in as first block data of write operation.
+ **/
+static ssize_t efi_capsule_write(struct file *file, const char __user *buff,
+				 size_t count, loff_t *offp)
+{
+	int ret = 0;
+	struct capsule_info *cap_info = file->private_data;
+	struct page *page;
+	void *kbuff = NULL;
+	size_t write_byte;
+
+	if (count == 0)
+		return 0;
+
+	/* Return error while NO_FURTHER_WRITE_ACTION is flagged */
+	if (cap_info->index < 0)
+		return -EIO;
+
+	/* Only alloc a new page when previous page is full */
+	if (!cap_info->page_bytes_remain) {
+		page = alloc_page(GFP_KERNEL);
+		if (!page) {
+			pr_debug("%s: alloc_page() failed\n", __func__);
+			ret = -ENOMEM;
+			goto failed;
+		}
+
+		cap_info->pages[cap_info->index++] = page;
+		cap_info->page_bytes_remain = PAGE_SIZE;
+	}
+
+	page = cap_info->pages[cap_info->index - 1];
+
+	kbuff = kmap(page);
+	if (!kbuff) {
+		pr_debug("%s: kmap() failed\n", __func__);
+		ret = -EFAULT;
+		goto failed;
+	}
+	kbuff += PAGE_SIZE - cap_info->page_bytes_remain;
+
+	/* Copy capsule binary data from user space to kernel space buffer */
+	write_byte = min_t(size_t, count, cap_info->page_bytes_remain);
+	if (copy_from_user(kbuff, buff, write_byte)) {
+		pr_debug("%s: copy_from_user() failed\n", __func__);
+		ret = -EFAULT;
+		goto fail_unmap;
+	}
+	cap_info->page_bytes_remain -= write_byte;
+
+	/* Setup capsule binary info structure */
+	if (!cap_info->header_obtained) {
+		ret = efi_capsule_setup_info(cap_info, kbuff,
+					     cap_info->count + write_byte);
+		if (ret)
+			goto fail_unmap;
+	}
+
+	cap_info->count += write_byte;
+	kunmap(page);
+
+	/* Submit the full binary to efi_capsule_update() API */
+	if (cap_info->header_obtained &&
+	    cap_info->count >= cap_info->total_size) {
+		if (cap_info->count > cap_info->total_size) {
+			pr_err("%s: upload size exceeded header defined size\n",
+			       __func__);
+			ret = -EINVAL;
+			goto failed;
+		}
+
+		ret = efi_capsule_submit_update(cap_info);
+		if (ret)
+			goto failed;
+	}
+
+	return write_byte;
+
+fail_unmap:
+	kunmap(page);
+failed:
+	efi_free_all_buff_pages(cap_info);
+	return ret;
+}
+
+/**
+ * efi_capsule_flush - called by file close or file flush
+ * @file: file pointer
+ * @id: not used
+ *
+ *	If a capsule is being partially uploaded then calling this function
+ *	will be treated as upload termination and will free those completed
+ *	buffer pages and -ECANCELED will be returned.
+ **/
+static int efi_capsule_flush(struct file *file, fl_owner_t id)
+{
+	int ret = 0;
+	struct capsule_info *cap_info = file->private_data;
+
+	if (cap_info->index > 0) {
+		pr_err("%s: capsule upload not complete\n", __func__);
+		efi_free_all_buff_pages(cap_info);
+		ret = -ECANCELED;
+	}
+
+	return ret;
+}
+
+/**
+ * efi_capsule_release - called by file close
+ * @inode: not used
+ * @file: file pointer
+ *
+ *	We will not free successfully submitted pages since efi update
+ *	requires data to be maintained across system reboot.
+ **/
+static int efi_capsule_release(struct inode *inode, struct file *file)
+{
+	struct capsule_info *cap_info = file->private_data;
+
+	kfree(cap_info->pages);
+	kfree(file->private_data);
+	file->private_data = NULL;
+	return 0;
+}
+
+/**
+ * efi_capsule_open - called by file open
+ * @inode: not used
+ * @file: file pointer
+ *
+ *	Will allocate each capsule_info memory for each file open call.
+ *	This provided the capability to support multiple file open feature
+ *	where user is not needed to wait for others to finish in order to
+ *	upload their capsule binary.
+ **/
+static int efi_capsule_open(struct inode *inode, struct file *file)
+{
+	struct capsule_info *cap_info;
+
+	cap_info = kzalloc(sizeof(*cap_info), GFP_KERNEL);
+	if (!cap_info)
+		return -ENOMEM;
+
+	cap_info->pages = kzalloc(sizeof(void *), GFP_KERNEL);
+	if (!cap_info->pages) {
+		kfree(cap_info);
+		return -ENOMEM;
+	}
+
+	file->private_data = cap_info;
+
+	return 0;
+}
+
+static const struct file_operations efi_capsule_fops = {
+	.owner = THIS_MODULE,
+	.open = efi_capsule_open,
+	.write = efi_capsule_write,
+	.flush = efi_capsule_flush,
+	.release = efi_capsule_release,
+	.llseek = no_llseek,
+};
+
+static struct miscdevice efi_capsule_misc = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "efi_capsule_loader",
+	.fops = &efi_capsule_fops,
+};
+
+static int __init efi_capsule_loader_init(void)
+{
+	int ret;
+
+	if (!efi_enabled(EFI_RUNTIME_SERVICES))
+		return -ENODEV;
+
+	ret = misc_register(&efi_capsule_misc);
+	if (ret)
+		pr_err("%s: Failed to register misc char file note\n",
+		       __func__);
+
+	return ret;
+}
+module_init(efi_capsule_loader_init);
+
+static void __exit efi_capsule_loader_exit(void)
+{
+	misc_deregister(&efi_capsule_misc);
+}
+module_exit(efi_capsule_loader_exit);
+
+MODULE_DESCRIPTION("EFI capsule firmware binary loader");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c
new file mode 100644
index 000000000000..53b9fd2293ee
--- /dev/null
+++ b/drivers/firmware/efi/capsule.c
@@ -0,0 +1,308 @@
+/*
+ * EFI capsule support.
+ *
+ * Copyright 2013 Intel Corporation; author Matt Fleming
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ */
+
+#define pr_fmt(fmt) "efi: " fmt
+
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/highmem.h>
+#include <linux/efi.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+
+typedef struct {
+	u64 length;
+	u64 data;
+} efi_capsule_block_desc_t;
+
+static bool capsule_pending;
+static bool stop_capsules;
+static int efi_reset_type = -1;
+
+/*
+ * capsule_mutex serialises access to both capsule_pending and
+ * efi_reset_type and stop_capsules.
+ */
+static DEFINE_MUTEX(capsule_mutex);
+
+/**
+ * efi_capsule_pending - has a capsule been passed to the firmware?
+ * @reset_type: store the type of EFI reset if capsule is pending
+ *
+ * To ensure that the registered capsule is processed correctly by the
+ * firmware we need to perform a specific type of reset. If a capsule is
+ * pending return the reset type in @reset_type.
+ *
+ * This function will race with callers of efi_capsule_update(), for
+ * example, calling this function while somebody else is in
+ * efi_capsule_update() but hasn't reached efi_capsue_update_locked()
+ * will miss the updates to capsule_pending and efi_reset_type after
+ * efi_capsule_update_locked() completes.
+ *
+ * A non-racy use is from platform reboot code because we use
+ * system_state to ensure no capsules can be sent to the firmware once
+ * we're at SYSTEM_RESTART. See efi_capsule_update_locked().
+ */
+bool efi_capsule_pending(int *reset_type)
+{
+	if (!capsule_pending)
+		return false;
+
+	if (reset_type)
+		*reset_type = efi_reset_type;
+
+	return true;
+}
+
+/*
+ * Whitelist of EFI capsule flags that we support.
+ *
+ * We do not handle EFI_CAPSULE_INITIATE_RESET because that would
+ * require us to prepare the kernel for reboot. Refuse to load any
+ * capsules with that flag and any other flags that we do not know how
+ * to handle.
+ */
+#define EFI_CAPSULE_SUPPORTED_FLAG_MASK			\
+	(EFI_CAPSULE_PERSIST_ACROSS_RESET | EFI_CAPSULE_POPULATE_SYSTEM_TABLE)
+
+/**
+ * efi_capsule_supported - does the firmware support the capsule?
+ * @guid: vendor guid of capsule
+ * @flags: capsule flags
+ * @size: size of capsule data
+ * @reset: the reset type required for this capsule
+ *
+ * Check whether a capsule with @flags is supported by the firmware
+ * and that @size doesn't exceed the maximum size for a capsule.
+ *
+ * No attempt is made to check @reset against the reset type required
+ * by any pending capsules because of the races involved.
+ */
+int efi_capsule_supported(efi_guid_t guid, u32 flags, size_t size, int *reset)
+{
+	efi_capsule_header_t capsule;
+	efi_capsule_header_t *cap_list[] = { &capsule };
+	efi_status_t status;
+	u64 max_size;
+
+	if (flags & ~EFI_CAPSULE_SUPPORTED_FLAG_MASK)
+		return -EINVAL;
+
+	capsule.headersize = capsule.imagesize = sizeof(capsule);
+	memcpy(&capsule.guid, &guid, sizeof(efi_guid_t));
+	capsule.flags = flags;
+
+	status = efi.query_capsule_caps(cap_list, 1, &max_size, reset);
+	if (status != EFI_SUCCESS)
+		return efi_status_to_err(status);
+
+	if (size > max_size)
+		return -ENOSPC;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(efi_capsule_supported);
+
+/*
+ * Every scatter gather list (block descriptor) page must end with a
+ * continuation pointer. The last continuation pointer of the last
+ * page must be zero to mark the end of the chain.
+ */
+#define SGLIST_PER_PAGE	((PAGE_SIZE / sizeof(efi_capsule_block_desc_t)) - 1)
+
+/*
+ * How many scatter gather list (block descriptor) pages do we need
+ * to map @count pages?
+ */
+static inline unsigned int sg_pages_num(unsigned int count)
+{
+	return DIV_ROUND_UP(count, SGLIST_PER_PAGE);
+}
+
+/**
+ * efi_capsule_update_locked - pass a single capsule to the firmware
+ * @capsule: capsule to send to the firmware
+ * @sg_pages: array of scatter gather (block descriptor) pages
+ * @reset: the reset type required for @capsule
+ *
+ * Since this function must be called under capsule_mutex check
+ * whether efi_reset_type will conflict with @reset, and atomically
+ * set it and capsule_pending if a capsule was successfully sent to
+ * the firmware.
+ *
+ * We also check to see if the system is about to restart, and if so,
+ * abort. This avoids races between efi_capsule_update() and
+ * efi_capsule_pending().
+ */
+static int
+efi_capsule_update_locked(efi_capsule_header_t *capsule,
+			  struct page **sg_pages, int reset)
+{
+	efi_physical_addr_t sglist_phys;
+	efi_status_t status;
+
+	lockdep_assert_held(&capsule_mutex);
+
+	/*
+	 * If someone has already registered a capsule that requires a
+	 * different reset type, we're out of luck and must abort.
+	 */
+	if (efi_reset_type >= 0 && efi_reset_type != reset) {
+		pr_err("Conflicting capsule reset type %d (%d).\n",
+		       reset, efi_reset_type);
+		return -EINVAL;
+	}
+
+	/*
+	 * If the system is getting ready to restart it may have
+	 * called efi_capsule_pending() to make decisions (such as
+	 * whether to force an EFI reboot), and we're racing against
+	 * that call. Abort in that case.
+	 */
+	if (unlikely(stop_capsules)) {
+		pr_warn("Capsule update raced with reboot, aborting.\n");
+		return -EINVAL;
+	}
+
+	sglist_phys = page_to_phys(sg_pages[0]);
+
+	status = efi.update_capsule(&capsule, 1, sglist_phys);
+	if (status == EFI_SUCCESS) {
+		capsule_pending = true;
+		efi_reset_type = reset;
+	}
+
+	return efi_status_to_err(status);
+}
+
+/**
+ * efi_capsule_update - send a capsule to the firmware
+ * @capsule: capsule to send to firmware
+ * @pages: an array of capsule data pages
+ *
+ * Build a scatter gather list with EFI capsule block descriptors to
+ * map the capsule described by @capsule with its data in @pages and
+ * send it to the firmware via the UpdateCapsule() runtime service.
+ *
+ * @capsule must be a virtual mapping of the first page in @pages
+ * (@pages[0]) in the kernel address space. That is, a
+ * capsule_header_t that describes the entire contents of the capsule
+ * must be at the start of the first data page.
+ *
+ * Even though this function will validate that the firmware supports
+ * the capsule guid, users will likely want to check that
+ * efi_capsule_supported() returns true before calling this function
+ * because it makes it easier to print helpful error messages.
+ *
+ * If the capsule is successfully submitted to the firmware, any
+ * subsequent calls to efi_capsule_pending() will return true. @pages
+ * must not be released or modified if this function returns
+ * successfully.
+ *
+ * Callers must be prepared for this function to fail, which can
+ * happen if we raced with system reboot or if there is already a
+ * pending capsule that has a reset type that conflicts with the one
+ * required by @capsule. Do NOT use efi_capsule_pending() to detect
+ * this conflict since that would be racy. Instead, submit the capsule
+ * to efi_capsule_update() and check the return value.
+ *
+ * Return 0 on success, a converted EFI status code on failure.
+ */
+int efi_capsule_update(efi_capsule_header_t *capsule, struct page **pages)
+{
+	u32 imagesize = capsule->imagesize;
+	efi_guid_t guid = capsule->guid;
+	unsigned int count, sg_count;
+	u32 flags = capsule->flags;
+	struct page **sg_pages;
+	int rv, reset_type;
+	int i, j;
+
+	rv = efi_capsule_supported(guid, flags, imagesize, &reset_type);
+	if (rv)
+		return rv;
+
+	count = DIV_ROUND_UP(imagesize, PAGE_SIZE);
+	sg_count = sg_pages_num(count);
+
+	sg_pages = kzalloc(sg_count * sizeof(*sg_pages), GFP_KERNEL);
+	if (!sg_pages)
+		return -ENOMEM;
+
+	for (i = 0; i < sg_count; i++) {
+		sg_pages[i] = alloc_page(GFP_KERNEL);
+		if (!sg_pages[i]) {
+			rv = -ENOMEM;
+			goto out;
+		}
+	}
+
+	for (i = 0; i < sg_count; i++) {
+		efi_capsule_block_desc_t *sglist;
+
+		sglist = kmap(sg_pages[i]);
+		if (!sglist) {
+			rv = -ENOMEM;
+			goto out;
+		}
+
+		for (j = 0; j < SGLIST_PER_PAGE && count > 0; j++) {
+			u64 sz = min_t(u64, imagesize, PAGE_SIZE);
+
+			sglist[j].length = sz;
+			sglist[j].data = page_to_phys(*pages++);
+
+			imagesize -= sz;
+			count--;
+		}
+
+		/* Continuation pointer */
+		sglist[j].length = 0;
+
+		if (i + 1 == sg_count)
+			sglist[j].data = 0;
+		else
+			sglist[j].data = page_to_phys(sg_pages[i + 1]);
+
+		kunmap(sg_pages[i]);
+	}
+
+	mutex_lock(&capsule_mutex);
+	rv = efi_capsule_update_locked(capsule, sg_pages, reset_type);
+	mutex_unlock(&capsule_mutex);
+
+out:
+	for (i = 0; rv && i < sg_count; i++) {
+		if (sg_pages[i])
+			__free_page(sg_pages[i]);
+	}
+
+	kfree(sg_pages);
+	return rv;
+}
+EXPORT_SYMBOL_GPL(efi_capsule_update);
+
+static int capsule_reboot_notify(struct notifier_block *nb, unsigned long event, void *cmd)
+{
+	mutex_lock(&capsule_mutex);
+	stop_capsules = true;
+	mutex_unlock(&capsule_mutex);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block capsule_reboot_nb = {
+	.notifier_call = capsule_reboot_notify,
+};
+
+static int __init capsule_reboot_register(void)
+{
+	return register_reboot_notifier(&capsule_reboot_nb);
+}
+core_initcall(capsule_reboot_register);
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 3a69ed5ecfcb..05509f3aaee8 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -43,6 +43,7 @@ struct efi __read_mostly efi = {
 	.config_table		= EFI_INVALID_TABLE_ADDR,
 	.esrt			= EFI_INVALID_TABLE_ADDR,
 	.properties_table	= EFI_INVALID_TABLE_ADDR,
+	.mem_attr_table		= EFI_INVALID_TABLE_ADDR,
 };
 EXPORT_SYMBOL(efi);
 
@@ -256,7 +257,7 @@ subsys_initcall(efisubsys_init);
  */
 int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md)
 {
-	struct efi_memory_map *map = efi.memmap;
+	struct efi_memory_map *map = &efi.memmap;
 	phys_addr_t p, e;
 
 	if (!efi_enabled(EFI_MEMMAP)) {
@@ -338,6 +339,7 @@ static __initdata efi_config_table_type_t common_tables[] = {
 	{UGA_IO_PROTOCOL_GUID, "UGA", &efi.uga},
 	{EFI_SYSTEM_RESOURCE_TABLE_GUID, "ESRT", &efi.esrt},
 	{EFI_PROPERTIES_TABLE_GUID, "PROP", &efi.properties_table},
+	{EFI_MEMORY_ATTRIBUTES_TABLE_GUID, "MEMATTR", &efi.mem_attr_table},
 	{NULL_GUID, NULL, NULL},
 };
 
@@ -351,8 +353,9 @@ static __init int match_config_table(efi_guid_t *guid,
 		for (i = 0; efi_guidcmp(table_types[i].guid, NULL_GUID); i++) {
 			if (!efi_guidcmp(*guid, table_types[i].guid)) {
 				*(table_types[i].ptr) = table;
-				pr_cont(" %s=0x%lx ",
-					table_types[i].name, table);
+				if (table_types[i].name)
+					pr_cont(" %s=0x%lx ",
+						table_types[i].name, table);
 				return 1;
 			}
 		}
@@ -620,16 +623,12 @@ char * __init efi_md_typeattr_format(char *buf, size_t size,
  */
 u64 __weak efi_mem_attributes(unsigned long phys_addr)
 {
-	struct efi_memory_map *map;
 	efi_memory_desc_t *md;
-	void *p;
 
 	if (!efi_enabled(EFI_MEMMAP))
 		return 0;
 
-	map = efi.memmap;
-	for (p = map->map; p < map->map_end; p += map->desc_size) {
-		md = p;
+	for_each_efi_memory_desc(md) {
 		if ((md->phys_addr <= phys_addr) &&
 		    (phys_addr < (md->phys_addr +
 		    (md->num_pages << EFI_PAGE_SHIFT))))
@@ -637,3 +636,36 @@ u64 __weak efi_mem_attributes(unsigned long phys_addr)
 	}
 	return 0;
 }
+
+int efi_status_to_err(efi_status_t status)
+{
+	int err;
+
+	switch (status) {
+	case EFI_SUCCESS:
+		err = 0;
+		break;
+	case EFI_INVALID_PARAMETER:
+		err = -EINVAL;
+		break;
+	case EFI_OUT_OF_RESOURCES:
+		err = -ENOSPC;
+		break;
+	case EFI_DEVICE_ERROR:
+		err = -EIO;
+		break;
+	case EFI_WRITE_PROTECTED:
+		err = -EROFS;
+		break;
+	case EFI_SECURITY_VIOLATION:
+		err = -EACCES;
+		break;
+	case EFI_NOT_FOUND:
+		err = -ENOENT;
+		break;
+	default:
+		err = -EINVAL;
+	}
+
+	return err;
+}
diff --git a/drivers/firmware/efi/efibc.c b/drivers/firmware/efi/efibc.c
new file mode 100644
index 000000000000..8dd0c7085e59
--- /dev/null
+++ b/drivers/firmware/efi/efibc.c
@@ -0,0 +1,113 @@
+/*
+ * efibc: control EFI bootloaders which obey LoaderEntryOneShot var
+ * Copyright (c) 2013-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#define pr_fmt(fmt) "efibc: " fmt
+
+#include <linux/efi.h>
+#include <linux/module.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+
+static void efibc_str_to_str16(const char *str, efi_char16_t *str16)
+{
+	size_t i;
+
+	for (i = 0; i < strlen(str); i++)
+		str16[i] = str[i];
+
+	str16[i] = '\0';
+}
+
+static int efibc_set_variable(const char *name, const char *value)
+{
+	int ret;
+	efi_guid_t guid = LINUX_EFI_LOADER_ENTRY_GUID;
+	struct efivar_entry *entry;
+	size_t size = (strlen(value) + 1) * sizeof(efi_char16_t);
+
+	if (size > sizeof(entry->var.Data)) {
+		pr_err("value is too large");
+		return -EINVAL;
+	}
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		pr_err("failed to allocate efivar entry");
+		return -ENOMEM;
+	}
+
+	efibc_str_to_str16(name, entry->var.VariableName);
+	efibc_str_to_str16(value, (efi_char16_t *)entry->var.Data);
+	memcpy(&entry->var.VendorGuid, &guid, sizeof(guid));
+
+	ret = efivar_entry_set(entry,
+			       EFI_VARIABLE_NON_VOLATILE
+			       | EFI_VARIABLE_BOOTSERVICE_ACCESS
+			       | EFI_VARIABLE_RUNTIME_ACCESS,
+			       size, entry->var.Data, NULL);
+	if (ret)
+		pr_err("failed to set %s EFI variable: 0x%x\n",
+		       name, ret);
+
+	kfree(entry);
+	return ret;
+}
+
+static int efibc_reboot_notifier_call(struct notifier_block *notifier,
+				      unsigned long event, void *data)
+{
+	const char *reason = "shutdown";
+	int ret;
+
+	if (event == SYS_RESTART)
+		reason = "reboot";
+
+	ret = efibc_set_variable("LoaderEntryRebootReason", reason);
+	if (ret || !data)
+		return NOTIFY_DONE;
+
+	efibc_set_variable("LoaderEntryOneShot", (char *)data);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block efibc_reboot_notifier = {
+	.notifier_call = efibc_reboot_notifier_call,
+};
+
+static int __init efibc_init(void)
+{
+	int ret;
+
+	if (!efi_enabled(EFI_RUNTIME_SERVICES))
+		return -ENODEV;
+
+	ret = register_reboot_notifier(&efibc_reboot_notifier);
+	if (ret)
+		pr_err("unable to register reboot notifier\n");
+
+	return ret;
+}
+module_init(efibc_init);
+
+static void __exit efibc_exit(void)
+{
+	unregister_reboot_notifier(&efibc_reboot_notifier);
+}
+module_exit(efibc_exit);
+
+MODULE_AUTHOR("Jeremy Compostella <jeremy.compostella@intel.com>");
+MODULE_AUTHOR("Matt Gumbel <matthew.k.gumbel@intel.com");
+MODULE_DESCRIPTION("EFI Bootloader Control");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 096adcbcb5a9..116b244dee68 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -661,7 +661,7 @@ static void efivar_update_sysfs_entries(struct work_struct *work)
 			return;
 
 		err = efivar_init(efivar_update_sysfs_entry, entry,
-				  true, false, &efivar_sysfs_list);
+				  false, &efivar_sysfs_list);
 		if (!err)
 			break;
 
@@ -730,8 +730,7 @@ int efivars_sysfs_init(void)
 		return -ENOMEM;
 	}
 
-	efivar_init(efivars_sysfs_callback, NULL, false,
-		    true, &efivar_sysfs_list);
+	efivar_init(efivars_sysfs_callback, NULL, true, &efivar_sysfs_list);
 
 	error = create_efivars_bin_attributes();
 	if (error) {
diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c
index ed3a854950cc..48430aba13c1 100644
--- a/drivers/firmware/efi/fake_mem.c
+++ b/drivers/firmware/efi/fake_mem.c
@@ -57,7 +57,7 @@ static int __init cmp_fake_mem(const void *x1, const void *x2)
 void __init efi_fake_memmap(void)
 {
 	u64 start, end, m_start, m_end, m_attr;
-	int new_nr_map = memmap.nr_map;
+	int new_nr_map = efi.memmap.nr_map;
 	efi_memory_desc_t *md;
 	phys_addr_t new_memmap_phy;
 	void *new_memmap;
@@ -68,8 +68,7 @@ void __init efi_fake_memmap(void)
 		return;
 
 	/* count up the number of EFI memory descriptor */
-	for (old = memmap.map; old < memmap.map_end; old += memmap.desc_size) {
-		md = old;
+	for_each_efi_memory_desc(md) {
 		start = md->phys_addr;
 		end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1;
 
@@ -95,25 +94,25 @@ void __init efi_fake_memmap(void)
 	}
 
 	/* allocate memory for new EFI memmap */
-	new_memmap_phy = memblock_alloc(memmap.desc_size * new_nr_map,
+	new_memmap_phy = memblock_alloc(efi.memmap.desc_size * new_nr_map,
 					PAGE_SIZE);
 	if (!new_memmap_phy)
 		return;
 
 	/* create new EFI memmap */
 	new_memmap = early_memremap(new_memmap_phy,
-				    memmap.desc_size * new_nr_map);
+				    efi.memmap.desc_size * new_nr_map);
 	if (!new_memmap) {
-		memblock_free(new_memmap_phy, memmap.desc_size * new_nr_map);
+		memblock_free(new_memmap_phy, efi.memmap.desc_size * new_nr_map);
 		return;
 	}
 
-	for (old = memmap.map, new = new_memmap;
-	     old < memmap.map_end;
-	     old += memmap.desc_size, new += memmap.desc_size) {
+	for (old = efi.memmap.map, new = new_memmap;
+	     old < efi.memmap.map_end;
+	     old += efi.memmap.desc_size, new += efi.memmap.desc_size) {
 
 		/* copy original EFI memory descriptor */
-		memcpy(new, old, memmap.desc_size);
+		memcpy(new, old, efi.memmap.desc_size);
 		md = new;
 		start = md->phys_addr;
 		end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
@@ -134,8 +133,8 @@ void __init efi_fake_memmap(void)
 				md->num_pages = (m_end - md->phys_addr + 1) >>
 					EFI_PAGE_SHIFT;
 				/* latter part */
-				new += memmap.desc_size;
-				memcpy(new, old, memmap.desc_size);
+				new += efi.memmap.desc_size;
+				memcpy(new, old, efi.memmap.desc_size);
 				md = new;
 				md->phys_addr = m_end + 1;
 				md->num_pages = (end - md->phys_addr + 1) >>
@@ -147,16 +146,16 @@ void __init efi_fake_memmap(void)
 				md->num_pages = (m_start - md->phys_addr) >>
 					EFI_PAGE_SHIFT;
 				/* middle part */
-				new += memmap.desc_size;
-				memcpy(new, old, memmap.desc_size);
+				new += efi.memmap.desc_size;
+				memcpy(new, old, efi.memmap.desc_size);
 				md = new;
 				md->attribute |= m_attr;
 				md->phys_addr = m_start;
 				md->num_pages = (m_end - m_start + 1) >>
 					EFI_PAGE_SHIFT;
 				/* last part */
-				new += memmap.desc_size;
-				memcpy(new, old, memmap.desc_size);
+				new += efi.memmap.desc_size;
+				memcpy(new, old, efi.memmap.desc_size);
 				md = new;
 				md->phys_addr = m_end + 1;
 				md->num_pages = (end - m_end) >>
@@ -169,8 +168,8 @@ void __init efi_fake_memmap(void)
 				md->num_pages = (m_start - md->phys_addr) >>
 					EFI_PAGE_SHIFT;
 				/* latter part */
-				new += memmap.desc_size;
-				memcpy(new, old, memmap.desc_size);
+				new += efi.memmap.desc_size;
+				memcpy(new, old, efi.memmap.desc_size);
 				md = new;
 				md->phys_addr = m_start;
 				md->num_pages = (end - md->phys_addr + 1) >>
@@ -182,10 +181,10 @@ void __init efi_fake_memmap(void)
 
 	/* swap into new EFI memmap */
 	efi_unmap_memmap();
-	memmap.map = new_memmap;
-	memmap.phys_map = new_memmap_phy;
-	memmap.nr_map = new_nr_map;
-	memmap.map_end = memmap.map + memmap.nr_map * memmap.desc_size;
+	efi.memmap.map = new_memmap;
+	efi.memmap.phys_map = new_memmap_phy;
+	efi.memmap.nr_map = new_nr_map;
+	efi.memmap.map_end = efi.memmap.map + efi.memmap.nr_map * efi.memmap.desc_size;
 	set_bit(EFI_MEMMAP, &efi.flags);
 
 	/* print new EFI memmap */
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index da99bbb74aeb..c06945160a41 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -28,7 +28,7 @@ OBJECT_FILES_NON_STANDARD	:= y
 # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
 KCOV_INSTRUMENT			:= n
 
-lib-y				:= efi-stub-helper.o
+lib-y				:= efi-stub-helper.o gop.o
 
 # include the stub's generic dependencies from lib/ when building for ARM/arm64
 arm-deps := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c sort.c
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 414deb85c2e5..993aa56755f6 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -20,27 +20,49 @@
 
 bool __nokaslr;
 
-static int efi_secureboot_enabled(efi_system_table_t *sys_table_arg)
+static int efi_get_secureboot(efi_system_table_t *sys_table_arg)
 {
-	static efi_guid_t const var_guid = EFI_GLOBAL_VARIABLE_GUID;
-	static efi_char16_t const var_name[] = {
+	static efi_char16_t const sb_var_name[] = {
 		'S', 'e', 'c', 'u', 'r', 'e', 'B', 'o', 'o', 't', 0 };
+	static efi_char16_t const sm_var_name[] = {
+		'S', 'e', 't', 'u', 'p', 'M', 'o', 'd', 'e', 0 };
 
+	efi_guid_t var_guid = EFI_GLOBAL_VARIABLE_GUID;
 	efi_get_variable_t *f_getvar = sys_table_arg->runtime->get_variable;
-	unsigned long size = sizeof(u8);
-	efi_status_t status;
 	u8 val;
+	unsigned long size = sizeof(val);
+	efi_status_t status;
 
-	status = f_getvar((efi_char16_t *)var_name, (efi_guid_t *)&var_guid,
+	status = f_getvar((efi_char16_t *)sb_var_name, (efi_guid_t *)&var_guid,
 			  NULL, &size, &val);
 
+	if (status != EFI_SUCCESS)
+		goto out_efi_err;
+
+	if (val == 0)
+		return 0;
+
+	status = f_getvar((efi_char16_t *)sm_var_name, (efi_guid_t *)&var_guid,
+			  NULL, &size, &val);
+
+	if (status != EFI_SUCCESS)
+		goto out_efi_err;
+
+	if (val == 1)
+		return 0;
+
+	return 1;
+
+out_efi_err:
 	switch (status) {
-	case EFI_SUCCESS:
-		return val;
 	case EFI_NOT_FOUND:
 		return 0;
+	case EFI_DEVICE_ERROR:
+		return -EIO;
+	case EFI_SECURITY_VIOLATION:
+		return -EACCES;
 	default:
-		return 1;
+		return -EINVAL;
 	}
 }
 
@@ -147,6 +169,25 @@ void efi_char16_printk(efi_system_table_t *sys_table_arg,
 	out->output_string(out, str);
 }
 
+static struct screen_info *setup_graphics(efi_system_table_t *sys_table_arg)
+{
+	efi_guid_t gop_proto = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
+	efi_status_t status;
+	unsigned long size;
+	void **gop_handle = NULL;
+	struct screen_info *si = NULL;
+
+	size = 0;
+	status = efi_call_early(locate_handle, EFI_LOCATE_BY_PROTOCOL,
+				&gop_proto, NULL, &size, gop_handle);
+	if (status == EFI_BUFFER_TOO_SMALL) {
+		si = alloc_screen_info(sys_table_arg);
+		if (!si)
+			return NULL;
+		efi_setup_gop(sys_table_arg, si, &gop_proto, size);
+	}
+	return si;
+}
 
 /*
  * This function handles the architcture specific differences between arm and
@@ -185,6 +226,8 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 	efi_guid_t loaded_image_proto = LOADED_IMAGE_PROTOCOL_GUID;
 	unsigned long reserve_addr = 0;
 	unsigned long reserve_size = 0;
+	int secure_boot = 0;
+	struct screen_info *si;
 
 	/* Check if we were booted by the EFI firmware */
 	if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
@@ -237,6 +280,8 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 			__nokaslr = true;
 	}
 
+	si = setup_graphics(sys_table);
+
 	status = handle_kernel_image(sys_table, image_addr, &image_size,
 				     &reserve_addr,
 				     &reserve_size,
@@ -250,12 +295,21 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 	if (status != EFI_SUCCESS)
 		pr_efi_err(sys_table, "Failed to parse EFI cmdline options\n");
 
+	secure_boot = efi_get_secureboot(sys_table);
+	if (secure_boot > 0)
+		pr_efi(sys_table, "UEFI Secure Boot is enabled.\n");
+
+	if (secure_boot < 0) {
+		pr_efi_err(sys_table,
+			"could not determine UEFI Secure Boot status.\n");
+	}
+
 	/*
 	 * Unauthenticated device tree data is a security hazard, so
 	 * ignore 'dtb=' unless UEFI Secure Boot is disabled.
 	 */
-	if (efi_secureboot_enabled(sys_table)) {
-		pr_efi(sys_table, "UEFI Secure Boot is enabled.\n");
+	if (secure_boot != 0 && strstr(cmdline_ptr, "dtb=")) {
+		pr_efi(sys_table, "Ignoring DTB from command line.\n");
 	} else {
 		status = handle_cmdline_files(sys_table, image, cmdline_ptr,
 					      "dtb=",
@@ -309,6 +363,7 @@ fail_free_image:
 	efi_free(sys_table, image_size, *image_addr);
 	efi_free(sys_table, reserve_size, reserve_addr);
 fail_free_cmdline:
+	free_screen_info(sys_table, si);
 	efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr);
 fail:
 	return EFI_ERROR;
diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c
index 6f42be4d0084..e1f0b28e1dcb 100644
--- a/drivers/firmware/efi/libstub/arm32-stub.c
+++ b/drivers/firmware/efi/libstub/arm32-stub.c
@@ -26,6 +26,43 @@ efi_status_t check_platform_features(efi_system_table_t *sys_table_arg)
 	return EFI_SUCCESS;
 }
 
+static efi_guid_t screen_info_guid = LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID;
+
+struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg)
+{
+	struct screen_info *si;
+	efi_status_t status;
+
+	/*
+	 * Unlike on arm64, where we can directly fill out the screen_info
+	 * structure from the stub, we need to allocate a buffer to hold
+	 * its contents while we hand over to the kernel proper from the
+	 * decompressor.
+	 */
+	status = efi_call_early(allocate_pool, EFI_RUNTIME_SERVICES_DATA,
+				sizeof(*si), (void **)&si);
+
+	if (status != EFI_SUCCESS)
+		return NULL;
+
+	status = efi_call_early(install_configuration_table,
+				&screen_info_guid, si);
+	if (status == EFI_SUCCESS)
+		return si;
+
+	efi_call_early(free_pool, si);
+	return NULL;
+}
+
+void free_screen_info(efi_system_table_t *sys_table_arg, struct screen_info *si)
+{
+	if (!si)
+		return;
+
+	efi_call_early(install_configuration_table, &screen_info_guid, NULL);
+	efi_call_early(free_pool, si);
+}
+
 efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
 				 unsigned long *image_addr,
 				 unsigned long *image_size,
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 29ed2f9b218c..3bd127f95315 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -125,10 +125,12 @@ unsigned long get_dram_base(efi_system_table_t *sys_table_arg)
 
 	map.map_end = map.map + map_size;
 
-	for_each_efi_memory_desc(&map, md)
-		if (md->attribute & EFI_MEMORY_WB)
+	for_each_efi_memory_desc_in_map(&map, md) {
+		if (md->attribute & EFI_MEMORY_WB) {
 			if (membase > md->phys_addr)
 				membase = md->phys_addr;
+		}
+	}
 
 	efi_call_early(free_pool, map.map);
 
diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c
new file mode 100644
index 000000000000..932742e4cf23
--- /dev/null
+++ b/drivers/firmware/efi/libstub/gop.c
@@ -0,0 +1,354 @@
+/* -----------------------------------------------------------------------
+ *
+ *   Copyright 2011 Intel Corporation; author Matt Fleming
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+#include <linux/efi.h>
+#include <linux/screen_info.h>
+#include <asm/efi.h>
+#include <asm/setup.h>
+
+static void find_bits(unsigned long mask, u8 *pos, u8 *size)
+{
+	u8 first, len;
+
+	first = 0;
+	len = 0;
+
+	if (mask) {
+		while (!(mask & 0x1)) {
+			mask = mask >> 1;
+			first++;
+		}
+
+		while (mask & 0x1) {
+			mask = mask >> 1;
+			len++;
+		}
+	}
+
+	*pos = first;
+	*size = len;
+}
+
+static void
+setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line,
+		 struct efi_pixel_bitmask pixel_info, int pixel_format)
+{
+	if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) {
+		si->lfb_depth = 32;
+		si->lfb_linelength = pixels_per_scan_line * 4;
+		si->red_size = 8;
+		si->red_pos = 0;
+		si->green_size = 8;
+		si->green_pos = 8;
+		si->blue_size = 8;
+		si->blue_pos = 16;
+		si->rsvd_size = 8;
+		si->rsvd_pos = 24;
+	} else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) {
+		si->lfb_depth = 32;
+		si->lfb_linelength = pixels_per_scan_line * 4;
+		si->red_size = 8;
+		si->red_pos = 16;
+		si->green_size = 8;
+		si->green_pos = 8;
+		si->blue_size = 8;
+		si->blue_pos = 0;
+		si->rsvd_size = 8;
+		si->rsvd_pos = 24;
+	} else if (pixel_format == PIXEL_BIT_MASK) {
+		find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size);
+		find_bits(pixel_info.green_mask, &si->green_pos,
+			  &si->green_size);
+		find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size);
+		find_bits(pixel_info.reserved_mask, &si->rsvd_pos,
+			  &si->rsvd_size);
+		si->lfb_depth = si->red_size + si->green_size +
+			si->blue_size + si->rsvd_size;
+		si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8;
+	} else {
+		si->lfb_depth = 4;
+		si->lfb_linelength = si->lfb_width / 2;
+		si->red_size = 0;
+		si->red_pos = 0;
+		si->green_size = 0;
+		si->green_pos = 0;
+		si->blue_size = 0;
+		si->blue_pos = 0;
+		si->rsvd_size = 0;
+		si->rsvd_pos = 0;
+	}
+}
+
+static efi_status_t
+__gop_query32(efi_system_table_t *sys_table_arg,
+	      struct efi_graphics_output_protocol_32 *gop32,
+	      struct efi_graphics_output_mode_info **info,
+	      unsigned long *size, u64 *fb_base)
+{
+	struct efi_graphics_output_protocol_mode_32 *mode;
+	efi_graphics_output_protocol_query_mode query_mode;
+	efi_status_t status;
+	unsigned long m;
+
+	m = gop32->mode;
+	mode = (struct efi_graphics_output_protocol_mode_32 *)m;
+	query_mode = (void *)(unsigned long)gop32->query_mode;
+
+	status = __efi_call_early(query_mode, (void *)gop32, mode->mode, size,
+				  info);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	*fb_base = mode->frame_buffer_base;
+	return status;
+}
+
+static efi_status_t
+setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si,
+            efi_guid_t *proto, unsigned long size, void **gop_handle)
+{
+	struct efi_graphics_output_protocol_32 *gop32, *first_gop;
+	unsigned long nr_gops;
+	u16 width, height;
+	u32 pixels_per_scan_line;
+	u32 ext_lfb_base;
+	u64 fb_base;
+	struct efi_pixel_bitmask pixel_info;
+	int pixel_format;
+	efi_status_t status = EFI_NOT_FOUND;
+	u32 *handles = (u32 *)(unsigned long)gop_handle;
+	int i;
+
+	first_gop = NULL;
+	gop32 = NULL;
+
+	nr_gops = size / sizeof(u32);
+	for (i = 0; i < nr_gops; i++) {
+		struct efi_graphics_output_mode_info *info = NULL;
+		efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
+		bool conout_found = false;
+		void *dummy = NULL;
+		efi_handle_t h = (efi_handle_t)(unsigned long)handles[i];
+		u64 current_fb_base;
+
+		status = efi_call_early(handle_protocol, h,
+					proto, (void **)&gop32);
+		if (status != EFI_SUCCESS)
+			continue;
+
+		status = efi_call_early(handle_protocol, h,
+					&conout_proto, &dummy);
+		if (status == EFI_SUCCESS)
+			conout_found = true;
+
+		status = __gop_query32(sys_table_arg, gop32, &info, &size,
+				       &current_fb_base);
+		if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
+			/*
+			 * Systems that use the UEFI Console Splitter may
+			 * provide multiple GOP devices, not all of which are
+			 * backed by real hardware. The workaround is to search
+			 * for a GOP implementing the ConOut protocol, and if
+			 * one isn't found, to just fall back to the first GOP.
+			 */
+			width = info->horizontal_resolution;
+			height = info->vertical_resolution;
+			pixel_format = info->pixel_format;
+			pixel_info = info->pixel_information;
+			pixels_per_scan_line = info->pixels_per_scan_line;
+			fb_base = current_fb_base;
+
+			/*
+			 * Once we've found a GOP supporting ConOut,
+			 * don't bother looking any further.
+			 */
+			first_gop = gop32;
+			if (conout_found)
+				break;
+		}
+	}
+
+	/* Did we find any GOPs? */
+	if (!first_gop)
+		goto out;
+
+	/* EFI framebuffer */
+	si->orig_video_isVGA = VIDEO_TYPE_EFI;
+
+	si->lfb_width = width;
+	si->lfb_height = height;
+	si->lfb_base = fb_base;
+
+	ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
+	if (ext_lfb_base) {
+		si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+		si->ext_lfb_base = ext_lfb_base;
+	}
+
+	si->pages = 1;
+
+	setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
+
+	si->lfb_size = si->lfb_linelength * si->lfb_height;
+
+	si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
+out:
+	return status;
+}
+
+static efi_status_t
+__gop_query64(efi_system_table_t *sys_table_arg,
+	      struct efi_graphics_output_protocol_64 *gop64,
+	      struct efi_graphics_output_mode_info **info,
+	      unsigned long *size, u64 *fb_base)
+{
+	struct efi_graphics_output_protocol_mode_64 *mode;
+	efi_graphics_output_protocol_query_mode query_mode;
+	efi_status_t status;
+	unsigned long m;
+
+	m = gop64->mode;
+	mode = (struct efi_graphics_output_protocol_mode_64 *)m;
+	query_mode = (void *)(unsigned long)gop64->query_mode;
+
+	status = __efi_call_early(query_mode, (void *)gop64, mode->mode, size,
+				  info);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	*fb_base = mode->frame_buffer_base;
+	return status;
+}
+
+static efi_status_t
+setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si,
+	    efi_guid_t *proto, unsigned long size, void **gop_handle)
+{
+	struct efi_graphics_output_protocol_64 *gop64, *first_gop;
+	unsigned long nr_gops;
+	u16 width, height;
+	u32 pixels_per_scan_line;
+	u32 ext_lfb_base;
+	u64 fb_base;
+	struct efi_pixel_bitmask pixel_info;
+	int pixel_format;
+	efi_status_t status = EFI_NOT_FOUND;
+	u64 *handles = (u64 *)(unsigned long)gop_handle;
+	int i;
+
+	first_gop = NULL;
+	gop64 = NULL;
+
+	nr_gops = size / sizeof(u64);
+	for (i = 0; i < nr_gops; i++) {
+		struct efi_graphics_output_mode_info *info = NULL;
+		efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
+		bool conout_found = false;
+		void *dummy = NULL;
+		efi_handle_t h = (efi_handle_t)(unsigned long)handles[i];
+		u64 current_fb_base;
+
+		status = efi_call_early(handle_protocol, h,
+					proto, (void **)&gop64);
+		if (status != EFI_SUCCESS)
+			continue;
+
+		status = efi_call_early(handle_protocol, h,
+					&conout_proto, &dummy);
+		if (status == EFI_SUCCESS)
+			conout_found = true;
+
+		status = __gop_query64(sys_table_arg, gop64, &info, &size,
+				       &current_fb_base);
+		if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
+			/*
+			 * Systems that use the UEFI Console Splitter may
+			 * provide multiple GOP devices, not all of which are
+			 * backed by real hardware. The workaround is to search
+			 * for a GOP implementing the ConOut protocol, and if
+			 * one isn't found, to just fall back to the first GOP.
+			 */
+			width = info->horizontal_resolution;
+			height = info->vertical_resolution;
+			pixel_format = info->pixel_format;
+			pixel_info = info->pixel_information;
+			pixels_per_scan_line = info->pixels_per_scan_line;
+			fb_base = current_fb_base;
+
+			/*
+			 * Once we've found a GOP supporting ConOut,
+			 * don't bother looking any further.
+			 */
+			first_gop = gop64;
+			if (conout_found)
+				break;
+		}
+	}
+
+	/* Did we find any GOPs? */
+	if (!first_gop)
+		goto out;
+
+	/* EFI framebuffer */
+	si->orig_video_isVGA = VIDEO_TYPE_EFI;
+
+	si->lfb_width = width;
+	si->lfb_height = height;
+	si->lfb_base = fb_base;
+
+	ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
+	if (ext_lfb_base) {
+		si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+		si->ext_lfb_base = ext_lfb_base;
+	}
+
+	si->pages = 1;
+
+	setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
+
+	si->lfb_size = si->lfb_linelength * si->lfb_height;
+
+	si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
+out:
+	return status;
+}
+
+/*
+ * See if we have Graphics Output Protocol
+ */
+efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
+			   struct screen_info *si, efi_guid_t *proto,
+			   unsigned long size)
+{
+	efi_status_t status;
+	void **gop_handle = NULL;
+
+	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
+				size, (void **)&gop_handle);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	status = efi_call_early(locate_handle,
+				EFI_LOCATE_BY_PROTOCOL,
+				proto, NULL, &size, gop_handle);
+	if (status != EFI_SUCCESS)
+		goto free_handle;
+
+	if (efi_is_64bit()) {
+		status = setup_gop64(sys_table_arg, si, proto, size,
+				     gop_handle);
+	} else {
+		status = setup_gop32(sys_table_arg, si, proto, size,
+				     gop_handle);
+	}
+
+free_handle:
+	efi_call_early(free_pool, gop_handle);
+	return status;
+}
diff --git a/drivers/firmware/efi/memattr.c b/drivers/firmware/efi/memattr.c
new file mode 100644
index 000000000000..236004b9a50d
--- /dev/null
+++ b/drivers/firmware/efi/memattr.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt)	"efi: memattr: " fmt
+
+#include <linux/efi.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
+
+#include <asm/early_ioremap.h>
+
+static int __initdata tbl_size;
+
+/*
+ * Reserve the memory associated with the Memory Attributes configuration
+ * table, if it exists.
+ */
+int __init efi_memattr_init(void)
+{
+	efi_memory_attributes_table_t *tbl;
+
+	if (efi.mem_attr_table == EFI_INVALID_TABLE_ADDR)
+		return 0;
+
+	tbl = early_memremap(efi.mem_attr_table, sizeof(*tbl));
+	if (!tbl) {
+		pr_err("Failed to map EFI Memory Attributes table @ 0x%lx\n",
+		       efi.mem_attr_table);
+		return -ENOMEM;
+	}
+
+	if (tbl->version > 1) {
+		pr_warn("Unexpected EFI Memory Attributes table version %d\n",
+			tbl->version);
+		goto unmap;
+	}
+
+	tbl_size = sizeof(*tbl) + tbl->num_entries * tbl->desc_size;
+	memblock_reserve(efi.mem_attr_table, tbl_size);
+
+unmap:
+	early_memunmap(tbl, sizeof(*tbl));
+	return 0;
+}
+
+/*
+ * Returns a copy @out of the UEFI memory descriptor @in if it is covered
+ * entirely by a UEFI memory map entry with matching attributes. The virtual
+ * address of @out is set according to the matching entry that was found.
+ */
+static bool entry_is_valid(const efi_memory_desc_t *in, efi_memory_desc_t *out)
+{
+	u64 in_paddr = in->phys_addr;
+	u64 in_size = in->num_pages << EFI_PAGE_SHIFT;
+	efi_memory_desc_t *md;
+
+	*out = *in;
+
+	if (in->type != EFI_RUNTIME_SERVICES_CODE &&
+	    in->type != EFI_RUNTIME_SERVICES_DATA) {
+		pr_warn("Entry type should be RuntimeServiceCode/Data\n");
+		return false;
+	}
+
+	if (!(in->attribute & (EFI_MEMORY_RO | EFI_MEMORY_XP))) {
+		pr_warn("Entry attributes invalid: RO and XP bits both cleared\n");
+		return false;
+	}
+
+	if (PAGE_SIZE > EFI_PAGE_SIZE &&
+	    (!PAGE_ALIGNED(in->phys_addr) ||
+	     !PAGE_ALIGNED(in->num_pages << EFI_PAGE_SHIFT))) {
+		/*
+		 * Since arm64 may execute with page sizes of up to 64 KB, the
+		 * UEFI spec mandates that RuntimeServices memory regions must
+		 * be 64 KB aligned. We need to validate this here since we will
+		 * not be able to tighten permissions on such regions without
+		 * affecting adjacent regions.
+		 */
+		pr_warn("Entry address region misaligned\n");
+		return false;
+	}
+
+	for_each_efi_memory_desc(md) {
+		u64 md_paddr = md->phys_addr;
+		u64 md_size = md->num_pages << EFI_PAGE_SHIFT;
+
+		if (!(md->attribute & EFI_MEMORY_RUNTIME))
+			continue;
+		if (md->virt_addr == 0) {
+			/* no virtual mapping has been installed by the stub */
+			break;
+		}
+
+		if (md_paddr > in_paddr || (in_paddr - md_paddr) >= md_size)
+			continue;
+
+		/*
+		 * This entry covers the start of @in, check whether
+		 * it covers the end as well.
+		 */
+		if (md_paddr + md_size < in_paddr + in_size) {
+			pr_warn("Entry covers multiple EFI memory map regions\n");
+			return false;
+		}
+
+		if (md->type != in->type) {
+			pr_warn("Entry type deviates from EFI memory map region type\n");
+			return false;
+		}
+
+		out->virt_addr = in_paddr + (md->virt_addr - md_paddr);
+
+		return true;
+	}
+
+	pr_warn("No matching entry found in the EFI memory map\n");
+	return false;
+}
+
+/*
+ * To be called after the EFI page tables have been populated. If a memory
+ * attributes table is available, its contents will be used to update the
+ * mappings with tightened permissions as described by the table.
+ * This requires the UEFI memory map to have already been populated with
+ * virtual addresses.
+ */
+int __init efi_memattr_apply_permissions(struct mm_struct *mm,
+					 efi_memattr_perm_setter fn)
+{
+	efi_memory_attributes_table_t *tbl;
+	int i, ret;
+
+	if (tbl_size <= sizeof(*tbl))
+		return 0;
+
+	/*
+	 * We need the EFI memory map to be setup so we can use it to
+	 * lookup the virtual addresses of all entries in the  of EFI
+	 * Memory Attributes table. If it isn't available, this
+	 * function should not be called.
+	 */
+	if (WARN_ON(!efi_enabled(EFI_MEMMAP)))
+		return 0;
+
+	tbl = memremap(efi.mem_attr_table, tbl_size, MEMREMAP_WB);
+	if (!tbl) {
+		pr_err("Failed to map EFI Memory Attributes table @ 0x%lx\n",
+		       efi.mem_attr_table);
+		return -ENOMEM;
+	}
+
+	if (efi_enabled(EFI_DBG))
+		pr_info("Processing EFI Memory Attributes table:\n");
+
+	for (i = ret = 0; ret == 0 && i < tbl->num_entries; i++) {
+		efi_memory_desc_t md;
+		unsigned long size;
+		bool valid;
+		char buf[64];
+
+		valid = entry_is_valid((void *)tbl->entry + i * tbl->desc_size,
+				       &md);
+		size = md.num_pages << EFI_PAGE_SHIFT;
+		if (efi_enabled(EFI_DBG) || !valid)
+			pr_info("%s 0x%012llx-0x%012llx %s\n",
+				valid ? "" : "!", md.phys_addr,
+				md.phys_addr + size - 1,
+				efi_md_typeattr_format(buf, sizeof(buf), &md));
+
+		if (valid)
+			ret = fn(mm, &md);
+	}
+	memunmap(tbl);
+	return ret;
+}
diff --git a/drivers/firmware/efi/reboot.c b/drivers/firmware/efi/reboot.c
index 9c59d1c795d1..62ead9b9d871 100644
--- a/drivers/firmware/efi/reboot.c
+++ b/drivers/firmware/efi/reboot.c
@@ -9,7 +9,8 @@ int efi_reboot_quirk_mode = -1;
 
 void efi_reboot(enum reboot_mode reboot_mode, const char *__unused)
 {
-	int efi_mode;
+	const char *str[] = { "cold", "warm", "shutdown", "platform" };
+	int efi_mode, cap_reset_mode;
 
 	if (!efi_enabled(EFI_RUNTIME_SERVICES))
 		return;
@@ -30,6 +31,15 @@ void efi_reboot(enum reboot_mode reboot_mode, const char *__unused)
 	if (efi_reboot_quirk_mode != -1)
 		efi_mode = efi_reboot_quirk_mode;
 
+	if (efi_capsule_pending(&cap_reset_mode)) {
+		if (efi_mode != cap_reset_mode)
+			printk(KERN_CRIT "efi: %s reset requested but pending "
+			       "capsule update requires %s reset... Performing "
+			       "%s reset.\n", str[efi_mode], str[cap_reset_mode],
+			       str[cap_reset_mode]);
+		efi_mode = cap_reset_mode;
+	}
+
 	efi.reset_system(efi_mode, EFI_SUCCESS, 0, NULL);
 }
 
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index de6953039af6..23bef6bb73ee 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -16,10 +16,70 @@
 
 #include <linux/bug.h>
 #include <linux/efi.h>
+#include <linux/irqflags.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
+#include <linux/stringify.h>
 #include <asm/efi.h>
 
+static void efi_call_virt_check_flags(unsigned long flags, const char *call)
+{
+	unsigned long cur_flags, mismatch;
+
+	local_save_flags(cur_flags);
+
+	mismatch = flags ^ cur_flags;
+	if (!WARN_ON_ONCE(mismatch & ARCH_EFI_IRQ_FLAGS_MASK))
+		return;
+
+	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_NOW_UNRELIABLE);
+	pr_err_ratelimited(FW_BUG "IRQ flags corrupted (0x%08lx=>0x%08lx) by EFI %s\n",
+			   flags, cur_flags, call);
+	local_irq_restore(flags);
+}
+
+/*
+ * Arch code can implement the following three template macros, avoiding
+ * reptition for the void/non-void return cases of {__,}efi_call_virt:
+ *
+ *  * arch_efi_call_virt_setup
+ *
+ *    Sets up the environment for the call (e.g. switching page tables,
+ *    allowing kernel-mode use of floating point, if required).
+ *
+ *  * arch_efi_call_virt
+ *
+ *    Performs the call. The last expression in the macro must be the call
+ *    itself, allowing the logic to be shared by the void and non-void
+ *    cases.
+ *
+ *  * arch_efi_call_virt_teardown
+ *
+ *    Restores the usual kernel environment once the call has returned.
+ */
+
+#define efi_call_virt(f, args...)					\
+({									\
+	efi_status_t __s;						\
+	unsigned long flags;						\
+	arch_efi_call_virt_setup();					\
+	local_save_flags(flags);					\
+	__s = arch_efi_call_virt(f, args);				\
+	efi_call_virt_check_flags(flags, __stringify(f));		\
+	arch_efi_call_virt_teardown();					\
+	__s;								\
+})
+
+#define __efi_call_virt(f, args...)					\
+({									\
+	unsigned long flags;						\
+	arch_efi_call_virt_setup();					\
+	local_save_flags(flags);					\
+	arch_efi_call_virt(f, args);					\
+	efi_call_virt_check_flags(flags, __stringify(f));		\
+	arch_efi_call_virt_teardown();					\
+})
+
 /*
  * According to section 7.1 of the UEFI spec, Runtime Services are not fully
  * reentrant, and there are particular combinations of calls that need to be
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 34b741940494..d3b751383286 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -329,39 +329,6 @@ check_var_size_nonblocking(u32 attributes, unsigned long size)
 	return fops->query_variable_store(attributes, size, true);
 }
 
-static int efi_status_to_err(efi_status_t status)
-{
-	int err;
-
-	switch (status) {
-	case EFI_SUCCESS:
-		err = 0;
-		break;
-	case EFI_INVALID_PARAMETER:
-		err = -EINVAL;
-		break;
-	case EFI_OUT_OF_RESOURCES:
-		err = -ENOSPC;
-		break;
-	case EFI_DEVICE_ERROR:
-		err = -EIO;
-		break;
-	case EFI_WRITE_PROTECTED:
-		err = -EROFS;
-		break;
-	case EFI_SECURITY_VIOLATION:
-		err = -EACCES;
-		break;
-	case EFI_NOT_FOUND:
-		err = -ENOENT;
-		break;
-	default:
-		err = -EINVAL;
-	}
-
-	return err;
-}
-
 static bool variable_is_present(efi_char16_t *variable_name, efi_guid_t *vendor,
 				struct list_head *head)
 {
@@ -452,8 +419,7 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid,
  * Returns 0 on success, or a kernel error code on failure.
  */
 int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
-		void *data, bool atomic, bool duplicates,
-		struct list_head *head)
+		void *data, bool duplicates, struct list_head *head)
 {
 	const struct efivar_operations *ops = __efivars->ops;
 	unsigned long variable_name_size = 1024;
@@ -483,7 +449,7 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
 						&vendor_guid);
 		switch (status) {
 		case EFI_SUCCESS:
-			if (!atomic)
+			if (duplicates)
 				spin_unlock_irq(&__efivars->lock);
 
 			variable_name_size = var_name_strnsize(variable_name,
@@ -498,21 +464,19 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
 			 * and may end up looping here forever.
 			 */
 			if (duplicates &&
-			    variable_is_present(variable_name, &vendor_guid, head)) {
+			    variable_is_present(variable_name, &vendor_guid,
+						head)) {
 				dup_variable_bug(variable_name, &vendor_guid,
 						 variable_name_size);
-				if (!atomic)
-					spin_lock_irq(&__efivars->lock);
-
 				status = EFI_NOT_FOUND;
-				break;
+			} else {
+				err = func(variable_name, vendor_guid,
+					   variable_name_size, data);
+				if (err)
+					status = EFI_NOT_FOUND;
 			}
 
-			err = func(variable_name, vendor_guid, variable_name_size, data);
-			if (err)
-				status = EFI_NOT_FOUND;
-
-			if (!atomic)
+			if (duplicates)
 				spin_lock_irq(&__efivars->lock);
 
 			break;
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index bf731e9f643e..7f85c2c1d681 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -276,8 +276,8 @@ static int amdgpu_atombios_dp_get_dp_link_config(struct drm_connector *connector
 			}
 		}
 	} else {
-		for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
-			for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
+		for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
+			for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
 				max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
 				if (max_pix_clock >= pix_clock) {
 					*dp_lanes = lane_num;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a0f1bd711b53..e3f4c725a1c6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2872,20 +2872,6 @@ static void intel_dp_info(struct seq_file *m,
 		intel_panel_info(m, &intel_connector->panel);
 }
 
-static void intel_dp_mst_info(struct seq_file *m,
-			  struct intel_connector *intel_connector)
-{
-	struct intel_encoder *intel_encoder = intel_connector->encoder;
-	struct intel_dp_mst_encoder *intel_mst =
-		enc_to_mst(&intel_encoder->base);
-	struct intel_digital_port *intel_dig_port = intel_mst->primary;
-	struct intel_dp *intel_dp = &intel_dig_port->dp;
-	bool has_audio = drm_dp_mst_port_has_audio(&intel_dp->mst_mgr,
-					intel_connector->port);
-
-	seq_printf(m, "\taudio support: %s\n", yesno(has_audio));
-}
-
 static void intel_hdmi_info(struct seq_file *m,
 			    struct intel_connector *intel_connector)
 {
@@ -2929,8 +2915,6 @@ static void intel_connector_info(struct seq_file *m,
 			intel_hdmi_info(m, intel_connector);
 		else if (intel_encoder->type == INTEL_OUTPUT_LVDS)
 			intel_lvds_info(m, intel_connector);
-		else if (intel_encoder->type == INTEL_OUTPUT_DP_MST)
-			intel_dp_mst_info(m, intel_connector);
 	}
 
 	seq_printf(m, "\tmodes:\n");
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index fffdac801d3b..363bd79dea2e 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7444,6 +7444,8 @@ enum skl_disp_power_wells {
 #define  TRANS_CLK_SEL_DISABLED		(0x0<<29)
 #define  TRANS_CLK_SEL_PORT(x)		(((x)+1)<<29)
 
+#define CDCLK_FREQ			_MMIO(0x46200)
+
 #define _TRANSA_MSA_MISC		0x60410
 #define _TRANSB_MSA_MISC		0x61410
 #define _TRANSC_MSA_MISC		0x62410
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 30f921421b0c..7d281b40064a 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -262,8 +262,7 @@ static void hsw_audio_codec_disable(struct intel_encoder *encoder)
 	tmp |= AUD_CONFIG_N_PROG_ENABLE;
 	tmp &= ~AUD_CONFIG_UPPER_N_MASK;
 	tmp &= ~AUD_CONFIG_LOWER_N_MASK;
-	if (intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DISPLAYPORT) ||
-	    intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DP_MST))
+	if (intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DISPLAYPORT))
 		tmp |= AUD_CONFIG_N_VALUE_INDEX;
 	I915_WRITE(HSW_AUD_CFG(pipe), tmp);
 
@@ -476,8 +475,7 @@ static void ilk_audio_codec_enable(struct drm_connector *connector,
 	tmp &= ~AUD_CONFIG_N_VALUE_INDEX;
 	tmp &= ~AUD_CONFIG_N_PROG_ENABLE;
 	tmp &= ~AUD_CONFIG_PIXEL_CLOCK_HDMI_MASK;
-	if (intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DISPLAYPORT) ||
-	    intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DP_MST))
+	if (intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DISPLAYPORT))
 		tmp |= AUD_CONFIG_N_VALUE_INDEX;
 	else
 		tmp |= audio_config_hdmi_pixel_clock(adjusted_mode);
@@ -515,8 +513,7 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder)
 
 	/* ELD Conn_Type */
 	connector->eld[5] &= ~(3 << 2);
-	if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT) ||
-	    intel_pipe_has_type(crtc, INTEL_OUTPUT_DP_MST))
+	if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT))
 		connector->eld[5] |= (1 << 2);
 
 	connector->eld[6] = drm_av_sync_delay(connector, adjusted_mode) / 2;
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 505fc5cf26f8..0364292367b1 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -257,8 +257,14 @@ static bool intel_crt_compute_config(struct intel_encoder *encoder,
 		pipe_config->has_pch_encoder = true;
 
 	/* LPT FDI RX only supports 8bpc. */
-	if (HAS_PCH_LPT(dev))
+	if (HAS_PCH_LPT(dev)) {
+		if (pipe_config->bw_constrained && pipe_config->pipe_bpp < 24) {
+			DRM_DEBUG_KMS("LPT only supports 24bpp\n");
+			return false;
+		}
+
 		pipe_config->pipe_bpp = 24;
+	}
 
 	/* FDI must always be 2.7 GHz */
 	if (HAS_DDI(dev)) {
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 3b57bf06abe8..96ffcc541e17 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -3106,23 +3106,6 @@ void intel_ddi_fdi_disable(struct drm_crtc *crtc)
 	I915_WRITE(FDI_RX_CTL(PIPE_A), val);
 }
 
-bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv,
-				 struct intel_crtc *intel_crtc)
-{
-	u32 temp;
-
-	if (intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_AUDIO)) {
-		temp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD);
-
-		intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO);
-
-		if (temp & AUDIO_OUTPUT_ENABLE(intel_crtc->pipe))
-			return true;
-	}
-
-	return false;
-}
-
 void intel_ddi_get_config(struct intel_encoder *encoder,
 			  struct intel_crtc_state *pipe_config)
 {
@@ -3183,8 +3166,11 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 		break;
 	}
 
-	pipe_config->has_audio =
-		intel_ddi_is_audio_enabled(dev_priv, intel_crtc);
+	if (intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_AUDIO)) {
+		temp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD);
+		if (temp & AUDIO_OUTPUT_ENABLE(intel_crtc->pipe))
+			pipe_config->has_audio = true;
+	}
 
 	if (encoder->type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp_bpp &&
 	    pipe_config->pipe_bpp > dev_priv->vbt.edp_bpp) {
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 182f84937345..0104a06d01fd 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7988,9 +7988,6 @@ static void i9xx_get_pfit_config(struct intel_crtc *crtc,
 
 	pipe_config->gmch_pfit.control = tmp;
 	pipe_config->gmch_pfit.pgm_ratios = I915_READ(PFIT_PGM_RATIOS);
-	if (INTEL_INFO(dev)->gen < 5)
-		pipe_config->gmch_pfit.lvds_border_bits =
-			I915_READ(LVDS) & LVDS_BORDER_ENABLE;
 }
 
 static void vlv_crtc_clock_get(struct intel_crtc *crtc,
@@ -9752,6 +9749,8 @@ static void broadwell_set_cdclk(struct drm_device *dev, int cdclk)
 	sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data);
 	mutex_unlock(&dev_priv->rps.hw_lock);
 
+	I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1);
+
 	intel_update_cdclk(dev);
 
 	WARN(cdclk != dev_priv->cdclk_freq,
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 937e77228466..2c999725b3d4 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -78,8 +78,6 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
 		return false;
 	}
 
-	if (drm_dp_mst_port_has_audio(&intel_dp->mst_mgr, found->port))
-		pipe_config->has_audio = true;
 	mst_pbn = drm_dp_calc_pbn_mode(adjusted_mode->crtc_clock, bpp);
 
 	pipe_config->pbn = mst_pbn;
@@ -104,11 +102,6 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder)
 	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base);
 	struct intel_digital_port *intel_dig_port = intel_mst->primary;
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
-	struct drm_device *dev = encoder->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_crtc *crtc = encoder->base.crtc;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-
 	int ret;
 
 	DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links);
@@ -119,10 +112,6 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder)
 	if (ret) {
 		DRM_ERROR("failed to update payload %d\n", ret);
 	}
-	if (intel_crtc->config->has_audio) {
-		intel_audio_codec_disable(encoder);
-		intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO);
-	}
 }
 
 static void intel_mst_post_disable_dp(struct intel_encoder *encoder)
@@ -221,7 +210,6 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder)
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
 	enum port port = intel_dig_port->port;
 	int ret;
 
@@ -234,13 +222,6 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder)
 	ret = drm_dp_check_act_status(&intel_dp->mst_mgr);
 
 	ret = drm_dp_update_payload_part2(&intel_dp->mst_mgr);
-
-	if (crtc->config->has_audio) {
-		DRM_DEBUG_DRIVER("Enabling DP audio on pipe %c\n",
-				 pipe_name(crtc->pipe));
-		intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO);
-		intel_audio_codec_enable(encoder);
-	}
 }
 
 static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder,
@@ -266,9 +247,6 @@ static void intel_dp_mst_enc_get_config(struct intel_encoder *encoder,
 
 	pipe_config->has_dp_encoder = true;
 
-	pipe_config->has_audio =
-		intel_ddi_is_audio_enabled(dev_priv, crtc);
-
 	temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder));
 	if (temp & TRANS_DDI_PHSYNC)
 		flags |= DRM_MODE_FLAG_PHSYNC;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 7d3af3a72abe..9d0770c23fde 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1019,8 +1019,6 @@ void intel_ddi_set_pipe_settings(struct drm_crtc *crtc);
 void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp);
 bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector);
 void intel_ddi_fdi_disable(struct drm_crtc *crtc);
-bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv,
-				 struct intel_crtc *intel_crtc);
 void intel_ddi_get_config(struct intel_encoder *encoder,
 			  struct intel_crtc_state *pipe_config);
 struct intel_encoder *
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index cd9fe609aefb..10dc3517b63b 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -123,6 +123,10 @@ static void intel_lvds_get_config(struct intel_encoder *encoder,
 
 	pipe_config->base.adjusted_mode.flags |= flags;
 
+	if (INTEL_INFO(dev)->gen < 5)
+		pipe_config->gmch_pfit.lvds_border_bits =
+			tmp & LVDS_BORDER_ENABLE;
+
 	/* gen2/3 store dither state in pfit control, needs to match */
 	if (INTEL_INFO(dev)->gen < 4) {
 		tmp = I915_READ(PFIT_CONTROL);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 8ed3cf34f82d..3425d8e737b3 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6646,6 +6646,12 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
 	misccpctl = I915_READ(GEN7_MISCCPCTL);
 	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
 	I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
+	/*
+	 * Wait at least 100 clocks before re-enabling clock gating. See
+	 * the definition of L3SQCREG1 in BSpec.
+	 */
+	POSTING_READ(GEN8_L3SQCREG1);
+	udelay(1);
 	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
 
 	/*
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index b80b08f71cb4..532127c55de6 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -1742,6 +1742,7 @@ static u32 radeon_get_pll_use_mask(struct drm_crtc *crtc)
 static int radeon_get_shared_dp_ppll(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
 	struct drm_crtc *test_crtc;
 	struct radeon_crtc *test_radeon_crtc;
 
@@ -1751,6 +1752,10 @@ static int radeon_get_shared_dp_ppll(struct drm_crtc *crtc)
 		test_radeon_crtc = to_radeon_crtc(test_crtc);
 		if (test_radeon_crtc->encoder &&
 		    ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_radeon_crtc->encoder))) {
+			/* PPLL2 is exclusive to UNIPHYA on DCE61 */
+			if (ASIC_IS_DCE61(rdev) && !ASIC_IS_DCE8(rdev) &&
+			    test_radeon_crtc->pll_id == ATOM_PPLL2)
+				continue;
 			/* for DP use the same PLL for all */
 			if (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID)
 				return test_radeon_crtc->pll_id;
@@ -1772,6 +1777,7 @@ static int radeon_get_shared_nondp_ppll(struct drm_crtc *crtc)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
 	struct drm_crtc *test_crtc;
 	struct radeon_crtc *test_radeon_crtc;
 	u32 adjusted_clock, test_adjusted_clock;
@@ -1787,6 +1793,10 @@ static int radeon_get_shared_nondp_ppll(struct drm_crtc *crtc)
 		test_radeon_crtc = to_radeon_crtc(test_crtc);
 		if (test_radeon_crtc->encoder &&
 		    !ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_radeon_crtc->encoder))) {
+			/* PPLL2 is exclusive to UNIPHYA on DCE61 */
+			if (ASIC_IS_DCE61(rdev) && !ASIC_IS_DCE8(rdev) &&
+			    test_radeon_crtc->pll_id == ATOM_PPLL2)
+				continue;
 			/* check if we are already driving this connector with another crtc */
 			if (test_radeon_crtc->connector == radeon_crtc->connector) {
 				/* if we are, return that pll */
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index afa9db1dc0e3..cead089a9e7d 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -326,8 +326,8 @@ int radeon_dp_get_dp_link_config(struct drm_connector *connector,
 			}
 		}
 	} else {
-		for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
-			for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
+		for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
+			for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
 				max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
 				if (max_pix_clock >= pix_clock) {
 					*dp_lanes = lane_num;
diff --git a/drivers/gpu/drm/radeon/radeon_dp_auxch.c b/drivers/gpu/drm/radeon/radeon_dp_auxch.c
index 3b0c229d7dcd..db64e0062689 100644
--- a/drivers/gpu/drm/radeon/radeon_dp_auxch.c
+++ b/drivers/gpu/drm/radeon/radeon_dp_auxch.c
@@ -105,7 +105,7 @@ radeon_dp_aux_transfer_native(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg
 
 	tmp &= AUX_HPD_SEL(0x7);
 	tmp |= AUX_HPD_SEL(chan->rec.hpd);
-	tmp |= AUX_EN | AUX_LS_READ_EN;
+	tmp |= AUX_EN | AUX_LS_READ_EN | AUX_HPD_DISCON(0x1);
 
 	WREG32(AUX_CONTROL + aux_offset[instance], tmp);
 
diff --git a/drivers/input/misc/max8997_haptic.c b/drivers/input/misc/max8997_haptic.c
index a806ba3818f7..8d6326d7e7be 100644
--- a/drivers/input/misc/max8997_haptic.c
+++ b/drivers/input/misc/max8997_haptic.c
@@ -255,12 +255,14 @@ static int max8997_haptic_probe(struct platform_device *pdev)
 	struct max8997_dev *iodev = dev_get_drvdata(pdev->dev.parent);
 	const struct max8997_platform_data *pdata =
 					dev_get_platdata(iodev->dev);
-	const struct max8997_haptic_platform_data *haptic_pdata =
-					pdata->haptic_pdata;
+	const struct max8997_haptic_platform_data *haptic_pdata = NULL;
 	struct max8997_haptic *chip;
 	struct input_dev *input_dev;
 	int error;
 
+	if (pdata)
+		haptic_pdata = pdata->haptic_pdata;
+
 	if (!haptic_pdata) {
 		dev_err(&pdev->dev, "no haptic platform data\n");
 		return -EINVAL;
diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c
index df3581f60628..42de34b92996 100644
--- a/drivers/input/misc/twl6040-vibra.c
+++ b/drivers/input/misc/twl6040-vibra.c
@@ -257,6 +257,7 @@ static int twl6040_vibra_probe(struct platform_device *pdev)
 	int vddvibr_uV = 0;
 	int error;
 
+	of_node_get(twl6040_core_dev->of_node);
 	twl6040_core_node = of_find_node_by_name(twl6040_core_dev->of_node,
 						 "vibra");
 	if (!twl6040_core_node) {
diff --git a/drivers/input/mouse/byd.c b/drivers/input/mouse/byd.c
index fdc243ca93ed..e583f8b50454 100644
--- a/drivers/input/mouse/byd.c
+++ b/drivers/input/mouse/byd.c
@@ -2,6 +2,10 @@
  * BYD TouchPad PS/2 mouse driver
  *
  * Copyright (C) 2015 Chris Diamand <chris@diamand.org>
+ * Copyright (C) 2015 Richard Pospesel
+ * Copyright (C) 2015 Tai Chi Minh Ralph Eastwood
+ * Copyright (C) 2015 Martin Wimpress
+ * Copyright (C) 2015 Jay Kuri
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published by
diff --git a/drivers/media/v4l2-core/videobuf2-v4l2.c b/drivers/media/v4l2-core/videobuf2-v4l2.c
index 7f366f1b0377..0b1b8c7b6ce5 100644
--- a/drivers/media/v4l2-core/videobuf2-v4l2.c
+++ b/drivers/media/v4l2-core/videobuf2-v4l2.c
@@ -74,11 +74,6 @@ static int __verify_planes_array(struct vb2_buffer *vb, const struct v4l2_buffer
 	return 0;
 }
 
-static int __verify_planes_array_core(struct vb2_buffer *vb, const void *pb)
-{
-	return __verify_planes_array(vb, pb);
-}
-
 /**
  * __verify_length() - Verify that the bytesused value for each plane fits in
  * the plane length and that the data offset doesn't exceed the bytesused value.
@@ -442,7 +437,6 @@ static int __fill_vb2_buffer(struct vb2_buffer *vb,
 }
 
 static const struct vb2_buf_ops v4l2_buf_ops = {
-	.verify_planes_array	= __verify_planes_array_core,
 	.fill_user_buffer	= __fill_v4l2_buffer,
 	.fill_vb2_buffer	= __fill_vb2_buffer,
 	.copy_timestamp		= __copy_timestamp,
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c
index b212488606da..11be8044e0d7 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c
@@ -43,6 +43,7 @@ static void xgene_cle_idt_to_hw(u32 dstqid, u32 fpsel,
 static void xgene_cle_dbptr_to_hw(struct xgene_enet_pdata *pdata,
 				  struct xgene_cle_dbptr *dbptr, u32 *buf)
 {
+	buf[0] = SET_VAL(CLE_DROP, dbptr->drop);
 	buf[4] = SET_VAL(CLE_FPSEL, dbptr->fpsel) |
 		 SET_VAL(CLE_DSTQIDL, dbptr->dstqid);
 
@@ -412,7 +413,7 @@ static int xgene_enet_cle_init(struct xgene_enet_pdata *pdata)
 			.branch = {
 				{
 					/* IPV4 */
-					.valid = 0,
+					.valid = 1,
 					.next_packet_pointer = 22,
 					.jump_bw = JMP_FW,
 					.jump_rel = JMP_ABS,
@@ -420,7 +421,7 @@ static int xgene_enet_cle_init(struct xgene_enet_pdata *pdata)
 					.next_node = PKT_PROT_NODE,
 					.next_branch = 0,
 					.data = 0x8,
-					.mask = 0xffff
+					.mask = 0x0
 				},
 				{
 					.valid = 0,
@@ -456,7 +457,7 @@ static int xgene_enet_cle_init(struct xgene_enet_pdata *pdata)
 					.next_node = RSS_IPV4_TCP_NODE,
 					.next_branch = 0,
 					.data = 0x0600,
-					.mask = 0xffff
+					.mask = 0x00ff
 				},
 				{
 					/* UDP */
@@ -468,7 +469,7 @@ static int xgene_enet_cle_init(struct xgene_enet_pdata *pdata)
 					.next_node = RSS_IPV4_UDP_NODE,
 					.next_branch = 0,
 					.data = 0x1100,
-					.mask = 0xffff
+					.mask = 0x00ff
 				},
 				{
 					.valid = 0,
@@ -642,7 +643,7 @@ static int xgene_enet_cle_init(struct xgene_enet_pdata *pdata)
 				{
 					/* TCP DST Port */
 					.valid = 0,
-					.next_packet_pointer = 256,
+					.next_packet_pointer = 258,
 					.jump_bw = JMP_FW,
 					.jump_rel = JMP_ABS,
 					.operation = EQT,
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h
index 29a17abdd828..3bf90683240e 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h
@@ -83,6 +83,8 @@
 #define CLE_TYPE_POS		0
 #define CLE_TYPE_LEN		2
 
+#define CLE_DROP_POS		28
+#define CLE_DROP_LEN		1
 #define CLE_DSTQIDL_POS		25
 #define CLE_DSTQIDL_LEN		7
 #define CLE_DSTQIDH_POS		0
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index 39e081a70f5b..513d2a62ee6d 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -219,27 +219,30 @@ void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring,
 			    struct xgene_enet_pdata *pdata,
 			    enum xgene_enet_err_code status)
 {
-	struct rtnl_link_stats64 *stats = &pdata->stats;
-
 	switch (status) {
 	case INGRESS_CRC:
-		stats->rx_crc_errors++;
+		ring->rx_crc_errors++;
+		ring->rx_dropped++;
 		break;
 	case INGRESS_CHECKSUM:
 	case INGRESS_CHECKSUM_COMPUTE:
-		stats->rx_errors++;
+		ring->rx_errors++;
+		ring->rx_dropped++;
 		break;
 	case INGRESS_TRUNC_FRAME:
-		stats->rx_frame_errors++;
+		ring->rx_frame_errors++;
+		ring->rx_dropped++;
 		break;
 	case INGRESS_PKT_LEN:
-		stats->rx_length_errors++;
+		ring->rx_length_errors++;
+		ring->rx_dropped++;
 		break;
 	case INGRESS_PKT_UNDER:
-		stats->rx_frame_errors++;
+		ring->rx_frame_errors++;
+		ring->rx_dropped++;
 		break;
 	case INGRESS_FIFO_OVERRUN:
-		stats->rx_fifo_errors++;
+		ring->rx_fifo_errors++;
 		break;
 	default:
 		break;
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
index ba7da98af2ef..45220be3122f 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
@@ -86,7 +86,7 @@ enum xgene_enet_rm {
 #define RINGADDRL_POS		5
 #define RINGADDRL_LEN		27
 #define RINGADDRH_POS		0
-#define RINGADDRH_LEN		6
+#define RINGADDRH_LEN		7
 #define RINGSIZE_POS		23
 #define RINGSIZE_LEN		3
 #define RINGTYPE_POS		19
@@ -94,9 +94,9 @@ enum xgene_enet_rm {
 #define RINGMODE_POS		20
 #define RINGMODE_LEN		3
 #define RECOMTIMEOUTL_POS	28
-#define RECOMTIMEOUTL_LEN	3
+#define RECOMTIMEOUTL_LEN	4
 #define RECOMTIMEOUTH_POS	0
-#define RECOMTIMEOUTH_LEN	2
+#define RECOMTIMEOUTH_LEN	3
 #define NUMMSGSINQ_POS		1
 #define NUMMSGSINQ_LEN		16
 #define ACCEPTLERR		BIT(19)
@@ -201,6 +201,8 @@ enum xgene_enet_rm {
 #define USERINFO_LEN			32
 #define FPQNUM_POS			32
 #define FPQNUM_LEN			12
+#define ELERR_POS                       46
+#define ELERR_LEN                       2
 #define NV_POS				50
 #define NV_LEN				1
 #define LL_POS				51
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index 99d7e580e166..fd200883d228 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -443,8 +443,8 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb,
 
 	skb_tx_timestamp(skb);
 
-	pdata->stats.tx_packets++;
-	pdata->stats.tx_bytes += skb->len;
+	tx_ring->tx_packets++;
+	tx_ring->tx_bytes += skb->len;
 
 	pdata->ring_ops->wr_cmd(tx_ring, count);
 	return NETDEV_TX_OK;
@@ -483,12 +483,12 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring,
 	skb = buf_pool->rx_skb[skb_index];
 
 	/* checking for error */
-	status = GET_VAL(LERR, le64_to_cpu(raw_desc->m0));
+	status = (GET_VAL(ELERR, le64_to_cpu(raw_desc->m0)) << LERR_LEN) ||
+		  GET_VAL(LERR, le64_to_cpu(raw_desc->m0));
 	if (unlikely(status > 2)) {
 		dev_kfree_skb_any(skb);
 		xgene_enet_parse_error(rx_ring, netdev_priv(rx_ring->ndev),
 				       status);
-		pdata->stats.rx_dropped++;
 		ret = -EIO;
 		goto out;
 	}
@@ -506,8 +506,8 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring,
 		xgene_enet_skip_csum(skb);
 	}
 
-	pdata->stats.rx_packets++;
-	pdata->stats.rx_bytes += datalen;
+	rx_ring->rx_packets++;
+	rx_ring->rx_bytes += datalen;
 	napi_gro_receive(&rx_ring->napi, skb);
 out:
 	if (--rx_ring->nbufpool == 0) {
@@ -630,7 +630,7 @@ static int xgene_enet_register_irq(struct net_device *ndev)
 		ring = pdata->rx_ring[i];
 		irq_set_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
 		ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq,
-				       IRQF_SHARED, ring->irq_name, ring);
+				       0, ring->irq_name, ring);
 		if (ret) {
 			netdev_err(ndev, "Failed to request irq %s\n",
 				   ring->irq_name);
@@ -641,7 +641,7 @@ static int xgene_enet_register_irq(struct net_device *ndev)
 		ring = pdata->tx_ring[i]->cp_ring;
 		irq_set_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
 		ret = devm_request_irq(dev, ring->irq, xgene_enet_rx_irq,
-				       IRQF_SHARED, ring->irq_name, ring);
+				       0, ring->irq_name, ring);
 		if (ret) {
 			netdev_err(ndev, "Failed to request irq %s\n",
 				   ring->irq_name);
@@ -1114,12 +1114,31 @@ static struct rtnl_link_stats64 *xgene_enet_get_stats64(
 {
 	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
 	struct rtnl_link_stats64 *stats = &pdata->stats;
+	struct xgene_enet_desc_ring *ring;
+	int i;
 
-	stats->rx_errors += stats->rx_length_errors +
-			    stats->rx_crc_errors +
-			    stats->rx_frame_errors +
-			    stats->rx_fifo_errors;
-	memcpy(storage, &pdata->stats, sizeof(struct rtnl_link_stats64));
+	memset(stats, 0, sizeof(struct rtnl_link_stats64));
+	for (i = 0; i < pdata->txq_cnt; i++) {
+		ring = pdata->tx_ring[i];
+		if (ring) {
+			stats->tx_packets += ring->tx_packets;
+			stats->tx_bytes += ring->tx_bytes;
+		}
+	}
+
+	for (i = 0; i < pdata->rxq_cnt; i++) {
+		ring = pdata->rx_ring[i];
+		if (ring) {
+			stats->rx_packets += ring->rx_packets;
+			stats->rx_bytes += ring->rx_bytes;
+			stats->rx_errors += ring->rx_length_errors +
+				ring->rx_crc_errors +
+				ring->rx_frame_errors +
+				ring->rx_fifo_errors;
+			stats->rx_dropped += ring->rx_dropped;
+		}
+	}
+	memcpy(storage, stats, sizeof(struct rtnl_link_stats64));
 
 	return storage;
 }
@@ -1234,6 +1253,13 @@ static int xgene_enet_get_irqs(struct xgene_enet_pdata *pdata)
 	for (i = 0; i < max_irqs; i++) {
 		ret = platform_get_irq(pdev, i);
 		if (ret <= 0) {
+			if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) {
+				max_irqs = i;
+				pdata->rxq_cnt = max_irqs / 2;
+				pdata->txq_cnt = max_irqs / 2;
+				pdata->cq_cnt = max_irqs / 2;
+				break;
+			}
 			dev_err(dev, "Unable to get ENET IRQ\n");
 			ret = ret ? : -ENXIO;
 			return ret;
@@ -1437,19 +1463,28 @@ static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata)
 		pdata->port_ops = &xgene_xgport_ops;
 		pdata->cle_ops = &xgene_cle3in_ops;
 		pdata->rm = RM0;
-		pdata->rxq_cnt = XGENE_NUM_RX_RING;
-		pdata->txq_cnt = XGENE_NUM_TX_RING;
-		pdata->cq_cnt = XGENE_NUM_TXC_RING;
+		if (!pdata->rxq_cnt) {
+			pdata->rxq_cnt = XGENE_NUM_RX_RING;
+			pdata->txq_cnt = XGENE_NUM_TX_RING;
+			pdata->cq_cnt = XGENE_NUM_TXC_RING;
+		}
 		break;
 	}
 
 	if (pdata->enet_id == XGENE_ENET1) {
 		switch (pdata->port_id) {
 		case 0:
-			pdata->cpu_bufnum = START_CPU_BUFNUM_0;
-			pdata->eth_bufnum = START_ETH_BUFNUM_0;
-			pdata->bp_bufnum = START_BP_BUFNUM_0;
-			pdata->ring_num = START_RING_NUM_0;
+			if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) {
+				pdata->cpu_bufnum = X2_START_CPU_BUFNUM_0;
+				pdata->eth_bufnum = X2_START_ETH_BUFNUM_0;
+				pdata->bp_bufnum = X2_START_BP_BUFNUM_0;
+				pdata->ring_num = START_RING_NUM_0;
+			} else {
+				pdata->cpu_bufnum = START_CPU_BUFNUM_0;
+				pdata->eth_bufnum = START_ETH_BUFNUM_0;
+				pdata->bp_bufnum = START_BP_BUFNUM_0;
+				pdata->ring_num = START_RING_NUM_0;
+			}
 			break;
 		case 1:
 			if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) {
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
index 175d18890c7a..9d9cf445148c 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
@@ -49,10 +49,10 @@
 #define XGENE_ENET_MSS	1448
 #define XGENE_MIN_ENET_FRAME_SIZE	60
 
-#define XGENE_MAX_ENET_IRQ	8
-#define XGENE_NUM_RX_RING	4
-#define XGENE_NUM_TX_RING	4
-#define XGENE_NUM_TXC_RING	4
+#define XGENE_MAX_ENET_IRQ	16
+#define XGENE_NUM_RX_RING	8
+#define XGENE_NUM_TX_RING	8
+#define XGENE_NUM_TXC_RING	8
 
 #define START_CPU_BUFNUM_0	0
 #define START_ETH_BUFNUM_0	2
@@ -121,6 +121,16 @@ struct xgene_enet_desc_ring {
 		struct xgene_enet_raw_desc16 *raw_desc16;
 	};
 	__le64 *exp_bufs;
+	u64 tx_packets;
+	u64 tx_bytes;
+	u64 rx_packets;
+	u64 rx_bytes;
+	u64 rx_dropped;
+	u64 rx_errors;
+	u64 rx_length_errors;
+	u64 rx_crc_errors;
+	u64 rx_frame_errors;
+	u64 rx_fifo_errors;
 };
 
 struct xgene_mac_ops {
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h
index 29a71b4dcc44..002df5a6756e 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.h
@@ -33,7 +33,7 @@
 #define LINK_STATUS			BIT(2)
 #define LINK_UP				BIT(15)
 #define MPA_IDLE_WITH_QMI_EMPTY		BIT(12)
-#define SG_RX_DV_GATE_REG_0_ADDR	0x0dfc
+#define SG_RX_DV_GATE_REG_0_ADDR	0x05fc
 
 extern const struct xgene_mac_ops xgene_sgmac_ops;
 extern const struct xgene_port_ops xgene_sgport_ops;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 9d4e8e113fe1..c39a7f5c6a01 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -813,6 +813,46 @@ static inline struct sk_buff *bnxt_copy_skb(struct bnxt_napi *bnapi, u8 *data,
 	return skb;
 }
 
+static int bnxt_discard_rx(struct bnxt *bp, struct bnxt_napi *bnapi,
+			   u32 *raw_cons, void *cmp)
+{
+	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+	struct rx_cmp *rxcmp = cmp;
+	u32 tmp_raw_cons = *raw_cons;
+	u8 cmp_type, agg_bufs = 0;
+
+	cmp_type = RX_CMP_TYPE(rxcmp);
+
+	if (cmp_type == CMP_TYPE_RX_L2_CMP) {
+		agg_bufs = (le32_to_cpu(rxcmp->rx_cmp_misc_v1) &
+			    RX_CMP_AGG_BUFS) >>
+			   RX_CMP_AGG_BUFS_SHIFT;
+	} else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) {
+		struct rx_tpa_end_cmp *tpa_end = cmp;
+
+		agg_bufs = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) &
+			    RX_TPA_END_CMP_AGG_BUFS) >>
+			   RX_TPA_END_CMP_AGG_BUFS_SHIFT;
+	}
+
+	if (agg_bufs) {
+		if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, &tmp_raw_cons))
+			return -EBUSY;
+	}
+	*raw_cons = tmp_raw_cons;
+	return 0;
+}
+
+static void bnxt_sched_reset(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
+{
+	if (!rxr->bnapi->in_reset) {
+		rxr->bnapi->in_reset = true;
+		set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event);
+		schedule_work(&bp->sp_task);
+	}
+	rxr->rx_next_cons = 0xffff;
+}
+
 static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 			   struct rx_tpa_start_cmp *tpa_start,
 			   struct rx_tpa_start_cmp_ext *tpa_start1)
@@ -830,6 +870,11 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 	prod_rx_buf = &rxr->rx_buf_ring[prod];
 	tpa_info = &rxr->rx_tpa[agg_id];
 
+	if (unlikely(cons != rxr->rx_next_cons)) {
+		bnxt_sched_reset(bp, rxr);
+		return;
+	}
+
 	prod_rx_buf->data = tpa_info->data;
 
 	mapping = tpa_info->mapping;
@@ -867,6 +912,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 
 	rxr->rx_prod = NEXT_RX(prod);
 	cons = NEXT_RX(cons);
+	rxr->rx_next_cons = NEXT_RX(cons);
 	cons_rx_buf = &rxr->rx_buf_ring[cons];
 
 	bnxt_reuse_rx_data(rxr, cons, cons_rx_buf->data);
@@ -980,6 +1026,14 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 	dma_addr_t mapping;
 	struct sk_buff *skb;
 
+	if (unlikely(bnapi->in_reset)) {
+		int rc = bnxt_discard_rx(bp, bnapi, raw_cons, tpa_end);
+
+		if (rc < 0)
+			return ERR_PTR(-EBUSY);
+		return NULL;
+	}
+
 	tpa_info = &rxr->rx_tpa[agg_id];
 	data = tpa_info->data;
 	prefetch(data);
@@ -1146,6 +1200,12 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 	cons = rxcmp->rx_cmp_opaque;
 	rx_buf = &rxr->rx_buf_ring[cons];
 	data = rx_buf->data;
+	if (unlikely(cons != rxr->rx_next_cons)) {
+		int rc1 = bnxt_discard_rx(bp, bnapi, raw_cons, rxcmp);
+
+		bnxt_sched_reset(bp, rxr);
+		return rc1;
+	}
 	prefetch(data);
 
 	agg_bufs = (le32_to_cpu(rxcmp->rx_cmp_misc_v1) & RX_CMP_AGG_BUFS) >>
@@ -1245,6 +1305,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 
 next_rx:
 	rxr->rx_prod = NEXT_RX(prod);
+	rxr->rx_next_cons = NEXT_RX(cons);
 
 next_rx_no_prod:
 	*raw_cons = tmp_raw_cons;
@@ -2486,6 +2547,7 @@ static void bnxt_clear_ring_indices(struct bnxt *bp)
 			rxr->rx_prod = 0;
 			rxr->rx_agg_prod = 0;
 			rxr->rx_sw_agg_prod = 0;
+			rxr->rx_next_cons = 0;
 		}
 	}
 }
@@ -4462,6 +4524,7 @@ static void bnxt_enable_napi(struct bnxt *bp)
 	int i;
 
 	for (i = 0; i < bp->cp_nr_rings; i++) {
+		bp->bnapi[i]->in_reset = false;
 		bnxt_enable_poll(bp->bnapi[i]);
 		napi_enable(&bp->bnapi[i]->napi);
 	}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 8b823ff558ff..de9d53eee3dd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -584,6 +584,7 @@ struct bnxt_rx_ring_info {
 	u16			rx_prod;
 	u16			rx_agg_prod;
 	u16			rx_sw_agg_prod;
+	u16			rx_next_cons;
 	void __iomem		*rx_doorbell;
 	void __iomem		*rx_agg_doorbell;
 
@@ -636,6 +637,7 @@ struct bnxt_napi {
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	atomic_t		poll_state;
 #endif
+	bool			in_reset;
 };
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index fa05e347262f..06b819db51b1 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -533,6 +533,7 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
 		nicvf_config_vlan_stripping(nic, nic->netdev->features);
 
 	/* Enable Receive queue */
+	memset(&rq_cfg, 0, sizeof(struct rq_cfg));
 	rq_cfg.ena = 1;
 	rq_cfg.tcp_ena = 0;
 	nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)&rq_cfg);
@@ -565,6 +566,7 @@ void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
 			      qidx, (u64)(cq->dmem.phys_base));
 
 	/* Enable Completion queue */
+	memset(&cq_cfg, 0, sizeof(struct cq_cfg));
 	cq_cfg.ena = 1;
 	cq_cfg.reset = 0;
 	cq_cfg.caching = 0;
@@ -613,6 +615,7 @@ static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs,
 			      qidx, (u64)(sq->dmem.phys_base));
 
 	/* Enable send queue  & set queue size */
+	memset(&sq_cfg, 0, sizeof(struct sq_cfg));
 	sq_cfg.ena = 1;
 	sq_cfg.reset = 0;
 	sq_cfg.ldwb = 0;
@@ -649,6 +652,7 @@ static void nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs,
 
 	/* Enable RBDR  & set queue size */
 	/* Buffer size should be in multiples of 128 bytes */
+	memset(&rbdr_cfg, 0, sizeof(struct rbdr_cfg));
 	rbdr_cfg.ena = 1;
 	rbdr_cfg.reset = 0;
 	rbdr_cfg.ldwb = 0;
diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
index 1f23845a0694..085f9125cf42 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c
@@ -145,7 +145,7 @@ static void nps_enet_tx_handler(struct net_device *ndev)
 	u32 tx_ctrl_nt = (tx_ctrl_value & TX_CTL_NT_MASK) >> TX_CTL_NT_SHIFT;
 
 	/* Check if we got TX */
-	if (!priv->tx_packet_sent || tx_ctrl_ct)
+	if (!priv->tx_skb || tx_ctrl_ct)
 		return;
 
 	/* Ack Tx ctrl register */
@@ -160,7 +160,7 @@ static void nps_enet_tx_handler(struct net_device *ndev)
 	}
 
 	dev_kfree_skb(priv->tx_skb);
-	priv->tx_packet_sent = false;
+	priv->tx_skb = NULL;
 
 	if (netif_queue_stopped(ndev))
 		netif_wake_queue(ndev);
@@ -183,6 +183,9 @@ static int nps_enet_poll(struct napi_struct *napi, int budget)
 	work_done = nps_enet_rx_handler(ndev);
 	if (work_done < budget) {
 		u32 buf_int_enable_value = 0;
+		u32 tx_ctrl_value = nps_enet_reg_get(priv, NPS_ENET_REG_TX_CTL);
+		u32 tx_ctrl_ct =
+			(tx_ctrl_value & TX_CTL_CT_MASK) >> TX_CTL_CT_SHIFT;
 
 		napi_complete(napi);
 
@@ -192,6 +195,18 @@ static int nps_enet_poll(struct napi_struct *napi, int budget)
 
 		nps_enet_reg_set(priv, NPS_ENET_REG_BUF_INT_ENABLE,
 				 buf_int_enable_value);
+
+		/* in case we will get a tx interrupt while interrupts
+		 * are masked, we will lose it since the tx is edge interrupt.
+		 * specifically, while executing the code section above,
+		 * between nps_enet_tx_handler and the interrupts enable, all
+		 * tx requests will be stuck until we will get an rx interrupt.
+		 * the two code lines below will solve this situation by
+		 * re-adding ourselves to the poll list.
+		 */
+
+		if (priv->tx_skb && !tx_ctrl_ct)
+			napi_reschedule(napi);
 	}
 
 	return work_done;
@@ -217,7 +232,7 @@ static irqreturn_t nps_enet_irq_handler(s32 irq, void *dev_instance)
 	u32 tx_ctrl_ct = (tx_ctrl_value & TX_CTL_CT_MASK) >> TX_CTL_CT_SHIFT;
 	u32 rx_ctrl_cr = (rx_ctrl_value & RX_CTL_CR_MASK) >> RX_CTL_CR_SHIFT;
 
-	if ((!tx_ctrl_ct && priv->tx_packet_sent) || rx_ctrl_cr)
+	if ((!tx_ctrl_ct && priv->tx_skb) || rx_ctrl_cr)
 		if (likely(napi_schedule_prep(&priv->napi))) {
 			nps_enet_reg_set(priv, NPS_ENET_REG_BUF_INT_ENABLE, 0);
 			__napi_schedule(&priv->napi);
@@ -387,8 +402,6 @@ static void nps_enet_send_frame(struct net_device *ndev,
 	/* Write the length of the Frame */
 	tx_ctrl_value |= length << TX_CTL_NT_SHIFT;
 
-	/* Indicate SW is done */
-	priv->tx_packet_sent = true;
 	tx_ctrl_value |= NPS_ENET_ENABLE << TX_CTL_CT_SHIFT;
 	/* Send Frame */
 	nps_enet_reg_set(priv, NPS_ENET_REG_TX_CTL, tx_ctrl_value);
@@ -465,7 +478,7 @@ static s32 nps_enet_open(struct net_device *ndev)
 	s32 err;
 
 	/* Reset private variables */
-	priv->tx_packet_sent = false;
+	priv->tx_skb = NULL;
 	priv->ge_mac_cfg_2_value = 0;
 	priv->ge_mac_cfg_3_value = 0;
 
@@ -534,6 +547,11 @@ static netdev_tx_t nps_enet_start_xmit(struct sk_buff *skb,
 
 	priv->tx_skb = skb;
 
+	/* make sure tx_skb is actually written to the memory
+	 * before the HW is informed and the IRQ is fired.
+	 */
+	wmb();
+
 	nps_enet_send_frame(ndev, skb);
 
 	return NETDEV_TX_OK;
diff --git a/drivers/net/ethernet/ezchip/nps_enet.h b/drivers/net/ethernet/ezchip/nps_enet.h
index d0cab600bce8..3939ca20cc9f 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.h
+++ b/drivers/net/ethernet/ezchip/nps_enet.h
@@ -165,14 +165,12 @@
  * struct nps_enet_priv - Storage of ENET's private information.
  * @regs_base:      Base address of ENET memory-mapped control registers.
  * @irq:            For RX/TX IRQ number.
- * @tx_packet_sent: SW indication if frame is being sent.
  * @tx_skb:         socket buffer of sent frame.
  * @napi:           Structure for NAPI.
  */
 struct nps_enet_priv {
 	void __iomem *regs_base;
 	s32 irq;
-	bool tx_packet_sent;
 	struct sk_buff *tx_skb;
 	struct napi_struct napi;
 	u32 ge_mac_cfg_2_value;
diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig
index b5c6d42daa12..2664827ddecd 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -68,7 +68,7 @@ config MVNETA
 
 config MVNETA_BM
 	tristate
-	default y if MVNETA=y && MVNETA_BM_ENABLE
+	default y if MVNETA=y && MVNETA_BM_ENABLE!=n
 	default MVNETA_BM_ENABLE
 	select HWBM
 	help
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
index cda9e604a95f..0844b7c75767 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
@@ -1417,6 +1417,7 @@ void qlcnic_83xx_get_minidump_template(struct qlcnic_adapter *adapter)
 	struct qlcnic_fw_dump *fw_dump = &ahw->fw_dump;
 	struct pci_dev *pdev = adapter->pdev;
 	bool extended = false;
+	int ret;
 
 	prev_version = adapter->fw_version;
 	current_version = qlcnic_83xx_get_fw_version(adapter);
@@ -1427,8 +1428,11 @@ void qlcnic_83xx_get_minidump_template(struct qlcnic_adapter *adapter)
 		if (qlcnic_83xx_md_check_extended_dump_capability(adapter))
 			extended = !qlcnic_83xx_extend_md_capab(adapter);
 
-		if (!qlcnic_fw_cmd_get_minidump_temp(adapter))
-			dev_info(&pdev->dev, "Supports FW dump capability\n");
+		ret = qlcnic_fw_cmd_get_minidump_temp(adapter);
+		if (ret)
+			return;
+
+		dev_info(&pdev->dev, "Supports FW dump capability\n");
 
 		/* Once we have minidump template with extended iSCSI dump
 		 * capability, update the minidump capture mask to 0x1f as
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 9e2a0bd8f5a8..4277d0c12101 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1506,6 +1506,8 @@ static int ravb_close(struct net_device *ndev)
 		priv->phydev = NULL;
 	}
 
+	if (priv->chip_id == RCAR_GEN3)
+		free_irq(priv->emac_irq, ndev);
 	free_irq(ndev->irq, ndev);
 
 	napi_disable(&priv->napi[RAVB_NC]);
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 5590b9c182c9..445fc5aef308 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -790,9 +790,11 @@ void phy_start(struct phy_device *phydev)
 		break;
 	case PHY_HALTED:
 		/* make sure interrupts are re-enabled for the PHY */
-		err = phy_enable_interrupts(phydev);
-		if (err < 0)
-			break;
+		if (phydev->irq != PHY_POLL) {
+			err = phy_enable_interrupts(phydev);
+			if (err < 0)
+				break;
+		}
 
 		phydev->state = PHY_RESUMING;
 		do_resume = true;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 75870e68a7c3..34731e29c589 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -105,6 +105,7 @@ void iwl_mvm_set_tx_cmd(struct iwl_mvm *mvm, struct sk_buff *skb,
 			struct iwl_tx_cmd *tx_cmd,
 			struct ieee80211_tx_info *info, u8 sta_id)
 {
+	struct ieee80211_tx_info *skb_info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *hdr = (void *)skb->data;
 	__le16 fc = hdr->frame_control;
 	u32 tx_flags = le32_to_cpu(tx_cmd->tx_flags);
@@ -185,7 +186,7 @@ void iwl_mvm_set_tx_cmd(struct iwl_mvm *mvm, struct sk_buff *skb,
 	tx_cmd->tx_flags = cpu_to_le32(tx_flags);
 	/* Total # bytes to be transmitted */
 	tx_cmd->len = cpu_to_le16((u16)skb->len +
-		(uintptr_t)info->driver_data[0]);
+		(uintptr_t)skb_info->driver_data[0]);
 	tx_cmd->next_frame_len = 0;
 	tx_cmd->life_time = cpu_to_le32(TX_CMD_LIFE_TIME_INFINITE);
 	tx_cmd->sta_id = sta_id;
@@ -327,10 +328,11 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm,
  */
 static struct iwl_device_cmd *
 iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb,
-		      int hdrlen, struct ieee80211_sta *sta, u8 sta_id)
+		      struct ieee80211_tx_info *info, int hdrlen,
+		      struct ieee80211_sta *sta, u8 sta_id)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_tx_info *skb_info = IEEE80211_SKB_CB(skb);
 	struct iwl_device_cmd *dev_cmd;
 	struct iwl_tx_cmd *tx_cmd;
 
@@ -350,10 +352,10 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb,
 
 	iwl_mvm_set_tx_cmd_rate(mvm, tx_cmd, info, sta, hdr->frame_control);
 
-	memset(&info->status, 0, sizeof(info->status));
-	memset(info->driver_data, 0, sizeof(info->driver_data));
+	memset(&skb_info->status, 0, sizeof(skb_info->status));
+	memset(skb_info->driver_data, 0, sizeof(skb_info->driver_data));
 
-	info->driver_data[1] = dev_cmd;
+	skb_info->driver_data[1] = dev_cmd;
 
 	return dev_cmd;
 }
@@ -361,22 +363,25 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb,
 int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_tx_info *skb_info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_tx_info info;
 	struct iwl_device_cmd *dev_cmd;
 	struct iwl_tx_cmd *tx_cmd;
 	u8 sta_id;
 	int hdrlen = ieee80211_hdrlen(hdr->frame_control);
 
-	if (WARN_ON_ONCE(info->flags & IEEE80211_TX_CTL_AMPDU))
+	memcpy(&info, skb->cb, sizeof(info));
+
+	if (WARN_ON_ONCE(info.flags & IEEE80211_TX_CTL_AMPDU))
 		return -1;
 
-	if (WARN_ON_ONCE(info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM &&
-			 (!info->control.vif ||
-			  info->hw_queue != info->control.vif->cab_queue)))
+	if (WARN_ON_ONCE(info.flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM &&
+			 (!info.control.vif ||
+			  info.hw_queue != info.control.vif->cab_queue)))
 		return -1;
 
 	/* This holds the amsdu headers length */
-	info->driver_data[0] = (void *)(uintptr_t)0;
+	skb_info->driver_data[0] = (void *)(uintptr_t)0;
 
 	/*
 	 * IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used
@@ -385,7 +390,7 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
 	 * and hence needs to be sent on the aux queue
 	 */
 	if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE &&
-	    info->control.vif->type == NL80211_IFTYPE_STATION)
+	    info.control.vif->type == NL80211_IFTYPE_STATION)
 		IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue;
 
 	/*
@@ -398,14 +403,14 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
 	 * AUX station.
 	 */
 	sta_id = mvm->aux_sta.sta_id;
-	if (info->control.vif) {
+	if (info.control.vif) {
 		struct iwl_mvm_vif *mvmvif =
-			iwl_mvm_vif_from_mac80211(info->control.vif);
+			iwl_mvm_vif_from_mac80211(info.control.vif);
 
-		if (info->control.vif->type == NL80211_IFTYPE_P2P_DEVICE ||
-		    info->control.vif->type == NL80211_IFTYPE_AP)
+		if (info.control.vif->type == NL80211_IFTYPE_P2P_DEVICE ||
+		    info.control.vif->type == NL80211_IFTYPE_AP)
 			sta_id = mvmvif->bcast_sta.sta_id;
-		else if (info->control.vif->type == NL80211_IFTYPE_STATION &&
+		else if (info.control.vif->type == NL80211_IFTYPE_STATION &&
 			 is_multicast_ether_addr(hdr->addr1)) {
 			u8 ap_sta_id = ACCESS_ONCE(mvmvif->ap_sta_id);
 
@@ -414,19 +419,18 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
 		}
 	}
 
-	IWL_DEBUG_TX(mvm, "station Id %d, queue=%d\n", sta_id, info->hw_queue);
+	IWL_DEBUG_TX(mvm, "station Id %d, queue=%d\n", sta_id, info.hw_queue);
 
-	dev_cmd = iwl_mvm_set_tx_params(mvm, skb, hdrlen, NULL, sta_id);
+	dev_cmd = iwl_mvm_set_tx_params(mvm, skb, &info, hdrlen, NULL, sta_id);
 	if (!dev_cmd)
 		return -1;
 
-	/* From now on, we cannot access info->control */
 	tx_cmd = (struct iwl_tx_cmd *)dev_cmd->payload;
 
 	/* Copy MAC header from skb into command buffer */
 	memcpy(tx_cmd->hdr, hdr, hdrlen);
 
-	if (iwl_trans_tx(mvm->trans, skb, dev_cmd, info->hw_queue)) {
+	if (iwl_trans_tx(mvm->trans, skb, dev_cmd, info.hw_queue)) {
 		iwl_trans_free_tx_cmd(mvm->trans, dev_cmd);
 		return -1;
 	}
@@ -445,11 +449,11 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
 
 #ifdef CONFIG_INET
 static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb,
+			  struct ieee80211_tx_info *info,
 			  struct ieee80211_sta *sta,
 			  struct sk_buff_head *mpdus_skb)
 {
 	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *hdr = (void *)skb->data;
 	unsigned int mss = skb_shinfo(skb)->gso_size;
 	struct sk_buff *tmp, *next;
@@ -544,6 +548,8 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb,
 
 	/* This skb fits in one single A-MSDU */
 	if (num_subframes * mss >= tcp_payload_len) {
+		struct ieee80211_tx_info *skb_info = IEEE80211_SKB_CB(skb);
+
 		/*
 		 * Compute the length of all the data added for the A-MSDU.
 		 * This will be used to compute the length to write in the TX
@@ -552,11 +558,10 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb,
 		 * already had one set of SNAP / IP / TCP headers.
 		 */
 		num_subframes = DIV_ROUND_UP(tcp_payload_len, mss);
-		info = IEEE80211_SKB_CB(skb);
 		amsdu_add = num_subframes * sizeof(struct ethhdr) +
 			(num_subframes - 1) * (snap_ip_tcp + pad);
 		/* This holds the amsdu headers length */
-		info->driver_data[0] = (void *)(uintptr_t)amsdu_add;
+		skb_info->driver_data[0] = (void *)(uintptr_t)amsdu_add;
 
 		__skb_queue_tail(mpdus_skb, skb);
 		return 0;
@@ -596,11 +601,14 @@ segment:
 			ip_hdr(tmp)->id = htons(ip_base_id + i * num_subframes);
 
 		if (tcp_payload_len > mss) {
+			struct ieee80211_tx_info *skb_info =
+				IEEE80211_SKB_CB(tmp);
+
 			num_subframes = DIV_ROUND_UP(tcp_payload_len, mss);
-			info = IEEE80211_SKB_CB(tmp);
 			amsdu_add = num_subframes * sizeof(struct ethhdr) +
 				(num_subframes - 1) * (snap_ip_tcp + pad);
-			info->driver_data[0] = (void *)(uintptr_t)amsdu_add;
+			skb_info->driver_data[0] =
+				(void *)(uintptr_t)amsdu_add;
 			skb_shinfo(tmp)->gso_size = mss;
 		} else {
 			qc = ieee80211_get_qos_ctl((void *)tmp->data);
@@ -622,6 +630,7 @@ segment:
 }
 #else /* CONFIG_INET */
 static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb,
+			  struct ieee80211_tx_info *info,
 			  struct ieee80211_sta *sta,
 			  struct sk_buff_head *mpdus_skb)
 {
@@ -636,10 +645,10 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb,
  * Sets the fields in the Tx cmd that are crypto related
  */
 static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
+			   struct ieee80211_tx_info *info,
 			   struct ieee80211_sta *sta)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct iwl_mvm_sta *mvmsta;
 	struct iwl_device_cmd *dev_cmd;
 	struct iwl_tx_cmd *tx_cmd;
@@ -660,7 +669,8 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
 	if (WARN_ON_ONCE(mvmsta->sta_id == IWL_MVM_STATION_COUNT))
 		return -1;
 
-	dev_cmd = iwl_mvm_set_tx_params(mvm, skb, hdrlen, sta, mvmsta->sta_id);
+	dev_cmd = iwl_mvm_set_tx_params(mvm, skb, info, hdrlen,
+					sta, mvmsta->sta_id);
 	if (!dev_cmd)
 		goto drop;
 
@@ -736,7 +746,8 @@ int iwl_mvm_tx_skb(struct iwl_mvm *mvm, struct sk_buff *skb,
 		   struct ieee80211_sta *sta)
 {
 	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_tx_info *skb_info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_tx_info info;
 	struct sk_buff_head mpdus_skbs;
 	unsigned int payload_len;
 	int ret;
@@ -747,21 +758,23 @@ int iwl_mvm_tx_skb(struct iwl_mvm *mvm, struct sk_buff *skb,
 	if (WARN_ON_ONCE(mvmsta->sta_id == IWL_MVM_STATION_COUNT))
 		return -1;
 
+	memcpy(&info, skb->cb, sizeof(info));
+
 	/* This holds the amsdu headers length */
-	info->driver_data[0] = (void *)(uintptr_t)0;
+	skb_info->driver_data[0] = (void *)(uintptr_t)0;
 
 	if (!skb_is_gso(skb))
-		return iwl_mvm_tx_mpdu(mvm, skb, sta);
+		return iwl_mvm_tx_mpdu(mvm, skb, &info, sta);
 
 	payload_len = skb_tail_pointer(skb) - skb_transport_header(skb) -
 		tcp_hdrlen(skb) + skb->data_len;
 
 	if (payload_len <= skb_shinfo(skb)->gso_size)
-		return iwl_mvm_tx_mpdu(mvm, skb, sta);
+		return iwl_mvm_tx_mpdu(mvm, skb, &info, sta);
 
 	__skb_queue_head_init(&mpdus_skbs);
 
-	ret = iwl_mvm_tx_tso(mvm, skb, sta, &mpdus_skbs);
+	ret = iwl_mvm_tx_tso(mvm, skb, &info, sta, &mpdus_skbs);
 	if (ret)
 		return ret;
 
@@ -771,7 +784,7 @@ int iwl_mvm_tx_skb(struct iwl_mvm *mvm, struct sk_buff *skb,
 	while (!skb_queue_empty(&mpdus_skbs)) {
 		skb = __skb_dequeue(&mpdus_skbs);
 
-		ret = iwl_mvm_tx_mpdu(mvm, skb, sta);
+		ret = iwl_mvm_tx_mpdu(mvm, skb, &info, sta);
 		if (ret) {
 			__skb_queue_purge(&mpdus_skbs);
 			return ret;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index b42f26029225..4412a57ec862 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -711,6 +711,7 @@ static void xenvif_tx_err(struct xenvif_queue *queue,
 		if (cons == end)
 			break;
 		RING_COPY_REQUEST(&queue->tx, cons++, txp);
+		extra_count = 0; /* only the first frag can have extras */
 	} while (1);
 	queue->tx.req_cons = cons;
 }
diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c
index 4429312e848d..2c447130b954 100644
--- a/drivers/pinctrl/pinctrl-at91-pio4.c
+++ b/drivers/pinctrl/pinctrl-at91-pio4.c
@@ -722,9 +722,11 @@ static int atmel_conf_pin_config_group_set(struct pinctrl_dev *pctldev,
 			break;
 		case PIN_CONFIG_BIAS_PULL_UP:
 			conf |= ATMEL_PIO_PUEN_MASK;
+			conf &= (~ATMEL_PIO_PDEN_MASK);
 			break;
 		case PIN_CONFIG_BIAS_PULL_DOWN:
 			conf |= ATMEL_PIO_PDEN_MASK;
+			conf &= (~ATMEL_PIO_PUEN_MASK);
 			break;
 		case PIN_CONFIG_DRIVE_OPEN_DRAIN:
 			if (arg == 0)
diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c
index 40cd894e4df5..514a5e8fdbab 100644
--- a/drivers/regulator/axp20x-regulator.c
+++ b/drivers/regulator/axp20x-regulator.c
@@ -157,7 +157,9 @@ static struct regulator_ops axp20x_ops_sw = {
 static const struct regulator_linear_range axp20x_ldo4_ranges[] = {
 	REGULATOR_LINEAR_RANGE(1250000, 0x0, 0x0, 0),
 	REGULATOR_LINEAR_RANGE(1300000, 0x1, 0x8, 100000),
-	REGULATOR_LINEAR_RANGE(2500000, 0x9, 0xf, 100000),
+	REGULATOR_LINEAR_RANGE(2500000, 0x9, 0x9, 0),
+	REGULATOR_LINEAR_RANGE(2700000, 0xa, 0xb, 100000),
+	REGULATOR_LINEAR_RANGE(3000000, 0xc, 0xf, 100000),
 };
 
 static const struct regulator_desc axp20x_regulators[] = {
@@ -215,10 +217,14 @@ static const struct regulator_desc axp22x_regulators[] = {
 		 AXP22X_ELDO2_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(1)),
 	AXP_DESC(AXP22X, ELDO3, "eldo3", "eldoin", 700, 3300, 100,
 		 AXP22X_ELDO3_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(2)),
-	AXP_DESC_IO(AXP22X, LDO_IO0, "ldo_io0", "ips", 1800, 3300, 100,
+	/* Note the datasheet only guarantees reliable operation up to
+	 * 3.3V, this needs to be enforced via dts provided constraints */
+	AXP_DESC_IO(AXP22X, LDO_IO0, "ldo_io0", "ips", 700, 3800, 100,
 		    AXP22X_LDO_IO0_V_OUT, 0x1f, AXP20X_GPIO0_CTRL, 0x07,
 		    AXP22X_IO_ENABLED, AXP22X_IO_DISABLED),
-	AXP_DESC_IO(AXP22X, LDO_IO1, "ldo_io1", "ips", 1800, 3300, 100,
+	/* Note the datasheet only guarantees reliable operation up to
+	 * 3.3V, this needs to be enforced via dts provided constraints */
+	AXP_DESC_IO(AXP22X, LDO_IO1, "ldo_io1", "ips", 700, 3800, 100,
 		    AXP22X_LDO_IO1_V_OUT, 0x1f, AXP20X_GPIO1_CTRL, 0x07,
 		    AXP22X_IO_ENABLED, AXP22X_IO_DISABLED),
 	AXP_DESC_FIXED(AXP22X, RTC_LDO, "rtc_ldo", "ips", 3000),
diff --git a/drivers/regulator/da9063-regulator.c b/drivers/regulator/da9063-regulator.c
index ed9e7e96f877..c6af343f54ea 100644
--- a/drivers/regulator/da9063-regulator.c
+++ b/drivers/regulator/da9063-regulator.c
@@ -900,4 +900,4 @@ module_exit(da9063_regulator_cleanup);
 MODULE_AUTHOR("Krystian Garbaciak <krystian.garbaciak@diasemi.com>");
 MODULE_DESCRIPTION("DA9063 regulators driver");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("paltform:" DA9063_DRVNAME_REGULATORS);
+MODULE_ALIAS("platform:" DA9063_DRVNAME_REGULATORS);
diff --git a/drivers/regulator/gpio-regulator.c b/drivers/regulator/gpio-regulator.c
index a8718e98674a..83e89e5d4752 100644
--- a/drivers/regulator/gpio-regulator.c
+++ b/drivers/regulator/gpio-regulator.c
@@ -162,6 +162,8 @@ of_get_gpio_regulator_config(struct device *dev, struct device_node *np,
 	of_property_read_u32(np, "startup-delay-us", &config->startup_delay);
 
 	config->enable_gpio = of_get_named_gpio(np, "enable-gpio", 0);
+	if (config->enable_gpio == -EPROBE_DEFER)
+		return ERR_PTR(-EPROBE_DEFER);
 
 	/* Fetch GPIOs. - optional property*/
 	ret = of_gpio_count(np);
diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
index d24e2c783dc5..6dfa3502e1f1 100644
--- a/drivers/regulator/s2mps11.c
+++ b/drivers/regulator/s2mps11.c
@@ -308,7 +308,7 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.enable_mask	= S2MPS11_ENABLE_MASK			\
 }
 
-#define regulator_desc_s2mps11_buck6_10(num, min, step) {	\
+#define regulator_desc_s2mps11_buck67810(num, min, step) {	\
 	.name		= "BUCK"#num,				\
 	.id		= S2MPS11_BUCK##num,			\
 	.ops		= &s2mps11_buck_ops,			\
@@ -324,6 +324,22 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.enable_mask	= S2MPS11_ENABLE_MASK			\
 }
 
+#define regulator_desc_s2mps11_buck9 {				\
+	.name		= "BUCK9",				\
+	.id		= S2MPS11_BUCK9,			\
+	.ops		= &s2mps11_buck_ops,			\
+	.type		= REGULATOR_VOLTAGE,			\
+	.owner		= THIS_MODULE,				\
+	.min_uV		= MIN_3000_MV,				\
+	.uV_step	= STEP_25_MV,				\
+	.n_voltages	= S2MPS11_BUCK9_N_VOLTAGES,		\
+	.ramp_delay	= S2MPS11_RAMP_DELAY,			\
+	.vsel_reg	= S2MPS11_REG_B9CTRL2,			\
+	.vsel_mask	= S2MPS11_BUCK9_VSEL_MASK,		\
+	.enable_reg	= S2MPS11_REG_B9CTRL1,			\
+	.enable_mask	= S2MPS11_ENABLE_MASK			\
+}
+
 static const struct regulator_desc s2mps11_regulators[] = {
 	regulator_desc_s2mps11_ldo(1, STEP_25_MV),
 	regulator_desc_s2mps11_ldo(2, STEP_50_MV),
@@ -368,11 +384,11 @@ static const struct regulator_desc s2mps11_regulators[] = {
 	regulator_desc_s2mps11_buck1_4(3),
 	regulator_desc_s2mps11_buck1_4(4),
 	regulator_desc_s2mps11_buck5,
-	regulator_desc_s2mps11_buck6_10(6, MIN_600_MV, STEP_6_25_MV),
-	regulator_desc_s2mps11_buck6_10(7, MIN_600_MV, STEP_6_25_MV),
-	regulator_desc_s2mps11_buck6_10(8, MIN_600_MV, STEP_6_25_MV),
-	regulator_desc_s2mps11_buck6_10(9, MIN_3000_MV, STEP_25_MV),
-	regulator_desc_s2mps11_buck6_10(10, MIN_750_MV, STEP_12_5_MV),
+	regulator_desc_s2mps11_buck67810(6, MIN_600_MV, STEP_6_25_MV),
+	regulator_desc_s2mps11_buck67810(7, MIN_600_MV, STEP_6_25_MV),
+	regulator_desc_s2mps11_buck67810(8, MIN_600_MV, STEP_6_25_MV),
+	regulator_desc_s2mps11_buck9,
+	regulator_desc_s2mps11_buck67810(10, MIN_750_MV, STEP_12_5_MV),
 };
 
 static struct regulator_ops s2mps14_reg_ops;
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 8eaed0522aa3..a655cf29c16f 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -532,6 +532,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
 		return SCSI_DH_DEV_TEMP_BUSY;
 
  retry:
+	err = 0;
 	retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags);
 
 	if (retval) {
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 5d0ec42a9317..634254a52301 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -4214,7 +4214,7 @@ static struct scsi_host_template qla1280_driver_template = {
 	.eh_bus_reset_handler	= qla1280_eh_bus_reset,
 	.eh_host_reset_handler	= qla1280_eh_adapter_reset,
 	.bios_param		= qla1280_biosparam,
-	.can_queue		= 0xfffff,
+	.can_queue		= MAX_OUTSTANDING_COMMANDS,
 	.this_id		= -1,
 	.sg_tablesize		= SG_ALL,
 	.use_clustering		= ENABLE_CLUSTERING,
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 39412c9097c6..c1a2d747b246 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -385,8 +385,8 @@ static int dspi_transfer_one_message(struct spi_master *master,
 		dspi->cur_chip = spi_get_ctldata(spi);
 		dspi->cs = spi->chip_select;
 		dspi->cs_change = 0;
-		if (dspi->cur_transfer->transfer_list.next
-				== &dspi->cur_msg->transfers)
+		if (list_is_last(&dspi->cur_transfer->transfer_list,
+				 &dspi->cur_msg->transfers) || transfer->cs_change)
 			dspi->cs_change = 1;
 		dspi->void_write_data = dspi->cur_chip->void_write_data;
 
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 43a02e377b3b..0caa3c8bef46 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -423,12 +423,16 @@ static void omap2_mcspi_tx_dma(struct spi_device *spi,
 
 	if (mcspi_dma->dma_tx) {
 		struct dma_async_tx_descriptor *tx;
+		struct scatterlist sg;
 
 		dmaengine_slave_config(mcspi_dma->dma_tx, &cfg);
 
-		tx = dmaengine_prep_slave_sg(mcspi_dma->dma_tx, xfer->tx_sg.sgl,
-					     xfer->tx_sg.nents, DMA_MEM_TO_DEV,
-					     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+		sg_init_table(&sg, 1);
+		sg_dma_address(&sg) = xfer->tx_dma;
+		sg_dma_len(&sg) = xfer->len;
+
+		tx = dmaengine_prep_slave_sg(mcspi_dma->dma_tx, &sg, 1,
+		DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (tx) {
 			tx->callback = omap2_mcspi_tx_callback;
 			tx->callback_param = spi;
@@ -474,15 +478,20 @@ omap2_mcspi_rx_dma(struct spi_device *spi, struct spi_transfer *xfer,
 
 	if (mcspi_dma->dma_rx) {
 		struct dma_async_tx_descriptor *tx;
+		struct scatterlist sg;
 
 		dmaengine_slave_config(mcspi_dma->dma_rx, &cfg);
 
 		if ((l & OMAP2_MCSPI_CHCONF_TURBO) && mcspi->fifo_depth == 0)
 			dma_count -= es;
 
-		tx = dmaengine_prep_slave_sg(mcspi_dma->dma_rx, xfer->rx_sg.sgl,
-					     xfer->rx_sg.nents, DMA_DEV_TO_MEM,
-					     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+		sg_init_table(&sg, 1);
+		sg_dma_address(&sg) = xfer->rx_dma;
+		sg_dma_len(&sg) = dma_count;
+
+		tx = dmaengine_prep_slave_sg(mcspi_dma->dma_rx, &sg, 1,
+				DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT |
+				DMA_CTRL_ACK);
 		if (tx) {
 			tx->callback = omap2_mcspi_rx_callback;
 			tx->callback_param = spi;
@@ -496,6 +505,8 @@ omap2_mcspi_rx_dma(struct spi_device *spi, struct spi_transfer *xfer,
 	omap2_mcspi_set_dma_req(spi, 1, 1);
 
 	wait_for_completion(&mcspi_dma->dma_rx_completion);
+	dma_unmap_single(mcspi->dev, xfer->rx_dma, count,
+			 DMA_FROM_DEVICE);
 
 	if (mcspi->fifo_depth > 0)
 		return count;
@@ -608,6 +619,8 @@ omap2_mcspi_txrx_dma(struct spi_device *spi, struct spi_transfer *xfer)
 
 	if (tx != NULL) {
 		wait_for_completion(&mcspi_dma->dma_tx_completion);
+		dma_unmap_single(mcspi->dev, xfer->tx_dma, xfer->len,
+				 DMA_TO_DEVICE);
 
 		if (mcspi->fifo_depth > 0) {
 			irqstat_reg = mcspi->base + OMAP2_MCSPI_IRQSTATUS;
@@ -1074,16 +1087,6 @@ static void omap2_mcspi_cleanup(struct spi_device *spi)
 		gpio_free(spi->cs_gpio);
 }
 
-static bool omap2_mcspi_can_dma(struct spi_master *master,
-				struct spi_device *spi,
-				struct spi_transfer *xfer)
-{
-	if (xfer->len < DMA_MIN_BYTES)
-		return false;
-
-	return true;
-}
-
 static int omap2_mcspi_work_one(struct omap2_mcspi *mcspi,
 		struct spi_device *spi, struct spi_transfer *t)
 {
@@ -1265,6 +1268,32 @@ static int omap2_mcspi_transfer_one(struct spi_master *master,
 		return -EINVAL;
 	}
 
+	if (len < DMA_MIN_BYTES)
+		goto skip_dma_map;
+
+	if (mcspi_dma->dma_tx && tx_buf != NULL) {
+		t->tx_dma = dma_map_single(mcspi->dev, (void *) tx_buf,
+				len, DMA_TO_DEVICE);
+		if (dma_mapping_error(mcspi->dev, t->tx_dma)) {
+			dev_dbg(mcspi->dev, "dma %cX %d bytes error\n",
+					'T', len);
+			return -EINVAL;
+		}
+	}
+	if (mcspi_dma->dma_rx && rx_buf != NULL) {
+		t->rx_dma = dma_map_single(mcspi->dev, rx_buf, t->len,
+				DMA_FROM_DEVICE);
+		if (dma_mapping_error(mcspi->dev, t->rx_dma)) {
+			dev_dbg(mcspi->dev, "dma %cX %d bytes error\n",
+					'R', len);
+			if (tx_buf != NULL)
+				dma_unmap_single(mcspi->dev, t->tx_dma,
+						len, DMA_TO_DEVICE);
+			return -EINVAL;
+		}
+	}
+
+skip_dma_map:
 	return omap2_mcspi_work_one(mcspi, spi, t);
 }
 
@@ -1348,7 +1377,6 @@ static int omap2_mcspi_probe(struct platform_device *pdev)
 	master->transfer_one = omap2_mcspi_transfer_one;
 	master->set_cs = omap2_mcspi_set_cs;
 	master->cleanup = omap2_mcspi_cleanup;
-	master->can_dma = omap2_mcspi_can_dma;
 	master->dev.of_node = node;
 	master->max_speed_hz = OMAP2_MCSPI_MAX_FREQ;
 	master->min_speed_hz = OMAP2_MCSPI_MAX_FREQ >> 15;
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 85e59a406a4c..86138e4101b0 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -126,7 +126,7 @@ static const struct lpss_config lpss_platforms[] = {
 		.reg_general = -1,
 		.reg_ssp = 0x20,
 		.reg_cs_ctrl = 0x24,
-		.reg_capabilities = 0xfc,
+		.reg_capabilities = -1,
 		.rx_threshold = 1,
 		.tx_threshold_lo = 32,
 		.tx_threshold_hi = 56,
diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index eac3c960b2de..443f664534e1 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -94,6 +94,7 @@ struct ti_qspi {
 #define QSPI_FLEN(n)			((n - 1) << 0)
 #define QSPI_WLEN_MAX_BITS		128
 #define QSPI_WLEN_MAX_BYTES		16
+#define QSPI_WLEN_MASK			QSPI_WLEN(QSPI_WLEN_MAX_BITS)
 
 /* STATUS REGISTER */
 #define BUSY				0x01
@@ -235,16 +236,16 @@ static inline int ti_qspi_poll_wc(struct ti_qspi *qspi)
 	return  -ETIMEDOUT;
 }
 
-static int qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t)
+static int qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t,
+			  int count)
 {
-	int wlen, count, xfer_len;
+	int wlen, xfer_len;
 	unsigned int cmd;
 	const u8 *txbuf;
 	u32 data;
 
 	txbuf = t->tx_buf;
 	cmd = qspi->cmd | QSPI_WR_SNGL;
-	count = t->len;
 	wlen = t->bits_per_word >> 3;	/* in bytes */
 	xfer_len = wlen;
 
@@ -304,9 +305,10 @@ static int qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t)
 	return 0;
 }
 
-static int qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t)
+static int qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t,
+			 int count)
 {
-	int wlen, count;
+	int wlen;
 	unsigned int cmd;
 	u8 *rxbuf;
 
@@ -323,7 +325,6 @@ static int qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t)
 		cmd |= QSPI_RD_SNGL;
 		break;
 	}
-	count = t->len;
 	wlen = t->bits_per_word >> 3;	/* in bytes */
 
 	while (count) {
@@ -354,12 +355,13 @@ static int qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t)
 	return 0;
 }
 
-static int qspi_transfer_msg(struct ti_qspi *qspi, struct spi_transfer *t)
+static int qspi_transfer_msg(struct ti_qspi *qspi, struct spi_transfer *t,
+			     int count)
 {
 	int ret;
 
 	if (t->tx_buf) {
-		ret = qspi_write_msg(qspi, t);
+		ret = qspi_write_msg(qspi, t, count);
 		if (ret) {
 			dev_dbg(qspi->dev, "Error while writing\n");
 			return ret;
@@ -367,7 +369,7 @@ static int qspi_transfer_msg(struct ti_qspi *qspi, struct spi_transfer *t)
 	}
 
 	if (t->rx_buf) {
-		ret = qspi_read_msg(qspi, t);
+		ret = qspi_read_msg(qspi, t, count);
 		if (ret) {
 			dev_dbg(qspi->dev, "Error while reading\n");
 			return ret;
@@ -450,7 +452,8 @@ static int ti_qspi_start_transfer_one(struct spi_master *master,
 	struct spi_device *spi = m->spi;
 	struct spi_transfer *t;
 	int status = 0, ret;
-	int frame_length;
+	unsigned int frame_len_words, transfer_len_words;
+	int wlen;
 
 	/* setup device control reg */
 	qspi->dc = 0;
@@ -462,14 +465,15 @@ static int ti_qspi_start_transfer_one(struct spi_master *master,
 	if (spi->mode & SPI_CS_HIGH)
 		qspi->dc |= QSPI_CSPOL(spi->chip_select);
 
-	frame_length = (m->frame_length << 3) / spi->bits_per_word;
-
-	frame_length = clamp(frame_length, 0, QSPI_FRAME);
+	frame_len_words = 0;
+	list_for_each_entry(t, &m->transfers, transfer_list)
+		frame_len_words += t->len / (t->bits_per_word >> 3);
+	frame_len_words = min_t(unsigned int, frame_len_words, QSPI_FRAME);
 
 	/* setup command reg */
 	qspi->cmd = 0;
 	qspi->cmd |= QSPI_EN_CS(spi->chip_select);
-	qspi->cmd |= QSPI_FLEN(frame_length);
+	qspi->cmd |= QSPI_FLEN(frame_len_words);
 
 	ti_qspi_write(qspi, qspi->dc, QSPI_SPI_DC_REG);
 
@@ -479,16 +483,23 @@ static int ti_qspi_start_transfer_one(struct spi_master *master,
 		ti_qspi_disable_memory_map(spi);
 
 	list_for_each_entry(t, &m->transfers, transfer_list) {
-		qspi->cmd |= QSPI_WLEN(t->bits_per_word);
+		qspi->cmd = ((qspi->cmd & ~QSPI_WLEN_MASK) |
+			     QSPI_WLEN(t->bits_per_word));
+
+		wlen = t->bits_per_word >> 3;
+		transfer_len_words = min(t->len / wlen, frame_len_words);
 
-		ret = qspi_transfer_msg(qspi, t);
+		ret = qspi_transfer_msg(qspi, t, transfer_len_words * wlen);
 		if (ret) {
 			dev_dbg(qspi->dev, "transfer message failed\n");
 			mutex_unlock(&qspi->list_lock);
 			return -EINVAL;
 		}
 
-		m->actual_length += t->len;
+		m->actual_length += transfer_len_words * wlen;
+		frame_len_words -= transfer_len_words;
+		if (frame_len_words == 0)
+			break;
 	}
 
 	mutex_unlock(&qspi->list_lock);
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 983280e8d93f..e5a391aecde1 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -761,7 +761,7 @@ config FB_VESA
 
 config FB_EFI
 	bool "EFI-based Framebuffer Support"
-	depends on (FB = y) && X86 && EFI
+	depends on (FB = y) && !IA64 && EFI
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c
index 95d293b7445a..f4c045c0051c 100644
--- a/drivers/video/fbdev/efifb.c
+++ b/drivers/video/fbdev/efifb.c
@@ -6,16 +6,14 @@
  *
  */
 
-#include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/efi.h>
 #include <linux/errno.h>
 #include <linux/fb.h>
 #include <linux/platform_device.h>
 #include <linux/screen_info.h>
-#include <linux/dmi.h>
-#include <linux/pci.h>
 #include <video/vga.h>
-#include <asm/sysfb.h>
+#include <asm/efi.h>
 
 static bool request_mem_succeeded = false;
 
@@ -85,21 +83,13 @@ static struct fb_ops efifb_ops = {
 static int efifb_setup(char *options)
 {
 	char *this_opt;
-	int i;
 
 	if (options && *options) {
 		while ((this_opt = strsep(&options, ",")) != NULL) {
 			if (!*this_opt) continue;
 
-			for (i = 0; i < M_UNKNOWN; i++) {
-				if (efifb_dmi_list[i].base != 0 &&
-				    !strcmp(this_opt, efifb_dmi_list[i].optname)) {
-					screen_info.lfb_base = efifb_dmi_list[i].base;
-					screen_info.lfb_linelength = efifb_dmi_list[i].stride;
-					screen_info.lfb_width = efifb_dmi_list[i].width;
-					screen_info.lfb_height = efifb_dmi_list[i].height;
-				}
-			}
+			efifb_setup_from_dmi(&screen_info, this_opt);
+
 			if (!strncmp(this_opt, "base:", 5))
 				screen_info.lfb_base = simple_strtoul(this_opt+5, NULL, 0);
 			else if (!strncmp(this_opt, "stride:", 7))
@@ -338,5 +328,4 @@ static struct platform_driver efifb_driver = {
 	.remove = efifb_remove,
 };
 
-module_platform_driver(efifb_driver);
-MODULE_LICENSE("GPL");
+builtin_platform_driver(efifb_driver);
diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c
index be7e56a338e8..e9d2135445c1 100644
--- a/drivers/xen/efi.c
+++ b/drivers/xen/efi.c
@@ -316,7 +316,6 @@ static const struct efi efi_xen __initconst = {
 	.get_next_high_mono_count = xen_efi_get_next_high_mono_count,
 	.reset_system             = NULL, /* Functionality provided by Xen. */
 	.set_virtual_address_map  = NULL, /* Not used under Xen. */
-	.memmap                   = NULL, /* Not used under Xen. */
 	.flags			  = 0     /* Initialized later. */
 };
 
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index feef8a9c4de7..f02404052b7b 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -112,7 +112,6 @@ static int ecryptfs_readdir(struct file *file, struct dir_context *ctx)
 		.sb = inode->i_sb,
 	};
 	lower_file = ecryptfs_file_to_lower(file);
-	lower_file->f_pos = ctx->pos;
 	rc = iterate_dir(lower_file, &buf.ctx);
 	ctx->pos = buf.ctx.pos;
 	if (rc < 0)
@@ -223,14 +222,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 	}
 	ecryptfs_set_file_lower(
 		file, ecryptfs_inode_to_private(inode)->lower_file);
-	if (d_is_dir(ecryptfs_dentry)) {
-		ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
-		mutex_lock(&crypt_stat->cs_mutex);
-		crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
-		mutex_unlock(&crypt_stat->cs_mutex);
-		rc = 0;
-		goto out;
-	}
 	rc = read_or_initialize_metadata(ecryptfs_dentry);
 	if (rc)
 		goto out_put;
@@ -247,6 +238,45 @@ out:
 	return rc;
 }
 
+/**
+ * ecryptfs_dir_open
+ * @inode: inode speciying file to open
+ * @file: Structure to return filled in
+ *
+ * Opens the file specified by inode.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int ecryptfs_dir_open(struct inode *inode, struct file *file)
+{
+	struct dentry *ecryptfs_dentry = file->f_path.dentry;
+	/* Private value of ecryptfs_dentry allocated in
+	 * ecryptfs_lookup() */
+	struct ecryptfs_file_info *file_info;
+	struct file *lower_file;
+
+	/* Released in ecryptfs_release or end of function if failure */
+	file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
+	ecryptfs_set_file_private(file, file_info);
+	if (unlikely(!file_info)) {
+		ecryptfs_printk(KERN_ERR,
+				"Error attempting to allocate memory\n");
+		return -ENOMEM;
+	}
+	lower_file = dentry_open(ecryptfs_dentry_to_lower_path(ecryptfs_dentry),
+				 file->f_flags, current_cred());
+	if (IS_ERR(lower_file)) {
+		printk(KERN_ERR "%s: Error attempting to initialize "
+			"the lower file for the dentry with name "
+			"[%pd]; rc = [%ld]\n", __func__,
+			ecryptfs_dentry, PTR_ERR(lower_file));
+		kmem_cache_free(ecryptfs_file_info_cache, file_info);
+		return PTR_ERR(lower_file);
+	}
+	ecryptfs_set_file_lower(file, lower_file);
+	return 0;
+}
+
 static int ecryptfs_flush(struct file *file, fl_owner_t td)
 {
 	struct file *lower_file = ecryptfs_file_to_lower(file);
@@ -267,6 +297,19 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static int ecryptfs_dir_release(struct inode *inode, struct file *file)
+{
+	fput(ecryptfs_file_to_lower(file));
+	kmem_cache_free(ecryptfs_file_info_cache,
+			ecryptfs_file_to_private(file));
+	return 0;
+}
+
+static loff_t ecryptfs_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+	return vfs_llseek(ecryptfs_file_to_lower(file), offset, whence);
+}
+
 static int
 ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
@@ -346,20 +389,16 @@ const struct file_operations ecryptfs_dir_fops = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = ecryptfs_compat_ioctl,
 #endif
-	.open = ecryptfs_open,
-	.flush = ecryptfs_flush,
-	.release = ecryptfs_release,
+	.open = ecryptfs_dir_open,
+	.release = ecryptfs_dir_release,
 	.fsync = ecryptfs_fsync,
-	.fasync = ecryptfs_fasync,
-	.splice_read = generic_file_splice_read,
-	.llseek = default_llseek,
+	.llseek = ecryptfs_dir_llseek,
 };
 
 const struct file_operations ecryptfs_main_fops = {
 	.llseek = generic_file_llseek,
 	.read_iter = ecryptfs_read_update_atime,
 	.write_iter = generic_file_write_iter,
-	.iterate = ecryptfs_readdir,
 	.unlocked_ioctl = ecryptfs_unlocked_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = ecryptfs_compat_ioctl,
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index d48e0d261d78..5f22e74bbade 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -157,7 +157,7 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg)
 	return 0;
 }
 
-long
+static long
 efivarfs_file_ioctl(struct file *file, unsigned int cmd, unsigned long p)
 {
 	void __user *arg = (void __user *)p;
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 553c5d2db4a4..9cb54a38832d 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -216,8 +216,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent)
 
 	INIT_LIST_HEAD(&efivarfs_list);
 
-	err = efivar_init(efivarfs_callback, (void *)sb, false,
-			  true, &efivarfs_list);
+	err = efivar_init(efivarfs_callback, (void *)sb, true, &efivarfs_list);
 	if (err)
 		__efivar_entry_iter(efivarfs_destroy, &efivarfs_list, NULL, NULL);
 
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 5384ceb35b1c..98b3eb7d8eaf 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -203,6 +203,8 @@ int get_rock_ridge_filename(struct iso_directory_record *de,
 	int retnamlen = 0;
 	int truncate = 0;
 	int ret = 0;
+	char *p;
+	int len;
 
 	if (!ISOFS_SB(inode->i_sb)->s_rock)
 		return 0;
@@ -267,12 +269,17 @@ repeat:
 					rr->u.NM.flags);
 				break;
 			}
-			if ((strlen(retname) + rr->len - 5) >= 254) {
+			len = rr->len - 5;
+			if (retnamlen + len >= 254) {
 				truncate = 1;
 				break;
 			}
-			strncat(retname, rr->u.NM.name, rr->len - 5);
-			retnamlen += rr->len - 5;
+			p = memchr(rr->u.NM.name, '\0', len);
+			if (unlikely(p))
+				len = p - rr->u.NM.name;
+			memcpy(retname + retnamlen, rr->u.NM.name, len);
+			retnamlen += len;
+			retname[retnamlen] = '\0';
 			break;
 		case SIG('R', 'E'):
 			kfree(rs.buffer);
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 03b688d19f69..37f9678ae4df 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -153,9 +153,9 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
 	p = buf + len + nlen;
 	*p = '\0';
 	for (kn = kn_to; kn != common; kn = kn->parent) {
-		nlen = strlen(kn->name);
-		p -= nlen;
-		memcpy(p, kn->name, nlen);
+		size_t tmp = strlen(kn->name);
+		p -= tmp;
+		memcpy(p, kn->name, tmp);
 		*(--p) = '/';
 	}
 
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index f73541fbe7af..3d670a3678f2 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/namei.h>
+#include <linux/seq_file.h>
 
 #include "kernfs-internal.h"
 
@@ -40,6 +41,19 @@ static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
 	return 0;
 }
 
+static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry)
+{
+	struct kernfs_node *node = dentry->d_fsdata;
+	struct kernfs_root *root = kernfs_root(node);
+	struct kernfs_syscall_ops *scops = root->syscall_ops;
+
+	if (scops && scops->show_path)
+		return scops->show_path(sf, node, root);
+
+	seq_dentry(sf, dentry, " \t\n\\");
+	return 0;
+}
+
 const struct super_operations kernfs_sops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= generic_delete_inode,
@@ -47,6 +61,7 @@ const struct super_operations kernfs_sops = {
 
 	.remount_fs	= kernfs_sop_remount_fs,
 	.show_options	= kernfs_sop_show_options,
+	.show_path	= kernfs_sop_show_path,
 };
 
 /**
diff --git a/fs/namei.c b/fs/namei.c
index 1d9ca2d5dff6..42f8ca038254 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1794,30 +1794,49 @@ static inline unsigned int fold_hash(unsigned long hash)
 	return hash_64(hash, 32);
 }
 
+/*
+ * This is George Marsaglia's XORSHIFT generator.
+ * It implements a maximum-period LFSR in only a few
+ * instructions.  It also has the property (required
+ * by hash_name()) that mix_hash(0) = 0.
+ */
+static inline unsigned long mix_hash(unsigned long hash)
+{
+	hash ^= hash << 13;
+	hash ^= hash >> 7;
+	hash ^= hash << 17;
+	return hash;
+}
+
 #else	/* 32-bit case */
 
 #define fold_hash(x) (x)
 
+static inline unsigned long mix_hash(unsigned long hash)
+{
+	hash ^= hash << 13;
+	hash ^= hash >> 17;
+	hash ^= hash << 5;
+	return hash;
+}
+
 #endif
 
 unsigned int full_name_hash(const unsigned char *name, unsigned int len)
 {
-	unsigned long a, mask;
-	unsigned long hash = 0;
+	unsigned long a, hash = 0;
 
 	for (;;) {
 		a = load_unaligned_zeropad(name);
 		if (len < sizeof(unsigned long))
 			break;
-		hash += a;
-		hash *= 9;
+		hash = mix_hash(hash + a);
 		name += sizeof(unsigned long);
 		len -= sizeof(unsigned long);
 		if (!len)
 			goto done;
 	}
-	mask = bytemask_from_count(len);
-	hash += mask & a;
+	hash += a & bytemask_from_count(len);
 done:
 	return fold_hash(hash);
 }
@@ -1835,7 +1854,7 @@ static inline u64 hash_name(const char *name)
 	hash = a = 0;
 	len = -sizeof(unsigned long);
 	do {
-		hash = (hash + a) * 9;
+		hash = mix_hash(hash + a);
 		len += sizeof(unsigned long);
 		a = load_unaligned_zeropad(name+len);
 		b = a ^ REPEAT_BYTE('/');
@@ -2267,6 +2286,33 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
 EXPORT_SYMBOL(vfs_path_lookup);
 
 /**
+ * lookup_hash - lookup single pathname component on already hashed name
+ * @name:	name and hash to lookup
+ * @base:	base directory to lookup from
+ *
+ * The name must have been verified and hashed (see lookup_one_len()).  Using
+ * this after just full_name_hash() is unsafe.
+ *
+ * This function also doesn't check for search permission on base directory.
+ *
+ * Use lookup_one_len_unlocked() instead, unless you really know what you are
+ * doing.
+ *
+ * Do not hold i_mutex; this helper takes i_mutex if necessary.
+ */
+struct dentry *lookup_hash(const struct qstr *name, struct dentry *base)
+{
+	struct dentry *ret;
+
+	ret = lookup_dcache(name, base, 0);
+	if (!ret)
+		ret = lookup_slow(name, base, 0);
+
+	return ret;
+}
+EXPORT_SYMBOL(lookup_hash);
+
+/**
  * lookup_one_len - filesystem helper to lookup single pathname component
  * @name:	pathname component to lookup
  * @base:	base directory to lookup from
@@ -2337,7 +2383,6 @@ struct dentry *lookup_one_len_unlocked(const char *name,
 	struct qstr this;
 	unsigned int c;
 	int err;
-	struct dentry *ret;
 
 	this.name = name;
 	this.len = len;
@@ -2369,10 +2414,7 @@ struct dentry *lookup_one_len_unlocked(const char *name,
 	if (err)
 		return ERR_PTR(err);
 
-	ret = lookup_dcache(&this, base, 0);
-	if (!ret)
-		ret = lookup_slow(&this, base, 0);
-	return ret;
+	return lookup_hash(&this, base);
 }
 EXPORT_SYMBOL(lookup_one_len_unlocked);
 
@@ -2942,22 +2984,10 @@ no_open:
 		dentry = lookup_real(dir, dentry, nd->flags);
 		if (IS_ERR(dentry))
 			return PTR_ERR(dentry);
-
-		if (create_error) {
-			int open_flag = op->open_flag;
-
-			error = create_error;
-			if ((open_flag & O_EXCL)) {
-				if (!dentry->d_inode)
-					goto out;
-			} else if (!dentry->d_inode) {
-				goto out;
-			} else if ((open_flag & O_TRUNC) &&
-				   d_is_reg(dentry)) {
-				goto out;
-			}
-			/* will fail later, go on to get the right error */
-		}
+	}
+	if (create_error && !dentry->d_inode) {
+		error = create_error;
+		goto out;
 	}
 looked_up:
 	path->dentry = dentry;
@@ -4213,7 +4243,11 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	bool new_is_dir = false;
 	unsigned max_links = new_dir->i_sb->s_max_links;
 
-	if (source == target)
+	/*
+	 * Check source == target.
+	 * On overlayfs need to look at underlying inodes.
+	 */
+	if (vfs_select_inode(old_dentry, 0) == vfs_select_inode(new_dentry, 0))
 		return 0;
 
 	error = may_delete(old_dir, old_dentry, is_dir);
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 0cdf497c91ef..2162434728c0 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -322,3 +322,90 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type)
 	brelse(di_bh);
 	return acl;
 }
+
+int ocfs2_acl_chmod(struct inode *inode, struct buffer_head *bh)
+{
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct posix_acl *acl;
+	int ret;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+		return 0;
+
+	acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, bh);
+	if (IS_ERR(acl) || !acl)
+		return PTR_ERR(acl);
+	ret = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
+	if (ret)
+		return ret;
+	ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS,
+			    acl, NULL, NULL);
+	posix_acl_release(acl);
+	return ret;
+}
+
+/*
+ * Initialize the ACLs of a new inode. If parent directory has default ACL,
+ * then clone to new inode. Called from ocfs2_mknod.
+ */
+int ocfs2_init_acl(handle_t *handle,
+		   struct inode *inode,
+		   struct inode *dir,
+		   struct buffer_head *di_bh,
+		   struct buffer_head *dir_bh,
+		   struct ocfs2_alloc_context *meta_ac,
+		   struct ocfs2_alloc_context *data_ac)
+{
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct posix_acl *acl = NULL;
+	int ret = 0, ret2;
+	umode_t mode;
+
+	if (!S_ISLNK(inode->i_mode)) {
+		if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
+			acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT,
+						   dir_bh);
+			if (IS_ERR(acl))
+				return PTR_ERR(acl);
+		}
+		if (!acl) {
+			mode = inode->i_mode & ~current_umask();
+			ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
+			if (ret) {
+				mlog_errno(ret);
+				goto cleanup;
+			}
+		}
+	}
+	if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
+		if (S_ISDIR(inode->i_mode)) {
+			ret = ocfs2_set_acl(handle, inode, di_bh,
+					    ACL_TYPE_DEFAULT, acl,
+					    meta_ac, data_ac);
+			if (ret)
+				goto cleanup;
+		}
+		mode = inode->i_mode;
+		ret = __posix_acl_create(&acl, GFP_NOFS, &mode);
+		if (ret < 0)
+			return ret;
+
+		ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
+		if (ret2) {
+			mlog_errno(ret2);
+			ret = ret2;
+			goto cleanup;
+		}
+		if (ret > 0) {
+			ret = ocfs2_set_acl(handle, inode,
+					    di_bh, ACL_TYPE_ACCESS,
+					    acl, meta_ac, data_ac);
+		}
+	}
+cleanup:
+	posix_acl_release(acl);
+	return ret;
+}
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 3fce68d08625..2783a75b3999 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -35,5 +35,10 @@ int ocfs2_set_acl(handle_t *handle,
 			 struct posix_acl *acl,
 			 struct ocfs2_alloc_context *meta_ac,
 			 struct ocfs2_alloc_context *data_ac);
+extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *);
+extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
+			  struct buffer_head *, struct buffer_head *,
+			  struct ocfs2_alloc_context *,
+			  struct ocfs2_alloc_context *);
 
 #endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 5308841756be..59cce53c91d8 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1268,20 +1268,20 @@ bail_unlock_rw:
 	if (size_change)
 		ocfs2_rw_unlock(inode, 1);
 bail:
-	brelse(bh);
 
 	/* Release quota pointers in case we acquired them */
 	for (qtype = 0; qtype < OCFS2_MAXQUOTAS; qtype++)
 		dqput(transfer_to[qtype]);
 
 	if (!status && attr->ia_valid & ATTR_MODE) {
-		status = posix_acl_chmod(inode, inode->i_mode);
+		status = ocfs2_acl_chmod(inode, bh);
 		if (status < 0)
 			mlog_errno(status);
 	}
 	if (inode_locked)
 		ocfs2_inode_unlock(inode, 1);
 
+	brelse(bh);
 	return status;
 }
 
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 6b3e87189a64..a8f1225e6d9b 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -259,7 +259,6 @@ static int ocfs2_mknod(struct inode *dir,
 	struct ocfs2_dir_lookup_result lookup = { NULL, };
 	sigset_t oldset;
 	int did_block_signals = 0;
-	struct posix_acl *default_acl = NULL, *acl = NULL;
 	struct ocfs2_dentry_lock *dl = NULL;
 
 	trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name,
@@ -367,12 +366,6 @@ static int ocfs2_mknod(struct inode *dir,
 		goto leave;
 	}
 
-	status = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
-	if (status) {
-		mlog_errno(status);
-		goto leave;
-	}
-
 	handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
 							    S_ISDIR(mode),
 							    xattr_credits));
@@ -421,16 +414,8 @@ static int ocfs2_mknod(struct inode *dir,
 		inc_nlink(dir);
 	}
 
-	if (default_acl) {
-		status = ocfs2_set_acl(handle, inode, new_fe_bh,
-				       ACL_TYPE_DEFAULT, default_acl,
-				       meta_ac, data_ac);
-	}
-	if (!status && acl) {
-		status = ocfs2_set_acl(handle, inode, new_fe_bh,
-				       ACL_TYPE_ACCESS, acl,
-				       meta_ac, data_ac);
-	}
+	status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh,
+			 meta_ac, data_ac);
 
 	if (status < 0) {
 		mlog_errno(status);
@@ -472,10 +457,6 @@ static int ocfs2_mknod(struct inode *dir,
 	d_instantiate(dentry, inode);
 	status = 0;
 leave:
-	if (default_acl)
-		posix_acl_release(default_acl);
-	if (acl)
-		posix_acl_release(acl);
 	if (status < 0 && did_quota_inode)
 		dquot_free_inode(inode);
 	if (handle)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 744d5d90c363..92bbe93bfe10 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4248,20 +4248,12 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
 	struct inode *inode = d_inode(old_dentry);
 	struct buffer_head *old_bh = NULL;
 	struct inode *new_orphan_inode = NULL;
-	struct posix_acl *default_acl, *acl;
-	umode_t mode;
 
 	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
 		return -EOPNOTSUPP;
 
-	mode = inode->i_mode;
-	error = posix_acl_create(dir, &mode, &default_acl, &acl);
-	if (error) {
-		mlog_errno(error);
-		return error;
-	}
 
-	error = ocfs2_create_inode_in_orphan(dir, mode,
+	error = ocfs2_create_inode_in_orphan(dir, inode->i_mode,
 					     &new_orphan_inode);
 	if (error) {
 		mlog_errno(error);
@@ -4300,16 +4292,11 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
 	/* If the security isn't preserved, we need to re-initialize them. */
 	if (!preserve) {
 		error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
-						    &new_dentry->d_name,
-						    default_acl, acl);
+						    &new_dentry->d_name);
 		if (error)
 			mlog_errno(error);
 	}
 out:
-	if (default_acl)
-		posix_acl_release(default_acl);
-	if (acl)
-		posix_acl_release(acl);
 	if (!error) {
 		error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
 						       new_dentry);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 7d3d979f57d9..f19b7381a998 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -7216,12 +7216,10 @@ out:
  */
 int ocfs2_init_security_and_acl(struct inode *dir,
 				struct inode *inode,
-				const struct qstr *qstr,
-				struct posix_acl *default_acl,
-				struct posix_acl *acl)
+				const struct qstr *qstr)
 {
-	struct buffer_head *dir_bh = NULL;
 	int ret = 0;
+	struct buffer_head *dir_bh = NULL;
 
 	ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
 	if (ret) {
@@ -7234,11 +7232,9 @@ int ocfs2_init_security_and_acl(struct inode *dir,
 		mlog_errno(ret);
 		goto leave;
 	}
-
-	if (!ret && default_acl)
-		ret = ocfs2_iop_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
-	if (!ret && acl)
-		ret = ocfs2_iop_set_acl(inode, acl, ACL_TYPE_ACCESS);
+	ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
+	if (ret)
+		mlog_errno(ret);
 
 	ocfs2_inode_unlock(dir, 0);
 	brelse(dir_bh);
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index f10d5b93c366..1633cc15ea1f 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -94,7 +94,5 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
 			 bool preserve_security);
 int ocfs2_init_security_and_acl(struct inode *dir,
 				struct inode *inode,
-				const struct qstr *qstr,
-				struct posix_acl *default_acl,
-				struct posix_acl *acl);
+				const struct qstr *qstr);
 #endif /* OCFS2_XATTR_H */
diff --git a/fs/open.c b/fs/open.c
index 17cb6b1dab75..081d3d6df74b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -840,16 +840,12 @@ EXPORT_SYMBOL(file_path);
 int vfs_open(const struct path *path, struct file *file,
 	     const struct cred *cred)
 {
-	struct dentry *dentry = path->dentry;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = vfs_select_inode(path->dentry, file->f_flags);
 
-	file->f_path = *path;
-	if (dentry->d_flags & DCACHE_OP_SELECT_INODE) {
-		inode = dentry->d_op->d_select_inode(dentry, file->f_flags);
-		if (IS_ERR(inode))
-			return PTR_ERR(inode);
-	}
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
 
+	file->f_path = *path;
 	return do_dentry_open(file, inode, NULL, cred);
 }
 
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 5d972e6cd3fe..791235e03d17 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -411,9 +411,7 @@ static inline struct dentry *ovl_lookup_real(struct dentry *dir,
 {
 	struct dentry *dentry;
 
-	inode_lock(dir->d_inode);
-	dentry = lookup_one_len(name->name, dir, name->len);
-	inode_unlock(dir->d_inode);
+	dentry = lookup_hash(name, dir);
 
 	if (IS_ERR(dentry)) {
 		if (PTR_ERR(dentry) == -ENOENT)
diff --git a/fs/splice.c b/fs/splice.c
index b018eb485019..dd9bf7e410d2 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1143,6 +1143,9 @@ static long do_splice_to(struct file *in, loff_t *ppos,
 	if (unlikely(ret < 0))
 		return ret;
 
+	if (unlikely(len > MAX_RW_COUNT))
+		len = MAX_RW_COUNT;
+
 	if (in->f_op->splice_read)
 		splice_read = in->f_op->splice_read;
 	else
diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
index d6d5dc98d7da..3fc94a046bf5 100644
--- a/include/asm-generic/rwsem.h
+++ b/include/asm-generic/rwsem.h
@@ -53,7 +53,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 /*
  * lock for writing
  */
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
+static inline void __down_write(struct rw_semaphore *sem)
 {
 	long tmp;
 
@@ -63,9 +63,16 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
 		rwsem_down_write_failed(sem);
 }
 
-static inline void __down_write(struct rw_semaphore *sem)
+static inline int __down_write_killable(struct rw_semaphore *sem)
 {
-	__down_write_nested(sem, 0);
+	long tmp;
+
+	tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
+				     (atomic_long_t *)&sem->count);
+	if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
+		if (IS_ERR(rwsem_down_write_failed_killable(sem)))
+			return -EINTR;
+	return 0;
 }
 
 static inline int __down_write_trylock(struct rw_semaphore *sem)
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 506c3531832e..e451534fe54d 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -560,11 +560,11 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 
 /**
  * atomic_fetch_or - perform *p |= mask and return old value of *p
- * @p: pointer to atomic_t
  * @mask: mask to OR on the atomic_t
+ * @p: pointer to atomic_t
  */
 #ifndef atomic_fetch_or
-static inline int atomic_fetch_or(atomic_t *p, int mask)
+static inline int atomic_fetch_or(int mask, atomic_t *p)
 {
 	int old, val = atomic_read(p);
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 4bb4de8d95ea..7e9422cb5989 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -565,4 +565,16 @@ static inline struct dentry *d_real(struct dentry *dentry)
 		return dentry;
 }
 
+static inline struct inode *vfs_select_inode(struct dentry *dentry,
+					     unsigned open_flags)
+{
+	struct inode *inode = d_inode(dentry);
+
+	if (inode && unlikely(dentry->d_flags & DCACHE_OP_SELECT_INODE))
+		inode = dentry->d_op->d_select_inode(dentry, open_flags);
+
+	return inode;
+}
+
+
 #endif	/* __LINUX_DCACHE_H */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 1626474567ac..df7acb51f3cc 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -21,6 +21,7 @@
 #include <linux/pfn.h>
 #include <linux/pstore.h>
 #include <linux/reboot.h>
+#include <linux/screen_info.h>
 
 #include <asm/page.h>
 
@@ -124,6 +125,13 @@ typedef struct {
 } efi_capsule_header_t;
 
 /*
+ * EFI capsule flags
+ */
+#define EFI_CAPSULE_PERSIST_ACROSS_RESET	0x00010000
+#define EFI_CAPSULE_POPULATE_SYSTEM_TABLE	0x00020000
+#define EFI_CAPSULE_INITIATE_RESET		0x00040000
+
+/*
  * Allocation types for calls to boottime->allocate_pages.
  */
 #define EFI_ALLOCATE_ANY_PAGES		0
@@ -282,9 +290,10 @@ typedef struct {
 	efi_status_t (*handle_protocol)(efi_handle_t, efi_guid_t *, void **);
 	void *__reserved;
 	void *register_protocol_notify;
-	void *locate_handle;
+	efi_status_t (*locate_handle)(int, efi_guid_t *, void *,
+				      unsigned long *, efi_handle_t *);
 	void *locate_device_path;
-	void *install_configuration_table;
+	efi_status_t (*install_configuration_table)(efi_guid_t *, void *);
 	void *load_image;
 	void *start_image;
 	void *exit;
@@ -623,6 +632,27 @@ void efi_native_runtime_setup(void);
 	EFI_GUID(0x3152bca5, 0xeade, 0x433d, \
 		 0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44)
 
+#define EFI_MEMORY_ATTRIBUTES_TABLE_GUID \
+	EFI_GUID(0xdcfa911d, 0x26eb, 0x469f, \
+		 0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20)
+
+#define EFI_CONSOLE_OUT_DEVICE_GUID \
+	EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, \
+		 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
+
+/*
+ * This GUID is used to pass to the kernel proper the struct screen_info
+ * structure that was populated by the stub based on the GOP protocol instance
+ * associated with ConOut
+ */
+#define LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID \
+	EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, \
+		 0xb9, 0xe, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95)
+
+#define LINUX_EFI_LOADER_ENTRY_GUID \
+	EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, \
+		 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f)
+
 typedef struct {
 	efi_guid_t guid;
 	u64 table;
@@ -847,6 +877,14 @@ typedef struct {
 
 #define EFI_INVALID_TABLE_ADDR		(~0UL)
 
+typedef struct {
+	u32 version;
+	u32 num_entries;
+	u32 desc_size;
+	u32 reserved;
+	efi_memory_desc_t entry[0];
+} efi_memory_attributes_table_t;
+
 /*
  * All runtime access to EFI goes through this structure:
  */
@@ -868,6 +906,7 @@ extern struct efi {
 	unsigned long config_table;	/* config tables */
 	unsigned long esrt;		/* ESRT table */
 	unsigned long properties_table;	/* properties table */
+	unsigned long mem_attr_table;	/* memory attributes table */
 	efi_get_time_t *get_time;
 	efi_set_time_t *set_time;
 	efi_get_wakeup_time_t *get_wakeup_time;
@@ -883,7 +922,7 @@ extern struct efi {
 	efi_get_next_high_mono_count_t *get_next_high_mono_count;
 	efi_reset_system_t *reset_system;
 	efi_set_virtual_address_map_t *set_virtual_address_map;
-	struct efi_memory_map *memmap;
+	struct efi_memory_map memmap;
 	unsigned long flags;
 } efi;
 
@@ -945,7 +984,6 @@ extern void efi_initialize_iomem_resources(struct resource *code_resource,
 extern void efi_get_time(struct timespec *now);
 extern void efi_reserve_boot_services(void);
 extern int efi_get_fdt_params(struct efi_fdt_params *params);
-extern struct efi_memory_map memmap;
 extern struct kobject *efi_kobj;
 
 extern int efi_reboot_quirk_mode;
@@ -957,12 +995,34 @@ extern void __init efi_fake_memmap(void);
 static inline void efi_fake_memmap(void) { }
 #endif
 
+/*
+ * efi_memattr_perm_setter - arch specific callback function passed into
+ *                           efi_memattr_apply_permissions() that updates the
+ *                           mapping permissions described by the second
+ *                           argument in the page tables referred to by the
+ *                           first argument.
+ */
+typedef int (*efi_memattr_perm_setter)(struct mm_struct *, efi_memory_desc_t *);
+
+extern int efi_memattr_init(void);
+extern int efi_memattr_apply_permissions(struct mm_struct *mm,
+					 efi_memattr_perm_setter fn);
+
 /* Iterate through an efi_memory_map */
-#define for_each_efi_memory_desc(m, md)					   \
+#define for_each_efi_memory_desc_in_map(m, md)				   \
 	for ((md) = (m)->map;						   \
 	     (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \
 	     (md) = (void *)(md) + (m)->desc_size)
 
+/**
+ * for_each_efi_memory_desc - iterate over descriptors in efi.memmap
+ * @md: the efi_memory_desc_t * iterator
+ *
+ * Once the loop finishes @md must not be accessed.
+ */
+#define for_each_efi_memory_desc(md) \
+	for_each_efi_memory_desc_in_map(&efi.memmap, md)
+
 /*
  * Format an EFI memory descriptor's type and attributes to a user-provided
  * character buffer, as per snprintf(), and return the buffer.
@@ -1000,7 +1060,6 @@ extern int __init efi_setup_pcdp_console(char *);
  * possible, remove EFI-related code altogether.
  */
 #define EFI_BOOT		0	/* Were we booted from EFI? */
-#define EFI_SYSTEM_TABLES	1	/* Can we use EFI system tables? */
 #define EFI_CONFIG_TABLES	2	/* Can we use EFI config tables? */
 #define EFI_RUNTIME_SERVICES	3	/* Can we use runtime services? */
 #define EFI_MEMMAP		4	/* Can we use EFI memory map? */
@@ -1026,8 +1085,16 @@ static inline bool efi_enabled(int feature)
 }
 static inline void
 efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {}
+
+static inline bool
+efi_capsule_pending(int *reset_type)
+{
+	return false;
+}
 #endif
 
+extern int efi_status_to_err(efi_status_t status);
+
 /*
  * Variable Attributes
  */
@@ -1180,6 +1247,80 @@ struct efi_simple_text_output_protocol {
 	void *test_string;
 };
 
+#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR		0
+#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR		1
+#define PIXEL_BIT_MASK					2
+#define PIXEL_BLT_ONLY					3
+#define PIXEL_FORMAT_MAX				4
+
+struct efi_pixel_bitmask {
+	u32 red_mask;
+	u32 green_mask;
+	u32 blue_mask;
+	u32 reserved_mask;
+};
+
+struct efi_graphics_output_mode_info {
+	u32 version;
+	u32 horizontal_resolution;
+	u32 vertical_resolution;
+	int pixel_format;
+	struct efi_pixel_bitmask pixel_information;
+	u32 pixels_per_scan_line;
+} __packed;
+
+struct efi_graphics_output_protocol_mode_32 {
+	u32 max_mode;
+	u32 mode;
+	u32 info;
+	u32 size_of_info;
+	u64 frame_buffer_base;
+	u32 frame_buffer_size;
+} __packed;
+
+struct efi_graphics_output_protocol_mode_64 {
+	u32 max_mode;
+	u32 mode;
+	u64 info;
+	u64 size_of_info;
+	u64 frame_buffer_base;
+	u64 frame_buffer_size;
+} __packed;
+
+struct efi_graphics_output_protocol_mode {
+	u32 max_mode;
+	u32 mode;
+	unsigned long info;
+	unsigned long size_of_info;
+	u64 frame_buffer_base;
+	unsigned long frame_buffer_size;
+} __packed;
+
+struct efi_graphics_output_protocol_32 {
+	u32 query_mode;
+	u32 set_mode;
+	u32 blt;
+	u32 mode;
+};
+
+struct efi_graphics_output_protocol_64 {
+	u64 query_mode;
+	u64 set_mode;
+	u64 blt;
+	u64 mode;
+};
+
+struct efi_graphics_output_protocol {
+	unsigned long query_mode;
+	unsigned long set_mode;
+	unsigned long blt;
+	struct efi_graphics_output_protocol_mode *mode;
+};
+
+typedef efi_status_t (*efi_graphics_output_protocol_query_mode)(
+	struct efi_graphics_output_protocol *, u32, unsigned long *,
+	struct efi_graphics_output_mode_info **);
+
 extern struct list_head efivar_sysfs_list;
 
 static inline void
@@ -1195,8 +1336,7 @@ int efivars_unregister(struct efivars *efivars);
 struct kobject *efivars_kobject(void);
 
 int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
-		void *data, bool atomic, bool duplicates,
-		struct list_head *head);
+		void *data, bool duplicates, struct list_head *head);
 
 void efivar_entry_add(struct efivar_entry *entry, struct list_head *head);
 void efivar_entry_remove(struct efivar_entry *entry);
@@ -1242,6 +1382,13 @@ int efivars_sysfs_init(void);
 #define EFIVARS_DATA_SIZE_MAX 1024
 
 #endif /* CONFIG_EFI_VARS */
+extern bool efi_capsule_pending(int *reset_type);
+
+extern int efi_capsule_supported(efi_guid_t guid, u32 flags,
+				 size_t size, int *reset);
+
+extern int efi_capsule_update(efi_capsule_header_t *capsule,
+			      struct page **pages);
 
 #ifdef CONFIG_EFI_RUNTIME_MAP
 int efi_runtime_map_init(struct kobject *);
@@ -1319,5 +1466,9 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
 
 efi_status_t efi_parse_options(char *cmdline);
 
+efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
+			   struct screen_info *si, efi_guid_t *proto,
+			   unsigned long size);
+
 bool efi_runtime_disabled(void);
 #endif /* _LINUX_EFI_H */
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index c06c44242f39..30f089ebe0a4 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -152,6 +152,8 @@ struct kernfs_syscall_ops {
 	int (*rmdir)(struct kernfs_node *kn);
 	int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent,
 		      const char *new_name);
+	int (*show_path)(struct seq_file *sf, struct kernfs_node *kn,
+			 struct kernfs_root *root);
 };
 
 struct kernfs_root {
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index d10ef06971b5..f75222ea7f16 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -446,6 +446,18 @@ do {								\
 	lock_acquired(&(_lock)->dep_map, _RET_IP_);			\
 } while (0)
 
+#define LOCK_CONTENDED_RETURN(_lock, try, lock)			\
+({								\
+	int ____err = 0;					\
+	if (!try(_lock)) {					\
+		lock_contended(&(_lock)->dep_map, _RET_IP_);	\
+		____err = lock(_lock);				\
+	}							\
+	if (!____err)						\
+		lock_acquired(&(_lock)->dep_map, _RET_IP_);	\
+	____err;						\
+})
+
 #else /* CONFIG_LOCK_STAT */
 
 #define lock_contended(lockdep_map, ip) do {} while (0)
@@ -454,6 +466,9 @@ do {								\
 #define LOCK_CONTENDED(_lock, try, lock) \
 	lock(_lock)
 
+#define LOCK_CONTENDED_RETURN(_lock, try, lock) \
+	lock(_lock)
+
 #endif /* CONFIG_LOCK_STAT */
 
 #ifdef CONFIG_LOCKDEP
diff --git a/include/linux/mfd/samsung/s2mps11.h b/include/linux/mfd/samsung/s2mps11.h
index b288965e8101..2c14eeca46f0 100644
--- a/include/linux/mfd/samsung/s2mps11.h
+++ b/include/linux/mfd/samsung/s2mps11.h
@@ -173,10 +173,12 @@ enum s2mps11_regulators {
 
 #define S2MPS11_LDO_VSEL_MASK	0x3F
 #define S2MPS11_BUCK_VSEL_MASK	0xFF
+#define S2MPS11_BUCK9_VSEL_MASK	0x1F
 #define S2MPS11_ENABLE_MASK	(0x03 << S2MPS11_ENABLE_SHIFT)
 #define S2MPS11_ENABLE_SHIFT	0x06
 #define S2MPS11_LDO_N_VOLTAGES	(S2MPS11_LDO_VSEL_MASK + 1)
 #define S2MPS11_BUCK_N_VOLTAGES (S2MPS11_BUCK_VSEL_MASK + 1)
+#define S2MPS11_BUCK9_N_VOLTAGES (S2MPS11_BUCK9_VSEL_MASK + 1)
 #define S2MPS11_RAMP_DELAY	25000		/* uV/us */
 
 #define S2MPS11_CTRL1_PWRHOLD_MASK	BIT(4)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 864d7221de84..8f468e0d2534 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -500,11 +500,20 @@ static inline int page_mapcount(struct page *page)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 int total_mapcount(struct page *page);
+int page_trans_huge_mapcount(struct page *page, int *total_mapcount);
 #else
 static inline int total_mapcount(struct page *page)
 {
 	return page_mapcount(page);
 }
+static inline int page_trans_huge_mapcount(struct page *page,
+					   int *total_mapcount)
+{
+	int mapcount = page_mapcount(page);
+	if (total_mapcount)
+		*total_mapcount = mapcount;
+	return mapcount;
+}
 #endif
 
 static inline struct page *virt_to_head_page(const void *x)
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 77d01700daf7..ec5ec2818a28 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -79,6 +79,8 @@ extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int);
 
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
 extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int);
+struct qstr;
+extern struct dentry *lookup_hash(const struct qstr *, struct dentry *);
 
 extern int follow_down_one(struct path *);
 extern int follow_down(struct path *);
diff --git a/include/linux/proportions.h b/include/linux/proportions.h
deleted file mode 100644
index 21221338ad18..000000000000
--- a/include/linux/proportions.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * FLoating proportions
- *
- *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
- *
- * This file contains the public data structure and API definitions.
- */
-
-#ifndef _LINUX_PROPORTIONS_H
-#define _LINUX_PROPORTIONS_H
-
-#include <linux/percpu_counter.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
-#include <linux/gfp.h>
-
-struct prop_global {
-	/*
-	 * The period over which we differentiate
-	 *
-	 *   period = 2^shift
-	 */
-	int shift;
-	/*
-	 * The total event counter aka 'time'.
-	 *
-	 * Treated as an unsigned long; the lower 'shift - 1' bits are the
-	 * counter bits, the remaining upper bits the period counter.
-	 */
-	struct percpu_counter events;
-};
-
-/*
- * global proportion descriptor
- *
- * this is needed to consistently flip prop_global structures.
- */
-struct prop_descriptor {
-	int index;
-	struct prop_global pg[2];
-	struct mutex mutex;		/* serialize the prop_global switch */
-};
-
-int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp);
-void prop_change_shift(struct prop_descriptor *pd, int new_shift);
-
-/*
- * ----- PERCPU ------
- */
-
-struct prop_local_percpu {
-	/*
-	 * the local events counter
-	 */
-	struct percpu_counter events;
-
-	/*
-	 * snapshot of the last seen global state
-	 */
-	int shift;
-	unsigned long period;
-	raw_spinlock_t lock;		/* protect the snapshot state */
-};
-
-int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp);
-void prop_local_destroy_percpu(struct prop_local_percpu *pl);
-void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl);
-void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl,
-		long *numerator, long *denominator);
-
-static inline
-void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__prop_inc_percpu(pd, pl);
-	local_irq_restore(flags);
-}
-
-/*
- * Limit the time part in order to ensure there are some bits left for the
- * cycle counter and fraction multiply.
- */
-#if BITS_PER_LONG == 32
-#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
-#else
-#define PROP_MAX_SHIFT (BITS_PER_LONG/2)
-#endif
-
-#define PROP_FRAC_SHIFT		(BITS_PER_LONG - PROP_MAX_SHIFT - 1)
-#define PROP_FRAC_BASE		(1UL << PROP_FRAC_SHIFT)
-
-void __prop_inc_percpu_max(struct prop_descriptor *pd,
-			   struct prop_local_percpu *pl, long frac);
-
-
-/*
- * ----- SINGLE ------
- */
-
-struct prop_local_single {
-	/*
-	 * the local events counter
-	 */
-	unsigned long events;
-
-	/*
-	 * snapshot of the last seen global state
-	 * and a lock protecting this state
-	 */
-	unsigned long period;
-	int shift;
-	raw_spinlock_t lock;		/* protect the snapshot state */
-};
-
-#define INIT_PROP_LOCAL_SINGLE(name)			\
-{	.lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock),	\
-}
-
-int prop_local_init_single(struct prop_local_single *pl);
-void prop_local_destroy_single(struct prop_local_single *pl);
-void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl);
-void prop_fraction_single(struct prop_descriptor *pd, struct prop_local_single *pl,
-		long *numerator, long *denominator);
-
-static inline
-void prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__prop_inc_single(pd, pl);
-	local_irq_restore(flags);
-}
-
-#endif /* _LINUX_PROPORTIONS_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 2657aff2725b..5f1533e3d032 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -508,14 +508,7 @@ int rcu_read_lock_bh_held(void);
  * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
  * critical section unless it can prove otherwise.
  */
-#ifdef CONFIG_PREEMPT_COUNT
 int rcu_read_lock_sched_held(void);
-#else /* #ifdef CONFIG_PREEMPT_COUNT */
-static inline int rcu_read_lock_sched_held(void)
-{
-	return 1;
-}
-#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */
 
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
@@ -532,18 +525,10 @@ static inline int rcu_read_lock_bh_held(void)
 	return 1;
 }
 
-#ifdef CONFIG_PREEMPT_COUNT
 static inline int rcu_read_lock_sched_held(void)
 {
-	return preempt_count() != 0 || irqs_disabled();
-}
-#else /* #ifdef CONFIG_PREEMPT_COUNT */
-static inline int rcu_read_lock_sched_held(void)
-{
-	return 1;
+	return !preemptible();
 }
-#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */
-
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 #ifdef CONFIG_PROVE_RCU
@@ -1144,4 +1129,17 @@ static inline void rcu_sysidle_force_exit(void)
 #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 
 
+/*
+ * Dump the ftrace buffer, but only one time per callsite per boot.
+ */
+#define rcu_ftrace_dump(oops_dump_mode) \
+do { \
+	static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
+	\
+	if (!atomic_read(&___rfd_beenhere) && \
+	    !atomic_xchg(&___rfd_beenhere, 1)) \
+		ftrace_dump(oops_dump_mode); \
+} while (0)
+
+
 #endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 64809aea661c..93aea75029fb 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -149,6 +149,22 @@ static inline unsigned long rcu_batches_completed_sched(void)
 	return 0;
 }
 
+/*
+ * Return the number of expedited grace periods completed.
+ */
+static inline unsigned long rcu_exp_batches_completed(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of expedited sched grace periods completed.
+ */
+static inline unsigned long rcu_exp_batches_completed_sched(void)
+{
+	return 0;
+}
+
 static inline void rcu_force_quiescent_state(void)
 {
 }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index ad1eda9fa4da..5043cb823fb2 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -87,6 +87,8 @@ unsigned long rcu_batches_started_sched(void);
 unsigned long rcu_batches_completed(void);
 unsigned long rcu_batches_completed_bh(void);
 unsigned long rcu_batches_completed_sched(void);
+unsigned long rcu_exp_batches_completed(void);
+unsigned long rcu_exp_batches_completed_sched(void);
 void show_rcu_gp_kthreads(void);
 
 void rcu_force_quiescent_state(void);
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index 561e8615528d..ae0528b834cd 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -34,7 +34,7 @@ struct rw_semaphore {
 extern void __down_read(struct rw_semaphore *sem);
 extern int __down_read_trylock(struct rw_semaphore *sem);
 extern void __down_write(struct rw_semaphore *sem);
-extern void __down_write_nested(struct rw_semaphore *sem, int subclass);
+extern int __must_check __down_write_killable(struct rw_semaphore *sem);
 extern int __down_write_trylock(struct rw_semaphore *sem);
 extern void __up_read(struct rw_semaphore *sem);
 extern void __up_write(struct rw_semaphore *sem);
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 8f498cdde280..d1c12d160ace 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -14,6 +14,7 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
+#include <linux/err.h>
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 #include <linux/osq_lock.h>
 #endif
@@ -43,6 +44,7 @@ struct rw_semaphore {
 
 extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
 extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
 extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
 extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
 
@@ -116,6 +118,7 @@ extern int down_read_trylock(struct rw_semaphore *sem);
  * lock for writing
  */
 extern void down_write(struct rw_semaphore *sem);
+extern int __must_check down_write_killable(struct rw_semaphore *sem);
 
 /*
  * trylock for writing -- returns 1 if successful, 0 if contention
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 52c4847b05e2..e8dfa6f0d843 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -40,7 +40,6 @@ struct sched_param {
 #include <linux/pid.h>
 #include <linux/percpu.h>
 #include <linux/topology.h>
-#include <linux/proportions.h>
 #include <linux/seccomp.h>
 #include <linux/rcupdate.h>
 #include <linux/rculist.h>
@@ -1596,6 +1595,7 @@ struct task_struct {
 
 	unsigned long sas_ss_sp;
 	size_t sas_ss_size;
+	unsigned sas_ss_flags;
 
 	struct callback_head *task_works;
 
@@ -2575,6 +2575,18 @@ static inline int kill_cad_pid(int sig, int priv)
  */
 static inline int on_sig_stack(unsigned long sp)
 {
+	/*
+	 * If the signal stack is SS_AUTODISARM then, by construction, we
+	 * can't be on the signal stack unless user code deliberately set
+	 * SS_AUTODISARM when we were already on it.
+	 *
+	 * This improves reliability: if user state gets corrupted such that
+	 * the stack pointer points very close to the end of the signal stack,
+	 * then this check will enable the signal to be handled anyway.
+	 */
+	if (current->sas_ss_flags & SS_AUTODISARM)
+		return 0;
+
 #ifdef CONFIG_STACK_GROWSUP
 	return sp >= current->sas_ss_sp &&
 		sp - current->sas_ss_sp < current->sas_ss_size;
@@ -2592,6 +2604,13 @@ static inline int sas_ss_flags(unsigned long sp)
 	return on_sig_stack(sp) ? SS_ONSTACK : 0;
 }
 
+static inline void sas_ss_reset(struct task_struct *p)
+{
+	p->sas_ss_sp = 0;
+	p->sas_ss_size = 0;
+	p->sas_ss_flags = SS_DISABLE;
+}
+
 static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
 {
 	if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp))
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 92557bbce7e7..3fbe81444d31 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -432,8 +432,10 @@ int __save_altstack(stack_t __user *, unsigned long);
 	stack_t __user *__uss = uss; \
 	struct task_struct *t = current; \
 	put_user_ex((void __user *)t->sas_ss_sp, &__uss->ss_sp); \
-	put_user_ex(sas_ss_flags(sp), &__uss->ss_flags); \
+	put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \
 	put_user_ex(t->sas_ss_size, &__uss->ss_size); \
+	if (t->sas_ss_flags & SS_AUTODISARM) \
+		sas_ss_reset(t); \
 } while (0);
 
 #ifdef CONFIG_PROC_FS
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0a4cd4703f40..ad220359f1b0 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -418,7 +418,7 @@ extern sector_t swapdev_block(int, pgoff_t);
 extern int page_swapcount(struct page *);
 extern int swp_swapcount(swp_entry_t entry);
 extern struct swap_info_struct *page_swap_info(struct page *);
-extern int reuse_swap_page(struct page *);
+extern bool reuse_swap_page(struct page *, int *);
 extern int try_to_free_swap(struct page *);
 struct backing_dev_info;
 
@@ -513,8 +513,8 @@ static inline int swp_swapcount(swp_entry_t entry)
 	return 0;
 }
 
-#define reuse_swap_page(page) \
-	(!PageTransCompound(page) && page_mapcount(page) == 1)
+#define reuse_swap_page(page, total_mapcount) \
+	(page_trans_huge_mapcount(page, total_mapcount) == 1)
 
 static inline int try_to_free_swap(struct page *page)
 {
diff --git a/include/linux/uio.h b/include/linux/uio.h
index fd9bcfedad42..1b5d1cd796e2 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -87,6 +87,7 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
 size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
+unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
 void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index ef72c4aada56..d3e756539d44 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -172,6 +172,77 @@ TRACE_EVENT(rcu_grace_period_init,
 );
 
 /*
+ * Tracepoint for expedited grace-period events.  Takes a string identifying
+ * the RCU flavor, the expedited grace-period sequence number, and a string
+ * identifying the grace-period-related event as follows:
+ *
+ *	"snap": Captured snapshot of expedited grace period sequence number.
+ *	"start": Started a real expedited grace period.
+ *	"end": Ended a real expedited grace period.
+ *	"endwake": Woke piggybackers up.
+ *	"done": Someone else did the expedited grace period for us.
+ */
+TRACE_EVENT(rcu_exp_grace_period,
+
+	TP_PROTO(const char *rcuname, unsigned long gpseq, const char *gpevent),
+
+	TP_ARGS(rcuname, gpseq, gpevent),
+
+	TP_STRUCT__entry(
+		__field(const char *, rcuname)
+		__field(unsigned long, gpseq)
+		__field(const char *, gpevent)
+	),
+
+	TP_fast_assign(
+		__entry->rcuname = rcuname;
+		__entry->gpseq = gpseq;
+		__entry->gpevent = gpevent;
+	),
+
+	TP_printk("%s %lu %s",
+		  __entry->rcuname, __entry->gpseq, __entry->gpevent)
+);
+
+/*
+ * Tracepoint for expedited grace-period funnel-locking events.  Takes a
+ * string identifying the RCU flavor, an integer identifying the rcu_node
+ * combining-tree level, another pair of integers identifying the lowest-
+ * and highest-numbered CPU associated with the current rcu_node structure,
+ * and a string.  identifying the grace-period-related event as follows:
+ *
+ *	"nxtlvl": Advance to next level of rcu_node funnel
+ *	"wait": Wait for someone else to do expedited GP
+ */
+TRACE_EVENT(rcu_exp_funnel_lock,
+
+	TP_PROTO(const char *rcuname, u8 level, int grplo, int grphi,
+		 const char *gpevent),
+
+	TP_ARGS(rcuname, level, grplo, grphi, gpevent),
+
+	TP_STRUCT__entry(
+		__field(const char *, rcuname)
+		__field(u8, level)
+		__field(int, grplo)
+		__field(int, grphi)
+		__field(const char *, gpevent)
+	),
+
+	TP_fast_assign(
+		__entry->rcuname = rcuname;
+		__entry->level = level;
+		__entry->grplo = grplo;
+		__entry->grphi = grphi;
+		__entry->gpevent = gpevent;
+	),
+
+	TP_printk("%s %d %d %d %s",
+		  __entry->rcuname, __entry->level, __entry->grplo,
+		  __entry->grphi, __entry->gpevent)
+);
+
+/*
  * Tracepoint for RCU no-CBs CPU callback handoffs.  This event is intended
  * to assist debugging of these handoffs.
  *
@@ -704,11 +775,15 @@ TRACE_EVENT(rcu_barrier,
 #else /* #ifdef CONFIG_RCU_TRACE */
 
 #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
-#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
-				    qsmask) do { } while (0)
 #define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \
 				      level, grplo, grphi, event) \
 				      do { } while (0)
+#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
+				    qsmask) do { } while (0)
+#define trace_rcu_exp_grace_period(rcuname, gqseq, gpevent) \
+	do { } while (0)
+#define trace_rcu_exp_funnel_lock(rcuname, level, grplo, grphi, gpevent) \
+	do { } while (0)
 #define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0)
 #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
 #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h
index f80277569f24..e601c8c3bdc7 100644
--- a/include/uapi/linux/if.h
+++ b/include/uapi/linux/if.h
@@ -19,14 +19,20 @@
 #ifndef _LINUX_IF_H
 #define _LINUX_IF_H
 
+#include <linux/libc-compat.h>          /* for compatibility with glibc */
 #include <linux/types.h>		/* for "__kernel_caddr_t" et al	*/
 #include <linux/socket.h>		/* for "struct sockaddr" et al	*/
 #include <linux/compiler.h>		/* for "__user" et al           */
 
+#if __UAPI_DEF_IF_IFNAMSIZ
 #define	IFNAMSIZ	16
+#endif /* __UAPI_DEF_IF_IFNAMSIZ */
 #define	IFALIASZ	256
 #include <linux/hdlc/ioctl.h>
 
+/* For glibc compatibility. An empty enum does not compile. */
+#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && \
+    __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0
 /**
  * enum net_device_flags - &struct net_device flags
  *
@@ -68,6 +74,8 @@
  * @IFF_ECHO: echo sent packets. Volatile.
  */
 enum net_device_flags {
+/* for compatibility with glibc net/if.h */
+#if __UAPI_DEF_IF_NET_DEVICE_FLAGS
 	IFF_UP				= 1<<0,  /* sysfs */
 	IFF_BROADCAST			= 1<<1,  /* volatile */
 	IFF_DEBUG			= 1<<2,  /* sysfs */
@@ -84,11 +92,17 @@ enum net_device_flags {
 	IFF_PORTSEL			= 1<<13, /* sysfs */
 	IFF_AUTOMEDIA			= 1<<14, /* sysfs */
 	IFF_DYNAMIC			= 1<<15, /* sysfs */
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS */
+#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
 	IFF_LOWER_UP			= 1<<16, /* volatile */
 	IFF_DORMANT			= 1<<17, /* volatile */
 	IFF_ECHO			= 1<<18, /* volatile */
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */
 };
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 */
 
+/* for compatibility with glibc net/if.h */
+#if __UAPI_DEF_IF_NET_DEVICE_FLAGS
 #define IFF_UP				IFF_UP
 #define IFF_BROADCAST			IFF_BROADCAST
 #define IFF_DEBUG			IFF_DEBUG
@@ -105,9 +119,13 @@ enum net_device_flags {
 #define IFF_PORTSEL			IFF_PORTSEL
 #define IFF_AUTOMEDIA			IFF_AUTOMEDIA
 #define IFF_DYNAMIC			IFF_DYNAMIC
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS */
+
+#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
 #define IFF_LOWER_UP			IFF_LOWER_UP
 #define IFF_DORMANT			IFF_DORMANT
 #define IFF_ECHO			IFF_ECHO
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */
 
 #define IFF_VOLATILE	(IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\
 		IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT)
@@ -166,6 +184,8 @@ enum {
  *	being very small might be worth keeping for clean configuration.
  */
 
+/* for compatibility with glibc net/if.h */
+#if __UAPI_DEF_IF_IFMAP
 struct ifmap {
 	unsigned long mem_start;
 	unsigned long mem_end;
@@ -175,6 +195,7 @@ struct ifmap {
 	unsigned char port;
 	/* 3 bytes spare */
 };
+#endif /* __UAPI_DEF_IF_IFMAP */
 
 struct if_settings {
 	unsigned int type;	/* Type of physical device or protocol */
@@ -200,6 +221,8 @@ struct if_settings {
  * remainder may be interface specific.
  */
 
+/* for compatibility with glibc net/if.h */
+#if __UAPI_DEF_IF_IFREQ
 struct ifreq {
 #define IFHWADDRLEN	6
 	union
@@ -223,6 +246,7 @@ struct ifreq {
 		struct	if_settings ifru_settings;
 	} ifr_ifru;
 };
+#endif /* __UAPI_DEF_IF_IFREQ */
 
 #define ifr_name	ifr_ifrn.ifrn_name	/* interface name 	*/
 #define ifr_hwaddr	ifr_ifru.ifru_hwaddr	/* MAC address 		*/
@@ -249,6 +273,8 @@ struct ifreq {
  * must know all networks accessible).
  */
 
+/* for compatibility with glibc net/if.h */
+#if __UAPI_DEF_IF_IFCONF
 struct ifconf  {
 	int	ifc_len;			/* size of buffer	*/
 	union {
@@ -256,6 +282,8 @@ struct ifconf  {
 		struct ifreq __user *ifcu_req;
 	} ifc_ifcu;
 };
+#endif /* __UAPI_DEF_IF_IFCONF */
+
 #define	ifc_buf	ifc_ifcu.ifcu_buf		/* buffer address	*/
 #define	ifc_req	ifc_ifcu.ifcu_req		/* array of structures	*/
 
diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index 7d024ceb075d..d5e38c73377c 100644
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -51,6 +51,40 @@
 /* We have included glibc headers... */
 #if defined(__GLIBC__)
 
+/* Coordinate with glibc net/if.h header. */
+#if defined(_NET_IF_H)
+
+/* GLIBC headers included first so don't define anything
+ * that would already be defined. */
+
+#define __UAPI_DEF_IF_IFCONF 0
+#define __UAPI_DEF_IF_IFMAP 0
+#define __UAPI_DEF_IF_IFNAMSIZ 0
+#define __UAPI_DEF_IF_IFREQ 0
+/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 0
+/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
+#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */
+
+#else /* _NET_IF_H */
+
+/* Linux headers included first, and we must define everything
+ * we need. The expectation is that glibc will check the
+ * __UAPI_DEF_* defines and adjust appropriately. */
+
+#define __UAPI_DEF_IF_IFCONF 1
+#define __UAPI_DEF_IF_IFMAP 1
+#define __UAPI_DEF_IF_IFNAMSIZ 1
+#define __UAPI_DEF_IF_IFREQ 1
+/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1
+/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
+
+#endif /* _NET_IF_H */
+
 /* Coordinate with glibc netinet/in.h header. */
 #if defined(_NETINET_IN_H)
 
@@ -117,6 +151,16 @@
  * that we need. */
 #else /* !defined(__GLIBC__) */
 
+/* Definitions for if.h */
+#define __UAPI_DEF_IF_IFCONF 1
+#define __UAPI_DEF_IF_IFMAP 1
+#define __UAPI_DEF_IF_IFNAMSIZ 1
+#define __UAPI_DEF_IF_IFREQ 1
+/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1
+/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
+
 /* Definitions for in.h */
 #define __UAPI_DEF_IN_ADDR		1
 #define __UAPI_DEF_IN_IPPROTO		1
diff --git a/include/uapi/linux/signal.h b/include/uapi/linux/signal.h
index e1bd50c29ded..cd0804b6bfa2 100644
--- a/include/uapi/linux/signal.h
+++ b/include/uapi/linux/signal.h
@@ -7,4 +7,9 @@
 #define SS_ONSTACK	1
 #define SS_DISABLE	2
 
+/* bit-flags */
+#define SS_AUTODISARM	(1U << 31)	/* disable sas during sighandling */
+/* mask for all SS_xxx flags */
+#define SS_FLAG_BITS	SS_AUTODISARM
+
 #endif /* _UAPI_LINUX_SIGNAL_H */
diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild
index 242cf0c6e33d..e3969bd939e4 100644
--- a/include/uapi/linux/tc_act/Kbuild
+++ b/include/uapi/linux/tc_act/Kbuild
@@ -10,3 +10,4 @@ header-y += tc_skbedit.h
 header-y += tc_vlan.h
 header-y += tc_bpf.h
 header-y += tc_connmark.h
+header-y += tc_ife.h
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 909a7d31ffd3..86cb5c6e8932 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1215,6 +1215,41 @@ static void cgroup_destroy_root(struct cgroup_root *root)
 	cgroup_free_root(root);
 }
 
+/*
+ * look up cgroup associated with current task's cgroup namespace on the
+ * specified hierarchy
+ */
+static struct cgroup *
+current_cgns_cgroup_from_root(struct cgroup_root *root)
+{
+	struct cgroup *res = NULL;
+	struct css_set *cset;
+
+	lockdep_assert_held(&css_set_lock);
+
+	rcu_read_lock();
+
+	cset = current->nsproxy->cgroup_ns->root_cset;
+	if (cset == &init_css_set) {
+		res = &root->cgrp;
+	} else {
+		struct cgrp_cset_link *link;
+
+		list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
+			struct cgroup *c = link->cgrp;
+
+			if (c->root == root) {
+				res = c;
+				break;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	BUG_ON(!res);
+	return res;
+}
+
 /* look up cgroup associated with given css_set on the specified hierarchy */
 static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
 					    struct cgroup_root *root)
@@ -1593,6 +1628,33 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
 	return 0;
 }
 
+static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
+			    struct kernfs_root *kf_root)
+{
+	int len = 0;
+	char *buf = NULL;
+	struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
+	struct cgroup *ns_cgroup;
+
+	buf = kmalloc(PATH_MAX, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	spin_lock_bh(&css_set_lock);
+	ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
+	len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
+	spin_unlock_bh(&css_set_lock);
+
+	if (len >= PATH_MAX)
+		len = -ERANGE;
+	else if (len > 0) {
+		seq_escape(sf, buf, " \t\n\\");
+		len = 0;
+	}
+	kfree(buf);
+	return len;
+}
+
 static int cgroup_show_options(struct seq_file *seq,
 			       struct kernfs_root *kf_root)
 {
@@ -5433,6 +5495,7 @@ static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
 	.mkdir			= cgroup_mkdir,
 	.rmdir			= cgroup_rmdir,
 	.rename			= cgroup_rename,
+	.show_path		= cgroup_show_path,
 };
 
 static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
diff --git a/kernel/fork.c b/kernel/fork.c
index d277e83ed3e0..3e8451527cbe 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1494,7 +1494,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	 * sigaltstack should be cleared when sharing the same VM
 	 */
 	if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
-		p->sas_ss_sp = p->sas_ss_size = 0;
+		sas_ss_reset(p);
 
 	/*
 	 * Syscall tracing and stepping should be turned off in the
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 78c1c0ee6dc1..874d53eaf389 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -708,7 +708,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
  * yet. Otherwise we look it up. We cache the result in the lock object
  * itself, so actual lookup of the hash should be once per lock object.
  */
-static inline struct lock_class *
+static struct lock_class *
 register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 {
 	struct lockdep_subclass_key *key;
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 8ef1919d63b2..f8c5af52a131 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -75,12 +75,7 @@ struct lock_stress_stats {
 	long n_lock_acquired;
 };
 
-#if defined(MODULE)
-#define LOCKTORTURE_RUNNABLE_INIT 1
-#else
-#define LOCKTORTURE_RUNNABLE_INIT 0
-#endif
-int torture_runnable = LOCKTORTURE_RUNNABLE_INIT;
+int torture_runnable = IS_ENABLED(MODULE);
 module_param(torture_runnable, int, 0444);
 MODULE_PARM_DESC(torture_runnable, "Start locktorture at module init");
 
@@ -394,12 +389,12 @@ static void torture_rtmutex_boost(struct torture_random_state *trsp)
 
 	if (!rt_task(current)) {
 		/*
-		 * (1) Boost priority once every ~50k operations. When the
+		 * Boost priority once every ~50k operations. When the
 		 * task tries to take the lock, the rtmutex it will account
 		 * for the new priority, and do any corresponding pi-dance.
 		 */
-		if (!(torture_random(trsp) %
-		      (cxt.nrealwriters_stress * factor))) {
+		if (trsp && !(torture_random(trsp) %
+			      (cxt.nrealwriters_stress * factor))) {
 			policy = SCHED_FIFO;
 			param.sched_priority = MAX_RT_PRIO - 1;
 		} else /* common case, do nothing */
@@ -748,6 +743,15 @@ static void lock_torture_cleanup(void)
 	if (torture_cleanup_begin())
 		return;
 
+	/*
+	 * Indicates early cleanup, meaning that the test has not run,
+	 * such as when passing bogus args when loading the module. As
+	 * such, only perform the underlying torture-specific cleanups,
+	 * and avoid anything related to locktorture.
+	 */
+	if (!cxt.lwsa)
+		goto end;
+
 	if (writer_tasks) {
 		for (i = 0; i < cxt.nrealwriters_stress; i++)
 			torture_stop_kthread(lock_torture_writer,
@@ -776,6 +780,7 @@ static void lock_torture_cleanup(void)
 	else
 		lock_torture_print_module_parms(cxt.cur_ops,
 						"End of test: SUCCESS");
+end:
 	torture_cleanup_end();
 }
 
@@ -870,6 +875,7 @@ static int __init lock_torture_init(void)
 			VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
 			firsterr = -ENOMEM;
 			kfree(cxt.lwsa);
+			cxt.lwsa = NULL;
 			goto unwind;
 		}
 
@@ -878,6 +884,7 @@ static int __init lock_torture_init(void)
 			cxt.lrsa[i].n_lock_acquired = 0;
 		}
 	}
+
 	lock_torture_print_module_parms(cxt.cur_ops, "Start of test");
 
 	/* Prepare torture context. */
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index d734b7502001..22e025309845 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -191,8 +191,6 @@ static ssize_t qstat_write(struct file *file, const char __user *user_buf,
 
 		for (i = 0 ; i < qstat_num; i++)
 			WRITE_ONCE(ptr[i], 0);
-		for (i = 0 ; i < qstat_num; i++)
-			WRITE_ONCE(ptr[i], 0);
 	}
 	return count;
 }
@@ -214,10 +212,8 @@ static int __init init_qspinlock_stat(void)
 	struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL);
 	int i;
 
-	if (!d_qstat) {
-		pr_warn("Could not create 'qlockstat' debugfs directory\n");
-		return 0;
-	}
+	if (!d_qstat)
+		goto out;
 
 	/*
 	 * Create the debugfs files
@@ -227,12 +223,20 @@ static int __init init_qspinlock_stat(void)
 	 * performance.
 	 */
 	for (i = 0; i < qstat_num; i++)
-		debugfs_create_file(qstat_names[i], 0400, d_qstat,
-				   (void *)(long)i, &fops_qstat);
+		if (!debugfs_create_file(qstat_names[i], 0400, d_qstat,
+					 (void *)(long)i, &fops_qstat))
+			goto fail_undo;
+
+	if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
+				 (void *)(long)qstat_reset_cnts, &fops_qstat))
+		goto fail_undo;
 
-	debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
-			   (void *)(long)qstat_reset_cnts, &fops_qstat);
 	return 0;
+fail_undo:
+	debugfs_remove_recursive(d_qstat);
+out:
+	pr_warn("Could not create 'qlockstat' debugfs entries\n");
+	return -ENOMEM;
 }
 fs_initcall(init_qspinlock_stat);
 
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 3a5048572065..1591f6b3539f 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -191,11 +191,12 @@ int __down_read_trylock(struct rw_semaphore *sem)
 /*
  * get a write lock on the semaphore
  */
-void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
+int __sched __down_write_common(struct rw_semaphore *sem, int state)
 {
 	struct rwsem_waiter waiter;
 	struct task_struct *tsk;
 	unsigned long flags;
+	int ret = 0;
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
@@ -215,21 +216,33 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
 		 */
 		if (sem->count == 0)
 			break;
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+		if (signal_pending_state(state, current)) {
+			ret = -EINTR;
+			goto out;
+		}
+		set_task_state(tsk, state);
 		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 		schedule();
 		raw_spin_lock_irqsave(&sem->wait_lock, flags);
 	}
 	/* got the lock */
 	sem->count = -1;
+out:
 	list_del(&waiter.list);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+	return ret;
 }
 
 void __sched __down_write(struct rw_semaphore *sem)
 {
-	__down_write_nested(sem, 0);
+	__down_write_common(sem, TASK_UNINTERRUPTIBLE);
+}
+
+int __sched __down_write_killable(struct rw_semaphore *sem)
+{
+	return __down_write_common(sem, TASK_KILLABLE);
 }
 
 /*
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index a4d4de05b2d1..09e30c6225e5 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -433,12 +433,13 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
 /*
  * Wait until we successfully acquire the write lock
  */
-__visible
-struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
+static inline struct rw_semaphore *
+__rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 {
 	long count;
 	bool waiting = true; /* any queued threads before us */
 	struct rwsem_waiter waiter;
+	struct rw_semaphore *ret = sem;
 
 	/* undo write bias from down_write operation, stop active locking */
 	count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
@@ -478,7 +479,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
 		count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
 
 	/* wait until we successfully acquire the lock */
-	set_current_state(TASK_UNINTERRUPTIBLE);
+	set_current_state(state);
 	while (true) {
 		if (rwsem_try_write_lock(count, sem))
 			break;
@@ -486,21 +487,48 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
 
 		/* Block until there are no active lockers. */
 		do {
+			if (signal_pending_state(state, current))
+				goto out_nolock;
+
 			schedule();
-			set_current_state(TASK_UNINTERRUPTIBLE);
+			set_current_state(state);
 		} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
 
 		raw_spin_lock_irq(&sem->wait_lock);
 	}
 	__set_current_state(TASK_RUNNING);
+	list_del(&waiter.list);
+	raw_spin_unlock_irq(&sem->wait_lock);
 
+	return ret;
+
+out_nolock:
+	__set_current_state(TASK_RUNNING);
+	raw_spin_lock_irq(&sem->wait_lock);
 	list_del(&waiter.list);
+	if (list_empty(&sem->wait_list))
+		rwsem_atomic_update(-RWSEM_WAITING_BIAS, sem);
+	else
+		__rwsem_do_wake(sem, RWSEM_WAKE_ANY);
 	raw_spin_unlock_irq(&sem->wait_lock);
 
-	return sem;
+	return ERR_PTR(-EINTR);
+}
+
+__visible struct rw_semaphore * __sched
+rwsem_down_write_failed(struct rw_semaphore *sem)
+{
+	return __rwsem_down_write_failed_common(sem, TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(rwsem_down_write_failed);
 
+__visible struct rw_semaphore * __sched
+rwsem_down_write_failed_killable(struct rw_semaphore *sem)
+{
+	return __rwsem_down_write_failed_common(sem, TASK_KILLABLE);
+}
+EXPORT_SYMBOL(rwsem_down_write_failed_killable);
+
 /*
  * handle waking up a waiter on the semaphore
  * - up_read/up_write has decremented the active part of count if we come here
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 205be0ce34de..c817216c1615 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -55,6 +55,25 @@ void __sched down_write(struct rw_semaphore *sem)
 EXPORT_SYMBOL(down_write);
 
 /*
+ * lock for writing
+ */
+int __sched down_write_killable(struct rw_semaphore *sem)
+{
+	might_sleep();
+	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
+
+	if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) {
+		rwsem_release(&sem->dep_map, 1, _RET_IP_);
+		return -EINTR;
+	}
+
+	rwsem_set_owner(sem);
+	return 0;
+}
+
+EXPORT_SYMBOL(down_write_killable);
+
+/*
  * trylock for writing -- returns 1 if successful, 0 if contention
  */
 int down_write_trylock(struct rw_semaphore *sem)
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 032b2c015beb..18dfc485225c 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -5,6 +5,7 @@ KCOV_INSTRUMENT := n
 obj-y += update.o sync.o
 obj-$(CONFIG_SRCU) += srcu.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
+obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o
 obj-$(CONFIG_TREE_RCU) += tree.o
 obj-$(CONFIG_PREEMPT_RCU) += tree.o
 obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
new file mode 100644
index 000000000000..3cee0d8393ed
--- /dev/null
+++ b/kernel/rcu/rcuperf.c
@@ -0,0 +1,655 @@
+/*
+ * Read-Copy Update module-based performance-test facility
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2015
+ *
+ * Authors: Paul E. McKenney <paulmck@us.ibm.com>
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/rcupdate.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
+#include <linux/freezer.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/stat.h>
+#include <linux/srcu.h>
+#include <linux/slab.h>
+#include <asm/byteorder.h>
+#include <linux/torture.h>
+#include <linux/vmalloc.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
+
+#define PERF_FLAG "-perf:"
+#define PERFOUT_STRING(s) \
+	pr_alert("%s" PERF_FLAG s "\n", perf_type)
+#define VERBOSE_PERFOUT_STRING(s) \
+	do { if (verbose) pr_alert("%s" PERF_FLAG " %s\n", perf_type, s); } while (0)
+#define VERBOSE_PERFOUT_ERRSTRING(s) \
+	do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0)
+
+torture_param(bool, gp_exp, true, "Use expedited GP wait primitives");
+torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
+torture_param(int, nreaders, -1, "Number of RCU reader threads");
+torture_param(int, nwriters, -1, "Number of RCU updater threads");
+torture_param(bool, shutdown, false, "Shutdown at end of performance tests.");
+torture_param(bool, verbose, true, "Enable verbose debugging printk()s");
+
+static char *perf_type = "rcu";
+module_param(perf_type, charp, 0444);
+MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, rcu_bh, ...)");
+
+static int nrealreaders;
+static int nrealwriters;
+static struct task_struct **writer_tasks;
+static struct task_struct **reader_tasks;
+static struct task_struct *shutdown_task;
+
+static u64 **writer_durations;
+static int *writer_n_durations;
+static atomic_t n_rcu_perf_reader_started;
+static atomic_t n_rcu_perf_writer_started;
+static atomic_t n_rcu_perf_writer_finished;
+static wait_queue_head_t shutdown_wq;
+static u64 t_rcu_perf_writer_started;
+static u64 t_rcu_perf_writer_finished;
+static unsigned long b_rcu_perf_writer_started;
+static unsigned long b_rcu_perf_writer_finished;
+
+static int rcu_perf_writer_state;
+#define RTWS_INIT		0
+#define RTWS_EXP_SYNC		1
+#define RTWS_SYNC		2
+#define RTWS_IDLE		2
+#define RTWS_STOPPING		3
+
+#define MAX_MEAS 10000
+#define MIN_MEAS 100
+
+#if defined(MODULE) || defined(CONFIG_RCU_PERF_TEST_RUNNABLE)
+#define RCUPERF_RUNNABLE_INIT 1
+#else
+#define RCUPERF_RUNNABLE_INIT 0
+#endif
+static int perf_runnable = RCUPERF_RUNNABLE_INIT;
+module_param(perf_runnable, int, 0444);
+MODULE_PARM_DESC(perf_runnable, "Start rcuperf at boot");
+
+/*
+ * Operations vector for selecting different types of tests.
+ */
+
+struct rcu_perf_ops {
+	int ptype;
+	void (*init)(void);
+	void (*cleanup)(void);
+	int (*readlock)(void);
+	void (*readunlock)(int idx);
+	unsigned long (*started)(void);
+	unsigned long (*completed)(void);
+	unsigned long (*exp_completed)(void);
+	void (*sync)(void);
+	void (*exp_sync)(void);
+	const char *name;
+};
+
+static struct rcu_perf_ops *cur_ops;
+
+/*
+ * Definitions for rcu perf testing.
+ */
+
+static int rcu_perf_read_lock(void) __acquires(RCU)
+{
+	rcu_read_lock();
+	return 0;
+}
+
+static void rcu_perf_read_unlock(int idx) __releases(RCU)
+{
+	rcu_read_unlock();
+}
+
+static unsigned long __maybe_unused rcu_no_completed(void)
+{
+	return 0;
+}
+
+static void rcu_sync_perf_init(void)
+{
+}
+
+static struct rcu_perf_ops rcu_ops = {
+	.ptype		= RCU_FLAVOR,
+	.init		= rcu_sync_perf_init,
+	.readlock	= rcu_perf_read_lock,
+	.readunlock	= rcu_perf_read_unlock,
+	.started	= rcu_batches_started,
+	.completed	= rcu_batches_completed,
+	.exp_completed	= rcu_exp_batches_completed,
+	.sync		= synchronize_rcu,
+	.exp_sync	= synchronize_rcu_expedited,
+	.name		= "rcu"
+};
+
+/*
+ * Definitions for rcu_bh perf testing.
+ */
+
+static int rcu_bh_perf_read_lock(void) __acquires(RCU_BH)
+{
+	rcu_read_lock_bh();
+	return 0;
+}
+
+static void rcu_bh_perf_read_unlock(int idx) __releases(RCU_BH)
+{
+	rcu_read_unlock_bh();
+}
+
+static struct rcu_perf_ops rcu_bh_ops = {
+	.ptype		= RCU_BH_FLAVOR,
+	.init		= rcu_sync_perf_init,
+	.readlock	= rcu_bh_perf_read_lock,
+	.readunlock	= rcu_bh_perf_read_unlock,
+	.started	= rcu_batches_started_bh,
+	.completed	= rcu_batches_completed_bh,
+	.exp_completed	= rcu_exp_batches_completed_sched,
+	.sync		= synchronize_rcu_bh,
+	.exp_sync	= synchronize_rcu_bh_expedited,
+	.name		= "rcu_bh"
+};
+
+/*
+ * Definitions for srcu perf testing.
+ */
+
+DEFINE_STATIC_SRCU(srcu_ctl_perf);
+static struct srcu_struct *srcu_ctlp = &srcu_ctl_perf;
+
+static int srcu_perf_read_lock(void) __acquires(srcu_ctlp)
+{
+	return srcu_read_lock(srcu_ctlp);
+}
+
+static void srcu_perf_read_unlock(int idx) __releases(srcu_ctlp)
+{
+	srcu_read_unlock(srcu_ctlp, idx);
+}
+
+static unsigned long srcu_perf_completed(void)
+{
+	return srcu_batches_completed(srcu_ctlp);
+}
+
+static void srcu_perf_synchronize(void)
+{
+	synchronize_srcu(srcu_ctlp);
+}
+
+static void srcu_perf_synchronize_expedited(void)
+{
+	synchronize_srcu_expedited(srcu_ctlp);
+}
+
+static struct rcu_perf_ops srcu_ops = {
+	.ptype		= SRCU_FLAVOR,
+	.init		= rcu_sync_perf_init,
+	.readlock	= srcu_perf_read_lock,
+	.readunlock	= srcu_perf_read_unlock,
+	.started	= NULL,
+	.completed	= srcu_perf_completed,
+	.exp_completed	= srcu_perf_completed,
+	.sync		= srcu_perf_synchronize,
+	.exp_sync	= srcu_perf_synchronize_expedited,
+	.name		= "srcu"
+};
+
+/*
+ * Definitions for sched perf testing.
+ */
+
+static int sched_perf_read_lock(void)
+{
+	preempt_disable();
+	return 0;
+}
+
+static void sched_perf_read_unlock(int idx)
+{
+	preempt_enable();
+}
+
+static struct rcu_perf_ops sched_ops = {
+	.ptype		= RCU_SCHED_FLAVOR,
+	.init		= rcu_sync_perf_init,
+	.readlock	= sched_perf_read_lock,
+	.readunlock	= sched_perf_read_unlock,
+	.started	= rcu_batches_started_sched,
+	.completed	= rcu_batches_completed_sched,
+	.exp_completed	= rcu_exp_batches_completed_sched,
+	.sync		= synchronize_sched,
+	.exp_sync	= synchronize_sched_expedited,
+	.name		= "sched"
+};
+
+#ifdef CONFIG_TASKS_RCU
+
+/*
+ * Definitions for RCU-tasks perf testing.
+ */
+
+static int tasks_perf_read_lock(void)
+{
+	return 0;
+}
+
+static void tasks_perf_read_unlock(int idx)
+{
+}
+
+static struct rcu_perf_ops tasks_ops = {
+	.ptype		= RCU_TASKS_FLAVOR,
+	.init		= rcu_sync_perf_init,
+	.readlock	= tasks_perf_read_lock,
+	.readunlock	= tasks_perf_read_unlock,
+	.started	= rcu_no_completed,
+	.completed	= rcu_no_completed,
+	.sync		= synchronize_rcu_tasks,
+	.exp_sync	= synchronize_rcu_tasks,
+	.name		= "tasks"
+};
+
+#define RCUPERF_TASKS_OPS &tasks_ops,
+
+static bool __maybe_unused torturing_tasks(void)
+{
+	return cur_ops == &tasks_ops;
+}
+
+#else /* #ifdef CONFIG_TASKS_RCU */
+
+#define RCUPERF_TASKS_OPS
+
+static bool __maybe_unused torturing_tasks(void)
+{
+	return false;
+}
+
+#endif /* #else #ifdef CONFIG_TASKS_RCU */
+
+/*
+ * If performance tests complete, wait for shutdown to commence.
+ */
+static void rcu_perf_wait_shutdown(void)
+{
+	cond_resched_rcu_qs();
+	if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters)
+		return;
+	while (!torture_must_stop())
+		schedule_timeout_uninterruptible(1);
+}
+
+/*
+ * RCU perf reader kthread.  Repeatedly does empty RCU read-side
+ * critical section, minimizing update-side interference.
+ */
+static int
+rcu_perf_reader(void *arg)
+{
+	unsigned long flags;
+	int idx;
+	long me = (long)arg;
+
+	VERBOSE_PERFOUT_STRING("rcu_perf_reader task started");
+	set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
+	set_user_nice(current, MAX_NICE);
+	atomic_inc(&n_rcu_perf_reader_started);
+
+	do {
+		local_irq_save(flags);
+		idx = cur_ops->readlock();
+		cur_ops->readunlock(idx);
+		local_irq_restore(flags);
+		rcu_perf_wait_shutdown();
+	} while (!torture_must_stop());
+	torture_kthread_stopping("rcu_perf_reader");
+	return 0;
+}
+
+/*
+ * RCU perf writer kthread.  Repeatedly does a grace period.
+ */
+static int
+rcu_perf_writer(void *arg)
+{
+	int i = 0;
+	int i_max;
+	long me = (long)arg;
+	struct sched_param sp;
+	bool started = false, done = false, alldone = false;
+	u64 t;
+	u64 *wdp;
+	u64 *wdpp = writer_durations[me];
+
+	VERBOSE_PERFOUT_STRING("rcu_perf_writer task started");
+	WARN_ON(rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp);
+	WARN_ON(rcu_gp_is_normal() && gp_exp);
+	WARN_ON(!wdpp);
+	set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
+	sp.sched_priority = 1;
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+
+	if (holdoff)
+		schedule_timeout_uninterruptible(holdoff * HZ);
+
+	t = ktime_get_mono_fast_ns();
+	if (atomic_inc_return(&n_rcu_perf_writer_started) >= nrealwriters) {
+		t_rcu_perf_writer_started = t;
+		if (gp_exp) {
+			b_rcu_perf_writer_started =
+				cur_ops->exp_completed() / 2;
+		} else {
+			b_rcu_perf_writer_started =
+				cur_ops->completed();
+		}
+	}
+
+	do {
+		wdp = &wdpp[i];
+		*wdp = ktime_get_mono_fast_ns();
+		if (gp_exp) {
+			rcu_perf_writer_state = RTWS_EXP_SYNC;
+			cur_ops->exp_sync();
+		} else {
+			rcu_perf_writer_state = RTWS_SYNC;
+			cur_ops->sync();
+		}
+		rcu_perf_writer_state = RTWS_IDLE;
+		t = ktime_get_mono_fast_ns();
+		*wdp = t - *wdp;
+		i_max = i;
+		if (!started &&
+		    atomic_read(&n_rcu_perf_writer_started) >= nrealwriters)
+			started = true;
+		if (!done && i >= MIN_MEAS) {
+			done = true;
+			sp.sched_priority = 0;
+			sched_setscheduler_nocheck(current,
+						   SCHED_NORMAL, &sp);
+			pr_alert("%s" PERF_FLAG
+				 "rcu_perf_writer %ld has %d measurements\n",
+				 perf_type, me, MIN_MEAS);
+			if (atomic_inc_return(&n_rcu_perf_writer_finished) >=
+			    nrealwriters) {
+				schedule_timeout_interruptible(10);
+				rcu_ftrace_dump(DUMP_ALL);
+				PERFOUT_STRING("Test complete");
+				t_rcu_perf_writer_finished = t;
+				if (gp_exp) {
+					b_rcu_perf_writer_finished =
+						cur_ops->exp_completed() / 2;
+				} else {
+					b_rcu_perf_writer_finished =
+						cur_ops->completed();
+				}
+				if (shutdown) {
+					smp_mb(); /* Assign before wake. */
+					wake_up(&shutdown_wq);
+				}
+			}
+		}
+		if (done && !alldone &&
+		    atomic_read(&n_rcu_perf_writer_finished) >= nrealwriters)
+			alldone = true;
+		if (started && !alldone && i < MAX_MEAS - 1)
+			i++;
+		rcu_perf_wait_shutdown();
+	} while (!torture_must_stop());
+	rcu_perf_writer_state = RTWS_STOPPING;
+	writer_n_durations[me] = i_max;
+	torture_kthread_stopping("rcu_perf_writer");
+	return 0;
+}
+
+static inline void
+rcu_perf_print_module_parms(struct rcu_perf_ops *cur_ops, const char *tag)
+{
+	pr_alert("%s" PERF_FLAG
+		 "--- %s: nreaders=%d nwriters=%d verbose=%d shutdown=%d\n",
+		 perf_type, tag, nrealreaders, nrealwriters, verbose, shutdown);
+}
+
+static void
+rcu_perf_cleanup(void)
+{
+	int i;
+	int j;
+	int ngps = 0;
+	u64 *wdp;
+	u64 *wdpp;
+
+	if (torture_cleanup_begin())
+		return;
+
+	if (reader_tasks) {
+		for (i = 0; i < nrealreaders; i++)
+			torture_stop_kthread(rcu_perf_reader,
+					     reader_tasks[i]);
+		kfree(reader_tasks);
+	}
+
+	if (writer_tasks) {
+		for (i = 0; i < nrealwriters; i++) {
+			torture_stop_kthread(rcu_perf_writer,
+					     writer_tasks[i]);
+			if (!writer_n_durations)
+				continue;
+			j = writer_n_durations[i];
+			pr_alert("%s%s writer %d gps: %d\n",
+				 perf_type, PERF_FLAG, i, j);
+			ngps += j;
+		}
+		pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n",
+			 perf_type, PERF_FLAG,
+			 t_rcu_perf_writer_started, t_rcu_perf_writer_finished,
+			 t_rcu_perf_writer_finished -
+			 t_rcu_perf_writer_started,
+			 ngps,
+			 b_rcu_perf_writer_finished -
+			 b_rcu_perf_writer_started);
+		for (i = 0; i < nrealwriters; i++) {
+			if (!writer_durations)
+				break;
+			if (!writer_n_durations)
+				continue;
+			wdpp = writer_durations[i];
+			if (!wdpp)
+				continue;
+			for (j = 0; j <= writer_n_durations[i]; j++) {
+				wdp = &wdpp[j];
+				pr_alert("%s%s %4d writer-duration: %5d %llu\n",
+					perf_type, PERF_FLAG,
+					i, j, *wdp);
+				if (j % 100 == 0)
+					schedule_timeout_uninterruptible(1);
+			}
+			kfree(writer_durations[i]);
+		}
+		kfree(writer_tasks);
+		kfree(writer_durations);
+		kfree(writer_n_durations);
+	}
+
+	/* Do flavor-specific cleanup operations.  */
+	if (cur_ops->cleanup != NULL)
+		cur_ops->cleanup();
+
+	torture_cleanup_end();
+}
+
+/*
+ * Return the number if non-negative.  If -1, the number of CPUs.
+ * If less than -1, that much less than the number of CPUs, but
+ * at least one.
+ */
+static int compute_real(int n)
+{
+	int nr;
+
+	if (n >= 0) {
+		nr = n;
+	} else {
+		nr = num_online_cpus() + 1 + n;
+		if (nr <= 0)
+			nr = 1;
+	}
+	return nr;
+}
+
+/*
+ * RCU perf shutdown kthread.  Just waits to be awakened, then shuts
+ * down system.
+ */
+static int
+rcu_perf_shutdown(void *arg)
+{
+	do {
+		wait_event(shutdown_wq,
+			   atomic_read(&n_rcu_perf_writer_finished) >=
+			   nrealwriters);
+	} while (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters);
+	smp_mb(); /* Wake before output. */
+	rcu_perf_cleanup();
+	kernel_power_off();
+	return -EINVAL;
+}
+
+static int __init
+rcu_perf_init(void)
+{
+	long i;
+	int firsterr = 0;
+	static struct rcu_perf_ops *perf_ops[] = {
+		&rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops,
+		RCUPERF_TASKS_OPS
+	};
+
+	if (!torture_init_begin(perf_type, verbose, &perf_runnable))
+		return -EBUSY;
+
+	/* Process args and tell the world that the perf'er is on the job. */
+	for (i = 0; i < ARRAY_SIZE(perf_ops); i++) {
+		cur_ops = perf_ops[i];
+		if (strcmp(perf_type, cur_ops->name) == 0)
+			break;
+	}
+	if (i == ARRAY_SIZE(perf_ops)) {
+		pr_alert("rcu-perf: invalid perf type: \"%s\"\n",
+			 perf_type);
+		pr_alert("rcu-perf types:");
+		for (i = 0; i < ARRAY_SIZE(perf_ops); i++)
+			pr_alert(" %s", perf_ops[i]->name);
+		pr_alert("\n");
+		firsterr = -EINVAL;
+		goto unwind;
+	}
+	if (cur_ops->init)
+		cur_ops->init();
+
+	nrealwriters = compute_real(nwriters);
+	nrealreaders = compute_real(nreaders);
+	atomic_set(&n_rcu_perf_reader_started, 0);
+	atomic_set(&n_rcu_perf_writer_started, 0);
+	atomic_set(&n_rcu_perf_writer_finished, 0);
+	rcu_perf_print_module_parms(cur_ops, "Start of test");
+
+	/* Start up the kthreads. */
+
+	if (shutdown) {
+		init_waitqueue_head(&shutdown_wq);
+		firsterr = torture_create_kthread(rcu_perf_shutdown, NULL,
+						  shutdown_task);
+		if (firsterr)
+			goto unwind;
+		schedule_timeout_uninterruptible(1);
+	}
+	reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
+			       GFP_KERNEL);
+	if (reader_tasks == NULL) {
+		VERBOSE_PERFOUT_ERRSTRING("out of memory");
+		firsterr = -ENOMEM;
+		goto unwind;
+	}
+	for (i = 0; i < nrealreaders; i++) {
+		firsterr = torture_create_kthread(rcu_perf_reader, (void *)i,
+						  reader_tasks[i]);
+		if (firsterr)
+			goto unwind;
+	}
+	while (atomic_read(&n_rcu_perf_reader_started) < nrealreaders)
+		schedule_timeout_uninterruptible(1);
+	writer_tasks = kcalloc(nrealwriters, sizeof(reader_tasks[0]),
+			       GFP_KERNEL);
+	writer_durations = kcalloc(nrealwriters, sizeof(*writer_durations),
+				   GFP_KERNEL);
+	writer_n_durations =
+		kcalloc(nrealwriters, sizeof(*writer_n_durations),
+			GFP_KERNEL);
+	if (!writer_tasks || !writer_durations || !writer_n_durations) {
+		VERBOSE_PERFOUT_ERRSTRING("out of memory");
+		firsterr = -ENOMEM;
+		goto unwind;
+	}
+	for (i = 0; i < nrealwriters; i++) {
+		writer_durations[i] =
+			kcalloc(MAX_MEAS, sizeof(*writer_durations[i]),
+				GFP_KERNEL);
+		if (!writer_durations[i])
+			goto unwind;
+		firsterr = torture_create_kthread(rcu_perf_writer, (void *)i,
+						  writer_tasks[i]);
+		if (firsterr)
+			goto unwind;
+	}
+	torture_init_end();
+	return 0;
+
+unwind:
+	torture_init_end();
+	rcu_perf_cleanup();
+	return firsterr;
+}
+
+module_init(rcu_perf_init);
+module_exit(rcu_perf_cleanup);
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 250ea67c1615..084a28a732eb 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -130,8 +130,8 @@ static struct rcu_torture __rcu *rcu_torture_current;
 static unsigned long rcu_torture_current_version;
 static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
 static DEFINE_SPINLOCK(rcu_torture_lock);
-static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = { 0 };
-static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) = { 0 };
+static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count);
+static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch);
 static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1];
 static atomic_t n_rcu_torture_alloc;
 static atomic_t n_rcu_torture_alloc_fail;
@@ -916,7 +916,7 @@ rcu_torture_fqs(void *arg)
 static int
 rcu_torture_writer(void *arg)
 {
-	bool can_expedite = !rcu_gp_is_expedited();
+	bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal();
 	int expediting = 0;
 	unsigned long gp_snap;
 	bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal;
@@ -932,7 +932,7 @@ rcu_torture_writer(void *arg)
 	VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
 	if (!can_expedite) {
 		pr_alert("%s" TORTURE_FLAG
-			 " Grace periods expedited from boot/sysfs for %s,\n",
+			 " GP expediting controlled from boot/sysfs for %s,\n",
 			 torture_type, cur_ops->name);
 		pr_alert("%s" TORTURE_FLAG
 			 " Disabled dynamic grace-period expediting.\n",
@@ -1082,17 +1082,6 @@ rcu_torture_fakewriter(void *arg)
 	return 0;
 }
 
-static void rcutorture_trace_dump(void)
-{
-	static atomic_t beenhere = ATOMIC_INIT(0);
-
-	if (atomic_read(&beenhere))
-		return;
-	if (atomic_xchg(&beenhere, 1) != 0)
-		return;
-	ftrace_dump(DUMP_ALL);
-}
-
 /*
  * RCU torture reader from timer handler.  Dereferences rcu_torture_current,
  * incrementing the corresponding element of the pipeline array.  The
@@ -1142,7 +1131,7 @@ static void rcu_torture_timer(unsigned long unused)
 	if (pipe_count > 1) {
 		do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts,
 					  started, completed);
-		rcutorture_trace_dump();
+		rcu_ftrace_dump(DUMP_ALL);
 	}
 	__this_cpu_inc(rcu_torture_count[pipe_count]);
 	completed = completed - started;
@@ -1215,7 +1204,7 @@ rcu_torture_reader(void *arg)
 		if (pipe_count > 1) {
 			do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
 						  ts, started, completed);
-			rcutorture_trace_dump();
+			rcu_ftrace_dump(DUMP_ALL);
 		}
 		__this_cpu_inc(rcu_torture_count[pipe_count]);
 		completed = completed - started;
@@ -1333,7 +1322,7 @@ rcu_torture_stats_print(void)
 			 rcu_torture_writer_state,
 			 gpnum, completed, flags);
 		show_rcu_gp_kthreads();
-		rcutorture_trace_dump();
+		rcu_ftrace_dump(DUMP_ALL);
 	}
 	rtcv_snap = rcu_torture_current_version;
 }
@@ -1489,7 +1478,9 @@ static int rcu_torture_barrier_cbs(void *arg)
 		 * The above smp_load_acquire() ensures barrier_phase load
 		 * is ordered before the folloiwng ->call().
 		 */
+		local_irq_disable(); /* Just to test no-irq call_rcu(). */
 		cur_ops->call(&rcu, rcu_torture_barrier_cbf);
+		local_irq_enable();
 		if (atomic_dec_and_test(&barrier_cbs_count))
 			wake_up(&barrier_wq);
 	} while (!torture_must_stop());
@@ -1596,7 +1587,7 @@ static int rcutorture_cpu_notify(struct notifier_block *self,
 {
 	long cpu = (long)hcpu;
 
-	switch (action) {
+	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_ONLINE:
 	case CPU_DOWN_FAILED:
 		(void)rcutorture_booster_init(cpu);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 9a535a86e732..c7f1bc4f817c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -102,6 +102,8 @@ struct rcu_state sname##_state = { \
 	.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
 	.name = RCU_STATE_NAME(sname), \
 	.abbr = sabbr, \
+	.exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \
+	.exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \
 }
 
 RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
@@ -370,6 +372,21 @@ void rcu_all_qs(void)
 		rcu_momentary_dyntick_idle();
 		local_irq_restore(flags);
 	}
+	if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))) {
+		/*
+		 * Yes, we just checked a per-CPU variable with preemption
+		 * enabled, so we might be migrated to some other CPU at
+		 * this point.  That is OK because in that case, the
+		 * migration will supply the needed quiescent state.
+		 * We might end up needlessly disabling preemption and
+		 * invoking rcu_sched_qs() on the destination CPU, but
+		 * the probability and cost are both quite low, so this
+		 * should not be a problem in practice.
+		 */
+		preempt_disable();
+		rcu_sched_qs();
+		preempt_enable();
+	}
 	this_cpu_inc(rcu_qs_ctr);
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 }
@@ -385,9 +402,11 @@ module_param(qlowmark, long, 0444);
 
 static ulong jiffies_till_first_fqs = ULONG_MAX;
 static ulong jiffies_till_next_fqs = ULONG_MAX;
+static bool rcu_kick_kthreads;
 
 module_param(jiffies_till_first_fqs, ulong, 0644);
 module_param(jiffies_till_next_fqs, ulong, 0644);
+module_param(rcu_kick_kthreads, bool, 0644);
 
 /*
  * How long the grace period must be before we start recruiting
@@ -460,6 +479,28 @@ unsigned long rcu_batches_completed_bh(void)
 EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
 
 /*
+ * Return the number of RCU expedited batches completed thus far for
+ * debug & stats.  Odd numbers mean that a batch is in progress, even
+ * numbers mean idle.  The value returned will thus be roughly double
+ * the cumulative batches since boot.
+ */
+unsigned long rcu_exp_batches_completed(void)
+{
+	return rcu_state_p->expedited_sequence;
+}
+EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
+
+/*
+ * Return the number of RCU-sched expedited batches completed thus far
+ * for debug & stats.  Similar to rcu_exp_batches_completed().
+ */
+unsigned long rcu_exp_batches_completed_sched(void)
+{
+	return rcu_sched_state.expedited_sequence;
+}
+EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched);
+
+/*
  * Force a quiescent state.
  */
 void rcu_force_quiescent_state(void)
@@ -637,7 +678,7 @@ static void rcu_eqs_enter_common(long long oldval, bool user)
 			idle_task(smp_processor_id());
 
 		trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);
-		ftrace_dump(DUMP_ORIG);
+		rcu_ftrace_dump(DUMP_ORIG);
 		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
 			  current->pid, current->comm,
 			  idle->pid, idle->comm); /* must be idle task! */
@@ -799,7 +840,7 @@ static void rcu_eqs_exit_common(long long oldval, int user)
 
 		trace_rcu_dyntick(TPS("Error on exit: not idle task"),
 				  oldval, rdtp->dynticks_nesting);
-		ftrace_dump(DUMP_ORIG);
+		rcu_ftrace_dump(DUMP_ORIG);
 		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
 			  current->pid, current->comm,
 			  idle->pid, idle->comm); /* must be idle task! */
@@ -1224,8 +1265,10 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
 		       rsp->gp_flags,
 		       gp_state_getname(rsp->gp_state), rsp->gp_state,
 		       rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
-		if (rsp->gp_kthread)
+		if (rsp->gp_kthread) {
 			sched_show_task(rsp->gp_kthread);
+			wake_up_process(rsp->gp_kthread);
+		}
 	}
 }
 
@@ -1249,6 +1292,25 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
 	}
 }
 
+/*
+ * If too much time has passed in the current grace period, and if
+ * so configured, go kick the relevant kthreads.
+ */
+static void rcu_stall_kick_kthreads(struct rcu_state *rsp)
+{
+	unsigned long j;
+
+	if (!rcu_kick_kthreads)
+		return;
+	j = READ_ONCE(rsp->jiffies_kick_kthreads);
+	if (time_after(jiffies, j) && rsp->gp_kthread) {
+		WARN_ONCE(1, "Kicking %s grace-period kthread\n", rsp->name);
+		rcu_ftrace_dump(DUMP_ALL);
+		wake_up_process(rsp->gp_kthread);
+		WRITE_ONCE(rsp->jiffies_kick_kthreads, j + HZ);
+	}
+}
+
 static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
 {
 	int cpu;
@@ -1260,6 +1322,11 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
 	struct rcu_node *rnp = rcu_get_root(rsp);
 	long totqlen = 0;
 
+	/* Kick and suppress, if so configured. */
+	rcu_stall_kick_kthreads(rsp);
+	if (rcu_cpu_stall_suppress)
+		return;
+
 	/* Only let one CPU complain about others per time interval. */
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
@@ -1333,6 +1400,11 @@ static void print_cpu_stall(struct rcu_state *rsp)
 	struct rcu_node *rnp = rcu_get_root(rsp);
 	long totqlen = 0;
 
+	/* Kick and suppress, if so configured. */
+	rcu_stall_kick_kthreads(rsp);
+	if (rcu_cpu_stall_suppress)
+		return;
+
 	/*
 	 * OK, time to rat on ourselves...
 	 * See Documentation/RCU/stallwarn.txt for info on how to debug
@@ -1377,8 +1449,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
 	unsigned long js;
 	struct rcu_node *rnp;
 
-	if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp))
+	if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) ||
+	    !rcu_gp_in_progress(rsp))
 		return;
+	rcu_stall_kick_kthreads(rsp);
 	j = jiffies;
 
 	/*
@@ -2117,8 +2191,11 @@ static int __noreturn rcu_gp_kthread(void *arg)
 		}
 		ret = 0;
 		for (;;) {
-			if (!ret)
+			if (!ret) {
 				rsp->jiffies_force_qs = jiffies + j;
+				WRITE_ONCE(rsp->jiffies_kick_kthreads,
+					   jiffies + 3 * j);
+			}
 			trace_rcu_grace_period(rsp->name,
 					       READ_ONCE(rsp->gpnum),
 					       TPS("fqswait"));
@@ -2144,6 +2221,15 @@ static int __noreturn rcu_gp_kthread(void *arg)
 						       TPS("fqsend"));
 				cond_resched_rcu_qs();
 				WRITE_ONCE(rsp->gp_activity, jiffies);
+				ret = 0; /* Force full wait till next FQS. */
+				j = jiffies_till_next_fqs;
+				if (j > HZ) {
+					j = HZ;
+					jiffies_till_next_fqs = HZ;
+				} else if (j < 1) {
+					j = 1;
+					jiffies_till_next_fqs = 1;
+				}
 			} else {
 				/* Deal with stray signal. */
 				cond_resched_rcu_qs();
@@ -2152,14 +2238,12 @@ static int __noreturn rcu_gp_kthread(void *arg)
 				trace_rcu_grace_period(rsp->name,
 						       READ_ONCE(rsp->gpnum),
 						       TPS("fqswaitsig"));
-			}
-			j = jiffies_till_next_fqs;
-			if (j > HZ) {
-				j = HZ;
-				jiffies_till_next_fqs = HZ;
-			} else if (j < 1) {
-				j = 1;
-				jiffies_till_next_fqs = 1;
+				ret = 1; /* Keep old FQS timing. */
+				j = jiffies;
+				if (time_after(jiffies, rsp->jiffies_force_qs))
+					j = 1;
+				else
+					j = rsp->jiffies_force_qs - j;
 			}
 		}
 
@@ -3376,8 +3460,12 @@ static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
 }
 static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
 {
+	unsigned long s;
+
 	smp_mb(); /* Caller's modifications seen first by other CPUs. */
-	return rcu_seq_snap(&rsp->expedited_sequence);
+	s = rcu_seq_snap(&rsp->expedited_sequence);
+	trace_rcu_exp_grace_period(rsp->name, s, TPS("snap"));
+	return s;
 }
 static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
 {
@@ -3469,7 +3557,7 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
  * for the current expedited grace period.  Works only for preemptible
  * RCU -- other RCU implementation use other means.
  *
- * Caller must hold the root rcu_node's exp_funnel_mutex.
+ * Caller must hold the rcu_state's exp_mutex.
  */
 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
 {
@@ -3485,8 +3573,8 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
  * recursively up the tree.  (Calm down, calm down, we do the recursion
  * iteratively!)
  *
- * Caller must hold the root rcu_node's exp_funnel_mutex and the
- * specified rcu_node structure's ->lock.
+ * Caller must hold the rcu_state's exp_mutex and the specified rcu_node
+ * structure's ->lock.
  */
 static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 				 bool wake, unsigned long flags)
@@ -3523,7 +3611,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
  * Report expedited quiescent state for specified node.  This is a
  * lock-acquisition wrapper function for __rcu_report_exp_rnp().
  *
- * Caller must hold the root rcu_node's exp_funnel_mutex.
+ * Caller must hold the rcu_state's exp_mutex.
  */
 static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
 					      struct rcu_node *rnp, bool wake)
@@ -3536,8 +3624,8 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
 
 /*
  * Report expedited quiescent state for multiple CPUs, all covered by the
- * specified leaf rcu_node structure.  Caller must hold the root
- * rcu_node's exp_funnel_mutex.
+ * specified leaf rcu_node structure.  Caller must hold the rcu_state's
+ * exp_mutex.
  */
 static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
 				    unsigned long mask, bool wake)
@@ -3555,7 +3643,6 @@ static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
 
 /*
  * Report expedited quiescent state for specified rcu_data (CPU).
- * Caller must hold the root rcu_node's exp_funnel_mutex.
  */
 static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
 			       bool wake)
@@ -3564,15 +3651,11 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
 }
 
 /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
-static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
-			       struct rcu_data *rdp,
-			       atomic_long_t *stat, unsigned long s)
+static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat,
+			       unsigned long s)
 {
 	if (rcu_exp_gp_seq_done(rsp, s)) {
-		if (rnp)
-			mutex_unlock(&rnp->exp_funnel_mutex);
-		else if (rdp)
-			mutex_unlock(&rdp->exp_funnel_mutex);
+		trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
 		/* Ensure test happens before caller kfree(). */
 		smp_mb__before_atomic(); /* ^^^ */
 		atomic_long_inc(stat);
@@ -3582,59 +3665,65 @@ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
 }
 
 /*
- * Funnel-lock acquisition for expedited grace periods.  Returns a
- * pointer to the root rcu_node structure, or NULL if some other
- * task did the expedited grace period for us.
+ * Funnel-lock acquisition for expedited grace periods.  Returns true
+ * if some other task completed an expedited grace period that this task
+ * can piggy-back on, and with no mutex held.  Otherwise, returns false
+ * with the mutex held, indicating that the caller must actually do the
+ * expedited grace period.
  */
-static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
+static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
 {
 	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
-	struct rcu_node *rnp0;
-	struct rcu_node *rnp1 = NULL;
+	struct rcu_node *rnp = rdp->mynode;
+	struct rcu_node *rnp_root = rcu_get_root(rsp);
+
+	/* Low-contention fastpath. */
+	if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
+	    (rnp == rnp_root ||
+	     ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
+	    !mutex_is_locked(&rsp->exp_mutex) &&
+	    mutex_trylock(&rsp->exp_mutex))
+		goto fastpath;
 
 	/*
-	 * First try directly acquiring the root lock in order to reduce
-	 * latency in the common case where expedited grace periods are
-	 * rare.  We check mutex_is_locked() to avoid pathological levels of
-	 * memory contention on ->exp_funnel_mutex in the heavy-load case.
+	 * Each pass through the following loop works its way up
+	 * the rcu_node tree, returning if others have done the work or
+	 * otherwise falls through to acquire rsp->exp_mutex.  The mapping
+	 * from CPU to rcu_node structure can be inexact, as it is just
+	 * promoting locality and is not strictly needed for correctness.
 	 */
-	rnp0 = rcu_get_root(rsp);
-	if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
-		if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
-			if (sync_exp_work_done(rsp, rnp0, NULL,
-					       &rdp->expedited_workdone0, s))
-				return NULL;
-			return rnp0;
+	for (; rnp != NULL; rnp = rnp->parent) {
+		if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s))
+			return true;
+
+		/* Work not done, either wait here or go up. */
+		spin_lock(&rnp->exp_lock);
+		if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) {
+
+			/* Someone else doing GP, so wait for them. */
+			spin_unlock(&rnp->exp_lock);
+			trace_rcu_exp_funnel_lock(rsp->name, rnp->level,
+						  rnp->grplo, rnp->grphi,
+						  TPS("wait"));
+			wait_event(rnp->exp_wq[(s >> 1) & 0x3],
+				   sync_exp_work_done(rsp,
+						      &rdp->exp_workdone2, s));
+			return true;
 		}
+		rnp->exp_seq_rq = s; /* Followers can wait on us. */
+		spin_unlock(&rnp->exp_lock);
+		trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo,
+					  rnp->grphi, TPS("nxtlvl"));
 	}
-
-	/*
-	 * Each pass through the following loop works its way
-	 * up the rcu_node tree, returning if others have done the
-	 * work or otherwise falls through holding the root rnp's
-	 * ->exp_funnel_mutex.  The mapping from CPU to rcu_node structure
-	 * can be inexact, as it is just promoting locality and is not
-	 * strictly needed for correctness.
-	 */
-	if (sync_exp_work_done(rsp, NULL, NULL, &rdp->expedited_workdone1, s))
-		return NULL;
-	mutex_lock(&rdp->exp_funnel_mutex);
-	rnp0 = rdp->mynode;
-	for (; rnp0 != NULL; rnp0 = rnp0->parent) {
-		if (sync_exp_work_done(rsp, rnp1, rdp,
-				       &rdp->expedited_workdone2, s))
-			return NULL;
-		mutex_lock(&rnp0->exp_funnel_mutex);
-		if (rnp1)
-			mutex_unlock(&rnp1->exp_funnel_mutex);
-		else
-			mutex_unlock(&rdp->exp_funnel_mutex);
-		rnp1 = rnp0;
+	mutex_lock(&rsp->exp_mutex);
+fastpath:
+	if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) {
+		mutex_unlock(&rsp->exp_mutex);
+		return true;
 	}
-	if (sync_exp_work_done(rsp, rnp1, rdp,
-			       &rdp->expedited_workdone3, s))
-		return NULL;
-	return rnp1;
+	rcu_exp_gp_seq_start(rsp);
+	trace_rcu_exp_grace_period(rsp->name, s, TPS("start"));
+	return false;
 }
 
 /* Invoked on each online non-idle CPU for expedited quiescent state. */
@@ -3649,6 +3738,11 @@ static void sync_sched_exp_handler(void *data)
 	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
 	    __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
 		return;
+	if (rcu_is_cpu_rrupt_from_idle()) {
+		rcu_report_exp_rdp(&rcu_sched_state,
+				   this_cpu_ptr(&rcu_sched_data), true);
+		return;
+	}
 	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
 	resched_cpu(smp_processor_id());
 }
@@ -3773,7 +3867,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
 		       rsp->name);
 		ndetected = 0;
 		rcu_for_each_leaf_node(rsp, rnp) {
-			ndetected = rcu_print_task_exp_stall(rnp);
+			ndetected += rcu_print_task_exp_stall(rnp);
 			mask = 1;
 			for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
 				struct rcu_data *rdp;
@@ -3783,7 +3877,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
 				ndetected++;
 				rdp = per_cpu_ptr(rsp->rda, cpu);
 				pr_cont(" %d-%c%c%c", cpu,
-					"O."[cpu_online(cpu)],
+					"O."[!!cpu_online(cpu)],
 					"o."[!!(rdp->grpmask & rnp->expmaskinit)],
 					"N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
 			}
@@ -3792,7 +3886,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
 		pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
 			jiffies - jiffies_start, rsp->expedited_sequence,
 			rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
-		if (!ndetected) {
+		if (ndetected) {
 			pr_err("blocking rcu_node structures:");
 			rcu_for_each_node_breadth_first(rsp, rnp) {
 				if (rnp == rnp_root)
@@ -3818,6 +3912,41 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
 	}
 }
 
+/*
+ * Wait for the current expedited grace period to complete, and then
+ * wake up everyone who piggybacked on the just-completed expedited
+ * grace period.  Also update all the ->exp_seq_rq counters as needed
+ * in order to avoid counter-wrap problems.
+ */
+static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
+{
+	struct rcu_node *rnp;
+
+	synchronize_sched_expedited_wait(rsp);
+	rcu_exp_gp_seq_end(rsp);
+	trace_rcu_exp_grace_period(rsp->name, s, TPS("end"));
+
+	/*
+	 * Switch over to wakeup mode, allowing the next GP, but -only- the
+	 * next GP, to proceed.
+	 */
+	mutex_lock(&rsp->exp_wake_mutex);
+	mutex_unlock(&rsp->exp_mutex);
+
+	rcu_for_each_node_breadth_first(rsp, rnp) {
+		if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
+			spin_lock(&rnp->exp_lock);
+			/* Recheck, avoid hang in case someone just arrived. */
+			if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
+				rnp->exp_seq_rq = s;
+			spin_unlock(&rnp->exp_lock);
+		}
+		wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]);
+	}
+	trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
+	mutex_unlock(&rsp->exp_wake_mutex);
+}
+
 /**
  * synchronize_sched_expedited - Brute-force RCU-sched grace period
  *
@@ -3837,7 +3966,6 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
 void synchronize_sched_expedited(void)
 {
 	unsigned long s;
-	struct rcu_node *rnp;
 	struct rcu_state *rsp = &rcu_sched_state;
 
 	/* If only one CPU, this is automatically a grace period. */
@@ -3852,17 +3980,14 @@ void synchronize_sched_expedited(void)
 
 	/* Take a snapshot of the sequence number.  */
 	s = rcu_exp_gp_seq_snap(rsp);
-
-	rnp = exp_funnel_lock(rsp, s);
-	if (rnp == NULL)
+	if (exp_funnel_lock(rsp, s))
 		return;  /* Someone else did our work for us. */
 
-	rcu_exp_gp_seq_start(rsp);
+	/* Initialize the rcu_node tree in preparation for the wait. */
 	sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler);
-	synchronize_sched_expedited_wait(rsp);
 
-	rcu_exp_gp_seq_end(rsp);
-	mutex_unlock(&rnp->exp_funnel_mutex);
+	/* Wait and clean up, including waking everyone. */
+	rcu_exp_wait_wake(rsp, s);
 }
 EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
 
@@ -4162,7 +4287,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
 	WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
 	rdp->cpu = cpu;
 	rdp->rsp = rsp;
-	mutex_init(&rdp->exp_funnel_mutex);
 	rcu_boot_init_nocb_percpu_data(rdp);
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
@@ -4420,10 +4544,8 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 {
 	static const char * const buf[] = RCU_NODE_NAME_INIT;
 	static const char * const fqs[] = RCU_FQS_NAME_INIT;
-	static const char * const exp[] = RCU_EXP_NAME_INIT;
 	static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
 	static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
-	static struct lock_class_key rcu_exp_class[RCU_NUM_LVLS];
 	static u8 fl_mask = 0x1;
 
 	int levelcnt[RCU_NUM_LVLS];		/* # nodes in each level. */
@@ -4482,9 +4604,11 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 			rnp->level = i;
 			INIT_LIST_HEAD(&rnp->blkd_tasks);
 			rcu_init_one_nocb(rnp);
-			mutex_init(&rnp->exp_funnel_mutex);
-			lockdep_set_class_and_name(&rnp->exp_funnel_mutex,
-						   &rcu_exp_class[i], exp[i]);
+			init_waitqueue_head(&rnp->exp_wq[0]);
+			init_waitqueue_head(&rnp->exp_wq[1]);
+			init_waitqueue_head(&rnp->exp_wq[2]);
+			init_waitqueue_head(&rnp->exp_wq[3]);
+			spin_lock_init(&rnp->exp_lock);
 		}
 	}
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index df668c0f9e64..e3959f5e6ddf 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -70,7 +70,6 @@
 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0 }
 #  define RCU_NODE_NAME_INIT  { "rcu_node_0" }
 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0" }
-#  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0" }
 #elif NR_CPUS <= RCU_FANOUT_2
 #  define RCU_NUM_LVLS	      2
 #  define NUM_RCU_LVL_0	      1
@@ -79,7 +78,6 @@
 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1" }
 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1" }
-#  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1" }
 #elif NR_CPUS <= RCU_FANOUT_3
 #  define RCU_NUM_LVLS	      3
 #  define NUM_RCU_LVL_0	      1
@@ -89,7 +87,6 @@
 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
-#  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2" }
 #elif NR_CPUS <= RCU_FANOUT_4
 #  define RCU_NUM_LVLS	      4
 #  define NUM_RCU_LVL_0	      1
@@ -100,7 +97,6 @@
 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
-#  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2", "rcu_node_exp_3" }
 #else
 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
 #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
@@ -252,7 +248,9 @@ struct rcu_node {
 				/* Counts of upcoming no-CB GP requests. */
 	raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
 
-	struct mutex exp_funnel_mutex ____cacheline_internodealigned_in_smp;
+	spinlock_t exp_lock ____cacheline_internodealigned_in_smp;
+	unsigned long exp_seq_rq;
+	wait_queue_head_t exp_wq[4];
 } ____cacheline_internodealigned_in_smp;
 
 /*
@@ -387,11 +385,9 @@ struct rcu_data {
 #ifdef CONFIG_RCU_FAST_NO_HZ
 	struct rcu_head oom_head;
 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
-	struct mutex exp_funnel_mutex;
-	atomic_long_t expedited_workdone0;	/* # done by others #0. */
-	atomic_long_t expedited_workdone1;	/* # done by others #1. */
-	atomic_long_t expedited_workdone2;	/* # done by others #2. */
-	atomic_long_t expedited_workdone3;	/* # done by others #3. */
+	atomic_long_t exp_workdone1;	/* # done by others #1. */
+	atomic_long_t exp_workdone2;	/* # done by others #2. */
+	atomic_long_t exp_workdone3;	/* # done by others #3. */
 
 	/* 7) Callback offloading. */
 #ifdef CONFIG_RCU_NOCB_CPU
@@ -505,6 +501,8 @@ struct rcu_state {
 						/*  _rcu_barrier(). */
 	/* End of fields guarded by barrier_mutex. */
 
+	struct mutex exp_mutex;			/* Serialize expedited GP. */
+	struct mutex exp_wake_mutex;		/* Serialize wakeup. */
 	unsigned long expedited_sequence;	/* Take a ticket. */
 	atomic_long_t expedited_normal;		/* # fallbacks to normal. */
 	atomic_t expedited_need_qs;		/* # CPUs left to check in. */
@@ -513,6 +511,8 @@ struct rcu_state {
 
 	unsigned long jiffies_force_qs;		/* Time at which to invoke */
 						/*  force_quiescent_state(). */
+	unsigned long jiffies_kick_kthreads;	/* Time at which to kick */
+						/*  kthreads, if configured. */
 	unsigned long n_force_qs;		/* Number of calls to */
 						/*  force_quiescent_state(). */
 	unsigned long n_force_qs_lh;		/* ~Number of calls leaving */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index efdf7b61ce12..ff1cd4e1188d 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -722,18 +722,22 @@ static void sync_rcu_exp_handler(void *info)
  * synchronize_rcu_expedited - Brute-force RCU grace period
  *
  * Wait for an RCU-preempt grace period, but expedite it.  The basic
- * idea is to invoke synchronize_sched_expedited() to push all the tasks to
- * the ->blkd_tasks lists and wait for this list to drain.  This consumes
- * significant time on all CPUs and is unfriendly to real-time workloads,
- * so is thus not recommended for any sort of common-case code.
- * In fact, if you are using synchronize_rcu_expedited() in a loop,
- * please restructure your code to batch your updates, and then Use a
- * single synchronize_rcu() instead.
+ * idea is to IPI all non-idle non-nohz online CPUs.  The IPI handler
+ * checks whether the CPU is in an RCU-preempt critical section, and
+ * if so, it sets a flag that causes the outermost rcu_read_unlock()
+ * to report the quiescent state.  On the other hand, if the CPU is
+ * not in an RCU read-side critical section, the IPI handler reports
+ * the quiescent state immediately.
+ *
+ * Although this is a greate improvement over previous expedited
+ * implementations, it is still unfriendly to real-time workloads, so is
+ * thus not recommended for any sort of common-case code.  In fact, if
+ * you are using synchronize_rcu_expedited() in a loop, please restructure
+ * your code to batch your updates, and then Use a single synchronize_rcu()
+ * instead.
  */
 void synchronize_rcu_expedited(void)
 {
-	struct rcu_node *rnp;
-	struct rcu_node *rnp_unlock;
 	struct rcu_state *rsp = rcu_state_p;
 	unsigned long s;
 
@@ -744,23 +748,14 @@ void synchronize_rcu_expedited(void)
 	}
 
 	s = rcu_exp_gp_seq_snap(rsp);
-
-	rnp_unlock = exp_funnel_lock(rsp, s);
-	if (rnp_unlock == NULL)
+	if (exp_funnel_lock(rsp, s))
 		return;  /* Someone else did our work for us. */
 
-	rcu_exp_gp_seq_start(rsp);
-
 	/* Initialize the rcu_node tree in preparation for the wait. */
 	sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler);
 
-	/* Wait for snapshotted ->blkd_tasks lists to drain. */
-	rnp = rcu_get_root(rsp);
-	synchronize_sched_expedited_wait(rsp);
-
-	/* Clean up and exit. */
-	rcu_exp_gp_seq_end(rsp);
-	mutex_unlock(&rnp_unlock->exp_funnel_mutex);
+	/* Wait for ->blkd_tasks lists to drain, then wake everyone up. */
+	rcu_exp_wait_wake(rsp, s);
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 1088e64f01ad..86782f9a4604 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -185,17 +185,16 @@ static int show_rcuexp(struct seq_file *m, void *v)
 	int cpu;
 	struct rcu_state *rsp = (struct rcu_state *)m->private;
 	struct rcu_data *rdp;
-	unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
+	unsigned long s1 = 0, s2 = 0, s3 = 0;
 
 	for_each_possible_cpu(cpu) {
 		rdp = per_cpu_ptr(rsp->rda, cpu);
-		s0 += atomic_long_read(&rdp->expedited_workdone0);
-		s1 += atomic_long_read(&rdp->expedited_workdone1);
-		s2 += atomic_long_read(&rdp->expedited_workdone2);
-		s3 += atomic_long_read(&rdp->expedited_workdone3);
+		s1 += atomic_long_read(&rdp->exp_workdone1);
+		s2 += atomic_long_read(&rdp->exp_workdone2);
+		s3 += atomic_long_read(&rdp->exp_workdone3);
 	}
-	seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
-		   rsp->expedited_sequence, s0, s1, s2, s3,
+	seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
+		   rsp->expedited_sequence, s1, s2, s3,
 		   atomic_long_read(&rsp->expedited_normal),
 		   atomic_read(&rsp->expedited_need_qs),
 		   rsp->expedited_sequence / 2);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index ca828b41c938..3ccdc8eebc5a 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -67,7 +67,7 @@ static int rcu_normal_after_boot;
 module_param(rcu_normal_after_boot, int, 0);
 #endif /* #ifndef CONFIG_TINY_RCU */
 
-#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_PREEMPT_COUNT)
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
 /**
  * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
  *
@@ -111,7 +111,7 @@ int rcu_read_lock_sched_held(void)
 		return 0;
 	if (debug_locks)
 		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
-	return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
+	return lockdep_opinion || !preemptible();
 }
 EXPORT_SYMBOL(rcu_read_lock_sched_held);
 #endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 40748dc8ea3e..e7dd0ec169be 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3188,25 +3188,17 @@ static inline void check_schedstat_required(void)
 static void
 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
-	bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING);
-	bool curr = cfs_rq->curr == se;
-
 	/*
-	 * If we're the current task, we must renormalise before calling
-	 * update_curr().
+	 * Update the normalized vruntime before updating min_vruntime
+	 * through calling update_curr().
 	 */
-	if (renorm && curr)
+	if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
 		se->vruntime += cfs_rq->min_vruntime;
 
-	update_curr(cfs_rq);
-
 	/*
-	 * Otherwise, renormalise after, such that we're placed at the current
-	 * moment in time, instead of some random moment in the past.
+	 * Update run-time statistics of the 'current'.
 	 */
-	if (renorm && !curr)
-		se->vruntime += cfs_rq->min_vruntime;
-
+	update_curr(cfs_rq);
 	enqueue_entity_load_avg(cfs_rq, se);
 	account_entity_enqueue(cfs_rq, se);
 	update_cfs_shares(cfs_rq);
@@ -3222,7 +3214,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		update_stats_enqueue(cfs_rq, se);
 		check_spread(cfs_rq, se);
 	}
-	if (!curr)
+	if (se != cfs_rq->curr)
 		__enqueue_entity(cfs_rq, se);
 	se->on_rq = 1;
 
diff --git a/kernel/signal.c b/kernel/signal.c
index aa9bf00749c1..ab122a2cee41 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3099,12 +3099,14 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
 
 	oss.ss_sp = (void __user *) current->sas_ss_sp;
 	oss.ss_size = current->sas_ss_size;
-	oss.ss_flags = sas_ss_flags(sp);
+	oss.ss_flags = sas_ss_flags(sp) |
+		(current->sas_ss_flags & SS_FLAG_BITS);
 
 	if (uss) {
 		void __user *ss_sp;
 		size_t ss_size;
-		int ss_flags;
+		unsigned ss_flags;
+		int ss_mode;
 
 		error = -EFAULT;
 		if (!access_ok(VERIFY_READ, uss, sizeof(*uss)))
@@ -3119,18 +3121,13 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
 		if (on_sig_stack(sp))
 			goto out;
 
+		ss_mode = ss_flags & ~SS_FLAG_BITS;
 		error = -EINVAL;
-		/*
-		 * Note - this code used to test ss_flags incorrectly:
-		 *  	  old code may have been written using ss_flags==0
-		 *	  to mean ss_flags==SS_ONSTACK (as this was the only
-		 *	  way that worked) - this fix preserves that older
-		 *	  mechanism.
-		 */
-		if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0)
+		if (ss_mode != SS_DISABLE && ss_mode != SS_ONSTACK &&
+				ss_mode != 0)
 			goto out;
 
-		if (ss_flags == SS_DISABLE) {
+		if (ss_mode == SS_DISABLE) {
 			ss_size = 0;
 			ss_sp = NULL;
 		} else {
@@ -3141,6 +3138,7 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
 
 		current->sas_ss_sp = (unsigned long) ss_sp;
 		current->sas_ss_size = ss_size;
+		current->sas_ss_flags = ss_flags;
 	}
 
 	error = 0;
@@ -3171,9 +3169,14 @@ int restore_altstack(const stack_t __user *uss)
 int __save_altstack(stack_t __user *uss, unsigned long sp)
 {
 	struct task_struct *t = current;
-	return  __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) |
-		__put_user(sas_ss_flags(sp), &uss->ss_flags) |
+	int err = __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) |
+		__put_user(t->sas_ss_flags, &uss->ss_flags) |
 		__put_user(t->sas_ss_size, &uss->ss_size);
+	if (err)
+		return err;
+	if (t->sas_ss_flags & SS_AUTODISARM)
+		sas_ss_reset(t);
+	return 0;
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 58e3310c9b21..3daa49ff0719 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -262,7 +262,7 @@ static void tick_nohz_dep_set_all(atomic_t *dep,
 {
 	int prev;
 
-	prev = atomic_fetch_or(dep, BIT(bit));
+	prev = atomic_fetch_or(BIT(bit), dep);
 	if (!prev)
 		tick_nohz_full_kick_all();
 }
@@ -292,7 +292,7 @@ void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
 
 	ts = per_cpu_ptr(&tick_cpu_sched, cpu);
 
-	prev = atomic_fetch_or(&ts->tick_dep_mask, BIT(bit));
+	prev = atomic_fetch_or(BIT(bit), &ts->tick_dep_mask);
 	if (!prev) {
 		preempt_disable();
 		/* Perf needs local kick that is NMI safe */
diff --git a/kernel/torture.c b/kernel/torture.c
index 44aa462d033f..fa0bdeee17ac 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -451,6 +451,7 @@ static int torture_shutdown(void *arg)
 		torture_shutdown_hook();
 	else
 		VERBOSE_TOROUT_STRING("No torture_shutdown_hook(), skipping.");
+	ftrace_dump(DUMP_ALL);
 	kernel_power_off();	/* Shut down the system. */
 	return 0;
 }
@@ -602,8 +603,9 @@ bool torture_init_begin(char *ttype, bool v, int *runnable)
 {
 	mutex_lock(&fullstop_mutex);
 	if (torture_type != NULL) {
-		pr_alert("torture_init_begin: refusing %s init: %s running",
+		pr_alert("torture_init_begin: Refusing %s init: %s running.\n",
 			 ttype, torture_type);
+		pr_alert("torture_init_begin: One torture test at a time!\n");
 		mutex_unlock(&fullstop_mutex);
 		return false;
 	}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3bfdff06eea7..5f5068e94003 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4554,6 +4554,17 @@ static void rebind_workers(struct worker_pool *pool)
 						  pool->attrs->cpumask) < 0);
 
 	spin_lock_irq(&pool->lock);
+
+	/*
+	 * XXX: CPU hotplug notifiers are weird and can call DOWN_FAILED
+	 * w/o preceding DOWN_PREPARE.  Work around it.  CPU hotplug is
+	 * being reworked and this can go away in time.
+	 */
+	if (!(pool->flags & POOL_DISASSOCIATED)) {
+		spin_unlock_irq(&pool->lock);
+		return;
+	}
+
 	pool->flags &= ~POOL_DISASSOCIATED;
 
 	for_each_pool_worker(worker, pool) {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1e9a607534ca..f4b797a690ba 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1289,6 +1289,39 @@ config TORTURE_TEST
 	tristate
 	default n
 
+config RCU_PERF_TEST
+	tristate "performance tests for RCU"
+	depends on DEBUG_KERNEL
+	select TORTURE_TEST
+	select SRCU
+	select TASKS_RCU
+	default n
+	help
+	  This option provides a kernel module that runs performance
+	  tests on the RCU infrastructure.  The kernel module may be built
+	  after the fact on the running kernel to be tested, if desired.
+
+	  Say Y here if you want RCU performance tests to be built into
+	  the kernel.
+	  Say M if you want the RCU performance tests to build as a module.
+	  Say N if you are unsure.
+
+config RCU_PERF_TEST_RUNNABLE
+	bool "performance tests for RCU runnable by default"
+	depends on RCU_PERF_TEST = y
+	default n
+	help
+	  This option provides a way to build the RCU performance tests
+	  directly into the kernel without them starting up at boot time.
+	  You can use /sys/module to manually override this setting.
+	  This /proc file is available only when the RCU performance
+	  tests have been built into the kernel.
+
+	  Say Y here if you want the RCU performance tests to start during
+	  boot (you probably don't).
+	  Say N here if you want the RCU performance tests to start only
+	  after being manually enabled via /sys/module.
+
 config RCU_TORTURE_TEST
 	tristate "torture tests for RCU"
 	depends on DEBUG_KERNEL
diff --git a/lib/Makefile b/lib/Makefile
index 7bd6fd436c97..a65e9a861535 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -23,7 +23,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o \
 	 sha1.o md5.o irq_regs.o argv_split.o \
-	 proportions.o flex_proportions.o ratelimit.o show_mem.o \
+	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
 	 earlycpio.o seq_buf.o nmi_backtrace.o
 
diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c
index 2b3f46c049d4..554522934c44 100644
--- a/lib/asn1_decoder.c
+++ b/lib/asn1_decoder.c
@@ -74,7 +74,7 @@ next_tag:
 
 	/* Extract a tag from the data */
 	tag = data[dp++];
-	if (tag == 0) {
+	if (tag == ASN1_EOC) {
 		/* It appears to be an EOC. */
 		if (data[dp++] != 0)
 			goto invalid_eoc;
@@ -96,10 +96,8 @@ next_tag:
 
 	/* Extract the length */
 	len = data[dp++];
-	if (len <= 0x7f) {
-		dp += len;
-		goto next_tag;
-	}
+	if (len <= 0x7f)
+		goto check_length;
 
 	if (unlikely(len == ASN1_INDEFINITE_LENGTH)) {
 		/* Indefinite length */
@@ -110,14 +108,18 @@ next_tag:
 	}
 
 	n = len - 0x80;
-	if (unlikely(n > sizeof(size_t) - 1))
+	if (unlikely(n > sizeof(len) - 1))
 		goto length_too_long;
 	if (unlikely(n > datalen - dp))
 		goto data_overrun_error;
-	for (len = 0; n > 0; n--) {
+	len = 0;
+	for (; n > 0; n--) {
 		len <<= 8;
 		len |= data[dp++];
 	}
+check_length:
+	if (len > datalen - dp)
+		goto data_overrun_error;
 	dp += len;
 	goto next_tag;
 
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 5fecddc32b1b..ca5316e0087b 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -569,6 +569,25 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_alignment);
 
+unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
+{
+        unsigned long res = 0;
+	size_t size = i->count;
+	if (!size)
+		return 0;
+
+	iterate_all_kinds(i, size, v,
+		(res |= (!res ? 0 : (unsigned long)v.iov_base) |
+			(size != v.iov_len ? size : 0), 0),
+		(res |= (!res ? 0 : (unsigned long)v.bv_offset) |
+			(size != v.bv_len ? size : 0)),
+		(res |= (!res ? 0 : (unsigned long)v.iov_base) |
+			(size != v.iov_len ? size : 0))
+		);
+		return res;
+}
+EXPORT_SYMBOL(iov_iter_gap_alignment);
+
 ssize_t iov_iter_get_pages(struct iov_iter *i,
 		   struct page **pages, size_t maxsize, unsigned maxpages,
 		   size_t *start)
diff --git a/lib/proportions.c b/lib/proportions.c
deleted file mode 100644
index efa54f259ea9..000000000000
--- a/lib/proportions.c
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * Floating proportions
- *
- *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
- *
- * Description:
- *
- * The floating proportion is a time derivative with an exponentially decaying
- * history:
- *
- *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
- *
- * Where j is an element from {prop_local}, x_{j} is j's number of events,
- * and i the time period over which the differential is taken. So d/dt_{-i} is
- * the differential over the i-th last period.
- *
- * The decaying history gives smooth transitions. The time differential carries
- * the notion of speed.
- *
- * The denominator is 2^(1+i) because we want the series to be normalised, ie.
- *
- *   \Sum_{i=0} 1/2^(1+i) = 1
- *
- * Further more, if we measure time (t) in the same events as x; so that:
- *
- *   t = \Sum_{j} x_{j}
- *
- * we get that:
- *
- *   \Sum_{j} p_{j} = 1
- *
- * Writing this in an iterative fashion we get (dropping the 'd's):
- *
- *   if (++x_{j}, ++t > period)
- *     t /= 2;
- *     for_each (j)
- *       x_{j} /= 2;
- *
- * so that:
- *
- *   p_{j} = x_{j} / t;
- *
- * We optimize away the '/= 2' for the global time delta by noting that:
- *
- *   if (++t > period) t /= 2:
- *
- * Can be approximated by:
- *
- *   period/2 + (++t % period/2)
- *
- * [ Furthermore, when we choose period to be 2^n it can be written in terms of
- *   binary operations and wraparound artefacts disappear. ]
- *
- * Also note that this yields a natural counter of the elapsed periods:
- *
- *   c = t / (period/2)
- *
- * [ Its monotonic increasing property can be applied to mitigate the wrap-
- *   around issue. ]
- *
- * This allows us to do away with the loop over all prop_locals on each period
- * expiration. By remembering the period count under which it was last accessed
- * as c_{j}, we can obtain the number of 'missed' cycles from:
- *
- *   c - c_{j}
- *
- * We can then lazily catch up to the global period count every time we are
- * going to use x_{j}, by doing:
- *
- *   x_{j} /= 2^(c - c_{j}), c_{j} = c
- */
-
-#include <linux/proportions.h>
-#include <linux/rcupdate.h>
-
-int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp)
-{
-	int err;
-
-	if (shift > PROP_MAX_SHIFT)
-		shift = PROP_MAX_SHIFT;
-
-	pd->index = 0;
-	pd->pg[0].shift = shift;
-	mutex_init(&pd->mutex);
-	err = percpu_counter_init(&pd->pg[0].events, 0, gfp);
-	if (err)
-		goto out;
-
-	err = percpu_counter_init(&pd->pg[1].events, 0, gfp);
-	if (err)
-		percpu_counter_destroy(&pd->pg[0].events);
-
-out:
-	return err;
-}
-
-/*
- * We have two copies, and flip between them to make it seem like an atomic
- * update. The update is not really atomic wrt the events counter, but
- * it is internally consistent with the bit layout depending on shift.
- *
- * We copy the events count, move the bits around and flip the index.
- */
-void prop_change_shift(struct prop_descriptor *pd, int shift)
-{
-	int index;
-	int offset;
-	u64 events;
-	unsigned long flags;
-
-	if (shift > PROP_MAX_SHIFT)
-		shift = PROP_MAX_SHIFT;
-
-	mutex_lock(&pd->mutex);
-
-	index = pd->index ^ 1;
-	offset = pd->pg[pd->index].shift - shift;
-	if (!offset)
-		goto out;
-
-	pd->pg[index].shift = shift;
-
-	local_irq_save(flags);
-	events = percpu_counter_sum(&pd->pg[pd->index].events);
-	if (offset < 0)
-		events <<= -offset;
-	else
-		events >>= offset;
-	percpu_counter_set(&pd->pg[index].events, events);
-
-	/*
-	 * ensure the new pg is fully written before the switch
-	 */
-	smp_wmb();
-	pd->index = index;
-	local_irq_restore(flags);
-
-	synchronize_rcu();
-
-out:
-	mutex_unlock(&pd->mutex);
-}
-
-/*
- * wrap the access to the data in an rcu_read_lock() section;
- * this is used to track the active references.
- */
-static struct prop_global *prop_get_global(struct prop_descriptor *pd)
-__acquires(RCU)
-{
-	int index;
-
-	rcu_read_lock();
-	index = pd->index;
-	/*
-	 * match the wmb from vcd_flip()
-	 */
-	smp_rmb();
-	return &pd->pg[index];
-}
-
-static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
-__releases(RCU)
-{
-	rcu_read_unlock();
-}
-
-static void
-prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
-{
-	int offset = *pl_shift - new_shift;
-
-	if (!offset)
-		return;
-
-	if (offset < 0)
-		*pl_period <<= -offset;
-	else
-		*pl_period >>= offset;
-
-	*pl_shift = new_shift;
-}
-
-/*
- * PERCPU
- */
-
-#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
-
-int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp)
-{
-	raw_spin_lock_init(&pl->lock);
-	pl->shift = 0;
-	pl->period = 0;
-	return percpu_counter_init(&pl->events, 0, gfp);
-}
-
-void prop_local_destroy_percpu(struct prop_local_percpu *pl)
-{
-	percpu_counter_destroy(&pl->events);
-}
-
-/*
- * Catch up with missed period expirations.
- *
- *   until (c_{j} == c)
- *     x_{j} -= x_{j}/2;
- *     c_{j}++;
- */
-static
-void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
-{
-	unsigned long period = 1UL << (pg->shift - 1);
-	unsigned long period_mask = ~(period - 1);
-	unsigned long global_period;
-	unsigned long flags;
-
-	global_period = percpu_counter_read(&pg->events);
-	global_period &= period_mask;
-
-	/*
-	 * Fast path - check if the local and global period count still match
-	 * outside of the lock.
-	 */
-	if (pl->period == global_period)
-		return;
-
-	raw_spin_lock_irqsave(&pl->lock, flags);
-	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
-
-	/*
-	 * For each missed period, we half the local counter.
-	 * basically:
-	 *   pl->events >> (global_period - pl->period);
-	 */
-	period = (global_period - pl->period) >> (pg->shift - 1);
-	if (period < BITS_PER_LONG) {
-		s64 val = percpu_counter_read(&pl->events);
-
-		if (val < (nr_cpu_ids * PROP_BATCH))
-			val = percpu_counter_sum(&pl->events);
-
-		__percpu_counter_add(&pl->events, -val + (val >> period),
-					PROP_BATCH);
-	} else
-		percpu_counter_set(&pl->events, 0);
-
-	pl->period = global_period;
-	raw_spin_unlock_irqrestore(&pl->lock, flags);
-}
-
-/*
- *   ++x_{j}, ++t
- */
-void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
-{
-	struct prop_global *pg = prop_get_global(pd);
-
-	prop_norm_percpu(pg, pl);
-	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
-	percpu_counter_add(&pg->events, 1);
-	prop_put_global(pd, pg);
-}
-
-/*
- * identical to __prop_inc_percpu, except that it limits this pl's fraction to
- * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
- */
-void __prop_inc_percpu_max(struct prop_descriptor *pd,
-			   struct prop_local_percpu *pl, long frac)
-{
-	struct prop_global *pg = prop_get_global(pd);
-
-	prop_norm_percpu(pg, pl);
-
-	if (unlikely(frac != PROP_FRAC_BASE)) {
-		unsigned long period_2 = 1UL << (pg->shift - 1);
-		unsigned long counter_mask = period_2 - 1;
-		unsigned long global_count;
-		long numerator, denominator;
-
-		numerator = percpu_counter_read_positive(&pl->events);
-		global_count = percpu_counter_read(&pg->events);
-		denominator = period_2 + (global_count & counter_mask);
-
-		if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
-			goto out_put;
-	}
-
-	percpu_counter_add(&pl->events, 1);
-	percpu_counter_add(&pg->events, 1);
-
-out_put:
-	prop_put_global(pd, pg);
-}
-
-/*
- * Obtain a fraction of this proportion
- *
- *   p_{j} = x_{j} / (period/2 + t % period/2)
- */
-void prop_fraction_percpu(struct prop_descriptor *pd,
-		struct prop_local_percpu *pl,
-		long *numerator, long *denominator)
-{
-	struct prop_global *pg = prop_get_global(pd);
-	unsigned long period_2 = 1UL << (pg->shift - 1);
-	unsigned long counter_mask = period_2 - 1;
-	unsigned long global_count;
-
-	prop_norm_percpu(pg, pl);
-	*numerator = percpu_counter_read_positive(&pl->events);
-
-	global_count = percpu_counter_read(&pg->events);
-	*denominator = period_2 + (global_count & counter_mask);
-
-	prop_put_global(pd, pg);
-}
-
-/*
- * SINGLE
- */
-
-int prop_local_init_single(struct prop_local_single *pl)
-{
-	raw_spin_lock_init(&pl->lock);
-	pl->shift = 0;
-	pl->period = 0;
-	pl->events = 0;
-	return 0;
-}
-
-void prop_local_destroy_single(struct prop_local_single *pl)
-{
-}
-
-/*
- * Catch up with missed period expirations.
- */
-static
-void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
-{
-	unsigned long period = 1UL << (pg->shift - 1);
-	unsigned long period_mask = ~(period - 1);
-	unsigned long global_period;
-	unsigned long flags;
-
-	global_period = percpu_counter_read(&pg->events);
-	global_period &= period_mask;
-
-	/*
-	 * Fast path - check if the local and global period count still match
-	 * outside of the lock.
-	 */
-	if (pl->period == global_period)
-		return;
-
-	raw_spin_lock_irqsave(&pl->lock, flags);
-	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
-	/*
-	 * For each missed period, we half the local counter.
-	 */
-	period = (global_period - pl->period) >> (pg->shift - 1);
-	if (likely(period < BITS_PER_LONG))
-		pl->events >>= period;
-	else
-		pl->events = 0;
-	pl->period = global_period;
-	raw_spin_unlock_irqrestore(&pl->lock, flags);
-}
-
-/*
- *   ++x_{j}, ++t
- */
-void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
-{
-	struct prop_global *pg = prop_get_global(pd);
-
-	prop_norm_single(pg, pl);
-	pl->events++;
-	percpu_counter_add(&pg->events, 1);
-	prop_put_global(pd, pg);
-}
-
-/*
- * Obtain a fraction of this proportion
- *
- *   p_{j} = x_{j} / (period/2 + t % period/2)
- */
-void prop_fraction_single(struct prop_descriptor *pd,
-	       	struct prop_local_single *pl,
-		long *numerator, long *denominator)
-{
-	struct prop_global *pg = prop_get_global(pd);
-	unsigned long period_2 = 1UL << (pg->shift - 1);
-	unsigned long counter_mask = period_2 - 1;
-	unsigned long global_count;
-
-	prop_norm_single(pg, pl);
-	*numerator = pl->events;
-
-	global_count = percpu_counter_read(&pg->events);
-	*denominator = period_2 + (global_count & counter_mask);
-
-	prop_put_global(pd, pg);
-}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f7daa7de8f48..b49ee126d4d1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1298,15 +1298,9 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
 	/*
 	 * We can only reuse the page if nobody else maps the huge page or it's
-	 * part. We can do it by checking page_mapcount() on each sub-page, but
-	 * it's expensive.
-	 * The cheaper way is to check page_count() to be equal 1: every
-	 * mapcount takes page reference reference, so this way we can
-	 * guarantee, that the PMD is the only mapping.
-	 * This can give false negative if somebody pinned the page, but that's
-	 * fine.
+	 * part.
 	 */
-	if (page_mapcount(page) == 1 && page_count(page) == 1) {
+	if (page_trans_huge_mapcount(page, NULL) == 1) {
 		pmd_t entry;
 		entry = pmd_mkyoung(orig_pmd);
 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
@@ -2079,7 +2073,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 		if (pte_write(pteval)) {
 			writable = true;
 		} else {
-			if (PageSwapCache(page) && !reuse_swap_page(page)) {
+			if (PageSwapCache(page) &&
+			    !reuse_swap_page(page, NULL)) {
 				unlock_page(page);
 				result = SCAN_SWAP_CACHE_PAGE;
 				goto out;
@@ -3223,6 +3218,64 @@ int total_mapcount(struct page *page)
 }
 
 /*
+ * This calculates accurately how many mappings a transparent hugepage
+ * has (unlike page_mapcount() which isn't fully accurate). This full
+ * accuracy is primarily needed to know if copy-on-write faults can
+ * reuse the page and change the mapping to read-write instead of
+ * copying them. At the same time this returns the total_mapcount too.
+ *
+ * The function returns the highest mapcount any one of the subpages
+ * has. If the return value is one, even if different processes are
+ * mapping different subpages of the transparent hugepage, they can
+ * all reuse it, because each process is reusing a different subpage.
+ *
+ * The total_mapcount is instead counting all virtual mappings of the
+ * subpages. If the total_mapcount is equal to "one", it tells the
+ * caller all mappings belong to the same "mm" and in turn the
+ * anon_vma of the transparent hugepage can become the vma->anon_vma
+ * local one as no other process may be mapping any of the subpages.
+ *
+ * It would be more accurate to replace page_mapcount() with
+ * page_trans_huge_mapcount(), however we only use
+ * page_trans_huge_mapcount() in the copy-on-write faults where we
+ * need full accuracy to avoid breaking page pinning, because
+ * page_trans_huge_mapcount() is slower than page_mapcount().
+ */
+int page_trans_huge_mapcount(struct page *page, int *total_mapcount)
+{
+	int i, ret, _total_mapcount, mapcount;
+
+	/* hugetlbfs shouldn't call it */
+	VM_BUG_ON_PAGE(PageHuge(page), page);
+
+	if (likely(!PageTransCompound(page))) {
+		mapcount = atomic_read(&page->_mapcount) + 1;
+		if (total_mapcount)
+			*total_mapcount = mapcount;
+		return mapcount;
+	}
+
+	page = compound_head(page);
+
+	_total_mapcount = ret = 0;
+	for (i = 0; i < HPAGE_PMD_NR; i++) {
+		mapcount = atomic_read(&page[i]._mapcount) + 1;
+		ret = max(ret, mapcount);
+		_total_mapcount += mapcount;
+	}
+	if (PageDoubleMap(page)) {
+		ret -= 1;
+		_total_mapcount -= HPAGE_PMD_NR;
+	}
+	mapcount = compound_mapcount(page);
+	ret += mapcount;
+	_total_mapcount += mapcount;
+	if (total_mapcount)
+		*total_mapcount = _total_mapcount;
+	return ret;
+}
+
+/*
  * This function splits huge page into normal pages. @page can point to any
  * subpage of huge page to split. Split doesn't change the position of @page.
  *
diff --git a/mm/ksm.c b/mm/ksm.c
index b99e828172f6..4786b4150f62 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -783,6 +783,7 @@ static int unmerge_and_remove_all_rmap_items(void)
 		}
 
 		remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
+		up_read(&mm->mmap_sem);
 
 		spin_lock(&ksm_mmlist_lock);
 		ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
@@ -794,12 +795,9 @@ static int unmerge_and_remove_all_rmap_items(void)
 
 			free_mm_slot(mm_slot);
 			clear_bit(MMF_VM_MERGEABLE, &mm->flags);
-			up_read(&mm->mmap_sem);
 			mmdrop(mm);
-		} else {
+		} else
 			spin_unlock(&ksm_mmlist_lock);
-			up_read(&mm->mmap_sem);
-		}
 	}
 
 	/* Clean up stable nodes, but don't worry if some are still busy */
@@ -1663,8 +1661,15 @@ next_mm:
 		up_read(&mm->mmap_sem);
 		mmdrop(mm);
 	} else {
-		spin_unlock(&ksm_mmlist_lock);
 		up_read(&mm->mmap_sem);
+		/*
+		 * up_read(&mm->mmap_sem) first because after
+		 * spin_unlock(&ksm_mmlist_lock) run, the "mm" may
+		 * already have been freed under us by __ksm_exit()
+		 * because the "mm_slot" is still hashed and
+		 * ksm_scan.mm_slot doesn't point to it anymore.
+		 */
+		spin_unlock(&ksm_mmlist_lock);
 	}
 
 	/* Repeat until we've completed scanning the whole list */
diff --git a/mm/memory.c b/mm/memory.c
index 52c218e2b724..07493e34ab7e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2373,6 +2373,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * not dirty accountable.
 	 */
 	if (PageAnon(old_page) && !PageKsm(old_page)) {
+		int total_mapcount;
 		if (!trylock_page(old_page)) {
 			get_page(old_page);
 			pte_unmap_unlock(page_table, ptl);
@@ -2387,13 +2388,18 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			}
 			put_page(old_page);
 		}
-		if (reuse_swap_page(old_page)) {
-			/*
-			 * The page is all ours.  Move it to our anon_vma so
-			 * the rmap code will not search our parent or siblings.
-			 * Protected against the rmap code by the page lock.
-			 */
-			page_move_anon_rmap(old_page, vma, address);
+		if (reuse_swap_page(old_page, &total_mapcount)) {
+			if (total_mapcount == 1) {
+				/*
+				 * The page is all ours. Move it to
+				 * our anon_vma so the rmap code will
+				 * not search our parent or siblings.
+				 * Protected against the rmap code by
+				 * the page lock.
+				 */
+				page_move_anon_rmap(compound_head(old_page),
+						    vma, address);
+			}
 			unlock_page(old_page);
 			return wp_page_reuse(mm, vma, address, page_table, ptl,
 					     orig_pte, old_page, 0, 0);
@@ -2617,7 +2623,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	inc_mm_counter_fast(mm, MM_ANONPAGES);
 	dec_mm_counter_fast(mm, MM_SWAPENTS);
 	pte = mk_pte(page, vma->vm_page_prot);
-	if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
+	if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
 		flags &= ~FAULT_FLAG_WRITE;
 		ret |= VM_FAULT_WRITE;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 83874eced5bf..031713ab40ce 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -922,18 +922,19 @@ out:
  * to it.  And as a side-effect, free up its swap: because the old content
  * on disk will never be read, and seeking back there to write new content
  * later would only waste time away from clustering.
+ *
+ * NOTE: total_mapcount should not be relied upon by the caller if
+ * reuse_swap_page() returns false, but it may be always overwritten
+ * (see the other implementation for CONFIG_SWAP=n).
  */
-int reuse_swap_page(struct page *page)
+bool reuse_swap_page(struct page *page, int *total_mapcount)
 {
 	int count;
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	if (unlikely(PageKsm(page)))
-		return 0;
-	/* The page is part of THP and cannot be reused */
-	if (PageTransCompound(page))
-		return 0;
-	count = page_mapcount(page);
+		return false;
+	count = page_trans_huge_mapcount(page, total_mapcount);
 	if (count <= 1 && PageSwapCache(page)) {
 		count += page_swapcount(page);
 		if (count == 1 && !PageWriteback(page)) {
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d97268e8ff10..2b68418c7198 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -975,6 +975,8 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
 			val = 65535 - 40;
 		if (type == RTAX_MTU && val > 65535 - 15)
 			val = 65535 - 15;
+		if (type == RTAX_HOPLIMIT && val > 255)
+			val = 255;
 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
 			return -EINVAL;
 		fi->fib_metrics[type - 1] = val;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 205a2b8a5a84..4cc84212cce1 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -398,7 +398,10 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
 				  iph->saddr, iph->daddr, tpi->key);
 
 	if (tunnel) {
-		skb_pop_mac_header(skb);
+		if (tunnel->dev->type != ARPHRD_NONE)
+			skb_pop_mac_header(skb);
+		else
+			skb_reset_mac_header(skb);
 		if (tunnel->collect_md) {
 			__be16 flags;
 			__be64 tun_id;
@@ -1031,6 +1034,8 @@ static void ipgre_netlink_parms(struct net_device *dev,
 		struct ip_tunnel *t = netdev_priv(dev);
 
 		t->collect_md = true;
+		if (dev->type == ARPHRD_IPGRE)
+			dev->type = ARPHRD_NONE;
 	}
 }
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 441ae9da3a23..79a03b87a771 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2640,8 +2640,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 */
 	if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
 		     skb_headroom(skb) >= 0xFFFF)) {
-		struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
-						   GFP_ATOMIC);
+		struct sk_buff *nskb;
+
+		skb_mstamp_get(&skb->skb_mstamp);
+		nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
 		err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
 			     -ENOBUFS;
 	} else {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d916d6ab9ad2..6f32944e0223 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1750,6 +1750,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
 		} else {
 			val = nla_get_u32(nla);
 		}
+		if (type == RTAX_HOPLIMIT && val > 255)
+			val = 255;
 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
 			goto err;
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index afde5f5e728a..e27fd17c6743 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -66,7 +66,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
 EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
 
-static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
+static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
 static __read_mostly bool nf_conntrack_locks_all;
 
 void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
@@ -1778,6 +1778,7 @@ void nf_conntrack_init_end(void)
 
 int nf_conntrack_init_net(struct net *net)
 {
+	static atomic64_t unique_id;
 	int ret = -ENOMEM;
 	int cpu;
 
@@ -1800,7 +1801,8 @@ int nf_conntrack_init_net(struct net *net)
 	if (!net->ct.stat)
 		goto err_pcpu_lists;
 
-	net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
+	net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%llu",
+				(u64)atomic64_inc_return(&unique_id));
 	if (!net->ct.slabname)
 		goto err_slabname;
 
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 4c2b4c0c4d5f..dbd0803b1827 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -96,6 +96,8 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl,
 			return -EINVAL;
 		if (flags & NFACCT_F_OVERQUOTA)
 			return -EINVAL;
+		if ((flags & NFACCT_F_QUOTA) && !tb[NFACCT_QUOTA])
+			return -EINVAL;
 
 		size += sizeof(u64);
 	}
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 29d2c31f406c..daf45da448fa 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -236,6 +236,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
 
 		list_del(&info->timer->entry);
 		del_timer_sync(&info->timer->timer);
+		cancel_work_sync(&info->timer->work);
 		sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
 		kfree(info->timer->attr.attr.name);
 		kfree(info->timer);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index b5fea1101faa..10c84d882881 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -776,6 +776,19 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 			return -EINVAL;
 		}
 
+		/* Userspace may decide to perform a ct lookup without a helper
+		 * specified followed by a (recirculate and) commit with one.
+		 * Therefore, for unconfirmed connections which we will commit,
+		 * we need to attach the helper here.
+		 */
+		if (!nf_ct_is_confirmed(ct) && info->commit &&
+		    info->helper && !nfct_help(ct)) {
+			int err = __nf_ct_try_assign_helper(ct, info->ct,
+							    GFP_ATOMIC);
+			if (err)
+				return err;
+		}
+
 		/* Call the helper only if:
 		 * - nf_conntrack_in() was executed above ("!cached") for a
 		 *   confirmed connection, or
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index c589a9ba506a..343d011aa818 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -423,7 +423,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 	u16 ife_type = 0;
 	u8 *daddr = NULL;
 	u8 *saddr = NULL;
-	int ret = 0;
+	int ret = 0, exists = 0;
 	int err;
 
 	err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy);
@@ -435,25 +435,29 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_IFE_PARMS]);
 
+	exists = tcf_hash_check(tn, parm->index, a, bind);
+	if (exists && bind)
+		return 0;
+
 	if (parm->flags & IFE_ENCODE) {
 		/* Until we get issued the ethertype, we cant have
 		 * a default..
 		**/
 		if (!tb[TCA_IFE_TYPE]) {
+			if (exists)
+				tcf_hash_release(a, bind);
 			pr_info("You MUST pass etherype for encoding\n");
 			return -EINVAL;
 		}
 	}
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
+	if (!exists) {
 		ret = tcf_hash_create(tn, parm->index, est, a, sizeof(*ife),
 				      bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
-		if (bind)	/* dont override defaults */
-			return 0;
 		tcf_hash_release(a, bind);
 		if (!ovr)
 			return -EEXIST;
@@ -495,6 +499,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 				       NULL);
 		if (err) {
 metadata_parse_err:
+			if (exists)
+				tcf_hash_release(a, bind);
 			if (ret == ACT_P_CREATED)
 				_tcf_ife_cleanup(a, bind);
 
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 350e134cffb3..8b5270008a6e 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -96,7 +96,7 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
 	struct tcf_ipt *ipt;
 	struct xt_entry_target *td, *t;
 	char *tname;
-	int ret = 0, err;
+	int ret = 0, err, exists = 0;
 	u32 hook = 0;
 	u32 index = 0;
 
@@ -107,18 +107,23 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
 	if (err < 0)
 		return err;
 
-	if (tb[TCA_IPT_HOOK] == NULL)
-		return -EINVAL;
-	if (tb[TCA_IPT_TARG] == NULL)
+	if (tb[TCA_IPT_INDEX] != NULL)
+		index = nla_get_u32(tb[TCA_IPT_INDEX]);
+
+	exists = tcf_hash_check(tn, index, a, bind);
+	if (exists && bind)
+		return 0;
+
+	if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) {
+		if (exists)
+			tcf_hash_release(a, bind);
 		return -EINVAL;
+	}
 
 	td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
 	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
 		return -EINVAL;
 
-	if (tb[TCA_IPT_INDEX] != NULL)
-		index = nla_get_u32(tb[TCA_IPT_INDEX]);
-
 	if (!tcf_hash_check(tn, index, a, bind)) {
 		ret = tcf_hash_create(tn, index, est, a, sizeof(*ipt), bind,
 				      false);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index e8a760cf7775..8f3948dd38b8 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -61,7 +61,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	struct tc_mirred *parm;
 	struct tcf_mirred *m;
 	struct net_device *dev;
-	int ret, ok_push = 0;
+	int ret, ok_push = 0, exists = 0;
 
 	if (nla == NULL)
 		return -EINVAL;
@@ -71,17 +71,27 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	if (tb[TCA_MIRRED_PARMS] == NULL)
 		return -EINVAL;
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
+
+	exists = tcf_hash_check(tn, parm->index, a, bind);
+	if (exists && bind)
+		return 0;
+
 	switch (parm->eaction) {
 	case TCA_EGRESS_MIRROR:
 	case TCA_EGRESS_REDIR:
 		break;
 	default:
+		if (exists)
+			tcf_hash_release(a, bind);
 		return -EINVAL;
 	}
 	if (parm->ifindex) {
 		dev = __dev_get_by_index(net, parm->ifindex);
-		if (dev == NULL)
+		if (dev == NULL) {
+			if (exists)
+				tcf_hash_release(a, bind);
 			return -ENODEV;
+		}
 		switch (dev->type) {
 		case ARPHRD_TUNNEL:
 		case ARPHRD_TUNNEL6:
@@ -99,7 +109,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		dev = NULL;
 	}
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
+	if (!exists) {
 		if (dev == NULL)
 			return -EINVAL;
 		ret = tcf_hash_create(tn, parm->index, est, a,
@@ -108,9 +118,6 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
-		if (bind)
-			return 0;
-
 		tcf_hash_release(a, bind);
 		if (!ovr)
 			return -EEXIST;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 75b2be13fbcc..3a33fb648a6d 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -87,7 +87,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	struct tc_defact *parm;
 	struct tcf_defact *d;
 	char *defdata;
-	int ret = 0, err;
+	int ret = 0, err, exists = 0;
 
 	if (nla == NULL)
 		return -EINVAL;
@@ -99,13 +99,21 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	if (tb[TCA_DEF_PARMS] == NULL)
 		return -EINVAL;
 
-	if (tb[TCA_DEF_DATA] == NULL)
-		return -EINVAL;
 
 	parm = nla_data(tb[TCA_DEF_PARMS]);
+	exists = tcf_hash_check(tn, parm->index, a, bind);
+	if (exists && bind)
+		return 0;
+
+	if (tb[TCA_DEF_DATA] == NULL) {
+		if (exists)
+			tcf_hash_release(a, bind);
+		return -EINVAL;
+	}
+
 	defdata = nla_data(tb[TCA_DEF_DATA]);
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
+	if (!exists) {
 		ret = tcf_hash_create(tn, parm->index, est, a,
 				      sizeof(*d), bind, false);
 		if (ret)
@@ -122,8 +130,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	} else {
 		d = to_defact(a);
 
-		if (bind)
-			return 0;
 		tcf_hash_release(a, bind);
 		if (!ovr)
 			return -EEXIST;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index cfcdbdc00c9b..69da5a8f0034 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -69,7 +69,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 	struct tcf_skbedit *d;
 	u32 flags = 0, *priority = NULL, *mark = NULL;
 	u16 *queue_mapping = NULL;
-	int ret = 0, err;
+	int ret = 0, err, exists = 0;
 
 	if (nla == NULL)
 		return -EINVAL;
@@ -96,12 +96,18 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 		mark = nla_data(tb[TCA_SKBEDIT_MARK]);
 	}
 
-	if (!flags)
-		return -EINVAL;
-
 	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
+	exists = tcf_hash_check(tn, parm->index, a, bind);
+	if (exists && bind)
+		return 0;
+
+	if (!flags) {
+		tcf_hash_release(a, bind);
+		return -EINVAL;
+	}
+
+	if (!exists) {
 		ret = tcf_hash_create(tn, parm->index, est, a,
 				      sizeof(*d), bind, false);
 		if (ret)
@@ -111,8 +117,6 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 		ret = ACT_P_CREATED;
 	} else {
 		d = to_skbedit(a);
-		if (bind)
-			return 0;
 		tcf_hash_release(a, bind);
 		if (!ovr)
 			return -EEXIST;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index bab8ae0cefc0..c45f926dafb9 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -77,7 +77,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	int action;
 	__be16 push_vid = 0;
 	__be16 push_proto = 0;
-	int ret = 0;
+	int ret = 0, exists = 0;
 	int err;
 
 	if (!nla)
@@ -90,15 +90,25 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	if (!tb[TCA_VLAN_PARMS])
 		return -EINVAL;
 	parm = nla_data(tb[TCA_VLAN_PARMS]);
+	exists = tcf_hash_check(tn, parm->index, a, bind);
+	if (exists && bind)
+		return 0;
+
 	switch (parm->v_action) {
 	case TCA_VLAN_ACT_POP:
 		break;
 	case TCA_VLAN_ACT_PUSH:
-		if (!tb[TCA_VLAN_PUSH_VLAN_ID])
+		if (!tb[TCA_VLAN_PUSH_VLAN_ID]) {
+			if (exists)
+				tcf_hash_release(a, bind);
 			return -EINVAL;
+		}
 		push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]);
-		if (push_vid >= VLAN_VID_MASK)
+		if (push_vid >= VLAN_VID_MASK) {
+			if (exists)
+				tcf_hash_release(a, bind);
 			return -ERANGE;
+		}
 
 		if (tb[TCA_VLAN_PUSH_VLAN_PROTOCOL]) {
 			push_proto = nla_get_be16(tb[TCA_VLAN_PUSH_VLAN_PROTOCOL]);
@@ -114,11 +124,13 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 		}
 		break;
 	default:
+		if (exists)
+			tcf_hash_release(a, bind);
 		return -EINVAL;
 	}
 	action = parm->v_action;
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
+	if (!exists) {
 		ret = tcf_hash_create(tn, parm->index, est, a,
 				      sizeof(*v), bind, false);
 		if (ret)
@@ -126,8 +138,6 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 
 		ret = ACT_P_CREATED;
 	} else {
-		if (bind)
-			return 0;
 		tcf_hash_release(a, bind);
 		if (!ovr)
 			return -EEXIST;
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 7ecd04c21360..997ff7b2509b 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -277,6 +277,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
 
 	memset(&theirs, 0, sizeof(theirs));
 	memcpy(new, ours, sizeof(*new));
+	memset(dte, 0, sizeof(*dte));
 
 	len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask);
 	if (len < 0)
diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c
index 64e0d1d81ca5..9739fce9e032 100644
--- a/sound/pci/hda/hda_sysfs.c
+++ b/sound/pci/hda/hda_sysfs.c
@@ -141,14 +141,6 @@ static int reconfig_codec(struct hda_codec *codec)
 	err = snd_hda_codec_configure(codec);
 	if (err < 0)
 		goto error;
-	/* rebuild PCMs */
-	err = snd_hda_codec_build_pcms(codec);
-	if (err < 0)
-		goto error;
-	/* rebuild mixers */
-	err = snd_hda_codec_build_controls(codec);
-	if (err < 0)
-		goto error;
 	err = snd_card_register(codec->card);
  error:
 	snd_hda_power_down(codec);
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 1483f85999ec..a010d704e0e2 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -3401,6 +3401,9 @@ static int patch_atihdmi(struct hda_codec *codec)
 	spec->ops.pin_hbr_setup = atihdmi_pin_hbr_setup;
 	spec->ops.setup_stream = atihdmi_setup_stream;
 
+	spec->chmap.ops.pin_get_slot_channel = atihdmi_pin_get_slot_channel;
+	spec->chmap.ops.pin_set_slot_channel = atihdmi_pin_set_slot_channel;
+
 	if (!has_amd_full_remap_support(codec)) {
 		/* override to ATI/AMD-specific versions with pairwise mapping */
 		spec->chmap.ops.chmap_cea_alloc_validate_get_type =
@@ -3408,10 +3411,6 @@ static int patch_atihdmi(struct hda_codec *codec)
 		spec->chmap.ops.cea_alloc_to_tlv_chmap =
 				atihdmi_paired_cea_alloc_to_tlv_chmap;
 		spec->chmap.ops.chmap_validate = atihdmi_paired_chmap_validate;
-		spec->chmap.ops.pin_get_slot_channel =
-				atihdmi_pin_get_slot_channel;
-		spec->chmap.ops.pin_set_slot_channel =
-				atihdmi_pin_set_slot_channel;
 	}
 
 	/* ATI/AMD converters do not advertise all of their capabilities */
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index ac4490a96863..4918ffa5ba68 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6426,6 +6426,7 @@ enum {
 	ALC668_FIXUP_DELL_DISABLE_AAMIX,
 	ALC668_FIXUP_DELL_XPS13,
 	ALC662_FIXUP_ASUS_Nx50,
+	ALC668_FIXUP_ASUS_Nx51,
 };
 
 static const struct hda_fixup alc662_fixups[] = {
@@ -6672,6 +6673,15 @@ static const struct hda_fixup alc662_fixups[] = {
 		.chained = true,
 		.chain_id = ALC662_FIXUP_BASS_1A
 	},
+	[ALC668_FIXUP_ASUS_Nx51] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{0x1a, 0x90170151}, /* bass speaker */
+			{}
+		},
+		.chained = true,
+		.chain_id = ALC662_FIXUP_BASS_CHMAP,
+	},
 };
 
 static const struct snd_pci_quirk alc662_fixup_tbl[] = {
@@ -6694,11 +6704,14 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
+	SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE),
 	SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50),
 	SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A),
 	SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50),
 	SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP),
 	SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16),
+	SND_PCI_QUIRK(0x1043, 0x177d, "ASUS N551", ALC668_FIXUP_ASUS_Nx51),
+	SND_PCI_QUIRK(0x1043, 0x17bd, "ASUS N751", ALC668_FIXUP_ASUS_Nx51),
 	SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16),
 	SND_PCI_QUIRK(0x1043, 0x1bf3, "ASUS N76VZ", ALC662_FIXUP_BASS_MODE4_CHMAP),
 	SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT),
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 0adfd9537cf7..6adde457b602 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1137,8 +1137,11 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
 	case USB_ID(0x047F, 0x0415): /* Plantronics BT-300 */
 	case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
 	case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
+	case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */
 	case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
+	case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */
 	case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
+	case USB_ID(0x1de7, 0x0114): /* Phoenix Audio MT202pcs */
 	case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */
 		return true;
 	}
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 0144b3d1bb77..88cccea3ca99 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -1164,11 +1164,11 @@ process_filter(struct event_format *event, struct filter_arg **parg,
 		current_op = current_exp;
 
 	ret = collapse_tree(current_op, parg, error_str);
+	/* collapse_tree() may free current_op, and updates parg accordingly */
+	current_op = NULL;
 	if (ret < 0)
 		goto fail;
 
-	*parg = current_op;
-
 	free(token);
 	return 0;
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5645a8361de6..e459b685a4e9 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -536,6 +536,7 @@ static int __run_perf_stat(int argc, const char **argv)
 		perf_evlist__set_leader(evsel_list);
 
 	evlist__for_each(evsel_list, counter) {
+try_again:
 		if (create_perf_stat_counter(counter) < 0) {
 			/*
 			 * PPC returns ENXIO for HW counters until 2.6.37
@@ -552,7 +553,11 @@ static int __run_perf_stat(int argc, const char **argv)
 				if ((counter->leader != counter) ||
 				    !(counter->leader->nr_members > 1))
 					continue;
-			}
+			} else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
+                                if (verbose)
+                                        ui__warning("%s\n", msg);
+                                goto try_again;
+                        }
 
 			perf_evsel__open_strerror(counter, &target,
 						  errno, msg, sizeof(msg));
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index a23f54793e51..964c7c3602c0 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2274,6 +2274,8 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
 bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
 			  char *msg, size_t msgsize)
 {
+	int paranoid;
+
 	if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
 	    evsel->attr.type   == PERF_TYPE_HARDWARE &&
 	    evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
@@ -2293,6 +2295,22 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
 
 		zfree(&evsel->name);
 		return true;
+	} else if (err == EACCES && !evsel->attr.exclude_kernel &&
+		   (paranoid = perf_event_paranoid()) > 1) {
+		const char *name = perf_evsel__name(evsel);
+		char *new_name;
+
+		if (asprintf(&new_name, "%s%su", name, strchr(name, ':') ? "" : ":") < 0)
+			return false;
+
+		if (evsel->name)
+			free(evsel->name);
+		evsel->name = new_name;
+		scnprintf(msg, msgsize,
+"kernel.perf_event_paranoid=%d, trying to fall back to excluding kernel samples", paranoid);
+		evsel->attr.exclude_kernel = 1;
+
+		return true;
 	}
 
 	return false;
@@ -2311,12 +2329,13 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
 		 "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n"
 		 "which controls use of the performance events system by\n"
 		 "unprivileged users (without CAP_SYS_ADMIN).\n\n"
-		 "The default value is 1:\n\n"
+		 "The current value is %d:\n\n"
 		 "  -1: Allow use of (almost) all events by all users\n"
 		 ">= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK\n"
 		 ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n"
 		 ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN",
-				 target->system_wide ? "system-wide " : "");
+				 target->system_wide ? "system-wide " : "",
+				 perf_event_paranoid());
 	case ENOENT:
 		return scnprintf(msg, size, "The %s event is not supported.",
 				 perf_evsel__name(evsel));
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index b04afc3295df..ff9e5f20a5a7 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -19,6 +19,7 @@ TARGETS += powerpc
 TARGETS += pstore
 TARGETS += ptrace
 TARGETS += seccomp
+TARGETS += sigaltstack
 TARGETS += size
 TARGETS += static_keys
 TARGETS += sysctl
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
new file mode 100755
index 000000000000..3633828375e3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+#
+# Alternate sleeping and spinning on randomly selected CPUs.  The purpose
+# of this script is to inflict random OS jitter on a concurrently running
+# test.
+#
+# Usage: jitter.sh me duration [ sleepmax [ spinmax ] ]
+#
+# me: Random-number-generator seed salt.
+# duration: Time to run in seconds.
+# sleepmax: Maximum microseconds to sleep, defaults to one second.
+# spinmax: Maximum microseconds to spin, defaults to one millisecond.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+me=$(($1 * 1000))
+duration=$2
+sleepmax=${3-1000000}
+spinmax=${4-1000}
+
+n=1
+
+starttime=`awk 'BEGIN { print systime(); }' < /dev/null`
+
+while :
+do
+	# Check for done.
+	t=`awk -v s=$starttime 'BEGIN { print systime() - s; }' < /dev/null`
+	if test "$t" -gt "$duration"
+	then
+		exit 0;
+	fi
+
+	# Set affinity to randomly selected CPU
+	cpus=`ls /sys/devices/system/cpu/*/online |
+		sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//' |
+		grep -v '^0*$'`
+	cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
+		srand(n + me + systime());
+		ncpus = split(cpus, ca);
+		curcpu = ca[int(rand() * ncpus + 1)];
+		mask = lshift(1, curcpu);
+		if (mask + 0 <= 0)
+			mask = 1;
+		printf("%#x\n", mask);
+	}' < /dev/null`
+	n=$(($n+1))
+	if ! taskset -p $cpumask $$ > /dev/null 2>&1
+	then
+		echo taskset failure: '"taskset -p ' $cpumask $$ '"'
+		exit 1
+	fi
+
+	# Sleep a random duration
+	sleeptime=`awk -v me=$me -v n=$n -v sleepmax=$sleepmax 'BEGIN {
+		srand(n + me + systime());
+		printf("%06d", int(rand() * sleepmax));
+	}' < /dev/null`
+	n=$(($n+1))
+	sleep .$sleeptime
+
+	# Spin a random duration
+	limit=`awk -v me=$me -v n=$n -v spinmax=$spinmax 'BEGIN {
+		srand(n + me + systime());
+		printf("%06d", int(rand() * spinmax));
+	}' < /dev/null`
+	n=$(($n+1))
+	for i in {1..$limit}
+	do
+		echo > /dev/null
+	done
+done
+
+exit 1
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
new file mode 100755
index 000000000000..f79b0e9e84fc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+#
+# Analyze a given results directory for rcuperf performance measurements,
+# looking for ftrace data.  Exits with 0 if data was found, analyzed, and
+# printed.  Intended to be invoked from kvm-recheck-rcuperf.sh after
+# argument checking.
+#
+# Usage: kvm-recheck-rcuperf-ftrace.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+. tools/testing/selftests/rcutorture/bin/functions.sh
+
+if test "`grep -c 'rcu_exp_grace_period.*start' < $i/console.log`" -lt 100
+then
+	exit 10
+fi
+
+sed -e 's/^\[[^]]*]//' < $i/console.log |
+grep 'us : rcu_exp_grace_period' |
+sed -e 's/us : / : /' |
+tr -d '\015' |
+awk '
+$8 == "start" {
+	if (starttask != "")
+		nlost++;
+	starttask = $1;
+	starttime = $3;
+	startseq = $7;
+}
+
+$8 == "end" {
+	if (starttask == $1 && startseq == $7) {
+		curgpdur = $3 - starttime;
+		gptimes[++n] = curgpdur;
+		gptaskcnt[starttask]++;
+		sum += curgpdur;
+		if (curgpdur > 1000)
+			print "Long GP " starttime "us to " $3 "us (" curgpdur "us)";
+		starttask = "";
+	} else {
+		# Lost a message or some such, reset.
+		starttask = "";
+		nlost++;
+	}
+}
+
+$8 == "done" {
+	piggybackcnt[$1]++;
+}
+
+END {
+	newNR = asort(gptimes);
+	if (newNR <= 0) {
+		print "No ftrace records found???"
+		exit 10;
+	}
+	pct50 = int(newNR * 50 / 100);
+	if (pct50 < 1)
+		pct50 = 1;
+	pct90 = int(newNR * 90 / 100);
+	if (pct90 < 1)
+		pct90 = 1;
+	pct99 = int(newNR * 99 / 100);
+	if (pct99 < 1)
+		pct99 = 1;
+	div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100;
+	print "Histogram bucket size: " div;
+	last = gptimes[1] - 10;
+	count = 0;
+	for (i = 1; i <= newNR; i++) {
+		current = div * int(gptimes[i] / div);
+		if (last == current) {
+			count++;
+		} else {
+			if (count > 0)
+				print last, count;
+			count = 1;
+			last = current;
+		}
+	}
+	if (count > 0)
+		print last, count;
+	print "Distribution of grace periods across tasks:";
+	for (i in gptaskcnt) {
+		print "\t" i, gptaskcnt[i];
+		nbatches += gptaskcnt[i];
+	}
+	ngps = nbatches;
+	print "Distribution of piggybacking across tasks:";
+	for (i in piggybackcnt) {
+		print "\t" i, piggybackcnt[i];
+		ngps += piggybackcnt[i];
+	}
+	print "Average grace-period duration: " sum / newNR " microseconds";
+	print "Minimum grace-period duration: " gptimes[1];
+	print "50th percentile grace-period duration: " gptimes[pct50];
+	print "90th percentile grace-period duration: " gptimes[pct90];
+	print "99th percentile grace-period duration: " gptimes[pct99];
+	print "Maximum grace-period duration: " gptimes[newNR];
+	print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches " Lost: " nlost + 0;
+	print "Computed from ftrace data.";
+}'
+exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
new file mode 100755
index 000000000000..8f3121afc716
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+#
+# Analyze a given results directory for rcuperf performance measurements.
+#
+# Usage: kvm-recheck-rcuperf.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+if test -d $i
+then
+	:
+else
+	echo Unreadable results directory: $i
+	exit 1
+fi
+PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
+. tools/testing/selftests/rcutorture/bin/functions.sh
+
+if kvm-recheck-rcuperf-ftrace.sh $i
+then
+	# ftrace data was successfully analyzed, call it good!
+	exit 0
+fi
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+
+sed -e 's/^\[[^]]*]//' < $i/console.log |
+awk '
+/-perf: .* gps: .* batches:/ {
+	ngps = $9;
+	nbatches = $11;
+}
+
+/-perf: .*writer-duration/ {
+	gptimes[++n] = $5 / 1000.;
+	sum += $5 / 1000.;
+}
+
+END {
+	newNR = asort(gptimes);
+	if (newNR <= 0) {
+		print "No rcuperf records found???"
+		exit;
+	}
+	pct50 = int(newNR * 50 / 100);
+	if (pct50 < 1)
+		pct50 = 1;
+	pct90 = int(newNR * 90 / 100);
+	if (pct90 < 1)
+		pct90 = 1;
+	pct99 = int(newNR * 99 / 100);
+	if (pct99 < 1)
+		pct99 = 1;
+	div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100;
+	print "Histogram bucket size: " div;
+	last = gptimes[1] - 10;
+	count = 0;
+	for (i = 1; i <= newNR; i++) {
+		current = div * int(gptimes[i] / div);
+		if (last == current) {
+			count++;
+		} else {
+			if (count > 0)
+				print last, count;
+			count = 1;
+			last = current;
+		}
+	}
+	if (count > 0)
+		print last, count;
+	print "Average grace-period duration: " sum / newNR " microseconds";
+	print "Minimum grace-period duration: " gptimes[1];
+	print "50th percentile grace-period duration: " gptimes[pct50];
+	print "90th percentile grace-period duration: " gptimes[pct90];
+	print "99th percentile grace-period duration: " gptimes[pct99];
+	print "Maximum grace-period duration: " gptimes[newNR];
+	print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches;
+	print "Computed from rcuperf printk output.";
+}'
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index d86bdd6b6cc2..f659346d3358 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -48,7 +48,10 @@ do
 				cat $i/Make.oldconfig.err
 			fi
 			parse-build.sh $i/Make.out $configfile
-			parse-torture.sh $i/console.log $configfile
+			if test "$TORTURE_SUITE" != rcuperf
+			then
+				parse-torture.sh $i/console.log $configfile
+			fi
 			parse-console.sh $i/console.log $configfile
 			if test -r $i/Warnings
 			then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 0f80eefb0bfd..4109f306d855 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -6,7 +6,7 @@
 # Execute this in the source tree.  Do not run it as a background task
 # because qemu does not seem to like that much.
 #
-# Usage: kvm-test-1-run.sh config builddir resdir minutes qemu-args boot_args
+# Usage: kvm-test-1-run.sh config builddir resdir seconds qemu-args boot_args
 #
 # qemu-args defaults to "-enable-kvm -soundhw pcspk -nographic", along with
 #			arguments specifying the number of CPUs and other
@@ -91,25 +91,33 @@ fi
 # CONFIG_PCMCIA=n
 # CONFIG_CARDBUS=n
 # CONFIG_YENTA=n
-if kvm-build.sh $config_template $builddir $T
+base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
+if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux
 then
+	# Rerunning previous test, so use that test's kernel.
+	QEMU="`identify_qemu $base_resdir/vmlinux`"
+	KERNEL=$base_resdir/bzImage
+	ln -s $base_resdir/Make*.out $resdir  # for kvm-recheck.sh
+	ln -s $base_resdir/.config $resdir  # for kvm-recheck.sh
+elif kvm-build.sh $config_template $builddir $T
+then
+	# Had to build a kernel for this test.
 	QEMU="`identify_qemu $builddir/vmlinux`"
 	BOOT_IMAGE="`identify_boot_image $QEMU`"
 	cp $builddir/Make*.out $resdir
+	cp $builddir/vmlinux $resdir
 	cp $builddir/.config $resdir
 	if test -n "$BOOT_IMAGE"
 	then
 		cp $builddir/$BOOT_IMAGE $resdir
+		KERNEL=$resdir/bzImage
 	else
 		echo No identifiable boot image, not running KVM, see $resdir.
 		echo Do the torture scripts know about your architecture?
 	fi
 	parse-build.sh $resdir/Make.out $title
-	if test -f $builddir.wait
-	then
-		mv $builddir.wait $builddir.ready
-	fi
 else
+	# Build failed.
 	cp $builddir/Make*.out $resdir
 	cp $builddir/.config $resdir || :
 	echo Build failed, not running KVM, see $resdir.
@@ -119,12 +127,15 @@ else
 	fi
 	exit 1
 fi
+if test -f $builddir.wait
+then
+	mv $builddir.wait $builddir.ready
+fi
 while test -f $builddir.ready
 do
 	sleep 1
 done
-minutes=$4
-seconds=$(($minutes * 60))
+seconds=$4
 qemu_args=$5
 boot_args=$6
 
@@ -167,15 +178,26 @@ then
 	exit 0
 fi
 echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
-echo $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
-( $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append "$qemu_append $boot_args"; echo $? > $resdir/qemu-retval ) &
-qemu_pid=$!
+echo $QEMU $qemu_args -m 512 -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
+( $QEMU $qemu_args -m 512 -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat  $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
 commandcompleted=0
-echo Monitoring qemu job at pid $qemu_pid
+sleep 10 # Give qemu's pid a chance to reach the file
+if test -s "$resdir/qemu_pid"
+then
+	qemu_pid=`cat "$resdir/qemu_pid"`
+	echo Monitoring qemu job at pid $qemu_pid
+else
+	qemu_pid=""
+	echo Monitoring qemu job at yet-as-unknown pid
+fi
 while :
 do
+	if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+	then
+		qemu_pid=`cat "$resdir/qemu_pid"`
+	fi
 	kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
-	if kill -0 $qemu_pid > /dev/null 2>&1
+	if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
 	then
 		if test $kruntime -ge $seconds
 		then
@@ -195,12 +217,16 @@ do
 				ps -fp $killpid >> $resdir/Warnings 2>&1
 			fi
 		else
-			echo ' ---' `date`: Kernel done
+			echo ' ---' `date`: "Kernel done"
 		fi
 		break
 	fi
 done
-if test $commandcompleted -eq 0
+if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+then
+	qemu_pid=`cat "$resdir/qemu_pid"`
+fi
+if test $commandcompleted -eq 0 -a -n "$qemu_pid"
 then
 	echo Grace period for qemu job at pid $qemu_pid
 	while :
@@ -220,6 +246,9 @@ then
 		fi
 		sleep 1
 	done
+elif test -z "$qemu_pid"
+then
+	echo Unknown PID, cannot kill qemu command
 fi
 
 parse-torture.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 4a431767f77a..0d598145873e 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -34,7 +34,7 @@ T=/tmp/kvm.sh.$$
 trap 'rm -rf $T' 0
 mkdir $T
 
-dur=30
+dur=$((30*60))
 dryrun=""
 KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
 PATH=${KVM}/bin:$PATH; export PATH
@@ -48,6 +48,7 @@ resdir=""
 configs=""
 cpus=0
 ds=`date +%Y.%m.%d-%H:%M:%S`
+jitter=0
 
 . functions.sh
 
@@ -63,6 +64,7 @@ usage () {
 	echo "       --dryrun sched|script"
 	echo "       --duration minutes"
 	echo "       --interactive"
+	echo "       --jitter N [ maxsleep (us) [ maxspin (us) ] ]"
 	echo "       --kmake-arg kernel-make-arguments"
 	echo "       --mac nn:nn:nn:nn:nn:nn"
 	echo "       --no-initrd"
@@ -116,12 +118,17 @@ do
 		;;
 	--duration)
 		checkarg --duration "(minutes)" $# "$2" '^[0-9]*$' '^error'
-		dur=$2
+		dur=$(($2*60))
 		shift
 		;;
 	--interactive)
 		TORTURE_QEMU_INTERACTIVE=1; export TORTURE_QEMU_INTERACTIVE
 		;;
+	--jitter)
+		checkarg --jitter "(# threads [ sleep [ spin ] ])" $# "$2" '^-\{,1\}[0-9]\+\( \+[0-9]\+\)\{,2\} *$' '^error$'
+		jitter="$2"
+		shift
+		;;
 	--kmake-arg)
 		checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
 		TORTURE_KMAKE_ARG="$2"
@@ -156,7 +163,7 @@ do
 		shift
 		;;
 	--torture)
-		checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\)$' '^--'
+		checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--'
 		TORTURE_SUITE=$2
 		shift
 		;;
@@ -299,6 +306,7 @@ awk < $T/cfgcpu.pack \
 	-v CONFIGDIR="$CONFIGFRAG/" \
 	-v KVM="$KVM" \
 	-v ncpus=$cpus \
+	-v jitter="$jitter" \
 	-v rd=$resdir/$ds/ \
 	-v dur=$dur \
 	-v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
@@ -359,6 +367,16 @@ function dump(first, pastlast, batchnum)
 		print "\techo ----", cfr[j], cpusr[j] ovf ": Starting kernel. `date` >> " rd "/log";
 		print "fi"
 	}
+	njitter = 0;
+	split(jitter, ja);
+	if (ja[1] == -1 && ncpus == 0)
+		njitter = 1;
+	else if (ja[1] == -1)
+		njitter = ncpus;
+	else
+		njitter = ja[1];
+	for (j = 0; j < njitter; j++)
+		print "jitter.sh " j " " dur " " ja[2] " " ja[3] "&"
 	print "wait"
 	print "if test -z \"$TORTURE_BUILDONLY\""
 	print "then"
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index 39a2c6d7d7ec..17cbe098b115 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -14,7 +14,7 @@ CONFIG_HOTPLUG_CPU=n
 CONFIG_SUSPEND=n
 CONFIG_HIBERNATION=n
 CONFIG_RCU_FANOUT=4
-CONFIG_RCU_FANOUT_LEAF=4
+CONFIG_RCU_FANOUT_LEAF=3
 CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
index 0fc8a3428938..e34c33430447 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
@@ -1 +1 @@
-rcutorture.torture_type=rcu_bh
+rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST b/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST
new file mode 100644
index 000000000000..c9f56cf20775
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST
@@ -0,0 +1 @@
+TREE
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
new file mode 100644
index 000000000000..a09816b8c0f3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_RCU_PERF_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
new file mode 100644
index 000000000000..a312f671a29a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
@@ -0,0 +1,20 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
new file mode 100644
index 000000000000..985fb170d13c
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
@@ -0,0 +1,23 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=54
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=3
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
new file mode 100644
index 000000000000..34f2a1b35ee5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+#
+# Torture-suite-dependent shell functions for the rest of the scripts.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2015
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+# rcuperf_param_nreaders bootparam-string
+#
+# Adds nreaders rcuperf module parameter if not already specified.
+rcuperf_param_nreaders () {
+	if ! echo "$1" | grep -q "rcuperf.nreaders"
+	then
+		echo rcuperf.nreaders=-1
+	fi
+}
+
+# rcuperf_param_nwriters bootparam-string
+#
+# Adds nwriters rcuperf module parameter if not already specified.
+rcuperf_param_nwriters () {
+	if ! echo "$1" | grep -q "rcuperf.nwriters"
+	then
+		echo rcuperf.nwriters=-1
+	fi
+}
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+	echo $1 `rcuperf_param_nreaders "$1"` \
+		`rcuperf_param_nwriters "$1"` \
+		rcuperf.perf_runnable=1 \
+		rcuperf.shutdown=1 \
+		rcuperf.verbose=1
+}
diff --git a/tools/testing/selftests/sigaltstack/Makefile b/tools/testing/selftests/sigaltstack/Makefile
new file mode 100644
index 000000000000..56af56eda6fa
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/Makefile
@@ -0,0 +1,8 @@
+CFLAGS = -Wall
+BINARIES = sas
+all: $(BINARIES)
+
+include ../lib.mk
+
+clean:
+	rm -rf $(BINARIES)
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
new file mode 100644
index 000000000000..1bb01258e559
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -0,0 +1,176 @@
+/*
+ * Stas Sergeev <stsp@users.sourceforge.net>
+ *
+ * test sigaltstack(SS_ONSTACK | SS_AUTODISARM)
+ * If that succeeds, then swapcontext() can be used inside sighandler safely.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <ucontext.h>
+#include <alloca.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#ifndef SS_AUTODISARM
+#define SS_AUTODISARM  (1U << 31)
+#endif
+
+static void *sstack, *ustack;
+static ucontext_t uc, sc;
+static const char *msg = "[OK]\tStack preserved";
+static const char *msg2 = "[FAIL]\tStack corrupted";
+struct stk_data {
+	char msg[128];
+	int flag;
+};
+
+void my_usr1(int sig, siginfo_t *si, void *u)
+{
+	char *aa;
+	int err;
+	stack_t stk;
+	struct stk_data *p;
+
+	register unsigned long sp asm("sp");
+
+	if (sp < (unsigned long)sstack ||
+			sp >= (unsigned long)sstack + SIGSTKSZ) {
+		printf("[FAIL]\tSP is not on sigaltstack\n");
+		exit(EXIT_FAILURE);
+	}
+	/* put some data on stack. other sighandler will try to overwrite it */
+	aa = alloca(1024);
+	assert(aa);
+	p = (struct stk_data *)(aa + 512);
+	strcpy(p->msg, msg);
+	p->flag = 1;
+	printf("[RUN]\tsignal USR1\n");
+	err = sigaltstack(NULL, &stk);
+	if (err) {
+		perror("[FAIL]\tsigaltstack()");
+		exit(EXIT_FAILURE);
+	}
+	if (stk.ss_flags != SS_DISABLE)
+		printf("[FAIL]\tss_flags=%i, should be SS_DISABLE\n",
+				stk.ss_flags);
+	else
+		printf("[OK]\tsigaltstack is disabled in sighandler\n");
+	swapcontext(&sc, &uc);
+	printf("%s\n", p->msg);
+	if (!p->flag) {
+		printf("[RUN]\tAborting\n");
+		exit(EXIT_FAILURE);
+	}
+}
+
+void my_usr2(int sig, siginfo_t *si, void *u)
+{
+	char *aa;
+	struct stk_data *p;
+
+	printf("[RUN]\tsignal USR2\n");
+	aa = alloca(1024);
+	/* dont run valgrind on this */
+	/* try to find the data stored by previous sighandler */
+	p = memmem(aa, 1024, msg, strlen(msg));
+	if (p) {
+		printf("[FAIL]\tsigaltstack re-used\n");
+		/* corrupt the data */
+		strcpy(p->msg, msg2);
+		/* tell other sighandler that his data is corrupted */
+		p->flag = 0;
+	}
+}
+
+static void switch_fn(void)
+{
+	printf("[RUN]\tswitched to user ctx\n");
+	raise(SIGUSR2);
+	setcontext(&sc);
+}
+
+int main(void)
+{
+	struct sigaction act;
+	stack_t stk;
+	int err;
+
+	sigemptyset(&act.sa_mask);
+	act.sa_flags = SA_ONSTACK | SA_SIGINFO;
+	act.sa_sigaction = my_usr1;
+	sigaction(SIGUSR1, &act, NULL);
+	act.sa_sigaction = my_usr2;
+	sigaction(SIGUSR2, &act, NULL);
+	sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+		      MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+	if (sstack == MAP_FAILED) {
+		perror("mmap()");
+		return EXIT_FAILURE;
+	}
+
+	err = sigaltstack(NULL, &stk);
+	if (err) {
+		perror("[FAIL]\tsigaltstack()");
+		exit(EXIT_FAILURE);
+	}
+	if (stk.ss_flags == SS_DISABLE) {
+		printf("[OK]\tInitial sigaltstack state was SS_DISABLE\n");
+	} else {
+		printf("[FAIL]\tInitial sigaltstack state was %i; should have been SS_DISABLE\n", stk.ss_flags);
+		return EXIT_FAILURE;
+	}
+
+	stk.ss_sp = sstack;
+	stk.ss_size = SIGSTKSZ;
+	stk.ss_flags = SS_ONSTACK | SS_AUTODISARM;
+	err = sigaltstack(&stk, NULL);
+	if (err) {
+		if (errno == EINVAL) {
+			printf("[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n");
+			/*
+			 * If test cases for the !SS_AUTODISARM variant were
+			 * added, we could still run them.  We don't have any
+			 * test cases like that yet, so just exit and report
+			 * success.
+			 */
+			return 0;
+		} else {
+			perror("[FAIL]\tsigaltstack(SS_ONSTACK | SS_AUTODISARM)");
+			return EXIT_FAILURE;
+		}
+	}
+
+	ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+		      MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+	if (ustack == MAP_FAILED) {
+		perror("mmap()");
+		return EXIT_FAILURE;
+	}
+	getcontext(&uc);
+	uc.uc_link = NULL;
+	uc.uc_stack.ss_sp = ustack;
+	uc.uc_stack.ss_size = SIGSTKSZ;
+	makecontext(&uc, switch_fn, 0);
+	raise(SIGUSR1);
+
+	err = sigaltstack(NULL, &stk);
+	if (err) {
+		perror("[FAIL]\tsigaltstack()");
+		exit(EXIT_FAILURE);
+	}
+	if (stk.ss_flags != SS_AUTODISARM) {
+		printf("[FAIL]\tss_flags=%i, should be SS_AUTODISARM\n",
+				stk.ss_flags);
+		exit(EXIT_FAILURE);
+	}
+	printf("[OK]\tsigaltstack is still SS_AUTODISARM after signal\n");
+
+	printf("[OK]\tTest passed\n");
+	return 0;
+}