summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/atomic_t.txt2
-rw-r--r--Documentation/memory-barriers.txt22
-rw-r--r--tools/memory-model/Documentation/explanation.txt39
-rw-r--r--tools/memory-model/linux-kernel.bell6
-rw-r--r--tools/memory-model/linux-kernel.cat5
-rw-r--r--tools/memory-model/litmus-tests/dep+plain.litmus31
6 files changed, 90 insertions, 15 deletions
diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt
index 0f1ffa03db09..d7adc6d543db 100644
--- a/Documentation/atomic_t.txt
+++ b/Documentation/atomic_t.txt
@@ -324,7 +324,7 @@ atomic operations.
Specifically 'simple' cmpxchg() loops are expected to not starve one another
indefinitely. However, this is not evident on LL/SC architectures, because
-while an LL/SC architecure 'can/should/must' provide forward progress
+while an LL/SC architecture 'can/should/must' provide forward progress
guarantees between competing LL/SC sections, such a guarantee does not
transfer to cmpxchg() implemented using LL/SC. Consider:
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index cc621decd943..06e14efd8662 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1910,7 +1910,8 @@ There are some more advanced barrier functions:
These are for use with consistent memory to guarantee the ordering
of writes or reads of shared memory accessible to both the CPU and a
- DMA capable device.
+ DMA capable device. See Documentation/core-api/dma-api.rst file for more
+ information about consistent memory.
For example, consider a device driver that shares memory with a device
and uses a descriptor status value to indicate if the descriptor belongs
@@ -1931,22 +1932,21 @@ There are some more advanced barrier functions:
/* assign ownership */
desc->status = DEVICE_OWN;
- /* notify device of new descriptors */
+ /* Make descriptor status visible to the device followed by
+ * notify device of new descriptor
+ */
writel(DESC_NOTIFY, doorbell);
}
- The dma_rmb() allows us guarantee the device has released ownership
+ The dma_rmb() allows us to guarantee that the device has released ownership
before we read the data from the descriptor, and the dma_wmb() allows
us to guarantee the data is written to the descriptor before the device
can see it now has ownership. The dma_mb() implies both a dma_rmb() and
- a dma_wmb(). Note that, when using writel(), a prior wmb() is not needed
- to guarantee that the cache coherent memory writes have completed before
- writing to the MMIO region. The cheaper writel_relaxed() does not provide
- this guarantee and must not be used here.
-
- See the subsection "Kernel I/O barrier effects" for more information on
- relaxed I/O accessors and the Documentation/core-api/dma-api.rst file for
- more information on consistent memory.
+ a dma_wmb().
+
+ Note that the dma_*() barriers do not provide any ordering guarantees for
+ accesses to MMIO regions. See the later "KERNEL I/O BARRIER EFFECTS"
+ subsection for more information about I/O accessors and MMIO ordering.
(*) pmem_wmb();
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index 11a1d2d4f681..8e7085238470 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -1007,6 +1007,36 @@ order. Equivalently,
where the rmw relation links the read and write events making up each
atomic update. This is what the LKMM's "atomic" axiom says.
+Atomic rmw updates play one more role in the LKMM: They can form "rmw
+sequences". An rmw sequence is simply a bunch of atomic updates where
+each update reads from the previous one. Written using events, it
+looks like this:
+
+ Z0 ->rf Y1 ->rmw Z1 ->rf ... ->rf Yn ->rmw Zn,
+
+where Z0 is some store event and n can be any number (even 0, in the
+degenerate case). We write this relation as: Z0 ->rmw-sequence Zn.
+Note that this implies Z0 and Zn are stores to the same variable.
+
+Rmw sequences have a special property in the LKMM: They can extend the
+cumul-fence relation. That is, if we have:
+
+ U ->cumul-fence X -> rmw-sequence Y
+
+then also U ->cumul-fence Y. Thinking about this in terms of the
+operational model, U ->cumul-fence X says that the store U propagates
+to each CPU before the store X does. Then the fact that X and Y are
+linked by an rmw sequence means that U also propagates to each CPU
+before Y does. In an analogous way, rmw sequences can also extend
+the w-post-bounded relation defined below in the PLAIN ACCESSES AND
+DATA RACES section.
+
+(The notion of rmw sequences in the LKMM is similar to, but not quite
+the same as, that of release sequences in the C11 memory model. They
+were added to the LKMM to fix an obscure bug; without them, atomic
+updates with full-barrier semantics did not always guarantee ordering
+at least as strong as atomic updates with release-barrier semantics.)
+
THE PRESERVED PROGRAM ORDER RELATION: ppo
-----------------------------------------
@@ -2545,7 +2575,7 @@ smp_store_release() -- which is basically how the Linux kernel treats
them.
Although we said that plain accesses are not linked by the ppo
-relation, they do contribute to it indirectly. Namely, when there is
+relation, they do contribute to it indirectly. Firstly, when there is
an address dependency from a marked load R to a plain store W,
followed by smp_wmb() and then a marked store W', the LKMM creates a
ppo link from R to W'. The reasoning behind this is perhaps a little
@@ -2554,6 +2584,13 @@ for this source code in which W' could execute before R. Just as with
pre-bounding by address dependencies, it is possible for the compiler
to undermine this relation if sufficient care is not taken.
+Secondly, plain accesses can carry dependencies: If a data dependency
+links a marked load R to a store W, and the store is read by a load R'
+from the same thread, then the data loaded by R' depends on the data
+loaded originally by R. Thus, if R' is linked to any access X by a
+dependency, R is also linked to access X by the same dependency, even
+if W' or R' (or both!) are plain.
+
There are a few oddball fences which need special treatment:
smp_mb__before_atomic(), smp_mb__after_atomic(), and
smp_mb__after_spinlock(). The LKMM uses fence events with special
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index 5be86b1025e8..70a9073dec3e 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -82,3 +82,9 @@ flag ~empty different-values(srcu-rscs) as srcu-bad-nesting
let Marked = (~M) | IW | Once | Release | Acquire | domain(rmw) | range(rmw) |
LKR | LKW | UL | LF | RL | RU
let Plain = M \ Marked
+
+(* Redefine dependencies to include those carried through plain accesses *)
+let carry-dep = (data ; rfi)*
+let addr = carry-dep ; addr
+let ctrl = carry-dep ; ctrl
+let data = carry-dep ; data
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index d70315fddef6..07f884f9b2bf 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -74,8 +74,9 @@ let ppo = to-r | to-w | fence | (po-unlock-lock-po & int)
(* Propagation: Ordering from release operations and strong fences. *)
let A-cumul(r) = (rfe ; [Marked])? ; r
+let rmw-sequence = (rf ; rmw)*
let cumul-fence = [Marked] ; (A-cumul(strong-fence | po-rel) | wmb |
- po-unlock-lock-po) ; [Marked]
+ po-unlock-lock-po) ; [Marked] ; rmw-sequence
let prop = [Marked] ; (overwrite & ext)? ; cumul-fence* ;
[Marked] ; rfe? ; [Marked]
@@ -174,7 +175,7 @@ let vis = cumul-fence* ; rfe? ; [Marked] ;
let w-pre-bounded = [Marked] ; (addr | fence)?
let r-pre-bounded = [Marked] ; (addr | nonrw-fence |
([R4rmb] ; fencerel(Rmb) ; [~Noreturn]))?
-let w-post-bounded = fence? ; [Marked]
+let w-post-bounded = fence? ; [Marked] ; rmw-sequence
let r-post-bounded = (nonrw-fence | ([~Noreturn] ; fencerel(Rmb) ; [R4rmb]))? ;
[Marked]
diff --git a/tools/memory-model/litmus-tests/dep+plain.litmus b/tools/memory-model/litmus-tests/dep+plain.litmus
new file mode 100644
index 000000000000..ebf84daa9a59
--- /dev/null
+++ b/tools/memory-model/litmus-tests/dep+plain.litmus
@@ -0,0 +1,31 @@
+C dep+plain
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates that in LKMM, plain accesses
+ * carry dependencies much like accesses to registers:
+ * The data stored to *z1 and *z2 by P0() originates from P0()'s
+ * READ_ONCE(), and therefore using that data to compute the
+ * conditional of P0()'s if-statement creates a control dependency
+ * from that READ_ONCE() to P0()'s WRITE_ONCE().
+ *)
+
+{}
+
+P0(int *x, int *y, int *z1, int *z2)
+{
+ int a = READ_ONCE(*x);
+ *z1 = a;
+ *z2 = *z1;
+ if (*z2 == 1)
+ WRITE_ONCE(*y, 1);
+}
+
+P1(int *x, int *y)
+{
+ int r = smp_load_acquire(y);
+ smp_store_release(x, r);
+}
+
+exists (x=1 /\ y=1)