33 files changed, 1815 insertions, 80 deletions
diff --git a/Documentation/devicetree/bindings/arm/apple.yaml b/Documentation/devicetree/bindings/arm/apple.yaml
new file mode 100644
index 000000000000..1e772c85206c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/apple.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/apple.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Apple ARM Machine Device Tree Bindings
+
+maintainers:
+  - Hector Martin <marcan@marcan.st>
+
+description: |
+  ARM platforms using SoCs designed by Apple Inc., branded "Apple Silicon".
+
+  This currently includes devices based on the "M1" SoC, starting with the
+  three Mac models released in late 2020:
+
+  - Mac mini (M1, 2020)
+  - MacBook Pro (13-inch, M1, 2020)
+  - MacBook Air (M1, 2020)
+
+  The compatible property should follow this format:
+
+  compatible = "apple,<targettype>", "apple,<socid>", "apple,arm-platform";
+
+  <targettype> represents the board/device and comes from the `target-type`
+  property of the root node of the Apple Device Tree, lowercased. It can be
+  queried on macOS using the following command:
+
+  $ ioreg -d2 -l | grep target-type
+
+  <socid> is the lowercased SoC ID. Apple uses at least *five* different
+  names for their SoCs:
+
+  - Marketing name ("M1")
+  - Internal name ("H13G")
+  - Codename ("Tonga")
+  - SoC ID ("T8103")
+  - Package/IC part number ("APL1102")
+
+  Devicetrees should use the lowercased SoC ID, to avoid confusion if
+  multiple SoCs share the same marketing name. This can be obtained from
+  the `compatible` property of the arm-io node of the Apple Device Tree,
+  which can be queried as follows on macOS:
+
+  $ ioreg -n arm-io | grep compatible
+
+properties:
+  $nodename:
+    const: "/"
+  compatible:
+    oneOf:
+      - description: Apple M1 SoC based platforms
+        items:
+          - enum:
+              - apple,j274 # Mac mini (M1, 2020)
+              - apple,j293 # MacBook Pro (13-inch, M1, 2020)
+              - apple,j313 # MacBook Air (M1, 2020)
+          - const: apple,t8103
+          - const: apple,arm-platform
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml
index 26b886b20b27..c299423dc7cb 100644
--- a/Documentation/devicetree/bindings/arm/cpus.yaml
+++ b/Documentation/devicetree/bindings/arm/cpus.yaml
@@ -85,6 +85,8 @@ properties:
 
   compatible:
     enum:
+      - apple,icestorm
+      - apple,firestorm
       - arm,arm710t
       - arm,arm720t
       - arm,arm740t
diff --git a/Documentation/devicetree/bindings/display/simple-framebuffer.yaml b/Documentation/devicetree/bindings/display/simple-framebuffer.yaml
index eaf8c54fcf50..c2499a7906f5 100644
--- a/Documentation/devicetree/bindings/display/simple-framebuffer.yaml
+++ b/Documentation/devicetree/bindings/display/simple-framebuffer.yaml
@@ -54,6 +54,7 @@ properties:
   compatible:
     items:
       - enum:
+          - apple,simple-framebuffer
           - allwinner,simple-framebuffer
           - amlogic,simple-framebuffer
       - const: simple-framebuffer
@@ -84,9 +85,13 @@ properties:
       Format of the framebuffer:
         * `a8b8g8r8` - 32-bit pixels, d[31:24]=a, d[23:16]=b, d[15:8]=g, d[7:0]=r
         * `r5g6b5` - 16-bit pixels, d[15:11]=r, d[10:5]=g, d[4:0]=b
+        * `x2r10g10b10` - 32-bit pixels, d[29:20]=r, d[19:10]=g, d[9:0]=b
+        * `x8r8g8b8` - 32-bit pixels, d[23:16]=r, d[15:8]=g, d[7:0]=b
     enum:
       - a8b8g8r8
       - r5g6b5
+      - x2r10g10b10
+      - x8r8g8b8
 
   display:
     $ref: /schemas/types.yaml#/definitions/phandle
diff --git a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
new file mode 100644
index 000000000000..cf6c091a07b1
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/apple,aic.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Apple Interrupt Controller
+
+maintainers:
+  - Hector Martin <marcan@marcan.st>
+
+description: |
+  The Apple Interrupt Controller is a simple interrupt controller present on
+  Apple ARM SoC platforms, including various iPhone and iPad devices and the
+  "Apple Silicon" Macs.
+
+  It provides the following features:
+
+  - Level-triggered hardware IRQs wired to SoC blocks
+    - Single mask bit per IRQ
+    - Per-IRQ affinity setting
+    - Automatic masking on event delivery (auto-ack)
+    - Software triggering (ORed with hw line)
+  - 2 per-CPU IPIs (meant as "self" and "other", but they are interchangeable
+    if not symmetric)
+  - Automatic prioritization (single event/ack register per CPU, lower IRQs =
+    higher priority)
+  - Automatic masking on ack
+  - Default "this CPU" register view and explicit per-CPU views
+
+  This device also represents the FIQ interrupt sources on platforms using AIC,
+  which do not go through a discrete interrupt controller.
+
+allOf:
+  - $ref: /schemas/interrupt-controller.yaml#
+
+properties:
+  compatible:
+    items:
+      - const: apple,t8103-aic
+      - const: apple,aic
+
+  interrupt-controller: true
+
+  '#interrupt-cells':
+    const: 3
+    description: |
+      The 1st cell contains the interrupt type:
+        - 0: Hardware IRQ
+        - 1: FIQ
+
+      The 2nd cell contains the interrupt number.
+        - HW IRQs: interrupt number
+        - FIQs:
+          - 0: physical HV timer
+          - 1: virtual HV timer
+          - 2: physical guest timer
+          - 3: virtual guest timer
+
+      The 3rd cell contains the interrupt flags. This is normally
+      IRQ_TYPE_LEVEL_HIGH (4).
+
+  reg:
+    description: |
+      Specifies base physical address and size of the AIC registers.
+    maxItems: 1
+
+required:
+  - compatible
+  - '#interrupt-cells'
+  - interrupt-controller
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        aic: interrupt-controller@23b100000 {
+            compatible = "apple,t8103-aic", "apple,aic";
+            #interrupt-cells = <3>;
+            interrupt-controller;
+            reg = <0x2 0x3b100000 0x0 0x8000>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/timer/arm,arch_timer.yaml b/Documentation/devicetree/bindings/timer/arm,arch_timer.yaml
index 2c75105c1398..7f5e3af58255 100644
--- a/Documentation/devicetree/bindings/timer/arm,arch_timer.yaml
+++ b/Documentation/devicetree/bindings/timer/arm,arch_timer.yaml
@@ -34,11 +34,30 @@ properties:
               - arm,armv8-timer
 
   interrupts:
+    minItems: 1
+    maxItems: 5
     items:
       - description: secure timer irq
       - description: non-secure timer irq
       - description: virtual timer irq
       - description: hypervisor timer irq
+      - description: hypervisor virtual timer irq
+
+  interrupt-names:
+    oneOf:
+      - minItems: 2
+        items:
+          - const: phys
+          - const: virt
+          - const: hyp-phys
+          - const: hyp-virt
+      - minItems: 3
+        items:
+          - const: sec-phys
+          - const: phys
+          - const: virt
+          - const: hyp-phys
+          - const: hyp-virt
 
   clock-frequency:
     description: The frequency of the main counter, in Hz. Should be present
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 276c0ef09435..b6e71a7c56bb 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -103,6 +103,8 @@ patternProperties:
     description: Anvo-Systems Dresden GmbH
   "^apm,.*":
     description: Applied Micro Circuits Corporation (APM)
+  "^apple,.*":
+    description: Apple Inc.
   "^aptina,.*":
     description: Aptina Imaging
   "^arasan,.*":
diff --git a/Documentation/driver-api/device-io.rst b/Documentation/driver-api/device-io.rst
index 764963876d08..e9f04b1815d1 100644
--- a/Documentation/driver-api/device-io.rst
+++ b/Documentation/driver-api/device-io.rst
@@ -146,6 +146,362 @@ There are also equivalents to memcpy. The ins() and
 outs() functions copy bytes, words or longs to the given
 port.
 
+__iomem pointer tokens
+======================
+
+The data type for an MMIO address is an ``__iomem`` qualified pointer, such as
+``void __iomem *reg``. On most architectures it is a regular pointer that
+points to a virtual memory address and can be offset or dereferenced, but in
+portable code, it must only be passed from and to functions that explicitly
+operated on an ``__iomem`` token, in particular the ioremap() and
+readl()/writel() functions. The 'sparse' semantic code checker can be used to
+verify that this is done correctly.
+
+While on most architectures, ioremap() creates a page table entry for an
+uncached virtual address pointing to the physical MMIO address, some
+architectures require special instructions for MMIO, and the ``__iomem`` pointer
+just encodes the physical address or an offsettable cookie that is interpreted
+by readl()/writel().
+
+Differences between I/O access functions
+========================================
+
+readq(), readl(), readw(), readb(), writeq(), writel(), writew(), writeb()
+
+  These are the most generic accessors, providing serialization against other
+  MMIO accesses and DMA accesses as well as fixed endianness for accessing
+  little-endian PCI devices and on-chip peripherals. Portable device drivers
+  should generally use these for any access to ``__iomem`` pointers.
+
+  Note that posted writes are not strictly ordered against a spinlock, see
+  Documentation/driver-api/io_ordering.rst.
+
+readq_relaxed(), readl_relaxed(), readw_relaxed(), readb_relaxed(),
+writeq_relaxed(), writel_relaxed(), writew_relaxed(), writeb_relaxed()
+
+  On architectures that require an expensive barrier for serializing against
+  DMA, these "relaxed" versions of the MMIO accessors only serialize against
+  each other, but contain a less expensive barrier operation. A device driver
+  might use these in a particularly performance sensitive fast path, with a
+  comment that explains why the usage in a specific location is safe without
+  the extra barriers.
+
+  See memory-barriers.txt for a more detailed discussion on the precise ordering
+  guarantees of the non-relaxed and relaxed versions.
+
+ioread64(), ioread32(), ioread16(), ioread8(),
+iowrite64(), iowrite32(), iowrite16(), iowrite8()
+
+  These are an alternative to the normal readl()/writel() functions, with almost
+  identical behavior, but they can also operate on ``__iomem`` tokens returned
+  for mapping PCI I/O space with pci_iomap() or ioport_map(). On architectures
+  that require special instructions for I/O port access, this adds a small
+  overhead for an indirect function call implemented in lib/iomap.c, while on
+  other architectures, these are simply aliases.
+
+ioread64be(), ioread32be(), ioread16be()
+iowrite64be(), iowrite32be(), iowrite16be()
+
+  These behave in the same way as the ioread32()/iowrite32() family, but with
+  reversed byte order, for accessing devices with big-endian MMIO registers.
+  Device drivers that can operate on either big-endian or little-endian
+  registers may have to implement a custom wrapper function that picks one or
+  the other depending on which device was found.
+
+  Note: On some architectures, the normal readl()/writel() functions
+  traditionally assume that devices are the same endianness as the CPU, while
+  using a hardware byte-reverse on the PCI bus when running a big-endian kernel.
+  Drivers that use readl()/writel() this way are generally not portable, but
+  tend to be limited to a particular SoC.
+
+hi_lo_readq(), lo_hi_readq(), hi_lo_readq_relaxed(), lo_hi_readq_relaxed(),
+ioread64_lo_hi(), ioread64_hi_lo(), ioread64be_lo_hi(), ioread64be_hi_lo(),
+hi_lo_writeq(), lo_hi_writeq(), hi_lo_writeq_relaxed(), lo_hi_writeq_relaxed(),
+iowrite64_lo_hi(), iowrite64_hi_lo(), iowrite64be_lo_hi(), iowrite64be_hi_lo()
+
+  Some device drivers have 64-bit registers that cannot be accessed atomically
+  on 32-bit architectures but allow two consecutive 32-bit accesses instead.
+  Since it depends on the particular device which of the two halves has to be
+  accessed first, a helper is provided for each combination of 64-bit accessors
+  with either low/high or high/low word ordering. A device driver must include
+  either <linux/io-64-nonatomic-lo-hi.h> or <linux/io-64-nonatomic-hi-lo.h> to
+  get the function definitions along with helpers that redirect the normal
+  readq()/writeq() to them on architectures that do not provide 64-bit access
+  natively.
+
+__raw_readq(), __raw_readl(), __raw_readw(), __raw_readb(),
+__raw_writeq(), __raw_writel(), __raw_writew(), __raw_writeb()
+
+  These are low-level MMIO accessors without barriers or byteorder changes and
+  architecture specific behavior. Accesses are usually atomic in the sense that
+  a four-byte __raw_readl() does not get split into individual byte loads, but
+  multiple consecutive accesses can be combined on the bus. In portable code, it
+  is only safe to use these to access memory behind a device bus but not MMIO
+  registers, as there are no ordering guarantees with regard to other MMIO
+  accesses or even spinlocks. The byte order is generally the same as for normal
+  memory, so unlike the other functions, these can be used to copy data between
+  kernel memory and device memory.
+
+inl(), inw(), inb(), outl(), outw(), outb()
+
+  PCI I/O port resources traditionally require separate helpers as they are
+  implemented using special instructions on the x86 architecture. On most other
+  architectures, these are mapped to readl()/writel() style accessors
+  internally, usually pointing to a fixed area in virtual memory. Instead of an
+  ``__iomem`` pointer, the address is a 32-bit integer token to identify a port
+  number. PCI requires I/O port access to be non-posted, meaning that an outb()
+  must complete before the following code executes, while a normal writeb() may
+  still be in progress. On architectures that correctly implement this, I/O port
+  access is therefore ordered against spinlocks. Many non-x86 PCI host bridge
+  implementations and CPU architectures however fail to implement non-posted I/O
+  space on PCI, so they can end up being posted on such hardware.
+
+  In some architectures, the I/O port number space has a 1:1 mapping to
+  ``__iomem`` pointers, but this is not recommended and device drivers should
+  not rely on that for portability. Similarly, an I/O port number as described
+  in a PCI base address register may not correspond to the port number as seen
+  by a device driver. Portable drivers need to read the port number for the
+  resource provided by the kernel.
+
+  There are no direct 64-bit I/O port accessors, but pci_iomap() in combination
+  with ioread64/iowrite64 can be used instead.
+
+inl_p(), inw_p(), inb_p(), outl_p(), outw_p(), outb_p()
+
+  On ISA devices that require specific timing, the _p versions of the I/O
+  accessors add a small delay. On architectures that do not have ISA buses,
+  these are aliases to the normal inb/outb helpers.
+
+readsq, readsl, readsw, readsb
+writesq, writesl, writesw, writesb
+ioread64_rep, ioread32_rep, ioread16_rep, ioread8_rep
+iowrite64_rep, iowrite32_rep, iowrite16_rep, iowrite8_rep
+insl, insw, insb, outsl, outsw, outsb
+
+  These are helpers that access the same address multiple times, usually to copy
+  data between kernel memory byte stream and a FIFO buffer. Unlike the normal
+  MMIO accessors, these do not perform a byteswap on big-endian kernels, so the
+  first byte in the FIFO register corresponds to the first byte in the memory
+  buffer regardless of the architecture.
+
+Device memory mapping modes
+===========================
+
+Some architectures support multiple modes for mapping device memory.
+ioremap_*() variants provide a common abstraction around these
+architecture-specific modes, with a shared set of semantics.
+
+ioremap() is the most common mapping type, and is applicable to typical device
+memory (e.g. I/O registers). Other modes can offer weaker or stronger
+guarantees, if supported by the architecture. From most to least common, they
+are as follows:
+
+ioremap()
+---------
+
+The default mode, suitable for most memory-mapped devices, e.g. control
+registers. Memory mapped using ioremap() has the following characteristics:
+
+* Uncached - CPU-side caches are bypassed, and all reads and writes are handled
+  directly by the device
+* No speculative operations - the CPU may not issue a read or write to this
+  memory, unless the instruction that does so has been reached in committed
+  program flow.
+* No reordering - The CPU may not reorder accesses to this memory mapping with
+  respect to each other. On some architectures, this relies on barriers in
+  readl_relaxed()/writel_relaxed().
+* No repetition - The CPU may not issue multiple reads or writes for a single
+  program instruction.
+* No write-combining - Each I/O operation results in one discrete read or write
+  being issued to the device, and multiple writes are not combined into larger
+  writes. This may or may not be enforced when using __raw I/O accessors or
+  pointer dereferences.
+* Non-executable - The CPU is not allowed to speculate instruction execution
+  from this memory (it probably goes without saying, but you're also not
+  allowed to jump into device memory).
+
+On many platforms and buses (e.g. PCI), writes issued through ioremap()
+mappings are posted, which means that the CPU does not wait for the write to
+actually reach the target device before retiring the write instruction.
+
+On many platforms, I/O accesses must be aligned with respect to the access
+size; failure to do so will result in an exception or unpredictable results.
+
+ioremap_wc()
+------------
+
+Maps I/O memory as normal memory with write combining. Unlike ioremap(),
+
+* The CPU may speculatively issue reads from the device that the program
+  didn't actually execute, and may choose to basically read whatever it wants.
+* The CPU may reorder operations as long as the result is consistent from the
+  program's point of view.
+* The CPU may write to the same location multiple times, even when the program
+  issued a single write.
+* The CPU may combine several writes into a single larger write.
+
+This mode is typically used for video framebuffers, where it can increase
+performance of writes. It can also be used for other blocks of memory in
+devices (e.g. buffers or shared memory), but care must be taken as accesses are
+not guaranteed to be ordered with respect to normal ioremap() MMIO register
+accesses without explicit barriers.
+
+On a PCI bus, it is usually safe to use ioremap_wc() on MMIO areas marked as
+``IORESOURCE_PREFETCH``, but it may not be used on those without the flag.
+For on-chip devices, there is no corresponding flag, but a driver can use
+ioremap_wc() on a device that is known to be safe.
+
+ioremap_wt()
+------------
+
+Maps I/O memory as normal memory with write-through caching. Like ioremap_wc(),
+but also,
+
+* The CPU may cache writes issued to and reads from the device, and serve reads
+  from that cache.
+
+This mode is sometimes used for video framebuffers, where drivers still expect
+writes to reach the device in a timely manner (and not be stuck in the CPU
+cache), but reads may be served from the cache for efficiency. However, it is
+rarely useful these days, as framebuffer drivers usually perform writes only,
+for which ioremap_wc() is more efficient (as it doesn't needlessly trash the
+cache). Most drivers should not use this.
+
+ioremap_np()
+------------
+
+Like ioremap(), but explicitly requests non-posted write semantics. On some
+architectures and buses, ioremap() mappings have posted write semantics, which
+means that writes can appear to "complete" from the point of view of the
+CPU before the written data actually arrives at the target device. Writes are
+still ordered with respect to other writes and reads from the same device, but
+due to the posted write semantics, this is not the case with respect to other
+devices. ioremap_np() explicitly requests non-posted semantics, which means
+that the write instruction will not appear to complete until the device has
+received (and to some platform-specific extent acknowledged) the written data.
+
+This mapping mode primarily exists to cater for platforms with bus fabrics that
+require this particular mapping mode to work correctly. These platforms set the
+``IORESOURCE_MEM_NONPOSTED`` flag for a resource that requires ioremap_np()
+semantics and portable drivers should use an abstraction that automatically
+selects it where appropriate (see the `Higher-level ioremap abstractions`_
+section below).
+
+The bare ioremap_np() is only available on some architectures; on others, it
+always returns NULL. Drivers should not normally use it, unless they are
+platform-specific or they derive benefit from non-posted writes where
+supported, and can fall back to ioremap() otherwise. The normal approach to
+ensure posted write completion is to do a dummy read after a write as
+explained in `Accessing the device`_, which works with ioremap() on all
+platforms.
+
+ioremap_np() should never be used for PCI drivers. PCI memory space writes are
+always posted, even on architectures that otherwise implement ioremap_np().
+Using ioremap_np() for PCI BARs will at best result in posted write semantics,
+and at worst result in complete breakage.
+
+Note that non-posted write semantics are orthogonal to CPU-side ordering
+guarantees. A CPU may still choose to issue other reads or writes before a
+non-posted write instruction retires. See the previous section on MMIO access
+functions for details on the CPU side of things.
+
+ioremap_uc()
+------------
+
+ioremap_uc() behaves like ioremap() except that on the x86 architecture without
+'PAT' mode, it marks memory as uncached even when the MTRR has designated
+it as cacheable, see Documentation/x86/pat.rst.
+
+Portable drivers should avoid the use of ioremap_uc().
+
+ioremap_cache()
+---------------
+
+ioremap_cache() effectively maps I/O memory as normal RAM. CPU write-back
+caches can be used, and the CPU is free to treat the device as if it were a
+block of RAM. This should never be used for device memory which has side
+effects of any kind, or which does not return the data previously written on
+read.
+
+It should also not be used for actual RAM, as the returned pointer is an
+``__iomem`` token. memremap() can be used for mapping normal RAM that is outside
+of the linear kernel memory area to a regular pointer.
+
+Portable drivers should avoid the use of ioremap_cache().
+
+Architecture example
+--------------------
+
+Here is how the above modes map to memory attribute settings on the ARM64
+architecture:
+
++------------------------+--------------------------------------------+
+| API                    | Memory region type and cacheability        |
++------------------------+--------------------------------------------+
+| ioremap_np()           | Device-nGnRnE                              |
++------------------------+--------------------------------------------+
+| ioremap()              | Device-nGnRE                               |
++------------------------+--------------------------------------------+
+| ioremap_uc()           | (not implemented)                          |
++------------------------+--------------------------------------------+
+| ioremap_wc()           | Normal-Non Cacheable                       |
++------------------------+--------------------------------------------+
+| ioremap_wt()           | (not implemented; fallback to ioremap)     |
++------------------------+--------------------------------------------+
+| ioremap_cache()        | Normal-Write-Back Cacheable                |
++------------------------+--------------------------------------------+
+
+Higher-level ioremap abstractions
+=================================
+
+Instead of using the above raw ioremap() modes, drivers are encouraged to use
+higher-level APIs. These APIs may implement platform-specific logic to
+automatically choose an appropriate ioremap mode on any given bus, allowing for
+a platform-agnostic driver to work on those platforms without any special
+cases. At the time of this writing, the following ioremap() wrappers have such
+logic:
+
+devm_ioremap_resource()
+
+  Can automatically select ioremap_np() over ioremap() according to platform
+  requirements, if the ``IORESOURCE_MEM_NONPOSTED`` flag is set on the struct
+  resource. Uses devres to automatically unmap the resource when the driver
+  probe() function fails or a device in unbound from its driver.
+
+  Documented in Documentation/driver-api/driver-model/devres.rst.
+
+of_address_to_resource()
+
+  Automatically sets the ``IORESOURCE_MEM_NONPOSTED`` flag for platforms that
+  require non-posted writes for certain buses (see the nonposted-mmio and
+  posted-mmio device tree properties).
+
+of_iomap()
+
+  Maps the resource described in a ``reg`` property in the device tree, doing
+  all required translations. Automatically selects ioremap_np() according to
+  platform requirements, as above.
+
+pci_ioremap_bar(), pci_ioremap_wc_bar()
+
+  Maps the resource described in a PCI base address without having to extract
+  the physical address first.
+
+pci_iomap(), pci_iomap_wc()
+
+  Like pci_ioremap_bar()/pci_ioremap_bar(), but also works on I/O space when
+  used together with ioread32()/iowrite32() and similar accessors
+
+pcim_iomap()
+
+  Like pci_iomap(), but uses devres to automatically unmap the resource when
+  the driver probe() function fails or a device in unbound from its driver
+
+  Documented in Documentation/driver-api/driver-model/devres.rst.
+
+Not using these wrappers may make drivers unusable on certain platforms with
+stricter rules for mapping I/O memory.
+
 Public Functions Provided
 =========================
 
diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst
index 5f8c6c303ff2..e0814d214048 100644
--- a/Documentation/driver-api/driver-model/devres.rst
+++ b/Documentation/driver-api/driver-model/devres.rst
@@ -310,6 +310,7 @@ IOMAP
   devm_ioremap()
   devm_ioremap_uc()
   devm_ioremap_wc()
+  devm_ioremap_np()
   devm_ioremap_resource() : checks resource, requests memory region, ioremaps
   devm_ioremap_resource_wc()
   devm_platform_ioremap_resource() : calls devm_ioremap_resource() for platform device
diff --git a/MAINTAINERS b/MAINTAINERS
index dec3739031fe..1162b0917630 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1649,6 +1649,20 @@ F:	arch/arm/mach-alpine/
 F:	arch/arm64/boot/dts/amazon/
 F:	drivers/*/*alpine*
 
+ARM/APPLE MACHINE SUPPORT
+M:	Hector Martin <marcan@marcan.st>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+W:	https://asahilinux.org
+B:	https://github.com/AsahiLinux/linux/issues
+C:	irc://chat.freenode.net/asahi-dev
+T:	git https://github.com/AsahiLinux/linux.git
+F:	Documentation/devicetree/bindings/arm/apple.yaml
+F:	Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
+F:	arch/arm64/boot/dts/apple/
+F:	drivers/irqchip/irq-apple-aic.c
+F:	include/dt-bindings/interrupt-controller/apple-aic.h
+
 ARM/ARTPEC MACHINE SUPPORT
 M:	Jesper Nilsson <jesper.nilsson@axis.com>
 M:	Lars Persson <lars.persson@axis.com>
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index ce50dd129eec..b6118186c774 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -26,6 +26,13 @@ config ARCH_ALPINE
 	  This enables support for the Annapurna Labs Alpine
 	  Soc family.
 
+config ARCH_APPLE
+	bool "Apple Silicon SoC family"
+	select APPLE_AIC
+	help
+	  This enables support for Apple's in-house ARM SoC family, starting
+	  with the Apple M1.
+
 config ARCH_BCM2835
 	bool "Broadcom BCM2835 family"
 	select TIMER_OF
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
index f1173cd93594..639e01a4d855 100644
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@@ -6,6 +6,7 @@ subdir-y += amazon
 subdir-y += amd
 subdir-y += amlogic
 subdir-y += apm
+subdir-y += apple
 subdir-y += arm
 subdir-y += bitmain
 subdir-y += broadcom
diff --git a/arch/arm64/boot/dts/apple/Makefile b/arch/arm64/boot/dts/apple/Makefile
new file mode 100644
index 000000000000..cbbd701ebf05
--- /dev/null
+++ b/arch/arm64/boot/dts/apple/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+dtb-$(CONFIG_ARCH_APPLE) += t8103-j274.dtb
diff --git a/arch/arm64/boot/dts/apple/t8103-j274.dts b/arch/arm64/boot/dts/apple/t8103-j274.dts
new file mode 100644
index 000000000000..e0f6775b9878
--- /dev/null
+++ b/arch/arm64/boot/dts/apple/t8103-j274.dts
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0+ OR MIT
+/*
+ * Apple Mac mini (M1, 2020)
+ *
+ * target-type: J274
+ *
+ * Copyright The Asahi Linux Contributors
+ */
+
+/dts-v1/;
+
+#include "t8103.dtsi"
+
+/ {
+	compatible = "apple,j274", "apple,t8103", "apple,arm-platform";
+	model = "Apple Mac mini (M1, 2020)";
+
+	aliases {
+		serial0 = &serial0;
+	};
+
+	chosen {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		stdout-path = "serial0";
+
+		framebuffer0: framebuffer@0 {
+			compatible = "apple,simple-framebuffer", "simple-framebuffer";
+			reg = <0 0 0 0>; /* To be filled by loader */
+			/* Format properties will be added by loader */
+			status = "disabled";
+		};
+	};
+
+	memory@800000000 {
+		device_type = "memory";
+		reg = <0x8 0 0x2 0>; /* To be filled by loader */
+	};
+};
+
+&serial0 {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi
new file mode 100644
index 000000000000..a1e22a2ea2e5
--- /dev/null
+++ b/arch/arm64/boot/dts/apple/t8103.dtsi
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0+ OR MIT
+/*
+ * Apple T8103 "M1" SoC
+ *
+ * Other names: H13G, "Tonga"
+ *
+ * Copyright The Asahi Linux Contributors
+ */
+
+#include <dt-bindings/interrupt-controller/apple-aic.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+	compatible = "apple,t8103", "apple,arm-platform";
+
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu0: cpu@0 {
+			compatible = "apple,icestorm";
+			device_type = "cpu";
+			reg = <0x0 0x0>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0 0>; /* To be filled by loader */
+		};
+
+		cpu1: cpu@1 {
+			compatible = "apple,icestorm";
+			device_type = "cpu";
+			reg = <0x0 0x1>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0 0>; /* To be filled by loader */
+		};
+
+		cpu2: cpu@2 {
+			compatible = "apple,icestorm";
+			device_type = "cpu";
+			reg = <0x0 0x2>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0 0>; /* To be filled by loader */
+		};
+
+		cpu3: cpu@3 {
+			compatible = "apple,icestorm";
+			device_type = "cpu";
+			reg = <0x0 0x3>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0 0>; /* To be filled by loader */
+		};
+
+		cpu4: cpu@10100 {
+			compatible = "apple,firestorm";
+			device_type = "cpu";
+			reg = <0x0 0x10100>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0 0>; /* To be filled by loader */
+		};
+
+		cpu5: cpu@10101 {
+			compatible = "apple,firestorm";
+			device_type = "cpu";
+			reg = <0x0 0x10101>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0 0>; /* To be filled by loader */
+		};
+
+		cpu6: cpu@10102 {
+			compatible = "apple,firestorm";
+			device_type = "cpu";
+			reg = <0x0 0x10102>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0 0>; /* To be filled by loader */
+		};
+
+		cpu7: cpu@10103 {
+			compatible = "apple,firestorm";
+			device_type = "cpu";
+			reg = <0x0 0x10103>;
+			enable-method = "spin-table";
+			cpu-release-addr = <0 0>; /* To be filled by loader */
+		};
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupt-parent = <&aic>;
+		interrupt-names = "phys", "virt", "hyp-phys", "hyp-virt";
+		interrupts = <AIC_FIQ AIC_TMR_GUEST_PHYS IRQ_TYPE_LEVEL_HIGH>,
+			     <AIC_FIQ AIC_TMR_GUEST_VIRT IRQ_TYPE_LEVEL_HIGH>,
+			     <AIC_FIQ AIC_TMR_HV_PHYS IRQ_TYPE_LEVEL_HIGH>,
+			     <AIC_FIQ AIC_TMR_HV_VIRT IRQ_TYPE_LEVEL_HIGH>;
+	};
+
+	clk24: clock-24m {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <24000000>;
+		clock-output-names = "clk24";
+	};
+
+	soc {
+		compatible = "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <2>;
+
+		ranges;
+		nonposted-mmio;
+
+		serial0: serial@235200000 {
+			compatible = "apple,s5l-uart";
+			reg = <0x2 0x35200000 0x0 0x1000>;
+			reg-io-width = <4>;
+			interrupt-parent = <&aic>;
+			interrupts = <AIC_IRQ 605 IRQ_TYPE_LEVEL_HIGH>;
+			/*
+			 * TODO: figure out the clocking properly, there may
+			 * be a third selectable clock.
+			 */
+			clocks = <&clk24>, <&clk24>;
+			clock-names = "uart", "clk_uart_baud0";
+			status = "disabled";
+		};
+
+		aic: interrupt-controller@23b100000 {
+			compatible = "apple,t8103-aic", "apple,aic";
+			#interrupt-cells = <3>;
+			interrupt-controller;
+			reg = <0x2 0x3b100000 0x0 0x8000>;
+		};
+	};
+};
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 99e8a8fb5b4e..08c6f769df9a 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -32,6 +32,7 @@ CONFIG_ARCH_AGILEX=y
 CONFIG_ARCH_N5X=y
 CONFIG_ARCH_SUNXI=y
 CONFIG_ARCH_ALPINE=y
+CONFIG_ARCH_APPLE=y
 CONFIG_ARCH_BCM2835=y
 CONFIG_ARCH_BCM4908=y
 CONFIG_ARCH_BCM_IPROC=y
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index ef5b040dee44..6231e1f0abe7 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -59,6 +59,7 @@
 #define ARM_CPU_IMP_NVIDIA		0x4E
 #define ARM_CPU_IMP_FUJITSU		0x46
 #define ARM_CPU_IMP_HISI		0x48
+#define ARM_CPU_IMP_APPLE		0x61
 
 #define ARM_CPU_PART_AEM_V8		0xD0F
 #define ARM_CPU_PART_FOUNDATION		0xD00
@@ -99,6 +100,9 @@
 
 #define HISI_CPU_PART_TSV110		0xD01
 
+#define APPLE_CPU_PART_M1_ICESTORM	0x022
+#define APPLE_CPU_PART_M1_FIRESTORM	0x023
+
 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
 #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
@@ -127,6 +131,8 @@
 #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
 #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
 #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
+#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
+#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
 
 /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
 #define MIDR_FUJITSU_ERRATUM_010001		MIDR_FUJITSU_A64FX
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 5ea8656a2030..7fd836bea7eb 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -169,16 +169,7 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
 
 #define ioremap(addr, size)		__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
 #define ioremap_wc(addr, size)		__ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
-
-/*
- * PCI configuration space mapping function.
- *
- * The PCI specification disallows posted write configuration transactions.
- * Add an arch specific pci_remap_cfgspace() definition that is implemented
- * through nGnRnE device memory attribute as recommended by the ARM v8
- * Architecture reference manual Issue A.k B2.8.2 "Device memory".
- */
-#define pci_remap_cfgspace(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))
+#define ioremap_np(addr, size)		__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))
 
 /*
  * io{read,write}{16,32,64}be() macros
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index b31ac5ccc8ab..012a0b8c0a27 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1041,6 +1041,66 @@
 #define TRFCR_ELx_ExTRE			BIT(1)
 #define TRFCR_ELx_E0TRE			BIT(0)
 
+
+/* GIC Hypervisor interface registers */
+/* ICH_MISR_EL2 bit definitions */
+#define ICH_MISR_EOI		(1 << 0)
+#define ICH_MISR_U		(1 << 1)
+
+/* ICH_LR*_EL2 bit definitions */
+#define ICH_LR_VIRTUAL_ID_MASK	((1ULL << 32) - 1)
+
+#define ICH_LR_EOI		(1ULL << 41)
+#define ICH_LR_GROUP		(1ULL << 60)
+#define ICH_LR_HW		(1ULL << 61)
+#define ICH_LR_STATE		(3ULL << 62)
+#define ICH_LR_PENDING_BIT	(1ULL << 62)
+#define ICH_LR_ACTIVE_BIT	(1ULL << 63)
+#define ICH_LR_PHYS_ID_SHIFT	32
+#define ICH_LR_PHYS_ID_MASK	(0x3ffULL << ICH_LR_PHYS_ID_SHIFT)
+#define ICH_LR_PRIORITY_SHIFT	48
+#define ICH_LR_PRIORITY_MASK	(0xffULL << ICH_LR_PRIORITY_SHIFT)
+
+/* ICH_HCR_EL2 bit definitions */
+#define ICH_HCR_EN		(1 << 0)
+#define ICH_HCR_UIE		(1 << 1)
+#define ICH_HCR_NPIE		(1 << 3)
+#define ICH_HCR_TC		(1 << 10)
+#define ICH_HCR_TALL0		(1 << 11)
+#define ICH_HCR_TALL1		(1 << 12)
+#define ICH_HCR_EOIcount_SHIFT	27
+#define ICH_HCR_EOIcount_MASK	(0x1f << ICH_HCR_EOIcount_SHIFT)
+
+/* ICH_VMCR_EL2 bit definitions */
+#define ICH_VMCR_ACK_CTL_SHIFT	2
+#define ICH_VMCR_ACK_CTL_MASK	(1 << ICH_VMCR_ACK_CTL_SHIFT)
+#define ICH_VMCR_FIQ_EN_SHIFT	3
+#define ICH_VMCR_FIQ_EN_MASK	(1 << ICH_VMCR_FIQ_EN_SHIFT)
+#define ICH_VMCR_CBPR_SHIFT	4
+#define ICH_VMCR_CBPR_MASK	(1 << ICH_VMCR_CBPR_SHIFT)
+#define ICH_VMCR_EOIM_SHIFT	9
+#define ICH_VMCR_EOIM_MASK	(1 << ICH_VMCR_EOIM_SHIFT)
+#define ICH_VMCR_BPR1_SHIFT	18
+#define ICH_VMCR_BPR1_MASK	(7 << ICH_VMCR_BPR1_SHIFT)
+#define ICH_VMCR_BPR0_SHIFT	21
+#define ICH_VMCR_BPR0_MASK	(7 << ICH_VMCR_BPR0_SHIFT)
+#define ICH_VMCR_PMR_SHIFT	24
+#define ICH_VMCR_PMR_MASK	(0xffUL << ICH_VMCR_PMR_SHIFT)
+#define ICH_VMCR_ENG0_SHIFT	0
+#define ICH_VMCR_ENG0_MASK	(1 << ICH_VMCR_ENG0_SHIFT)
+#define ICH_VMCR_ENG1_SHIFT	1
+#define ICH_VMCR_ENG1_MASK	(1 << ICH_VMCR_ENG1_SHIFT)
+
+/* ICH_VTR_EL2 bit definitions */
+#define ICH_VTR_PRI_BITS_SHIFT	29
+#define ICH_VTR_PRI_BITS_MASK	(7 << ICH_VTR_PRI_BITS_SHIFT)
+#define ICH_VTR_ID_BITS_SHIFT	23
+#define ICH_VTR_ID_BITS_MASK	(7 << ICH_VTR_ID_BITS_SHIFT)
+#define ICH_VTR_SEIS_SHIFT	22
+#define ICH_VTR_SEIS_MASK	(1 << ICH_VTR_SEIS_SHIFT)
+#define ICH_VTR_A3V_SHIFT	21
+#define ICH_VTR_A3V_MASK	(1 << ICH_VTR_A3V_SHIFT)
+
 #ifdef __ASSEMBLY__
 
 	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h
index 9bb27e5c22f1..9fbfc9574432 100644
--- a/arch/sparc/include/asm/io_64.h
+++ b/arch/sparc/include/asm/io_64.h
@@ -409,6 +409,10 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
 #define ioremap_uc(X,Y)			ioremap((X),(Y))
 #define ioremap_wc(X,Y)			ioremap((X),(Y))
 #define ioremap_wt(X,Y)			ioremap((X),(Y))
+static inline void __iomem *ioremap_np(unsigned long offset, unsigned long size)
+{
+	return NULL;
+}
 
 static inline void iounmap(volatile void __iomem *addr)
 {
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 1b885964fb34..4fb1f4da27ec 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -64,6 +64,14 @@ static u32 arch_timer_rate __ro_after_init;
 u32 arch_timer_rate1 __ro_after_init;
 static int arch_timer_ppi[ARCH_TIMER_MAX_TIMER_PPI] __ro_after_init;
 
+static const char *arch_timer_ppi_names[ARCH_TIMER_MAX_TIMER_PPI] = {
+	[ARCH_TIMER_PHYS_SECURE_PPI]	= "sec-phys",
+	[ARCH_TIMER_PHYS_NONSECURE_PPI]	= "phys",
+	[ARCH_TIMER_VIRT_PPI]		= "virt",
+	[ARCH_TIMER_HYP_PPI]		= "hyp-phys",
+	[ARCH_TIMER_HYP_VIRT_PPI]	= "hyp-virt",
+};
+
 static struct clock_event_device __percpu *arch_timer_evt;
 
 static enum arch_timer_ppi_nr arch_timer_uses_ppi __ro_after_init = ARCH_TIMER_VIRT_PPI;
@@ -1281,8 +1289,9 @@ static void __init arch_timer_populate_kvm_info(void)
 
 static int __init arch_timer_of_init(struct device_node *np)
 {
-	int i, ret;
+	int i, irq, ret;
 	u32 rate;
+	bool has_names;
 
 	if (arch_timers_present & ARCH_TIMER_TYPE_CP15) {
 		pr_warn("multiple nodes in dt, skipping\n");
@@ -1290,8 +1299,17 @@ static int __init arch_timer_of_init(struct device_node *np)
 	}
 
 	arch_timers_present |= ARCH_TIMER_TYPE_CP15;
-	for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++)
-		arch_timer_ppi[i] = irq_of_parse_and_map(np, i);
+
+	has_names = of_property_read_bool(np, "interrupt-names");
+
+	for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++) {
+		if (has_names)
+			irq = of_irq_get_byname(np, arch_timer_ppi_names[i]);
+		else
+			irq = of_irq_get(np, i);
+		if (irq > 0)
+			arch_timer_ppi[i] = irq;
+	}
 
 	arch_timer_populate_kvm_info();
 
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index c8f57e3e058d..b90e825df7e1 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -593,4 +593,12 @@ config IRQ_IDT3243X
 	select GENERIC_IRQ_CHIP
 	select IRQ_DOMAIN
 
+config APPLE_AIC
+	bool "Apple Interrupt Controller (AIC)"
+	depends on ARM64
+	default ARCH_APPLE
+	help
+	  Support for the Apple Interrupt Controller found on Apple Silicon SoCs,
+	  such as the M1.
+
 endmenu
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 18573602a939..f88cbf36a9d2 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -115,3 +115,4 @@ obj-$(CONFIG_SL28CPLD_INTC)		+= irq-sl28cpld.o
 obj-$(CONFIG_MACH_REALTEK_RTL)		+= irq-realtek-rtl.o
 obj-$(CONFIG_WPCM450_AIC)		+= irq-wpcm450-aic.o
 obj-$(CONFIG_IRQ_IDT3243X)		+= irq-idt3243x.o
+obj-$(CONFIG_APPLE_AIC)			+= irq-apple-aic.o
diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
new file mode 100644
index 000000000000..c179e27062fd
--- /dev/null
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -0,0 +1,852 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright The Asahi Linux Contributors
+ *
+ * Based on irq-lpc32xx:
+ *   Copyright 2015-2016 Vladimir Zapolskiy <vz@mleia.com>
+ * Based on irq-bcm2836:
+ *   Copyright 2015 Broadcom
+ */
+
+/*
+ * AIC is a fairly simple interrupt controller with the following features:
+ *
+ * - 896 level-triggered hardware IRQs
+ *   - Single mask bit per IRQ
+ *   - Per-IRQ affinity setting
+ *   - Automatic masking on event delivery (auto-ack)
+ *   - Software triggering (ORed with hw line)
+ * - 2 per-CPU IPIs (meant as "self" and "other", but they are
+ *   interchangeable if not symmetric)
+ * - Automatic prioritization (single event/ack register per CPU, lower IRQs =
+ *   higher priority)
+ * - Automatic masking on ack
+ * - Default "this CPU" register view and explicit per-CPU views
+ *
+ * In addition, this driver also handles FIQs, as these are routed to the same
+ * IRQ vector. These are used for Fast IPIs (TODO), the ARMv8 timer IRQs, and
+ * performance counters (TODO).
+ *
+ * Implementation notes:
+ *
+ * - This driver creates two IRQ domains, one for HW IRQs and internal FIQs,
+ *   and one for IPIs.
+ * - Since Linux needs more than 2 IPIs, we implement a software IRQ controller
+ *   and funnel all IPIs into one per-CPU IPI (the second "self" IPI is unused).
+ * - FIQ hwirq numbers are assigned after true hwirqs, and are per-cpu.
+ * - DT bindings use 3-cell form (like GIC):
+ *   - <0 nr flags> - hwirq #nr
+ *   - <1 nr flags> - FIQ #nr
+ *     - nr=0  Physical HV timer
+ *     - nr=1  Virtual HV timer
+ *     - nr=2  Physical guest timer
+ *     - nr=3  Virtual guest timer
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bits.h>
+#include <linux/bitfield.h>
+#include <linux/cpuhotplug.h>
+#include <linux/io.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/limits.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <asm/exception.h>
+#include <asm/sysreg.h>
+#include <asm/virt.h>
+
+#include <dt-bindings/interrupt-controller/apple-aic.h>
+
+/*
+ * AIC registers (MMIO)
+ */
+
+#define AIC_INFO		0x0004
+#define AIC_INFO_NR_HW		GENMASK(15, 0)
+
+#define AIC_CONFIG		0x0010
+
+#define AIC_WHOAMI		0x2000
+#define AIC_EVENT		0x2004
+#define AIC_EVENT_TYPE		GENMASK(31, 16)
+#define AIC_EVENT_NUM		GENMASK(15, 0)
+
+#define AIC_EVENT_TYPE_HW	1
+#define AIC_EVENT_TYPE_IPI	4
+#define AIC_EVENT_IPI_OTHER	1
+#define AIC_EVENT_IPI_SELF	2
+
+#define AIC_IPI_SEND		0x2008
+#define AIC_IPI_ACK		0x200c
+#define AIC_IPI_MASK_SET	0x2024
+#define AIC_IPI_MASK_CLR	0x2028
+
+#define AIC_IPI_SEND_CPU(cpu)	BIT(cpu)
+
+#define AIC_IPI_OTHER		BIT(0)
+#define AIC_IPI_SELF		BIT(31)
+
+#define AIC_TARGET_CPU		0x3000
+#define AIC_SW_SET		0x4000
+#define AIC_SW_CLR		0x4080
+#define AIC_MASK_SET		0x4100
+#define AIC_MASK_CLR		0x4180
+
+#define AIC_CPU_IPI_SET(cpu)	(0x5008 + ((cpu) << 7))
+#define AIC_CPU_IPI_CLR(cpu)	(0x500c + ((cpu) << 7))
+#define AIC_CPU_IPI_MASK_SET(cpu) (0x5024 + ((cpu) << 7))
+#define AIC_CPU_IPI_MASK_CLR(cpu) (0x5028 + ((cpu) << 7))
+
+#define MASK_REG(x)		(4 * ((x) >> 5))
+#define MASK_BIT(x)		BIT((x) & GENMASK(4, 0))
+
+/*
+ * IMP-DEF sysregs that control FIQ sources
+ * Note: sysreg-based IPIs are not supported yet.
+ */
+
+/* Core PMC control register */
+#define SYS_IMP_APL_PMCR0_EL1		sys_reg(3, 1, 15, 0, 0)
+#define PMCR0_IMODE			GENMASK(10, 8)
+#define PMCR0_IMODE_OFF			0
+#define PMCR0_IMODE_PMI			1
+#define PMCR0_IMODE_AIC			2
+#define PMCR0_IMODE_HALT		3
+#define PMCR0_IMODE_FIQ			4
+#define PMCR0_IACT			BIT(11)
+
+/* IPI request registers */
+#define SYS_IMP_APL_IPI_RR_LOCAL_EL1	sys_reg(3, 5, 15, 0, 0)
+#define SYS_IMP_APL_IPI_RR_GLOBAL_EL1	sys_reg(3, 5, 15, 0, 1)
+#define IPI_RR_CPU			GENMASK(7, 0)
+/* Cluster only used for the GLOBAL register */
+#define IPI_RR_CLUSTER			GENMASK(23, 16)
+#define IPI_RR_TYPE			GENMASK(29, 28)
+#define IPI_RR_IMMEDIATE		0
+#define IPI_RR_RETRACT			1
+#define IPI_RR_DEFERRED			2
+#define IPI_RR_NOWAKE			3
+
+/* IPI status register */
+#define SYS_IMP_APL_IPI_SR_EL1		sys_reg(3, 5, 15, 1, 1)
+#define IPI_SR_PENDING			BIT(0)
+
+/* Guest timer FIQ enable register */
+#define SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2	sys_reg(3, 5, 15, 1, 3)
+#define VM_TMR_FIQ_ENABLE_V		BIT(0)
+#define VM_TMR_FIQ_ENABLE_P		BIT(1)
+
+/* Deferred IPI countdown register */
+#define SYS_IMP_APL_IPI_CR_EL1		sys_reg(3, 5, 15, 3, 1)
+
+/* Uncore PMC control register */
+#define SYS_IMP_APL_UPMCR0_EL1		sys_reg(3, 7, 15, 0, 4)
+#define UPMCR0_IMODE			GENMASK(18, 16)
+#define UPMCR0_IMODE_OFF		0
+#define UPMCR0_IMODE_AIC		2
+#define UPMCR0_IMODE_HALT		3
+#define UPMCR0_IMODE_FIQ		4
+
+/* Uncore PMC status register */
+#define SYS_IMP_APL_UPMSR_EL1		sys_reg(3, 7, 15, 6, 4)
+#define UPMSR_IACT			BIT(0)
+
+#define AIC_NR_FIQ		4
+#define AIC_NR_SWIPI		32
+
+/*
+ * FIQ hwirq index definitions: FIQ sources use the DT binding defines
+ * directly, except that timers are special. At the irqchip level, the
+ * two timer types are represented by their access method: _EL0 registers
+ * or _EL02 registers. In the DT binding, the timers are represented
+ * by their purpose (HV or guest). This mapping is for when the kernel is
+ * running at EL2 (with VHE). When the kernel is running at EL1, the
+ * mapping differs and aic_irq_domain_translate() performs the remapping.
+ */
+
+#define AIC_TMR_EL0_PHYS	AIC_TMR_HV_PHYS
+#define AIC_TMR_EL0_VIRT	AIC_TMR_HV_VIRT
+#define AIC_TMR_EL02_PHYS	AIC_TMR_GUEST_PHYS
+#define AIC_TMR_EL02_VIRT	AIC_TMR_GUEST_VIRT
+
+struct aic_irq_chip {
+	void __iomem *base;
+	struct irq_domain *hw_domain;
+	struct irq_domain *ipi_domain;
+	int nr_hw;
+	int ipi_hwirq;
+};
+
+static DEFINE_PER_CPU(uint32_t, aic_fiq_unmasked);
+
+static DEFINE_PER_CPU(atomic_t, aic_vipi_flag);
+static DEFINE_PER_CPU(atomic_t, aic_vipi_enable);
+
+static struct aic_irq_chip *aic_irqc;
+
+static void aic_handle_ipi(struct pt_regs *regs);
+
+static u32 aic_ic_read(struct aic_irq_chip *ic, u32 reg)
+{
+	return readl_relaxed(ic->base + reg);
+}
+
+static void aic_ic_write(struct aic_irq_chip *ic, u32 reg, u32 val)
+{
+	writel_relaxed(val, ic->base + reg);
+}
+
+/*
+ * IRQ irqchip
+ */
+
+static void aic_irq_mask(struct irq_data *d)
+{
+	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
+
+	aic_ic_write(ic, AIC_MASK_SET + MASK_REG(irqd_to_hwirq(d)),
+		     MASK_BIT(irqd_to_hwirq(d)));
+}
+
+static void aic_irq_unmask(struct irq_data *d)
+{
+	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
+
+	aic_ic_write(ic, AIC_MASK_CLR + MASK_REG(d->hwirq),
+		     MASK_BIT(irqd_to_hwirq(d)));
+}
+
+static void aic_irq_eoi(struct irq_data *d)
+{
+	/*
+	 * Reading the interrupt reason automatically acknowledges and masks
+	 * the IRQ, so we just unmask it here if needed.
+	 */
+	if (!irqd_irq_disabled(d) && !irqd_irq_masked(d))
+		aic_irq_unmask(d);
+}
+
+static void __exception_irq_entry aic_handle_irq(struct pt_regs *regs)
+{
+	struct aic_irq_chip *ic = aic_irqc;
+	u32 event, type, irq;
+
+	do {
+		/*
+		 * We cannot use a relaxed read here, as reads from DMA buffers
+		 * need to be ordered after the IRQ fires.
+		 */
+		event = readl(ic->base + AIC_EVENT);
+		type = FIELD_GET(AIC_EVENT_TYPE, event);
+		irq = FIELD_GET(AIC_EVENT_NUM, event);
+
+		if (type == AIC_EVENT_TYPE_HW)
+			handle_domain_irq(aic_irqc->hw_domain, irq, regs);
+		else if (type == AIC_EVENT_TYPE_IPI && irq == 1)
+			aic_handle_ipi(regs);
+		else if (event != 0)
+			pr_err_ratelimited("Unknown IRQ event %d, %d\n", type, irq);
+	} while (event);
+
+	/*
+	 * vGIC maintenance interrupts end up here too, so we need to check
+	 * for them separately. This should never trigger if KVM is working
+	 * properly, because it will have already taken care of clearing it
+	 * on guest exit before this handler runs.
+	 */
+	if (is_kernel_in_hyp_mode() && (read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EN) &&
+		read_sysreg_s(SYS_ICH_MISR_EL2) != 0) {
+		pr_err_ratelimited("vGIC IRQ fired and not handled by KVM, disabling.\n");
+		sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EN, 0);
+	}
+}
+
+static int aic_irq_set_affinity(struct irq_data *d,
+				const struct cpumask *mask_val, bool force)
+{
+	irq_hw_number_t hwirq = irqd_to_hwirq(d);
+	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
+	int cpu;
+
+	if (force)
+		cpu = cpumask_first(mask_val);
+	else
+		cpu = cpumask_any_and(mask_val, cpu_online_mask);
+
+	aic_ic_write(ic, AIC_TARGET_CPU + hwirq * 4, BIT(cpu));
+	irq_data_update_effective_affinity(d, cpumask_of(cpu));
+
+	return IRQ_SET_MASK_OK;
+}
+
+static int aic_irq_set_type(struct irq_data *d, unsigned int type)
+{
+	/*
+	 * Some IRQs (e.g. MSIs) implicitly have edge semantics, and we don't
+	 * have a way to find out the type of any given IRQ, so just allow both.
+	 */
+	return (type == IRQ_TYPE_LEVEL_HIGH || type == IRQ_TYPE_EDGE_RISING) ? 0 : -EINVAL;
+}
+
+static struct irq_chip aic_chip = {
+	.name = "AIC",
+	.irq_mask = aic_irq_mask,
+	.irq_unmask = aic_irq_unmask,
+	.irq_eoi = aic_irq_eoi,
+	.irq_set_affinity = aic_irq_set_affinity,
+	.irq_set_type = aic_irq_set_type,
+};
+
+/*
+ * FIQ irqchip
+ */
+
+static unsigned long aic_fiq_get_idx(struct irq_data *d)
+{
+	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
+
+	return irqd_to_hwirq(d) - ic->nr_hw;
+}
+
+static void aic_fiq_set_mask(struct irq_data *d)
+{
+	/* Only the guest timers have real mask bits, unfortunately. */
+	switch (aic_fiq_get_idx(d)) {
+	case AIC_TMR_EL02_PHYS:
+		sysreg_clear_set_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2, VM_TMR_FIQ_ENABLE_P, 0);
+		isb();
+		break;
+	case AIC_TMR_EL02_VIRT:
+		sysreg_clear_set_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2, VM_TMR_FIQ_ENABLE_V, 0);
+		isb();
+		break;
+	default:
+		break;
+	}
+}
+
+static void aic_fiq_clear_mask(struct irq_data *d)
+{
+	switch (aic_fiq_get_idx(d)) {
+	case AIC_TMR_EL02_PHYS:
+		sysreg_clear_set_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2, 0, VM_TMR_FIQ_ENABLE_P);
+		isb();
+		break;
+	case AIC_TMR_EL02_VIRT:
+		sysreg_clear_set_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2, 0, VM_TMR_FIQ_ENABLE_V);
+		isb();
+		break;
+	default:
+		break;
+	}
+}
+
+static void aic_fiq_mask(struct irq_data *d)
+{
+	aic_fiq_set_mask(d);
+	__this_cpu_and(aic_fiq_unmasked, ~BIT(aic_fiq_get_idx(d)));
+}
+
+static void aic_fiq_unmask(struct irq_data *d)
+{
+	aic_fiq_clear_mask(d);
+	__this_cpu_or(aic_fiq_unmasked, BIT(aic_fiq_get_idx(d)));
+}
+
+static void aic_fiq_eoi(struct irq_data *d)
+{
+	/* We mask to ack (where we can), so we need to unmask at EOI. */
+	if (__this_cpu_read(aic_fiq_unmasked) & BIT(aic_fiq_get_idx(d)))
+		aic_fiq_clear_mask(d);
+}
+
+#define TIMER_FIRING(x)                                                        \
+	(((x) & (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_MASK |            \
+		 ARCH_TIMER_CTRL_IT_STAT)) ==                                  \
+	 (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT))
+
+static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs)
+{
+	/*
+	 * It would be really nice if we had a system register that lets us get
+	 * the FIQ source state without having to peek down into sources...
+	 * but such a register does not seem to exist.
+	 *
+	 * So, we have these potential sources to test for:
+	 *  - Fast IPIs (not yet used)
+	 *  - The 4 timers (CNTP, CNTV for each of HV and guest)
+	 *  - Per-core PMCs (not yet supported)
+	 *  - Per-cluster uncore PMCs (not yet supported)
+	 *
+	 * Since not dealing with any of these results in a FIQ storm,
+	 * we check for everything here, even things we don't support yet.
+	 */
+
+	if (read_sysreg_s(SYS_IMP_APL_IPI_SR_EL1) & IPI_SR_PENDING) {
+		pr_err_ratelimited("Fast IPI fired. Acking.\n");
+		write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1);
+	}
+
+	if (TIMER_FIRING(read_sysreg(cntp_ctl_el0)))
+		handle_domain_irq(aic_irqc->hw_domain,
+				  aic_irqc->nr_hw + AIC_TMR_EL0_PHYS, regs);
+
+	if (TIMER_FIRING(read_sysreg(cntv_ctl_el0)))
+		handle_domain_irq(aic_irqc->hw_domain,
+				  aic_irqc->nr_hw + AIC_TMR_EL0_VIRT, regs);
+
+	if (is_kernel_in_hyp_mode()) {
+		uint64_t enabled = read_sysreg_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2);
+
+		if ((enabled & VM_TMR_FIQ_ENABLE_P) &&
+		    TIMER_FIRING(read_sysreg_s(SYS_CNTP_CTL_EL02)))
+			handle_domain_irq(aic_irqc->hw_domain,
+					  aic_irqc->nr_hw + AIC_TMR_EL02_PHYS, regs);
+
+		if ((enabled & VM_TMR_FIQ_ENABLE_V) &&
+		    TIMER_FIRING(read_sysreg_s(SYS_CNTV_CTL_EL02)))
+			handle_domain_irq(aic_irqc->hw_domain,
+					  aic_irqc->nr_hw + AIC_TMR_EL02_VIRT, regs);
+	}
+
+	if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) ==
+			(FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) {
+		/*
+		 * Not supported yet, let's figure out how to handle this when
+		 * we implement these proprietary performance counters. For now,
+		 * just mask it and move on.
+		 */
+		pr_err_ratelimited("PMC FIQ fired. Masking.\n");
+		sysreg_clear_set_s(SYS_IMP_APL_PMCR0_EL1, PMCR0_IMODE | PMCR0_IACT,
+				   FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_OFF));
+	}
+
+	if (FIELD_GET(UPMCR0_IMODE, read_sysreg_s(SYS_IMP_APL_UPMCR0_EL1)) == UPMCR0_IMODE_FIQ &&
+			(read_sysreg_s(SYS_IMP_APL_UPMSR_EL1) & UPMSR_IACT)) {
+		/* Same story with uncore PMCs */
+		pr_err_ratelimited("Uncore PMC FIQ fired. Masking.\n");
+		sysreg_clear_set_s(SYS_IMP_APL_UPMCR0_EL1, UPMCR0_IMODE,
+				   FIELD_PREP(UPMCR0_IMODE, UPMCR0_IMODE_OFF));
+	}
+}
+
+static int aic_fiq_set_type(struct irq_data *d, unsigned int type)
+{
+	return (type == IRQ_TYPE_LEVEL_HIGH) ? 0 : -EINVAL;
+}
+
+static struct irq_chip fiq_chip = {
+	.name = "AIC-FIQ",
+	.irq_mask = aic_fiq_mask,
+	.irq_unmask = aic_fiq_unmask,
+	.irq_ack = aic_fiq_set_mask,
+	.irq_eoi = aic_fiq_eoi,
+	.irq_set_type = aic_fiq_set_type,
+};
+
+/*
+ * Main IRQ domain
+ */
+
+static int aic_irq_domain_map(struct irq_domain *id, unsigned int irq,
+			      irq_hw_number_t hw)
+{
+	struct aic_irq_chip *ic = id->host_data;
+
+	if (hw < ic->nr_hw) {
+		irq_domain_set_info(id, irq, hw, &aic_chip, id->host_data,
+				    handle_fasteoi_irq, NULL, NULL);
+		irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq)));
+	} else {
+		irq_set_percpu_devid(irq);
+		irq_domain_set_info(id, irq, hw, &fiq_chip, id->host_data,
+				    handle_percpu_devid_irq, NULL, NULL);
+	}
+
+	return 0;
+}
+
+static int aic_irq_domain_translate(struct irq_domain *id,
+				    struct irq_fwspec *fwspec,
+				    unsigned long *hwirq,
+				    unsigned int *type)
+{
+	struct aic_irq_chip *ic = id->host_data;
+
+	if (fwspec->param_count != 3 || !is_of_node(fwspec->fwnode))
+		return -EINVAL;
+
+	switch (fwspec->param[0]) {
+	case AIC_IRQ:
+		if (fwspec->param[1] >= ic->nr_hw)
+			return -EINVAL;
+		*hwirq = fwspec->param[1];
+		break;
+	case AIC_FIQ:
+		if (fwspec->param[1] >= AIC_NR_FIQ)
+			return -EINVAL;
+		*hwirq = ic->nr_hw + fwspec->param[1];
+
+		/*
+		 * In EL1 the non-redirected registers are the guest's,
+		 * not EL2's, so remap the hwirqs to match.
+		 */
+		if (!is_kernel_in_hyp_mode()) {
+			switch (fwspec->param[1]) {
+			case AIC_TMR_GUEST_PHYS:
+				*hwirq = ic->nr_hw + AIC_TMR_EL0_PHYS;
+				break;
+			case AIC_TMR_GUEST_VIRT:
+				*hwirq = ic->nr_hw + AIC_TMR_EL0_VIRT;
+				break;
+			case AIC_TMR_HV_PHYS:
+			case AIC_TMR_HV_VIRT:
+				return -ENOENT;
+			default:
+				break;
+			}
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	*type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
+
+	return 0;
+}
+
+static int aic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs, void *arg)
+{
+	unsigned int type = IRQ_TYPE_NONE;
+	struct irq_fwspec *fwspec = arg;
+	irq_hw_number_t hwirq;
+	int i, ret;
+
+	ret = aic_irq_domain_translate(domain, fwspec, &hwirq, &type);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < nr_irqs; i++) {
+		ret = aic_irq_domain_map(domain, virq + i, hwirq + i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void aic_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs)
+{
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		struct irq_data *d = irq_domain_get_irq_data(domain, virq + i);
+
+		irq_set_handler(virq + i, NULL);
+		irq_domain_reset_irq_data(d);
+	}
+}
+
+static const struct irq_domain_ops aic_irq_domain_ops = {
+	.translate	= aic_irq_domain_translate,
+	.alloc		= aic_irq_domain_alloc,
+	.free		= aic_irq_domain_free,
+};
+
+/*
+ * IPI irqchip
+ */
+
+static void aic_ipi_mask(struct irq_data *d)
+{
+	u32 irq_bit = BIT(irqd_to_hwirq(d));
+
+	/* No specific ordering requirements needed here. */
+	atomic_andnot(irq_bit, this_cpu_ptr(&aic_vipi_enable));
+}
+
+static void aic_ipi_unmask(struct irq_data *d)
+{
+	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
+	u32 irq_bit = BIT(irqd_to_hwirq(d));
+
+	atomic_or(irq_bit, this_cpu_ptr(&aic_vipi_enable));
+
+	/*
+	 * The atomic_or() above must complete before the atomic_read()
+	 * below to avoid racing aic_ipi_send_mask().
+	 */
+	smp_mb__after_atomic();
+
+	/*
+	 * If a pending vIPI was unmasked, raise a HW IPI to ourselves.
+	 * No barriers needed here since this is a self-IPI.
+	 */
+	if (atomic_read(this_cpu_ptr(&aic_vipi_flag)) & irq_bit)
+		aic_ic_write(ic, AIC_IPI_SEND, AIC_IPI_SEND_CPU(smp_processor_id()));
+}
+
+static void aic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
+{
+	struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
+	u32 irq_bit = BIT(irqd_to_hwirq(d));
+	u32 send = 0;
+	int cpu;
+	unsigned long pending;
+
+	for_each_cpu(cpu, mask) {
+		/*
+		 * This sequence is the mirror of the one in aic_ipi_unmask();
+		 * see the comment there. Additionally, release semantics
+		 * ensure that the vIPI flag set is ordered after any shared
+		 * memory accesses that precede it. This therefore also pairs
+		 * with the atomic_fetch_andnot in aic_handle_ipi().
+		 */
+		pending = atomic_fetch_or_release(irq_bit, per_cpu_ptr(&aic_vipi_flag, cpu));
+
+		/*
+		 * The atomic_fetch_or_release() above must complete before the
+		 * atomic_read() below to avoid racing aic_ipi_unmask().
+		 */
+		smp_mb__after_atomic();
+
+		if (!(pending & irq_bit) &&
+		    (atomic_read(per_cpu_ptr(&aic_vipi_enable, cpu)) & irq_bit))
+			send |= AIC_IPI_SEND_CPU(cpu);
+	}
+
+	/*
+	 * The flag writes must complete before the physical IPI is issued
+	 * to another CPU. This is implied by the control dependency on
+	 * the result of atomic_read_acquire() above, which is itself
+	 * already ordered after the vIPI flag write.
+	 */
+	if (send)
+		aic_ic_write(ic, AIC_IPI_SEND, send);
+}
+
+static struct irq_chip ipi_chip = {
+	.name = "AIC-IPI",
+	.irq_mask = aic_ipi_mask,
+	.irq_unmask = aic_ipi_unmask,
+	.ipi_send_mask = aic_ipi_send_mask,
+};
+
+/*
+ * IPI IRQ domain
+ */
+
+static void aic_handle_ipi(struct pt_regs *regs)
+{
+	int i;
+	unsigned long enabled, firing;
+
+	/*
+	 * Ack the IPI. We need to order this after the AIC event read, but
+	 * that is enforced by normal MMIO ordering guarantees.
+	 */
+	aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_OTHER);
+
+	/*
+	 * The mask read does not need to be ordered. Only we can change
+	 * our own mask anyway, so no races are possible here, as long as
+	 * we are properly in the interrupt handler (which is covered by
+	 * the barrier that is part of the top-level AIC handler's readl()).
+	 */
+	enabled = atomic_read(this_cpu_ptr(&aic_vipi_enable));
+
+	/*
+	 * Clear the IPIs we are about to handle. This pairs with the
+	 * atomic_fetch_or_release() in aic_ipi_send_mask(), and needs to be
+	 * ordered after the aic_ic_write() above (to avoid dropping vIPIs) and
+	 * before IPI handling code (to avoid races handling vIPIs before they
+	 * are signaled). The former is taken care of by the release semantics
+	 * of the write portion, while the latter is taken care of by the
+	 * acquire semantics of the read portion.
+	 */
+	firing = atomic_fetch_andnot(enabled, this_cpu_ptr(&aic_vipi_flag)) & enabled;
+
+	for_each_set_bit(i, &firing, AIC_NR_SWIPI)
+		handle_domain_irq(aic_irqc->ipi_domain, i, regs);
+
+	/*
+	 * No ordering needed here; at worst this just changes the timing of
+	 * when the next IPI will be delivered.
+	 */
+	aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
+}
+
+static int aic_ipi_alloc(struct irq_domain *d, unsigned int virq,
+			 unsigned int nr_irqs, void *args)
+{
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_set_percpu_devid(virq + i);
+		irq_domain_set_info(d, virq + i, i, &ipi_chip, d->host_data,
+				    handle_percpu_devid_irq, NULL, NULL);
+	}
+
+	return 0;
+}
+
+static void aic_ipi_free(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs)
+{
+	/* Not freeing IPIs */
+}
+
+static const struct irq_domain_ops aic_ipi_domain_ops = {
+	.alloc = aic_ipi_alloc,
+	.free = aic_ipi_free,
+};
+
+static int aic_init_smp(struct aic_irq_chip *irqc, struct device_node *node)
+{
+	struct irq_domain *ipi_domain;
+	int base_ipi;
+
+	ipi_domain = irq_domain_create_linear(irqc->hw_domain->fwnode, AIC_NR_SWIPI,
+					      &aic_ipi_domain_ops, irqc);
+	if (WARN_ON(!ipi_domain))
+		return -ENODEV;
+
+	ipi_domain->flags |= IRQ_DOMAIN_FLAG_IPI_SINGLE;
+	irq_domain_update_bus_token(ipi_domain, DOMAIN_BUS_IPI);
+
+	base_ipi = __irq_domain_alloc_irqs(ipi_domain, -1, AIC_NR_SWIPI,
+					   NUMA_NO_NODE, NULL, false, NULL);
+
+	if (WARN_ON(!base_ipi)) {
+		irq_domain_remove(ipi_domain);
+		return -ENODEV;
+	}
+
+	set_smp_ipi_range(base_ipi, AIC_NR_SWIPI);
+
+	irqc->ipi_domain = ipi_domain;
+
+	return 0;
+}
+
+static int aic_init_cpu(unsigned int cpu)
+{
+	/* Mask all hard-wired per-CPU IRQ/FIQ sources */
+
+	/* Pending Fast IPI FIQs */
+	write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1);
+
+	/* Timer FIQs */
+	sysreg_clear_set(cntp_ctl_el0, 0, ARCH_TIMER_CTRL_IT_MASK);
+	sysreg_clear_set(cntv_ctl_el0, 0, ARCH_TIMER_CTRL_IT_MASK);
+
+	/* EL2-only (VHE mode) IRQ sources */
+	if (is_kernel_in_hyp_mode()) {
+		/* Guest timers */
+		sysreg_clear_set_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2,
+				   VM_TMR_FIQ_ENABLE_V | VM_TMR_FIQ_ENABLE_P, 0);
+
+		/* vGIC maintenance IRQ */
+		sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EN, 0);
+	}
+
+	/* PMC FIQ */
+	sysreg_clear_set_s(SYS_IMP_APL_PMCR0_EL1, PMCR0_IMODE | PMCR0_IACT,
+			   FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_OFF));
+
+	/* Uncore PMC FIQ */
+	sysreg_clear_set_s(SYS_IMP_APL_UPMCR0_EL1, UPMCR0_IMODE,
+			   FIELD_PREP(UPMCR0_IMODE, UPMCR0_IMODE_OFF));
+
+	/* Commit all of the above */
+	isb();
+
+	/*
+	 * Make sure the kernel's idea of logical CPU order is the same as AIC's
+	 * If we ever end up with a mismatch here, we will have to introduce
+	 * a mapping table similar to what other irqchip drivers do.
+	 */
+	WARN_ON(aic_ic_read(aic_irqc, AIC_WHOAMI) != smp_processor_id());
+
+	/*
+	 * Always keep IPIs unmasked at the hardware level (except auto-masking
+	 * by AIC during processing). We manage masks at the vIPI level.
+	 */
+	aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_SELF | AIC_IPI_OTHER);
+	aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF);
+	aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
+
+	/* Initialize the local mask state */
+	__this_cpu_write(aic_fiq_unmasked, 0);
+
+	return 0;
+}
+
+static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent)
+{
+	int i;
+	void __iomem *regs;
+	u32 info;
+	struct aic_irq_chip *irqc;
+
+	regs = of_iomap(node, 0);
+	if (WARN_ON(!regs))
+		return -EIO;
+
+	irqc = kzalloc(sizeof(*irqc), GFP_KERNEL);
+	if (!irqc)
+		return -ENOMEM;
+
+	aic_irqc = irqc;
+	irqc->base = regs;
+
+	info = aic_ic_read(irqc, AIC_INFO);
+	irqc->nr_hw = FIELD_GET(AIC_INFO_NR_HW, info);
+
+	irqc->hw_domain = irq_domain_create_linear(of_node_to_fwnode(node),
+						   irqc->nr_hw + AIC_NR_FIQ,
+						   &aic_irq_domain_ops, irqc);
+	if (WARN_ON(!irqc->hw_domain)) {
+		iounmap(irqc->base);
+		kfree(irqc);
+		return -ENODEV;
+	}
+
+	irq_domain_update_bus_token(irqc->hw_domain, DOMAIN_BUS_WIRED);
+
+	if (aic_init_smp(irqc, node)) {
+		irq_domain_remove(irqc->hw_domain);
+		iounmap(irqc->base);
+		kfree(irqc);
+		return -ENODEV;
+	}
+
+	set_handle_irq(aic_handle_irq);
+	set_handle_fiq(aic_handle_fiq);
+
+	for (i = 0; i < BITS_TO_U32(irqc->nr_hw); i++)
+		aic_ic_write(irqc, AIC_MASK_SET + i * 4, U32_MAX);
+	for (i = 0; i < BITS_TO_U32(irqc->nr_hw); i++)
+		aic_ic_write(irqc, AIC_SW_CLR + i * 4, U32_MAX);
+	for (i = 0; i < irqc->nr_hw; i++)
+		aic_ic_write(irqc, AIC_TARGET_CPU + i * 4, 1);
+
+	if (!is_kernel_in_hyp_mode())
+		pr_info("Kernel running in EL1, mapping interrupts");
+
+	cpuhp_setup_state(CPUHP_AP_IRQ_APPLE_AIC_STARTING,
+			  "irqchip/apple-aic/ipi:starting",
+			  aic_init_cpu, NULL);
+
+	pr_info("Initialized with %d IRQs, %d FIQs, %d vIPIs\n",
+		irqc->nr_hw, AIC_NR_FIQ, AIC_NR_SWIPI);
+
+	return 0;
+}
+
+IRQCHIP_DECLARE(apple_m1_aic, "apple,aic", aic_of_ic_init);
diff --git a/drivers/of/address.c b/drivers/of/address.c
index 73ddf2540f3f..a2373fa4a571 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -26,6 +26,7 @@ static struct of_bus *of_match_bus(struct device_node *np);
 static int __of_address_to_resource(struct device_node *dev,
 		const __be32 *addrp, u64 size, unsigned int flags,
 		const char *name, struct resource *r);
+static bool of_mmio_is_nonposted(struct device_node *np);
 
 /* Debug utility */
 #ifdef DEBUG
@@ -847,6 +848,9 @@ static int __of_address_to_resource(struct device_node *dev,
 		return -EINVAL;
 	memset(r, 0, sizeof(struct resource));
 
+	if (of_mmio_is_nonposted(dev))
+		flags |= IORESOURCE_MEM_NONPOSTED;
+
 	r->start = taddr;
 	r->end = taddr + size - 1;
 	r->flags = flags;
@@ -896,7 +900,10 @@ void __iomem *of_iomap(struct device_node *np, int index)
 	if (of_address_to_resource(np, index, &res))
 		return NULL;
 
-	return ioremap(res.start, resource_size(&res));
+	if (res.flags & IORESOURCE_MEM_NONPOSTED)
+		return ioremap_np(res.start, resource_size(&res));
+	else
+		return ioremap(res.start, resource_size(&res));
 }
 EXPORT_SYMBOL(of_iomap);
 
@@ -928,7 +935,11 @@ void __iomem *of_io_request_and_map(struct device_node *np, int index,
 	if (!request_mem_region(res.start, resource_size(&res), name))
 		return IOMEM_ERR_PTR(-EBUSY);
 
-	mem = ioremap(res.start, resource_size(&res));
+	if (res.flags & IORESOURCE_MEM_NONPOSTED)
+		mem = ioremap_np(res.start, resource_size(&res));
+	else
+		mem = ioremap(res.start, resource_size(&res));
+
 	if (!mem) {
 		release_mem_region(res.start, resource_size(&res));
 		return IOMEM_ERR_PTR(-ENOMEM);
@@ -1094,3 +1105,31 @@ bool of_dma_is_coherent(struct device_node *np)
 	return false;
 }
 EXPORT_SYMBOL_GPL(of_dma_is_coherent);
+
+/**
+ * of_mmio_is_nonposted - Check if device uses non-posted MMIO
+ * @np:	device node
+ *
+ * Returns true if the "nonposted-mmio" property was found for
+ * the device's bus.
+ *
+ * This is currently only enabled on builds that support Apple ARM devices, as
+ * an optimization.
+ */
+static bool of_mmio_is_nonposted(struct device_node *np)
+{
+	struct device_node *parent;
+	bool nonposted;
+
+	if (!IS_ENABLED(CONFIG_ARCH_APPLE))
+		return false;
+
+	parent = of_get_parent(np);
+	if (!parent)
+		return false;
+
+	nonposted = of_property_read_bool(parent, "nonposted-mmio");
+
+	of_node_put(parent);
+	return nonposted;
+}
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index c6af40ce03be..76d456c516a1 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -942,7 +942,9 @@ static inline void *phys_to_virt(unsigned long address)
  *
  * ioremap_wc() and ioremap_wt() can provide more relaxed caching attributes
  * for specific drivers if the architecture choses to implement them.  If they
- * are not implemented we fall back to plain ioremap.
+ * are not implemented we fall back to plain ioremap. Conversely, ioremap_np()
+ * can provide stricter non-posted write semantics if the architecture
+ * implements them.
  */
 #ifndef CONFIG_MMU
 #ifndef ioremap
@@ -995,6 +997,23 @@ static inline void __iomem *ioremap_uc(phys_addr_t offset, size_t size)
 }
 #endif
 
+/*
+ * ioremap_np needs an explicit architecture implementation, as it
+ * requests stronger semantics than regular ioremap(). Portable drivers
+ * should instead use one of the higher-level abstractions, like
+ * devm_ioremap_resource(), to choose the correct variant for any given
+ * device and bus. Portable drivers with a good reason to want non-posted
+ * write semantics should always provide an ioremap() fallback in case
+ * ioremap_np() is not available.
+ */
+#ifndef ioremap_np
+#define ioremap_np ioremap_np
+static inline void __iomem *ioremap_np(phys_addr_t offset, size_t size)
+{
+	return NULL;
+}
+#endif
+
 #ifdef CONFIG_HAS_IOPORT_MAP
 #ifndef CONFIG_GENERIC_IOMAP
 #ifndef ioport_map
diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h
index 649224664969..9b3eb6d86200 100644
--- a/include/asm-generic/iomap.h
+++ b/include/asm-generic/iomap.h
@@ -101,6 +101,15 @@ extern void ioport_unmap(void __iomem *);
 #define ioremap_wt ioremap
 #endif
 
+#ifndef ARCH_HAS_IOREMAP_NP
+/* See the comment in asm-generic/io.h about ioremap_np(). */
+#define ioremap_np ioremap_np
+static inline void __iomem *ioremap_np(phys_addr_t offset, size_t size)
+{
+	return NULL;
+}
+#endif
+
 #ifdef CONFIG_PCI
 /* Destroy a virtual mapping cookie for a PCI BAR (memory or IO) */
 struct pci_dev;
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index 1d68d5613dae..73c7139c866f 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -32,6 +32,7 @@ enum arch_timer_ppi_nr {
 	ARCH_TIMER_PHYS_NONSECURE_PPI,
 	ARCH_TIMER_VIRT_PPI,
 	ARCH_TIMER_HYP_PPI,
+	ARCH_TIMER_HYP_VIRT_PPI,
 	ARCH_TIMER_MAX_TIMER_PPI
 };
 
diff --git a/include/dt-bindings/interrupt-controller/apple-aic.h b/include/dt-bindings/interrupt-controller/apple-aic.h
new file mode 100644
index 000000000000..604f2bb30ac0
--- /dev/null
+++ b/include/dt-bindings/interrupt-controller/apple-aic.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR MIT */
+#ifndef _DT_BINDINGS_INTERRUPT_CONTROLLER_APPLE_AIC_H
+#define _DT_BINDINGS_INTERRUPT_CONTROLLER_APPLE_AIC_H
+
+#include <dt-bindings/interrupt-controller/irq.h>
+
+#define AIC_IRQ	0
+#define AIC_FIQ	1
+
+#define AIC_TMR_HV_PHYS		0
+#define AIC_TMR_HV_VIRT		1
+#define AIC_TMR_GUEST_PHYS	2
+#define AIC_TMR_GUEST_VIRT	3
+
+#endif
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 3d4442397bf9..db82ce5304f9 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -100,6 +100,7 @@ enum cpuhp_state {
 	CPUHP_AP_CPU_PM_STARTING,
 	CPUHP_AP_IRQ_GIC_STARTING,
 	CPUHP_AP_IRQ_HIP04_STARTING,
+	CPUHP_AP_IRQ_APPLE_AIC_STARTING,
 	CPUHP_AP_IRQ_ARMADA_XP_STARTING,
 	CPUHP_AP_IRQ_BCM2836_STARTING,
 	CPUHP_AP_IRQ_MIPS_GIC_STARTING,
diff --git a/include/linux/io.h b/include/linux/io.h
index 8394c56babc2..61ff7d6278b6 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -68,6 +68,8 @@ void __iomem *devm_ioremap_uc(struct device *dev, resource_size_t offset,
 				   resource_size_t size);
 void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset,
 				   resource_size_t size);
+void __iomem *devm_ioremap_np(struct device *dev, resource_size_t offset,
+				   resource_size_t size);
 void devm_iounmap(struct device *dev, void __iomem *addr);
 int check_signature(const volatile void __iomem *io_addr,
 			const unsigned char *signature, int length);
@@ -80,20 +82,20 @@ void devm_memunmap(struct device *dev, void *addr);
 #ifdef CONFIG_PCI
 /*
  * The PCI specifications (Rev 3.0, 3.2.5 "Transaction Ordering and
- * Posting") mandate non-posted configuration transactions. There is
- * no ioremap API in the kernel that can guarantee non-posted write
- * semantics across arches so provide a default implementation for
- * mapping PCI config space that defaults to ioremap(); arches
- * should override it if they have memory mapping implementations that
- * guarantee non-posted writes semantics to make the memory mapping
- * compliant with the PCI specification.
+ * Posting") mandate non-posted configuration transactions. This default
+ * implementation attempts to use the ioremap_np() API to provide this
+ * on arches that support it, and falls back to ioremap() on those that
+ * don't. Overriding this function is deprecated; arches that properly
+ * support non-posted accesses should implement ioremap_np() instead, which
+ * this default implementation can then use to return mappings compliant with
+ * the PCI specification.
  */
 #ifndef pci_remap_cfgspace
 #define pci_remap_cfgspace pci_remap_cfgspace
 static inline void __iomem *pci_remap_cfgspace(phys_addr_t offset,
 					       size_t size)
 {
-	return ioremap(offset, size);
+	return ioremap_np(offset, size) ?: ioremap(offset, size);
 }
 #endif
 #endif
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 55de385c839c..1de6c2e40c32 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -108,6 +108,7 @@ struct resource {
 #define IORESOURCE_MEM_32BIT		(3<<3)
 #define IORESOURCE_MEM_SHADOWABLE	(1<<5)	/* dup: IORESOURCE_SHADOWABLE */
 #define IORESOURCE_MEM_EXPANSIONROM	(1<<6)
+#define IORESOURCE_MEM_NONPOSTED	(1<<7)
 
 /* PnP I/O specific bits (IORESOURCE_BITS) */
 #define IORESOURCE_IO_16BIT_ADDR	(1<<0)
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index f6d092fdb93d..81cbf85f73de 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -575,67 +575,11 @@
 #define ICC_SRE_EL1_DFB			(1U << 1)
 #define ICC_SRE_EL1_SRE			(1U << 0)
 
-/*
- * Hypervisor interface registers (SRE only)
- */
-#define ICH_LR_VIRTUAL_ID_MASK		((1ULL << 32) - 1)
-
-#define ICH_LR_EOI			(1ULL << 41)
-#define ICH_LR_GROUP			(1ULL << 60)
-#define ICH_LR_HW			(1ULL << 61)
-#define ICH_LR_STATE			(3ULL << 62)
-#define ICH_LR_PENDING_BIT		(1ULL << 62)
-#define ICH_LR_ACTIVE_BIT		(1ULL << 63)
-#define ICH_LR_PHYS_ID_SHIFT		32
-#define ICH_LR_PHYS_ID_MASK		(0x3ffULL << ICH_LR_PHYS_ID_SHIFT)
-#define ICH_LR_PRIORITY_SHIFT		48
-#define ICH_LR_PRIORITY_MASK		(0xffULL << ICH_LR_PRIORITY_SHIFT)
-
 /* These are for GICv2 emulation only */
 #define GICH_LR_VIRTUALID		(0x3ffUL << 0)
 #define GICH_LR_PHYSID_CPUID_SHIFT	(10)
 #define GICH_LR_PHYSID_CPUID		(7UL << GICH_LR_PHYSID_CPUID_SHIFT)
 
-#define ICH_MISR_EOI			(1 << 0)
-#define ICH_MISR_U			(1 << 1)
-
-#define ICH_HCR_EN			(1 << 0)
-#define ICH_HCR_UIE			(1 << 1)
-#define ICH_HCR_NPIE			(1 << 3)
-#define ICH_HCR_TC			(1 << 10)
-#define ICH_HCR_TALL0			(1 << 11)
-#define ICH_HCR_TALL1			(1 << 12)
-#define ICH_HCR_EOIcount_SHIFT		27
-#define ICH_HCR_EOIcount_MASK		(0x1f << ICH_HCR_EOIcount_SHIFT)
-
-#define ICH_VMCR_ACK_CTL_SHIFT		2
-#define ICH_VMCR_ACK_CTL_MASK		(1 << ICH_VMCR_ACK_CTL_SHIFT)
-#define ICH_VMCR_FIQ_EN_SHIFT		3
-#define ICH_VMCR_FIQ_EN_MASK		(1 << ICH_VMCR_FIQ_EN_SHIFT)
-#define ICH_VMCR_CBPR_SHIFT		4
-#define ICH_VMCR_CBPR_MASK		(1 << ICH_VMCR_CBPR_SHIFT)
-#define ICH_VMCR_EOIM_SHIFT		9
-#define ICH_VMCR_EOIM_MASK		(1 << ICH_VMCR_EOIM_SHIFT)
-#define ICH_VMCR_BPR1_SHIFT		18
-#define ICH_VMCR_BPR1_MASK		(7 << ICH_VMCR_BPR1_SHIFT)
-#define ICH_VMCR_BPR0_SHIFT		21
-#define ICH_VMCR_BPR0_MASK		(7 << ICH_VMCR_BPR0_SHIFT)
-#define ICH_VMCR_PMR_SHIFT		24
-#define ICH_VMCR_PMR_MASK		(0xffUL << ICH_VMCR_PMR_SHIFT)
-#define ICH_VMCR_ENG0_SHIFT		0
-#define ICH_VMCR_ENG0_MASK		(1 << ICH_VMCR_ENG0_SHIFT)
-#define ICH_VMCR_ENG1_SHIFT		1
-#define ICH_VMCR_ENG1_MASK		(1 << ICH_VMCR_ENG1_SHIFT)
-
-#define ICH_VTR_PRI_BITS_SHIFT		29
-#define ICH_VTR_PRI_BITS_MASK		(7 << ICH_VTR_PRI_BITS_SHIFT)
-#define ICH_VTR_ID_BITS_SHIFT		23
-#define ICH_VTR_ID_BITS_MASK		(7 << ICH_VTR_ID_BITS_SHIFT)
-#define ICH_VTR_SEIS_SHIFT		22
-#define ICH_VTR_SEIS_MASK		(1 << ICH_VTR_SEIS_SHIFT)
-#define ICH_VTR_A3V_SHIFT		21
-#define ICH_VTR_A3V_MASK		(1 << ICH_VTR_A3V_SHIFT)
-
 #define ICC_IAR1_EL1_SPURIOUS		0x3ff
 
 #define ICC_SRE_EL2_SRE			(1 << 0)
diff --git a/lib/devres.c b/lib/devres.c
index 2a4ff5d64288..4679dbb1bf5f 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -10,6 +10,7 @@ enum devm_ioremap_type {
 	DEVM_IOREMAP = 0,
 	DEVM_IOREMAP_UC,
 	DEVM_IOREMAP_WC,
+	DEVM_IOREMAP_NP,
 };
 
 void devm_ioremap_release(struct device *dev, void *res)
@@ -42,6 +43,9 @@ static void __iomem *__devm_ioremap(struct device *dev, resource_size_t offset,
 	case DEVM_IOREMAP_WC:
 		addr = ioremap_wc(offset, size);
 		break;
+	case DEVM_IOREMAP_NP:
+		addr = ioremap_np(offset, size);
+		break;
 	}
 
 	if (addr) {
@@ -99,6 +103,21 @@ void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset,
 EXPORT_SYMBOL(devm_ioremap_wc);
 
 /**
+ * devm_ioremap_np - Managed ioremap_np()
+ * @dev: Generic device to remap IO address for
+ * @offset: Resource address to map
+ * @size: Size of map
+ *
+ * Managed ioremap_np().  Map is automatically unmapped on driver detach.
+ */
+void __iomem *devm_ioremap_np(struct device *dev, resource_size_t offset,
+			      resource_size_t size)
+{
+	return __devm_ioremap(dev, offset, size, DEVM_IOREMAP_NP);
+}
+EXPORT_SYMBOL(devm_ioremap_np);
+
+/**
  * devm_iounmap - Managed iounmap()
  * @dev: Generic device to unmap for
  * @addr: Address to unmap
@@ -128,6 +147,9 @@ __devm_ioremap_resource(struct device *dev, const struct resource *res,
 		return IOMEM_ERR_PTR(-EINVAL);
 	}
 
+	if (type == DEVM_IOREMAP && res->flags & IORESOURCE_MEM_NONPOSTED)
+		type = DEVM_IOREMAP_NP;
+
 	size = resource_size(res);
 
 	if (res->name)