summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2024-07-16 00:03:44 +0300
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2024-07-16 00:03:44 +0300
commita23e1966932464e1c5226cb9ac4ce1d5fc10ba22 (patch)
treebf5f1b57faa01ca31656bfc48c7d6b6f0bc39189 /include/linux
parent7c7b1be19b228b450c2945ec379d7fc6bfef9852 (diff)
parentf3efefb6fdcce604413135bd8d4c5568e53a1f13 (diff)
downloadlinux-a23e1966932464e1c5226cb9ac4ce1d5fc10ba22.tar.xz
Merge branch 'next' into for-linus
Prepare input updates for 6.11 merge window.
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/acpi.h130
-rw-r--r--include/linux/acpi_amd_wbrf.h91
-rw-r--r--include/linux/acpi_iort.h1
-rw-r--r--include/linux/aer.h26
-rw-r--r--include/linux/amba/clcd-regs.h87
-rw-r--r--include/linux/amba/clcd.h290
-rw-r--r--include/linux/amba/pl022.h4
-rw-r--r--include/linux/amba/serial.h261
-rw-r--r--include/linux/amd-iommu.h127
-rw-r--r--include/linux/amd-pmf-io.h50
-rw-r--r--include/linux/amd-pstate.h14
-rw-r--r--include/linux/anon_inodes.h4
-rw-r--r--include/linux/apple-mailbox.h19
-rw-r--r--include/linux/arch_topology.h8
-rw-r--r--include/linux/args.h28
-rw-r--r--include/linux/arm-smccc.h71
-rw-r--r--include/linux/arm_ffa.h81
-rw-r--r--include/linux/arm_sdei.h2
-rw-r--r--include/linux/array_size.h13
-rw-r--r--include/linux/async.h3
-rw-r--r--include/linux/atmel-mci.h46
-rw-r--r--include/linux/atomic/atomic-arch-fallback.h67
-rw-r--r--include/linux/atomic/atomic-instrumented.h76
-rw-r--r--include/linux/atomic/atomic-long.h24
-rw-r--r--include/linux/audit.h3
-rw-r--r--include/linux/avf/virtchnl.h178
-rw-r--r--include/linux/backing-dev-defs.h7
-rw-r--r--include/linux/backing-dev.h2
-rw-r--r--include/linux/backing-file.h42
-rw-r--r--include/linux/badblocks.h30
-rw-r--r--include/linux/binfmts.h10
-rw-r--r--include/linux/bio.h30
-rw-r--r--include/linux/bitfield.h3
-rw-r--r--include/linux/bitmap-str.h16
-rw-r--r--include/linux/bitmap.h205
-rw-r--r--include/linux/bits.h8
-rw-r--r--include/linux/blk-integrity.h1
-rw-r--r--include/linux/blk-mq.h29
-rw-r--r--include/linux/blk-pm.h1
-rw-r--r--include/linux/blk_types.h65
-rw-r--r--include/linux/blkdev.h281
-rw-r--r--include/linux/bootconfig.h8
-rw-r--r--include/linux/bootmem_info.h2
-rw-r--r--include/linux/bpf-cgroup-defs.h5
-rw-r--r--include/linux/bpf-cgroup.h106
-rw-r--r--include/linux/bpf.h471
-rw-r--r--include/linux/bpf_local_storage.h30
-rw-r--r--include/linux/bpf_mem_alloc.h11
-rw-r--r--include/linux/bpf_mprog.h343
-rw-r--r--include/linux/bpf_types.h5
-rw-r--r--include/linux/bpf_verifier.h244
-rw-r--r--include/linux/bpfilter.h24
-rw-r--r--include/linux/brcmphy.h11
-rw-r--r--include/linux/btf.h51
-rw-r--r--include/linux/btf_ids.h24
-rw-r--r--include/linux/buffer_head.h143
-rw-r--r--include/linux/buildid.h5
-rw-r--r--include/linux/bvec.h2
-rw-r--r--include/linux/cache.h25
-rw-r--r--include/linux/cacheflush.h13
-rw-r--r--include/linux/cacheinfo.h7
-rw-r--r--include/linux/can/dev.h4
-rw-r--r--include/linux/can/rx-offload.h11
-rw-r--r--include/linux/cc_platform.h12
-rw-r--r--include/linux/cdx/cdx_bus.h122
-rw-r--r--include/linux/ceph/ceph_debug.h38
-rw-r--r--include/linux/ceph/ceph_fs.h52
-rw-r--r--include/linux/ceph/mdsmap.h72
-rw-r--r--include/linux/ceph/messenger.h40
-rw-r--r--include/linux/ceph/mon_client.h2
-rw-r--r--include/linux/ceph/osd_client.h99
-rw-r--r--include/linux/ceph/rados.h4
-rw-r--r--include/linux/cfi.h16
-rw-r--r--include/linux/cgroup-defs.h44
-rw-r--r--include/linux/cgroup.h16
-rw-r--r--include/linux/cleanup.h91
-rw-r--r--include/linux/clk-provider.h47
-rw-r--r--include/linux/clk.h39
-rw-r--r--include/linux/clk/mmp.h18
-rw-r--r--include/linux/clk/ti.h3
-rw-r--r--include/linux/clocksource.h14
-rw-r--r--include/linux/clocksource_ids.h3
-rw-r--r--include/linux/closure.h415
-rw-r--r--include/linux/cma.h6
-rw-r--r--include/linux/comedi/comedi_8254.h51
-rw-r--r--include/linux/comedi/comedi_8255.h24
-rw-r--r--include/linux/comedi/comedidev.h2
-rw-r--r--include/linux/compat.h1
-rw-r--r--include/linux/compiler-clang.h15
-rw-r--r--include/linux/compiler-gcc.h26
-rw-r--r--include/linux/compiler.h63
-rw-r--r--include/linux/compiler_attributes.h38
-rw-r--r--include/linux/compiler_types.h93
-rw-r--r--include/linux/completion.h1
-rw-r--r--include/linux/connector.h7
-rw-r--r--include/linux/console.h265
-rw-r--r--include/linux/console_struct.h1
-rw-r--r--include/linux/const.h8
-rw-r--r--include/linux/container.h2
-rw-r--r--include/linux/coresight.h208
-rw-r--r--include/linux/counter.h2
-rw-r--r--include/linux/cper.h23
-rw-r--r--include/linux/cpu.h54
-rw-r--r--include/linux/cpu_smt.h33
-rw-r--r--include/linux/cpufreq.h71
-rw-r--r--include/linux/cpuhotplug.h27
-rw-r--r--include/linux/cpumask.h129
-rw-r--r--include/linux/cpuset.h16
-rw-r--r--include/linux/crash_core.h156
-rw-r--r--include/linux/crash_dump.h8
-rw-r--r--include/linux/crash_reserve.h48
-rw-r--r--include/linux/crc-ccitt.h7
-rw-r--r--include/linux/cred.h121
-rw-r--r--include/linux/crypto.h30
-rw-r--r--include/linux/cxl-event.h143
-rw-r--r--include/linux/damon.h181
-rw-r--r--include/linux/dax.h31
-rw-r--r--include/linux/dcache.h182
-rw-r--r--include/linux/debugfs.h19
-rw-r--r--include/linux/decompress/mm.h2
-rw-r--r--include/linux/dev_printk.h2
-rw-r--r--include/linux/device-mapper.h1
-rw-r--r--include/linux/device.h58
-rw-r--r--include/linux/device/bus.h7
-rw-r--r--include/linux/device/class.h2
-rw-r--r--include/linux/dio.h2
-rw-r--r--include/linux/dlm_plock.h2
-rw-r--r--include/linux/dm-bufio.h7
-rw-r--r--include/linux/dm-io.h3
-rw-r--r--include/linux/dm-verity-loadpin.h2
-rw-r--r--include/linux/dma-buf.h11
-rw-r--r--include/linux/dma-direct.h19
-rw-r--r--include/linux/dma-fence.h35
-rw-r--r--include/linux/dma-map-ops.h21
-rw-r--r--include/linux/dma-mapping.h21
-rw-r--r--include/linux/dma/k3-udma-glue.h10
-rw-r--r--include/linux/dmaengine.h5
-rw-r--r--include/linux/dmar.h2
-rw-r--r--include/linux/dnotify.h4
-rw-r--r--include/linux/dpll.h182
-rw-r--r--include/linux/dsa/sja1105.h2
-rw-r--r--include/linux/dynamic_debug.h4
-rw-r--r--include/linux/dynamic_queue_limits.h45
-rw-r--r--include/linux/edac.h7
-rw-r--r--include/linux/efi.h84
-rw-r--r--include/linux/einj-cxl.h44
-rw-r--r--include/linux/elf-fdpic.h14
-rw-r--r--include/linux/energy_model.h166
-rw-r--r--include/linux/entry-common.h95
-rw-r--r--include/linux/etherdevice.h25
-rw-r--r--include/linux/ethtool.h126
-rw-r--r--include/linux/eventfd.h17
-rw-r--r--include/linux/evm.h123
-rw-r--r--include/linux/export-internal.h10
-rw-r--r--include/linux/export.h22
-rw-r--r--include/linux/exportfs.h70
-rw-r--r--include/linux/extcon.h12
-rw-r--r--include/linux/f2fs_fs.h80
-rw-r--r--include/linux/fb.h138
-rw-r--r--include/linux/fdtable.h36
-rw-r--r--include/linux/file.h14
-rw-r--r--include/linux/filelock.h137
-rw-r--r--include/linux/filter.h137
-rw-r--r--include/linux/find.h8
-rw-r--r--include/linux/firewire.h2
-rw-r--r--include/linux/firmware.h5
-rw-r--r--include/linux/firmware/cirrus/cs_dsp.h1
-rw-r--r--include/linux/firmware/imx/dsp.h6
-rw-r--r--include/linux/firmware/imx/sci.h16
-rw-r--r--include/linux/firmware/intel/stratix10-smc.h25
-rw-r--r--include/linux/firmware/intel/stratix10-svc-client.h5
-rw-r--r--include/linux/firmware/mediatek/mtk-adsp-ipc.h6
-rw-r--r--include/linux/firmware/meson/meson_sm.h2
-rw-r--r--include/linux/firmware/qcom/qcom_qseecom.h99
-rw-r--r--include/linux/firmware/qcom/qcom_scm.h121
-rw-r--r--include/linux/firmware/xlnx-zynqmp.h92
-rw-r--r--include/linux/flex_proportions.h32
-rw-r--r--include/linux/font.h3
-rw-r--r--include/linux/fortify-string.h179
-rw-r--r--include/linux/framer/framer-provider.h193
-rw-r--r--include/linux/framer/framer.h205
-rw-r--r--include/linux/framer/pef2256.h31
-rw-r--r--include/linux/freelist.h129
-rw-r--r--include/linux/frontswap.h91
-rw-r--r--include/linux/fs.h560
-rw-r--r--include/linux/fs_context.h8
-rw-r--r--include/linux/fs_enet_pd.h165
-rw-r--r--include/linux/fs_stack.h8
-rw-r--r--include/linux/fs_uart_pd.h71
-rw-r--r--include/linux/fscache-cache.h3
-rw-r--r--include/linux/fscache.h45
-rw-r--r--include/linux/fscrypt.h148
-rw-r--r--include/linux/fsnotify.h84
-rw-r--r--include/linux/fsnotify_backend.h21
-rw-r--r--include/linux/ftrace.h9
-rw-r--r--include/linux/fw_table.h61
-rw-r--r--include/linux/fwnode.h20
-rw-r--r--include/linux/generic-radix-tree.h87
-rw-r--r--include/linux/genl_magic_func.h27
-rw-r--r--include/linux/genl_magic_struct.h8
-rw-r--r--include/linux/gfp.h37
-rw-r--r--include/linux/gfp_types.h109
-rw-r--r--include/linux/gpio/consumer.h16
-rw-r--r--include/linux/gpio/driver.h154
-rw-r--r--include/linux/gpio/gpio-nomadik.h294
-rw-r--r--include/linux/gpio/property.h1
-rw-r--r--include/linux/greybus.h46
-rw-r--r--include/linux/greybus/greybus_protocols.h8
-rw-r--r--include/linux/greybus/svc.h3
-rw-r--r--include/linux/habanalabs/cpucp_if.h1423
-rw-r--r--include/linux/habanalabs/hl_boot_if.h792
-rw-r--r--include/linux/hid-roccat.h2
-rw-r--r--include/linux/hid-sensor-ids.h4
-rw-r--r--include/linux/hid.h39
-rw-r--r--include/linux/hid_bpf.h13
-rw-r--r--include/linux/highmem.h152
-rw-r--r--include/linux/hisi_acc_qm.h79
-rw-r--r--include/linux/hrtimer.h167
-rw-r--r--include/linux/hrtimer_defs.h104
-rw-r--r--include/linux/hrtimer_types.h50
-rw-r--r--include/linux/huge_mm.h211
-rw-r--r--include/linux/hugetlb.h107
-rw-r--r--include/linux/hugetlb_cgroup.h11
-rw-r--r--include/linux/hw_breakpoint.h3
-rw-r--r--include/linux/hw_random.h1
-rw-r--r--include/linux/hwmon.h18
-rw-r--r--include/linux/hyperv.h29
-rw-r--r--include/linux/i2c-atr.h116
-rw-r--r--include/linux/i2c.h22
-rw-r--r--include/linux/i3c/device.h4
-rw-r--r--include/linux/i3c/master.h26
-rw-r--r--include/linux/icmpv6.h10
-rw-r--r--include/linux/idr.h6
-rw-r--r--include/linux/ieee80211.h547
-rw-r--r--include/linux/if_arp.h4
-rw-r--r--include/linux/if_team.h6
-rw-r--r--include/linux/if_tun.h16
-rw-r--r--include/linux/if_vlan.h10
-rw-r--r--include/linux/igmp.h2
-rw-r--r--include/linux/iio/adc/ad_sigma_delta.h4
-rw-r--r--include/linux/iio/adc/adi-axi-adc.h64
-rw-r--r--include/linux/iio/backend.h72
-rw-r--r--include/linux/iio/buffer-dma.h7
-rw-r--r--include/linux/iio/buffer-dmaengine.h3
-rw-r--r--include/linux/iio/common/inv_sensors_timestamp.h95
-rw-r--r--include/linux/iio/common/st_sensors.h4
-rw-r--r--include/linux/iio/consumer.h37
-rw-r--r--include/linux/iio/iio.h48
-rw-r--r--include/linux/iio/imu/adis.h3
-rw-r--r--include/linux/iio/sw_device.h3
-rw-r--r--include/linux/iio/sw_trigger.h3
-rw-r--r--include/linux/iio/types.h3
-rw-r--r--include/linux/ima.h142
-rw-r--r--include/linux/indirect_call_wrapper.h4
-rw-r--r--include/linux/inet_diag.h1
-rw-r--r--include/linux/inetdevice.h14
-rw-r--r--include/linux/init.h14
-rw-r--r--include/linux/init_task.h7
-rw-r--r--include/linux/input.h7
-rw-r--r--include/linux/instruction_pointer.h5
-rw-r--r--include/linux/int_log.h56
-rw-r--r--include/linux/integrity.h27
-rw-r--r--include/linux/intel-ish-client-if.h3
-rw-r--r--include/linux/intel_rapl.h6
-rw-r--r--include/linux/intel_tcc.h2
-rw-r--r--include/linux/intel_tpmi.h22
-rw-r--r--include/linux/interconnect-provider.h13
-rw-r--r--include/linux/interrupt.h15
-rw-r--r--include/linux/io-pgtable.h38
-rw-r--r--include/linux/io.h7
-rw-r--r--include/linux/io_uring.h84
-rw-r--r--include/linux/io_uring/cmd.h77
-rw-r--r--include/linux/io_uring_types.h292
-rw-r--r--include/linux/iomap.h26
-rw-r--r--include/linux/iommu.h636
-rw-r--r--include/linux/iommufd.h9
-rw-r--r--include/linux/ioport.h15
-rw-r--r--include/linux/ioprio.h25
-rw-r--r--include/linux/ioremap.h30
-rw-r--r--include/linux/iosys-map.h50
-rw-r--r--include/linux/iov_iter.h274
-rw-r--r--include/linux/iova_bitmap.h26
-rw-r--r--include/linux/ipc.h2
-rw-r--r--include/linux/ipv6.h81
-rw-r--r--include/linux/irq.h28
-rw-r--r--include/linux/irq_work.h3
-rw-r--r--include/linux/irqdomain.h19
-rw-r--r--include/linux/irqdomain_defs.h2
-rw-r--r--include/linux/irqflags.h16
-rw-r--r--include/linux/irqflags_types.h22
-rw-r--r--include/linux/irqhandler.h2
-rw-r--r--include/linux/ism.h1
-rw-r--r--include/linux/iversion.h2
-rw-r--r--include/linux/jbd2.h47
-rw-r--r--include/linux/jiffies.h206
-rw-r--r--include/linux/kasan.h178
-rw-r--r--include/linux/kernel.h89
-rw-r--r--include/linux/kernfs.h15
-rw-r--r--include/linux/kexec.h72
-rw-r--r--include/linux/key-type.h1
-rw-r--r--include/linux/key.h1
-rw-r--r--include/linux/kfence.h13
-rw-r--r--include/linux/kmsan_types.h2
-rw-r--r--include/linux/kobject.h10
-rw-r--r--include/linux/kprobes.h28
-rw-r--r--include/linux/ksm.h30
-rw-r--r--include/linux/kstrtox.h5
-rw-r--r--include/linux/kthread.h2
-rw-r--r--include/linux/ktime.h8
-rw-r--r--include/linux/kvm_host.h322
-rw-r--r--include/linux/kvm_types.h9
-rw-r--r--include/linux/led-class-flash.h24
-rw-r--r--include/linux/led-class-multicolor.h29
-rw-r--r--include/linux/leds-expresswire.h38
-rw-r--r--include/linux/leds.h44
-rw-r--r--include/linux/libata.h82
-rw-r--r--include/linux/limits.h2
-rw-r--r--include/linux/linkmode.h23
-rw-r--r--include/linux/list.h134
-rw-r--r--include/linux/list_lru.h76
-rw-r--r--include/linux/llist.h46
-rw-r--r--include/linux/lockd/lockd.h12
-rw-r--r--include/linux/lockd/xdr.h2
-rw-r--r--include/linux/lockdep.h64
-rw-r--r--include/linux/lockdep_types.h59
-rw-r--r--include/linux/logic_pio.h3
-rw-r--r--include/linux/lsm_hook_defs.h72
-rw-r--r--include/linux/lsm_hooks.h37
-rw-r--r--include/linux/lwq.h124
-rw-r--r--include/linux/maple.h1
-rw-r--r--include/linux/maple_tree.h389
-rw-r--r--include/linux/marvell_phy.h2
-rw-r--r--include/linux/math.h19
-rw-r--r--include/linux/mc146818rtc.h3
-rw-r--r--include/linux/mcb.h1
-rw-r--r--include/linux/mdio.h92
-rw-r--r--include/linux/mei_cl_bus.h12
-rw-r--r--include/linux/memblock.h26
-rw-r--r--include/linux/memcontrol.h188
-rw-r--r--include/linux/memory-tiers.h45
-rw-r--r--include/linux/memory.h18
-rw-r--r--include/linux/memory_hotplug.h27
-rw-r--r--include/linux/mempolicy.h58
-rw-r--r--include/linux/mempool.h14
-rw-r--r--include/linux/memremap.h15
-rw-r--r--include/linux/mfd/88pm860x.h6
-rw-r--r--include/linux/mfd/abx500/ab8500.h10
-rw-r--r--include/linux/mfd/core.h2
-rw-r--r--include/linux/mfd/cs42l43-regs.h1184
-rw-r--r--include/linux/mfd/cs42l43.h103
-rw-r--r--include/linux/mfd/da9055/pdata.h13
-rw-r--r--include/linux/mfd/dbx500-prcmu.h21
-rw-r--r--include/linux/mfd/hi655x-pmic.h1
-rw-r--r--include/linux/mfd/idtRC38xxx_reg.h273
-rw-r--r--include/linux/mfd/lp8788.h9
-rw-r--r--include/linux/mfd/lpc_ich.h7
-rw-r--r--include/linux/mfd/max77686-private.h4
-rw-r--r--include/linux/mfd/max77693-private.h2
-rw-r--r--include/linux/mfd/max77843-private.h2
-rw-r--r--include/linux/mfd/max8997.h1
-rw-r--r--include/linux/mfd/max8998.h6
-rw-r--r--include/linux/mfd/mt6358/registers.h23
-rw-r--r--include/linux/mfd/rz-mtu3.h66
-rw-r--r--include/linux/mfd/si476x-platform.h2
-rw-r--r--include/linux/mfd/stm32-timers.h11
-rw-r--r--include/linux/mfd/sun4i-gpadc.h4
-rw-r--r--include/linux/mfd/syscon.h25
-rw-r--r--include/linux/mfd/tps65086.h23
-rw-r--r--include/linux/mfd/tps65910.h2
-rw-r--r--include/linux/mfd/twl.h1
-rw-r--r--include/linux/mhi.h29
-rw-r--r--include/linux/mhi_ep.h36
-rw-r--r--include/linux/micrel_phy.h11
-rw-r--r--include/linux/migrate.h4
-rw-r--r--include/linux/mii_timestamper.h4
-rw-r--r--include/linux/min_heap.h44
-rw-r--r--include/linux/minmax.h174
-rw-r--r--include/linux/misc_cgroup.h28
-rw-r--r--include/linux/mlx4/device.h20
-rw-r--r--include/linux/mlx4/driver.h42
-rw-r--r--include/linux/mlx5/device.h76
-rw-r--r--include/linux/mlx5/driver.h119
-rw-r--r--include/linux/mlx5/eswitch.h11
-rw-r--r--include/linux/mlx5/fs.h6
-rw-r--r--include/linux/mlx5/macsec.h32
-rw-r--r--include/linux/mlx5/mlx5_ifc.h310
-rw-r--r--include/linux/mlx5/mlx5_ifc_vdpa.h11
-rw-r--r--include/linux/mlx5/port.h3
-rw-r--r--include/linux/mlx5/qp.h5
-rw-r--r--include/linux/mlx5/vport.h1
-rw-r--r--include/linux/mm.h756
-rw-r--r--include/linux/mm_inline.h64
-rw-r--r--include/linux/mm_types.h295
-rw-r--r--include/linux/mm_types_task.h9
-rw-r--r--include/linux/mman.h13
-rw-r--r--include/linux/mmap_lock.h18
-rw-r--r--include/linux/mmc/card.h7
-rw-r--r--include/linux/mmc/core.h1
-rw-r--r--include/linux/mmc/host.h11
-rw-r--r--include/linux/mmc/mmc.h10
-rw-r--r--include/linux/mmdebug.h2
-rw-r--r--include/linux/mmu_context.h2
-rw-r--r--include/linux/mmu_notifier.h113
-rw-r--r--include/linux/mmzone.h139
-rw-r--r--include/linux/mnt_idmapping.h6
-rw-r--r--include/linux/mod_devicetable.h24
-rw-r--r--include/linux/module.h6
-rw-r--r--include/linux/module_symbol.h6
-rw-r--r--include/linux/moduleloader.h13
-rw-r--r--include/linux/moduleparam.h14
-rw-r--r--include/linux/mount.h9
-rw-r--r--include/linux/moxtet.h2
-rw-r--r--include/linux/msi.h30
-rw-r--r--include/linux/mtd/cfi.h2
-rw-r--r--include/linux/mtd/flashchip.h1
-rw-r--r--include/linux/mtd/jedec.h3
-rw-r--r--include/linux/mtd/lpc32xx_mlc.h2
-rw-r--r--include/linux/mtd/lpc32xx_slc.h2
-rw-r--r--include/linux/mtd/mtd.h4
-rw-r--r--include/linux/mtd/onfi.h1
-rw-r--r--include/linux/mtd/qinfo.h2
-rw-r--r--include/linux/mtd/rawnand.h18
-rw-r--r--include/linux/mtd/spinand.h3
-rw-r--r--include/linux/mtd/ubi.h2
-rw-r--r--include/linux/mutex.h63
-rw-r--r--include/linux/mutex_types.h71
-rw-r--r--include/linux/namei.h27
-rw-r--r--include/linux/net.h7
-rw-r--r--include/linux/net/intel/i40e_client.h2
-rw-r--r--include/linux/net_mm.h17
-rw-r--r--include/linux/net_tstamp.h30
-rw-r--r--include/linux/netdevice.h530
-rw-r--r--include/linux/netfilter.h22
-rw-r--r--include/linux/netfilter/ipset/ip_set.h4
-rw-r--r--include/linux/netfilter/nf_conntrack_h323.h4
-rw-r--r--include/linux/netfilter/nf_conntrack_proto_gre.h1
-rw-r--r--include/linux/netfilter/nf_conntrack_sctp.h1
-rw-r--r--include/linux/netfilter_bridge.h6
-rw-r--r--include/linux/netfilter_ipv6.h8
-rw-r--r--include/linux/netfs.h181
-rw-r--r--include/linux/netlink.h11
-rw-r--r--include/linux/nfs4.h284
-rw-r--r--include/linux/nfs_fs.h2
-rw-r--r--include/linux/nfs_fs_sb.h4
-rw-r--r--include/linux/nfs_page.h4
-rw-r--r--include/linux/nfs_xdr.h9
-rw-r--r--include/linux/nmi.h21
-rw-r--r--include/linux/node.h26
-rw-r--r--include/linux/nodemask.h2
-rw-r--r--include/linux/nodemask_types.h10
-rw-r--r--include/linux/notifier.h11
-rw-r--r--include/linux/ns_common.h2
-rw-r--r--include/linux/nsproxy.h8
-rw-r--r--include/linux/nubus.h2
-rw-r--r--include/linux/numa.h29
-rw-r--r--include/linux/nvme-auth.h7
-rw-r--r--include/linux/nvme-keyring.h28
-rw-r--r--include/linux/nvme-rdma.h6
-rw-r--r--include/linux/nvme-tcp.h6
-rw-r--r--include/linux/nvme.h26
-rw-r--r--include/linux/nvmem-consumer.h16
-rw-r--r--include/linux/nvmem-provider.h74
-rw-r--r--include/linux/objpool.h181
-rw-r--r--include/linux/objtool.h13
-rw-r--r--include/linux/of.h129
-rw-r--r--include/linux/of_device.h11
-rw-r--r--include/linux/of_graph.h4
-rw-r--r--include/linux/of_iommu.h13
-rw-r--r--include/linux/of_platform.h10
-rw-r--r--include/linux/oid_registry.h20
-rw-r--r--include/linux/osq_lock.h5
-rw-r--r--include/linux/overflow.h167
-rw-r--r--include/linux/padata.h14
-rw-r--r--include/linux/page-flags.h339
-rw-r--r--include/linux/page_counter.h2
-rw-r--r--include/linux/page_ext.h9
-rw-r--r--include/linux/page_idle.h5
-rw-r--r--include/linux/page_owner.h14
-rw-r--r--include/linux/page_table_check.h71
-rw-r--r--include/linux/pageblock-flags.h4
-rw-r--r--include/linux/pagemap.h209
-rw-r--r--include/linux/pagevec.h22
-rw-r--r--include/linux/parport.h2
-rw-r--r--include/linux/pci-ecam.h2
-rw-r--r--include/linux/pci-epc.h52
-rw-r--r--include/linux/pci-epf.h8
-rw-r--r--include/linux/pci.h148
-rw-r--r--include/linux/pci_ids.h120
-rw-r--r--include/linux/pcs/pcs-xpcs.h8
-rw-r--r--include/linux/pds/pds_adminq.h375
-rw-r--r--include/linux/pds/pds_common.h9
-rw-r--r--include/linux/pds/pds_core_if.h1
-rw-r--r--include/linux/peci.h5
-rw-r--r--include/linux/percpu.h13
-rw-r--r--include/linux/percpu_counter.h71
-rw-r--r--include/linux/perf/arm_pmu.h29
-rw-r--r--include/linux/perf/arm_pmuv3.h43
-rw-r--r--include/linux/perf/riscv_pmu.h12
-rw-r--r--include/linux/perf_event.h78
-rw-r--r--include/linux/pgtable.h324
-rw-r--r--include/linux/phy.h194
-rw-r--r--include/linux/phy/phy.h35
-rw-r--r--include/linux/phy/tegra/xusb.h1
-rw-r--r--include/linux/phylib_stubs.h68
-rw-r--r--include/linux/phylink.h217
-rw-r--r--include/linux/pid.h150
-rw-r--r--include/linux/pid_namespace.h39
-rw-r--r--include/linux/pid_types.h16
-rw-r--r--include/linux/pidfs.h8
-rw-r--r--include/linux/pinctrl/consumer.h61
-rw-r--r--include/linux/pinctrl/machine.h8
-rw-r--r--include/linux/pinctrl/pinconf-generic.h10
-rw-r--r--include/linux/pinctrl/pinconf.h16
-rw-r--r--include/linux/pinctrl/pinctrl.h24
-rw-r--r--include/linux/pinctrl/pinmux.h22
-rw-r--r--include/linux/pipe_fs_i.h26
-rw-r--r--include/linux/pktcdvd.h4
-rw-r--r--include/linux/platform_data/bd6107.h2
-rw-r--r--include/linux/platform_data/brcmfmac.h2
-rw-r--r--include/linux/platform_data/cros_ec_commands.h144
-rw-r--r--include/linux/platform_data/cros_ec_proto.h2
-rw-r--r--include/linux/platform_data/gpio-omap.h3
-rw-r--r--include/linux/platform_data/gpio_backlight.h2
-rw-r--r--include/linux/platform_data/gsc_hwmon.h2
-rw-r--r--include/linux/platform_data/hirschmann-hellcreek.h2
-rw-r--r--include/linux/platform_data/i2c-mux-reg.h2
-rw-r--r--include/linux/platform_data/lv5207lp.h2
-rw-r--r--include/linux/platform_data/mdio-bcm-unimac.h3
-rw-r--r--include/linux/platform_data/microchip-ksz.h24
-rw-r--r--include/linux/platform_data/net-cw1200.h4
-rw-r--r--include/linux/platform_data/omap-twl4030.h3
-rw-r--r--include/linux/platform_data/pca953x.h13
-rw-r--r--include/linux/platform_data/rtc-ds2404.h20
-rw-r--r--include/linux/platform_data/shmob_drm.h57
-rw-r--r--include/linux/platform_data/si5351.h2
-rw-r--r--include/linux/platform_data/video-mx3fb.h50
-rw-r--r--include/linux/platform_data/x86/asus-wmi.h26
-rw-r--r--include/linux/platform_data/x86/clk-lpss.h2
-rw-r--r--include/linux/platform_data/x86/pmc_atom.h25
-rw-r--r--include/linux/platform_data/x86/pwm-lpss.h4
-rw-r--r--include/linux/platform_data/x86/simatic-ipc-base.h5
-rw-r--r--include/linux/platform_data/x86/simatic-ipc.h6
-rw-r--r--include/linux/platform_device.h28
-rw-r--r--include/linux/plist.h12
-rw-r--r--include/linux/plist_types.h17
-rw-r--r--include/linux/pm.h85
-rw-r--r--include/linux/pm_clock.h4
-rw-r--r--include/linux/pm_domain.h83
-rw-r--r--include/linux/pm_opp.h140
-rw-r--r--include/linux/pm_runtime.h32
-rw-r--r--include/linux/pm_wakeup.h10
-rw-r--r--include/linux/pnp.h10
-rw-r--r--include/linux/poison.h5
-rw-r--r--include/linux/poll.h4
-rw-r--r--include/linux/posix-clock.h35
-rw-r--r--include/linux/posix-timers.h69
-rw-r--r--include/linux/posix-timers_types.h80
-rw-r--r--include/linux/power/bq27xxx_battery.h2
-rw-r--r--include/linux/power/power_on_reason.h19
-rw-r--r--include/linux/power_supply.h5
-rw-r--r--include/linux/prandom.h1
-rw-r--r--include/linux/preempt.h21
-rw-r--r--include/linux/printk.h2
-rw-r--r--include/linux/proc_fs.h2
-rw-r--r--include/linux/proc_ns.h2
-rw-r--r--include/linux/profile.h5
-rw-r--r--include/linux/property.h58
-rw-r--r--include/linux/pseudo_fs.h2
-rw-r--r--include/linux/psp-platform-access.h4
-rw-r--r--include/linux/psp-sev.h321
-rw-r--r--include/linux/ptdump.h10
-rw-r--r--include/linux/pti.h2
-rw-r--r--include/linux/ptp_clock_kernel.h3
-rw-r--r--include/linux/ptp_kvm.h4
-rw-r--r--include/linux/ptp_mock.h38
-rw-r--r--include/linux/ptrace.h4
-rw-r--r--include/linux/pwm.h155
-rw-r--r--include/linux/qed/qed_fcoe_if.h3
-rw-r--r--include/linux/quota.h4
-rw-r--r--include/linux/quotaops.h17
-rw-r--r--include/linux/raid/pq.h6
-rw-r--r--include/linux/randomize_kstack.h2
-rw-r--r--include/linux/range.h8
-rw-r--r--include/linux/ras.h18
-rw-r--r--include/linux/rbtree_augmented.h26
-rw-r--r--include/linux/rcu_notifier.h32
-rw-r--r--include/linux/rcu_sync.h1
-rw-r--r--include/linux/rculist.h2
-rw-r--r--include/linux/rculist_nulls.h4
-rw-r--r--include/linux/rcupdate.h46
-rw-r--r--include/linux/rcupdate_trace.h1
-rw-r--r--include/linux/rcupdate_wait.h15
-rw-r--r--include/linux/rcutiny.h4
-rw-r--r--include/linux/rcutree.h18
-rw-r--r--include/linux/reboot.h15
-rw-r--r--include/linux/refcount.h22
-rw-r--r--include/linux/refcount_types.h19
-rw-r--r--include/linux/regmap.h14
-rw-r--r--include/linux/regulator/consumer.h51
-rw-r--r--include/linux/regulator/db8500-prcmu.h6
-rw-r--r--include/linux/regulator/driver.h18
-rw-r--r--include/linux/regulator/machine.h18
-rw-r--r--include/linux/regulator/max8973-regulator.h6
-rw-r--r--include/linux/regulator/mt6358-regulator.h3
-rw-r--r--include/linux/remoteproc.h4
-rw-r--r--include/linux/resctrl.h52
-rw-r--r--include/linux/reset-controller.h4
-rw-r--r--include/linux/restart_block.h2
-rw-r--r--include/linux/resume_user_mode.h3
-rw-r--r--include/linux/rethook.h23
-rw-r--r--include/linux/rhashtable-types.h2
-rw-r--r--include/linux/ring_buffer.h22
-rw-r--r--include/linux/rmap.h412
-rw-r--r--include/linux/rpmsg.h15
-rw-r--r--include/linux/rseq.h131
-rw-r--r--include/linux/rslib.h1
-rw-r--r--include/linux/rtc.h20
-rw-r--r--include/linux/rtnetlink.h44
-rw-r--r--include/linux/rtsx_pci.h8
-rw-r--r--include/linux/rw_hint.h24
-rw-r--r--include/linux/rwbase_rt.h9
-rw-r--r--include/linux/rwsem.h54
-rw-r--r--include/linux/sched.h473
-rw-r--r--include/linux/sched/coredump.h19
-rw-r--r--include/linux/sched/deadline.h4
-rw-r--r--include/linux/sched/isolation.h4
-rw-r--r--include/linux/sched/mm.h119
-rw-r--r--include/linux/sched/numa_balancing.h16
-rw-r--r--include/linux/sched/rt.h4
-rw-r--r--include/linux/sched/sd_flags.h11
-rw-r--r--include/linux/sched/signal.h53
-rw-r--r--include/linux/sched/smt.h2
-rw-r--r--include/linux/sched/task.h42
-rw-r--r--include/linux/sched/task_stack.h1
-rw-r--r--include/linux/sched/topology.h28
-rw-r--r--include/linux/sched/types.h2
-rw-r--r--include/linux/sched/vhost_task.h7
-rw-r--r--include/linux/scmi_protocol.h66
-rw-r--r--include/linux/screen_info.h126
-rw-r--r--include/linux/seccomp.h24
-rw-r--r--include/linux/seccomp_types.h35
-rw-r--r--include/linux/secretmem.h17
-rw-r--r--include/linux/security.h200
-rw-r--r--include/linux/sed-opal-key.h26
-rw-r--r--include/linux/sed-opal.h5
-rw-r--r--include/linux/selection.h56
-rw-r--r--include/linux/sem.h10
-rw-r--r--include/linux/sem_types.h13
-rw-r--r--include/linux/seq_buf.h40
-rw-r--r--include/linux/seq_file.h22
-rw-r--r--include/linux/seqlock.h133
-rw-r--r--include/linux/seqlock_types.h93
-rw-r--r--include/linux/serdev.h29
-rw-r--r--include/linux/serial_8250.h6
-rw-r--r--include/linux/serial_core.h155
-rw-r--r--include/linux/shm.h4
-rw-r--r--include/linux/shmem_fs.h60
-rw-r--r--include/linux/shrinker.h87
-rw-r--r--include/linux/signal.h1
-rw-r--r--include/linux/signal_types.h2
-rw-r--r--include/linux/sizes.h9
-rw-r--r--include/linux/skbuff.h255
-rw-r--r--include/linux/skmsg.h14
-rw-r--r--include/linux/slab.h150
-rw-r--r--include/linux/slab_def.h124
-rw-r--r--include/linux/slimbus.h2
-rw-r--r--include/linux/slub_def.h204
-rw-r--r--include/linux/smp.h19
-rw-r--r--include/linux/smscphy.h34
-rw-r--r--include/linux/soc/andes/irq.h18
-rw-r--r--include/linux/soc/apple/rtkit.h18
-rw-r--r--include/linux/soc/mediatek/infracfg.h41
-rw-r--r--include/linux/soc/mediatek/mtk-mmsys.h8
-rw-r--r--include/linux/soc/mediatek/mtk_wed.h76
-rw-r--r--include/linux/soc/qcom/apr.h2
-rw-r--r--include/linux/soc/qcom/geni-se.h10
-rw-r--r--include/linux/soc/qcom/llcc-qcom.h2
-rw-r--r--include/linux/soc/qcom/qcom-pbs.h30
-rw-r--r--include/linux/soc/qcom/qcom_aoss.h4
-rw-r--r--include/linux/soc/qcom/smd-rpm.h20
-rw-r--r--include/linux/soc/qcom/smem.h1
-rw-r--r--include/linux/soc/samsung/exynos-pmu.h11
-rw-r--r--include/linux/sock_diag.h10
-rw-r--r--include/linux/socket.h8
-rw-r--r--include/linux/sockptr.h48
-rw-r--r--include/linux/soundwire/sdw.h36
-rw-r--r--include/linux/soundwire/sdw_amd.h83
-rw-r--r--include/linux/soundwire/sdw_intel.h14
-rw-r--r--include/linux/soundwire/sdw_type.h6
-rw-r--r--include/linux/spi/pxa2xx_spi.h7
-rw-r--r--include/linux/spi/sh_msiof.h4
-rw-r--r--include/linux/spi/spi-mem.h2
-rw-r--r--include/linux/spi/spi.h305
-rw-r--r--include/linux/spi/spi_bitbang.h2
-rw-r--r--include/linux/spi/spi_gpio.h4
-rw-r--r--include/linux/spinlock.h72
-rw-r--r--include/linux/splice.h51
-rw-r--r--include/linux/spmi.h5
-rw-r--r--include/linux/sprintf.h27
-rw-r--r--include/linux/srcu.h2
-rw-r--r--include/linux/srcutiny.h4
-rw-r--r--include/linux/stackdepot.h118
-rw-r--r--include/linux/stackleak.h6
-rw-r--r--include/linux/start_kernel.h2
-rw-r--r--include/linux/stmmac.h54
-rw-r--r--include/linux/string.h138
-rw-r--r--include/linux/string_choices.h12
-rw-r--r--include/linux/string_helpers.h14
-rw-r--r--include/linux/stringify.h2
-rw-r--r--include/linux/sunrpc/bc_xprt.h3
-rw-r--r--include/linux/sunrpc/cache.h12
-rw-r--r--include/linux/sunrpc/clnt.h5
-rw-r--r--include/linux/sunrpc/sched.h16
-rw-r--r--include/linux/sunrpc/stats.h23
-rw-r--r--include/linux/sunrpc/svc.h129
-rw-r--r--include/linux/sunrpc/svc_rdma.h115
-rw-r--r--include/linux/sunrpc/svc_xprt.h40
-rw-r--r--include/linux/sunrpc/svcauth.h60
-rw-r--r--include/linux/sunrpc/svcsock.h9
-rw-r--r--include/linux/sunrpc/xdr.h6
-rw-r--r--include/linux/sunrpc/xprt.h17
-rw-r--r--include/linux/superhyway.h107
-rw-r--r--include/linux/surface_aggregator/controller.h2
-rw-r--r--include/linux/surface_aggregator/device.h1
-rw-r--r--include/linux/surface_aggregator/serial_hub.h4
-rw-r--r--include/linux/suspend.h74
-rw-r--r--include/linux/swait.h2
-rw-r--r--include/linux/swap.h53
-rw-r--r--include/linux/swapfile.h5
-rw-r--r--include/linux/swapops.h93
-rw-r--r--include/linux/swiotlb.h143
-rw-r--r--include/linux/switchtec.h1
-rw-r--r--include/linux/sync_core.h16
-rw-r--r--include/linux/syscall_user_dispatch.h9
-rw-r--r--include/linux/syscall_user_dispatch_types.h22
-rw-r--r--include/linux/syscalls.h47
-rw-r--r--include/linux/sysctl.h42
-rw-r--r--include/linux/sysfb.h9
-rw-r--r--include/linux/sysfs.h144
-rw-r--r--include/linux/sysrq.h18
-rw-r--r--include/linux/tc.h2
-rw-r--r--include/linux/tcp.h315
-rw-r--r--include/linux/tee_drv.h18
-rw-r--r--include/linux/thermal.h156
-rw-r--r--include/linux/thunderbolt.h8
-rw-r--r--include/linux/ti_wilink_st.h2
-rw-r--r--include/linux/tick.h38
-rw-r--r--include/linux/time_namespace.h3
-rw-r--r--include/linux/timecounter.h11
-rw-r--r--include/linux/timekeeping.h60
-rw-r--r--include/linux/timer.h47
-rw-r--r--include/linux/timer_types.h23
-rw-r--r--include/linux/timerqueue.h13
-rw-r--r--include/linux/timerqueue_types.h17
-rw-r--r--include/linux/tnum.h4
-rw-r--r--include/linux/topology.h2
-rw-r--r--include/linux/torture.h16
-rw-r--r--include/linux/trace.h4
-rw-r--r--include/linux/trace_events.h36
-rw-r--r--include/linux/trace_seq.h17
-rw-r--r--include/linux/tracefs.h69
-rw-r--r--include/linux/tracepoint.h6
-rw-r--r--include/linux/tsm.h69
-rw-r--r--include/linux/tty.h52
-rw-r--r--include/linux/tty_buffer.h20
-rw-r--r--include/linux/tty_driver.h14
-rw-r--r--include/linux/tty_flip.h70
-rw-r--r--include/linux/tty_ldisc.h67
-rw-r--r--include/linux/tty_port.h15
-rw-r--r--include/linux/types.h3
-rw-r--r--include/linux/u64_stats_sync.h9
-rw-r--r--include/linux/ucs2_string.h1
-rw-r--r--include/linux/udp.h106
-rw-r--r--include/linux/uidgid.h24
-rw-r--r--include/linux/uidgid_types.h15
-rw-r--r--include/linux/uio.h61
-rw-r--r--include/linux/uio_driver.h13
-rw-r--r--include/linux/units.h10
-rw-r--r--include/linux/usb.h59
-rw-r--r--include/linux/usb/audio-v2.h4
-rw-r--r--include/linux/usb/ch9.h5
-rw-r--r--include/linux/usb/chipidea.h2
-rw-r--r--include/linux/usb/composite.h31
-rw-r--r--include/linux/usb/gadget.h11
-rw-r--r--include/linux/usb/hcd.h26
-rw-r--r--include/linux/usb/ljca.h145
-rw-r--r--include/linux/usb/of.h7
-rw-r--r--include/linux/usb/pd.h2
-rw-r--r--include/linux/usb/pd_vdo.h14
-rw-r--r--include/linux/usb/quirks.h3
-rw-r--r--include/linux/usb/r8152.h2
-rw-r--r--include/linux/usb/renesas_usbhs.h10
-rw-r--r--include/linux/usb/tcpci.h19
-rw-r--r--include/linux/usb/tcpm.h19
-rw-r--r--include/linux/usb/typec.h44
-rw-r--r--include/linux/usb/typec_altmode.h32
-rw-r--r--include/linux/usb/typec_dp.h35
-rw-r--r--include/linux/usb/typec_tbt.h10
-rw-r--r--include/linux/user_namespace.h8
-rw-r--r--include/linux/userfaultfd_k.h106
-rw-r--r--include/linux/vdpa.h51
-rw-r--r--include/linux/verification.h1
-rw-r--r--include/linux/vfio.h74
-rw-r--r--include/linux/vfio_pci_core.h28
-rw-r--r--include/linux/vgaarb.h27
-rw-r--r--include/linux/virtio.h39
-rw-r--r--include/linux/virtio_config.h4
-rw-r--r--include/linux/virtio_console.h38
-rw-r--r--include/linux/virtio_net.h28
-rw-r--r--include/linux/virtio_pci_admin.h23
-rw-r--r--include/linux/virtio_pci_modern.h44
-rw-r--r--include/linux/virtio_vsock.h11
-rw-r--r--include/linux/vlynq.h149
-rw-r--r--include/linux/vm_event_item.h4
-rw-r--r--include/linux/vmalloc.h6
-rw-r--r--include/linux/vmcore_info.h81
-rw-r--r--include/linux/vmstat.h60
-rw-r--r--include/linux/vt_kern.h12
-rw-r--r--include/linux/w1-gpio.h22
-rw-r--r--include/linux/wait.h13
-rw-r--r--include/linux/watch_queue.h2
-rw-r--r--include/linux/win_minmax.h4
-rw-r--r--include/linux/wmi.h24
-rw-r--r--include/linux/wordpart.h42
-rw-r--r--include/linux/workqueue.h316
-rw-r--r--include/linux/workqueue_types.h25
-rw-r--r--include/linux/writeback.h20
-rw-r--r--include/linux/wwan.h2
-rw-r--r--include/linux/xarray.h18
-rw-r--r--include/linux/xattr.h10
-rw-r--r--include/linux/zswap.h70
831 files changed, 30329 insertions, 12001 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 641dc4843987..34829f2c517a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -15,6 +15,7 @@
#include <linux/mod_devicetable.h>
#include <linux/property.h>
#include <linux/uuid.h>
+#include <linux/node.h>
struct irq_domain;
struct irq_domain_ops;
@@ -30,6 +31,7 @@ struct irq_domain_ops;
#include <linux/dynamic_debug.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/fw_table.h>
#include <acpi/acpi_bus.h>
#include <acpi/acpi_drivers.h>
@@ -37,6 +39,16 @@ struct irq_domain_ops;
#include <acpi/acpi_io.h>
#include <asm/acpi.h>
+#ifdef CONFIG_ACPI_TABLE_LIB
+#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI)
+#define __init_or_acpilib
+#define __initdata_or_acpilib
+#else
+#define EXPORT_SYMBOL_ACPI_LIB(x)
+#define __init_or_acpilib __init
+#define __initdata_or_acpilib __initdata
+#endif
+
static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
{
return adev ? adev->handle : NULL;
@@ -119,21 +131,8 @@ enum acpi_address_range_id {
/* Table Handlers */
-union acpi_subtable_headers {
- struct acpi_subtable_header common;
- struct acpi_hmat_structure hmat;
- struct acpi_prmt_module_header prmt;
- struct acpi_cedt_header cedt;
-};
-
typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table);
-typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header,
- const unsigned long end);
-
-typedef int (*acpi_tbl_entry_handler_arg)(union acpi_subtable_headers *header,
- void *arg, const unsigned long end);
-
/* Debugger support */
struct acpi_debugger_ops {
@@ -207,14 +206,6 @@ static inline int acpi_debugger_notify_command_complete(void)
(!entry) || (unsigned long)entry + sizeof(*entry) > end || \
((struct acpi_subtable_header *)entry)->length < sizeof(*entry))
-struct acpi_subtable_proc {
- int id;
- acpi_tbl_entry_handler handler;
- acpi_tbl_entry_handler_arg handler_arg;
- void *arg;
- int count;
-};
-
void __iomem *__acpi_map_table(unsigned long phys, unsigned long size);
void __acpi_unmap_table(void __iomem *map, unsigned long size);
int early_acpi_boot_init(void);
@@ -229,16 +220,6 @@ void acpi_reserve_initial_tables (void);
void acpi_table_init_complete (void);
int acpi_table_init (void);
-#ifdef CONFIG_ACPI_TABLE_LIB
-#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI)
-#define __init_or_acpilib
-#define __initdata_or_acpilib
-#else
-#define EXPORT_SYMBOL_ACPI_LIB(x)
-#define __init_or_acpilib __init
-#define __initdata_or_acpilib __initdata
-#endif
-
int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
int __init_or_acpilib acpi_table_parse_entries(char *id,
unsigned long table_size, int entry_id,
@@ -256,10 +237,15 @@ acpi_table_parse_cedt(enum acpi_cedt_type id,
int acpi_parse_mcfg (struct acpi_table_header *header);
void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
+static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc)
+{
+ return gicc->flags & ACPI_MADT_ENABLED;
+}
+
/* the following numa functions are architecture-dependent */
void acpi_numa_slit_init (struct acpi_table_slit *slit);
-#if defined(CONFIG_X86) || defined(CONFIG_IA64) || defined(CONFIG_LOONGARCH)
+#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa);
#else
static inline void
@@ -439,6 +425,23 @@ extern int acpi_blacklisted(void);
extern void acpi_osi_setup(char *str);
extern bool acpi_osi_is_win8(void);
+#ifdef CONFIG_ACPI_THERMAL_LIB
+int thermal_acpi_active_trip_temp(struct acpi_device *adev, int id, int *ret_temp);
+int thermal_acpi_passive_trip_temp(struct acpi_device *adev, int *ret_temp);
+int thermal_acpi_hot_trip_temp(struct acpi_device *adev, int *ret_temp);
+int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp);
+#endif
+
+#ifdef CONFIG_ACPI_HMAT
+int acpi_get_genport_coordinates(u32 uid, struct access_coordinate *coord);
+#else
+static inline int acpi_get_genport_coordinates(u32 uid,
+ struct access_coordinate *coord)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
#ifdef CONFIG_ACPI_NUMA
int acpi_map_pxm_to_node(int pxm);
int acpi_get_node(acpi_handle handle);
@@ -477,8 +480,6 @@ static inline int acpi_get_node(acpi_handle handle)
return 0;
}
#endif
-extern int acpi_paddr_to_node(u64 start_addr, u64 size);
-
extern int pnpacpi_disabled;
#define PXM_INVAL (-1)
@@ -773,6 +774,10 @@ const char *acpi_get_subsystem_id(acpi_handle handle);
#define ACPI_HANDLE(dev) (NULL)
#define ACPI_HANDLE_FWNODE(fwnode) (NULL)
+/* Get rid of the -Wunused-variable for adev */
+#define acpi_dev_uid_match(adev, uid2) (adev && false)
+#define acpi_dev_hid_uid_match(adev, hid2, uid2) (adev && false)
+
#include <acpi/acpi_numa.h>
struct fwnode_handle;
@@ -789,12 +794,6 @@ static inline bool acpi_dev_present(const char *hid, const char *uid, s64 hrv)
struct acpi_device;
-static inline bool
-acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *uid2)
-{
- return false;
-}
-
static inline int acpi_dev_uid_to_integer(struct acpi_device *adev, u64 *integer)
{
return -ENODEV;
@@ -1100,7 +1099,7 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state,
acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state,
u32 val_a, u32 val_b);
-#ifdef CONFIG_X86
+#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86)
struct acpi_s2idle_dev_ops {
struct list_head list_node;
void (*prepare)(void);
@@ -1109,15 +1108,14 @@ struct acpi_s2idle_dev_ops {
};
int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg);
void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg);
-#endif /* CONFIG_X86 */
-#ifndef CONFIG_IA64
-void arch_reserve_mem_area(acpi_physical_address addr, size_t size);
-#else
-static inline void arch_reserve_mem_area(acpi_physical_address addr,
- size_t size)
+int acpi_get_lps0_constraint(struct acpi_device *adev);
+#else /* CONFIG_SUSPEND && CONFIG_X86 */
+static inline int acpi_get_lps0_constraint(struct device *dev)
{
+ return ACPI_STATE_UNKNOWN;
}
-#endif /* CONFIG_X86 */
+#endif /* CONFIG_SUSPEND && CONFIG_X86 */
+void arch_reserve_mem_area(acpi_physical_address addr, size_t size);
#else
#define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0)
#endif
@@ -1172,6 +1170,7 @@ static inline void acpi_ec_set_gpe_wake_mask(u8 action) {}
#endif
#ifdef CONFIG_ACPI
+char *acpi_handle_path(acpi_handle handle);
__printf(3, 4)
void acpi_handle_printk(const char *level, acpi_handle handle,
const char *fmt, ...);
@@ -1476,6 +1475,15 @@ static inline int lpit_read_residency_count_address(u64 *address)
}
#endif
+#ifdef CONFIG_ACPI_PROCESSOR_IDLE
+#ifndef arch_get_idle_state_flags
+static inline unsigned int arch_get_idle_state_flags(u32 arch_flags)
+{
+ return 0;
+}
+#endif
+#endif /* CONFIG_ACPI_PROCESSOR_IDLE */
+
#ifdef CONFIG_ACPI_PPTT
int acpi_pptt_cpu_is_thread(unsigned int cpu);
int find_acpi_cpu_topology(unsigned int cpu, int level);
@@ -1535,4 +1543,30 @@ static inline void acpi_device_notify(struct device *dev) { }
static inline void acpi_device_notify_remove(struct device *dev) { }
#endif
+static inline void acpi_use_parent_companion(struct device *dev)
+{
+ ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent));
+}
+
+#ifdef CONFIG_ACPI_HMAT
+int hmat_update_target_coordinates(int nid, struct access_coordinate *coord,
+ enum access_coordinate_class access);
+#else
+static inline int hmat_update_target_coordinates(int nid,
+ struct access_coordinate *coord,
+ enum access_coordinate_class access)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+#ifdef CONFIG_ACPI_NUMA
+bool acpi_node_backed_by_real_pxm(int nid);
+#else
+static inline bool acpi_node_backed_by_real_pxm(int nid)
+{
+ return false;
+}
+#endif
+
#endif /*_LINUX_ACPI_H*/
diff --git a/include/linux/acpi_amd_wbrf.h b/include/linux/acpi_amd_wbrf.h
new file mode 100644
index 000000000000..898f31d536d4
--- /dev/null
+++ b/include/linux/acpi_amd_wbrf.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Wifi Band Exclusion Interface (AMD ACPI Implementation)
+ * Copyright (C) 2023 Advanced Micro Devices
+ */
+
+#ifndef _ACPI_AMD_WBRF_H
+#define _ACPI_AMD_WBRF_H
+
+#include <linux/device.h>
+#include <linux/notifier.h>
+
+/* The maximum number of frequency band ranges */
+#define MAX_NUM_OF_WBRF_RANGES 11
+
+/* Record actions */
+#define WBRF_RECORD_ADD 0x0
+#define WBRF_RECORD_REMOVE 0x1
+
+/**
+ * struct freq_band_range - Wifi frequency band range definition
+ * @start: start frequency point (in Hz)
+ * @end: end frequency point (in Hz)
+ */
+struct freq_band_range {
+ u64 start;
+ u64 end;
+};
+
+/**
+ * struct wbrf_ranges_in_out - wbrf ranges info
+ * @num_of_ranges: total number of band ranges in this struct
+ * @band_list: array of Wifi band ranges
+ */
+struct wbrf_ranges_in_out {
+ u64 num_of_ranges;
+ struct freq_band_range band_list[MAX_NUM_OF_WBRF_RANGES];
+};
+
+/**
+ * enum wbrf_notifier_actions - wbrf notifier actions index
+ * @WBRF_CHANGED: there was some frequency band updates. The consumers
+ * should retrieve the latest active frequency bands.
+ */
+enum wbrf_notifier_actions {
+ WBRF_CHANGED,
+};
+
+#if IS_ENABLED(CONFIG_AMD_WBRF)
+bool acpi_amd_wbrf_supported_producer(struct device *dev);
+int acpi_amd_wbrf_add_remove(struct device *dev, uint8_t action, struct wbrf_ranges_in_out *in);
+bool acpi_amd_wbrf_supported_consumer(struct device *dev);
+int amd_wbrf_retrieve_freq_band(struct device *dev, struct wbrf_ranges_in_out *out);
+int amd_wbrf_register_notifier(struct notifier_block *nb);
+int amd_wbrf_unregister_notifier(struct notifier_block *nb);
+#else
+static inline
+bool acpi_amd_wbrf_supported_consumer(struct device *dev)
+{
+ return false;
+}
+
+static inline
+int acpi_amd_wbrf_add_remove(struct device *dev, uint8_t action, struct wbrf_ranges_in_out *in)
+{
+ return -ENODEV;
+}
+
+static inline
+bool acpi_amd_wbrf_supported_producer(struct device *dev)
+{
+ return false;
+}
+static inline
+int amd_wbrf_retrieve_freq_band(struct device *dev, struct wbrf_ranges_in_out *out)
+{
+ return -ENODEV;
+}
+static inline
+int amd_wbrf_register_notifier(struct notifier_block *nb)
+{
+ return -ENODEV;
+}
+static inline
+int amd_wbrf_unregister_notifier(struct notifier_block *nb)
+{
+ return -ENODEV;
+}
+#endif /* CONFIG_AMD_WBRF */
+
+#endif /* _ACPI_AMD_WBRF_H */
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index ee7cb6aaff71..1cb65592c95d 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -21,6 +21,7 @@
*/
#define IORT_SMMU_V3_PMCG_GENERIC 0x00000000 /* Generic SMMUv3 PMCG */
#define IORT_SMMU_V3_PMCG_HISI_HIP08 0x00000001 /* HiSilicon HIP08 PMCG */
+#define IORT_SMMU_V3_PMCG_HISI_HIP09 0x00000002 /* HiSilicon HIP09 PMCG */
int iort_register_domain_token(int trans_id, phys_addr_t base,
struct fwnode_handle *fw_node);
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 3a3ab05e13fd..4b97f38f3fcf 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -18,11 +18,8 @@
struct pci_dev;
-struct aer_header_log_regs {
- unsigned int dw0;
- unsigned int dw1;
- unsigned int dw2;
- unsigned int dw3;
+struct pcie_tlp_log {
+ u32 dw[4];
};
struct aer_capability_regs {
@@ -33,34 +30,27 @@ struct aer_capability_regs {
u32 cor_status;
u32 cor_mask;
u32 cap_control;
- struct aer_header_log_regs header_log;
+ struct pcie_tlp_log header_log;
u32 root_command;
u32 root_status;
u16 cor_err_source;
u16 uncor_err_source;
};
+int pcie_read_tlp_log(struct pci_dev *dev, int where, struct pcie_tlp_log *log);
+
#if defined(CONFIG_PCIEAER)
-/* PCIe port driver needs this function to enable AER */
-int pci_enable_pcie_error_reporting(struct pci_dev *dev);
-int pci_disable_pcie_error_reporting(struct pci_dev *dev);
int pci_aer_clear_nonfatal_status(struct pci_dev *dev);
+int pcie_aer_is_native(struct pci_dev *dev);
#else
-static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
-{
- return -EINVAL;
-}
-static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev)
-{
- return -EINVAL;
-}
static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
{
return -EINVAL;
}
+static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; }
#endif
-void cper_print_aer(struct pci_dev *dev, int aer_severity,
+void pci_print_aer(struct pci_dev *dev, int aer_severity,
struct aer_capability_regs *aer);
int cper_severity_to_aer(int cper_severity);
void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
diff --git a/include/linux/amba/clcd-regs.h b/include/linux/amba/clcd-regs.h
deleted file mode 100644
index 421b0fa90d6a..000000000000
--- a/include/linux/amba/clcd-regs.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * David A Rusling
- *
- * Copyright (C) 2001 ARM Limited
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file COPYING in the main directory of this archive
- * for more details.
- */
-
-#ifndef AMBA_CLCD_REGS_H
-#define AMBA_CLCD_REGS_H
-
-/*
- * CLCD Controller Internal Register addresses
- */
-#define CLCD_TIM0 0x00000000
-#define CLCD_TIM1 0x00000004
-#define CLCD_TIM2 0x00000008
-#define CLCD_TIM3 0x0000000c
-#define CLCD_UBAS 0x00000010
-#define CLCD_LBAS 0x00000014
-
-#define CLCD_PL110_IENB 0x00000018
-#define CLCD_PL110_CNTL 0x0000001c
-#define CLCD_PL110_STAT 0x00000020
-#define CLCD_PL110_INTR 0x00000024
-#define CLCD_PL110_UCUR 0x00000028
-#define CLCD_PL110_LCUR 0x0000002C
-
-#define CLCD_PL111_CNTL 0x00000018
-#define CLCD_PL111_IENB 0x0000001c
-#define CLCD_PL111_RIS 0x00000020
-#define CLCD_PL111_MIS 0x00000024
-#define CLCD_PL111_ICR 0x00000028
-#define CLCD_PL111_UCUR 0x0000002c
-#define CLCD_PL111_LCUR 0x00000030
-
-#define CLCD_PALL 0x00000200
-#define CLCD_PALETTE 0x00000200
-
-#define TIM2_PCD_LO_MASK GENMASK(4, 0)
-#define TIM2_PCD_LO_BITS 5
-#define TIM2_CLKSEL (1 << 5)
-#define TIM2_ACB_MASK GENMASK(10, 6)
-#define TIM2_IVS (1 << 11)
-#define TIM2_IHS (1 << 12)
-#define TIM2_IPC (1 << 13)
-#define TIM2_IOE (1 << 14)
-#define TIM2_BCD (1 << 26)
-#define TIM2_PCD_HI_MASK GENMASK(31, 27)
-#define TIM2_PCD_HI_BITS 5
-#define TIM2_PCD_HI_SHIFT 27
-
-#define CNTL_LCDEN (1 << 0)
-#define CNTL_LCDBPP1 (0 << 1)
-#define CNTL_LCDBPP2 (1 << 1)
-#define CNTL_LCDBPP4 (2 << 1)
-#define CNTL_LCDBPP8 (3 << 1)
-#define CNTL_LCDBPP16 (4 << 1)
-#define CNTL_LCDBPP16_565 (6 << 1)
-#define CNTL_LCDBPP16_444 (7 << 1)
-#define CNTL_LCDBPP24 (5 << 1)
-#define CNTL_LCDBW (1 << 4)
-#define CNTL_LCDTFT (1 << 5)
-#define CNTL_LCDMONO8 (1 << 6)
-#define CNTL_LCDDUAL (1 << 7)
-#define CNTL_BGR (1 << 8)
-#define CNTL_BEBO (1 << 9)
-#define CNTL_BEPO (1 << 10)
-#define CNTL_LCDPWR (1 << 11)
-#define CNTL_LCDVCOMP(x) ((x) << 12)
-#define CNTL_LDMAFIFOTIME (1 << 15)
-#define CNTL_WATERMARK (1 << 16)
-
-/* ST Microelectronics variant bits */
-#define CNTL_ST_1XBPP_444 0x0
-#define CNTL_ST_1XBPP_5551 (1 << 17)
-#define CNTL_ST_1XBPP_565 (1 << 18)
-#define CNTL_ST_CDWID_12 0x0
-#define CNTL_ST_CDWID_16 (1 << 19)
-#define CNTL_ST_CDWID_18 (1 << 20)
-#define CNTL_ST_CDWID_24 ((1 << 19)|(1 << 20))
-#define CNTL_ST_CEAEN (1 << 21)
-#define CNTL_ST_LCDBPP24_PACKED (6 << 1)
-
-#endif /* AMBA_CLCD_REGS_H */
diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h
deleted file mode 100644
index b6e0cbeaf533..000000000000
--- a/include/linux/amba/clcd.h
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * linux/include/asm-arm/hardware/amba_clcd.h -- Integrator LCD panel.
- *
- * David A Rusling
- *
- * Copyright (C) 2001 ARM Limited
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file COPYING in the main directory of this archive
- * for more details.
- */
-#include <linux/fb.h>
-#include <linux/amba/clcd-regs.h>
-
-enum {
- /* individual formats */
- CLCD_CAP_RGB444 = (1 << 0),
- CLCD_CAP_RGB5551 = (1 << 1),
- CLCD_CAP_RGB565 = (1 << 2),
- CLCD_CAP_RGB888 = (1 << 3),
- CLCD_CAP_BGR444 = (1 << 4),
- CLCD_CAP_BGR5551 = (1 << 5),
- CLCD_CAP_BGR565 = (1 << 6),
- CLCD_CAP_BGR888 = (1 << 7),
-
- /* connection layouts */
- CLCD_CAP_444 = CLCD_CAP_RGB444 | CLCD_CAP_BGR444,
- CLCD_CAP_5551 = CLCD_CAP_RGB5551 | CLCD_CAP_BGR5551,
- CLCD_CAP_565 = CLCD_CAP_RGB565 | CLCD_CAP_BGR565,
- CLCD_CAP_888 = CLCD_CAP_RGB888 | CLCD_CAP_BGR888,
-
- /* red/blue ordering */
- CLCD_CAP_RGB = CLCD_CAP_RGB444 | CLCD_CAP_RGB5551 |
- CLCD_CAP_RGB565 | CLCD_CAP_RGB888,
- CLCD_CAP_BGR = CLCD_CAP_BGR444 | CLCD_CAP_BGR5551 |
- CLCD_CAP_BGR565 | CLCD_CAP_BGR888,
-
- CLCD_CAP_ALL = CLCD_CAP_BGR | CLCD_CAP_RGB,
-};
-
-struct backlight_device;
-
-struct clcd_panel {
- struct fb_videomode mode;
- signed short width; /* width in mm */
- signed short height; /* height in mm */
- u32 tim2;
- u32 tim3;
- u32 cntl;
- u32 caps;
- unsigned int bpp:8,
- fixedtimings:1,
- grayscale:1;
- unsigned int connector;
- struct backlight_device *backlight;
- /*
- * If the B/R lines are switched between the CLCD
- * and the panel we need to know this and not try to
- * compensate with the BGR bit in the control register.
- */
- bool bgr_connection;
-};
-
-struct clcd_regs {
- u32 tim0;
- u32 tim1;
- u32 tim2;
- u32 tim3;
- u32 cntl;
- unsigned long pixclock;
-};
-
-struct clcd_fb;
-
-/*
- * the board-type specific routines
- */
-struct clcd_board {
- const char *name;
-
- /*
- * Optional. Hardware capability flags.
- */
- u32 caps;
-
- /*
- * Optional. Check whether the var structure is acceptable
- * for this display.
- */
- int (*check)(struct clcd_fb *fb, struct fb_var_screeninfo *var);
-
- /*
- * Compulsory. Decode fb->fb.var into regs->*. In the case of
- * fixed timing, set regs->* to the register values required.
- */
- void (*decode)(struct clcd_fb *fb, struct clcd_regs *regs);
-
- /*
- * Optional. Disable any extra display hardware.
- */
- void (*disable)(struct clcd_fb *);
-
- /*
- * Optional. Enable any extra display hardware.
- */
- void (*enable)(struct clcd_fb *);
-
- /*
- * Setup platform specific parts of CLCD driver
- */
- int (*setup)(struct clcd_fb *);
-
- /*
- * mmap the framebuffer memory
- */
- int (*mmap)(struct clcd_fb *, struct vm_area_struct *);
-
- /*
- * Remove platform specific parts of CLCD driver
- */
- void (*remove)(struct clcd_fb *);
-};
-
-struct amba_device;
-struct clk;
-
-/* this data structure describes each frame buffer device we find */
-struct clcd_fb {
- struct fb_info fb;
- struct amba_device *dev;
- struct clk *clk;
- struct clcd_panel *panel;
- struct clcd_board *board;
- void *board_data;
- void __iomem *regs;
- u16 off_ienb;
- u16 off_cntl;
- u32 clcd_cntl;
- u32 cmap[16];
- bool clk_enabled;
-};
-
-static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs)
-{
- struct fb_var_screeninfo *var = &fb->fb.var;
- u32 val, cpl;
-
- /*
- * Program the CLCD controller registers and start the CLCD
- */
- val = ((var->xres / 16) - 1) << 2;
- val |= (var->hsync_len - 1) << 8;
- val |= (var->right_margin - 1) << 16;
- val |= (var->left_margin - 1) << 24;
- regs->tim0 = val;
-
- val = var->yres;
- if (fb->panel->cntl & CNTL_LCDDUAL)
- val /= 2;
- val -= 1;
- val |= (var->vsync_len - 1) << 10;
- val |= var->lower_margin << 16;
- val |= var->upper_margin << 24;
- regs->tim1 = val;
-
- val = fb->panel->tim2;
- val |= var->sync & FB_SYNC_HOR_HIGH_ACT ? 0 : TIM2_IHS;
- val |= var->sync & FB_SYNC_VERT_HIGH_ACT ? 0 : TIM2_IVS;
-
- cpl = var->xres_virtual;
- if (fb->panel->cntl & CNTL_LCDTFT) /* TFT */
- /* / 1 */;
- else if (!var->grayscale) /* STN color */
- cpl = cpl * 8 / 3;
- else if (fb->panel->cntl & CNTL_LCDMONO8) /* STN monochrome, 8bit */
- cpl /= 8;
- else /* STN monochrome, 4bit */
- cpl /= 4;
-
- regs->tim2 = val | ((cpl - 1) << 16);
-
- regs->tim3 = fb->panel->tim3;
-
- val = fb->panel->cntl;
- if (var->grayscale)
- val |= CNTL_LCDBW;
-
- if (fb->panel->caps && fb->board->caps && var->bits_per_pixel >= 16) {
- /*
- * if board and panel supply capabilities, we can support
- * changing BGR/RGB depending on supplied parameters. Here
- * we switch to what the framebuffer is providing if need
- * be, so if the framebuffer is BGR but the display connection
- * is RGB (first case) we switch it around. Vice versa mutatis
- * mutandis if the framebuffer is RGB but the display connection
- * is BGR, we flip it around.
- */
- if (var->red.offset == 0)
- val &= ~CNTL_BGR;
- else
- val |= CNTL_BGR;
- if (fb->panel->bgr_connection)
- val ^= CNTL_BGR;
- }
-
- switch (var->bits_per_pixel) {
- case 1:
- val |= CNTL_LCDBPP1;
- break;
- case 2:
- val |= CNTL_LCDBPP2;
- break;
- case 4:
- val |= CNTL_LCDBPP4;
- break;
- case 8:
- val |= CNTL_LCDBPP8;
- break;
- case 16:
- /*
- * PL110 cannot choose between 5551 and 565 modes in its
- * control register. It is possible to use 565 with
- * custom external wiring.
- */
- if (amba_part(fb->dev) == 0x110 ||
- var->green.length == 5)
- val |= CNTL_LCDBPP16;
- else if (var->green.length == 6)
- val |= CNTL_LCDBPP16_565;
- else
- val |= CNTL_LCDBPP16_444;
- break;
- case 32:
- val |= CNTL_LCDBPP24;
- break;
- }
-
- regs->cntl = val;
- regs->pixclock = var->pixclock;
-}
-
-static inline int clcdfb_check(struct clcd_fb *fb, struct fb_var_screeninfo *var)
-{
- var->xres_virtual = var->xres = (var->xres + 15) & ~15;
- var->yres_virtual = var->yres = (var->yres + 1) & ~1;
-
-#define CHECK(e,l,h) (var->e < l || var->e > h)
- if (CHECK(right_margin, (5+1), 256) || /* back porch */
- CHECK(left_margin, (5+1), 256) || /* front porch */
- CHECK(hsync_len, (5+1), 256) ||
- var->xres > 4096 ||
- var->lower_margin > 255 || /* back porch */
- var->upper_margin > 255 || /* front porch */
- var->vsync_len > 32 ||
- var->yres > 1024)
- return -EINVAL;
-#undef CHECK
-
- /* single panel mode: PCD = max(PCD, 1) */
- /* dual panel mode: PCD = max(PCD, 5) */
-
- /*
- * You can't change the grayscale setting, and
- * we can only do non-interlaced video.
- */
- if (var->grayscale != fb->fb.var.grayscale ||
- (var->vmode & FB_VMODE_MASK) != FB_VMODE_NONINTERLACED)
- return -EINVAL;
-
-#define CHECK(e) (var->e != fb->fb.var.e)
- if (fb->panel->fixedtimings &&
- (CHECK(xres) ||
- CHECK(yres) ||
- CHECK(bits_per_pixel) ||
- CHECK(pixclock) ||
- CHECK(left_margin) ||
- CHECK(right_margin) ||
- CHECK(upper_margin) ||
- CHECK(lower_margin) ||
- CHECK(hsync_len) ||
- CHECK(vsync_len) ||
- CHECK(sync)))
- return -EINVAL;
-#undef CHECK
-
- var->nonstd = 0;
- var->accel_flags = 0;
-
- return 0;
-}
diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h
index 9bf58aac0df2..d7b07d0311e1 100644
--- a/include/linux/amba/pl022.h
+++ b/include/linux/amba/pl022.h
@@ -16,6 +16,7 @@
#ifndef _SSP_PL022_H
#define _SSP_PL022_H
+#include <linux/dmaengine.h>
#include <linux/types.h>
/**
@@ -224,6 +225,7 @@ struct dma_chan;
* struct pl022_ssp_master - device.platform_data for SPI controller devices.
* @bus_id: identifier for this bus
* @enable_dma: if true enables DMA driven transfers.
+ * @dma_filter: callback filter for dma_request_channel.
* @dma_rx_param: parameter to locate an RX DMA channel.
* @dma_tx_param: parameter to locate a TX DMA channel.
* @autosuspend_delay: delay in ms following transfer completion before the
@@ -235,7 +237,7 @@ struct dma_chan;
struct pl022_ssp_controller {
u16 bus_id;
u8 enable_dma:1;
- bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
+ dma_filter_fn dma_filter;
void *dma_rx_param;
void *dma_tx_param;
int autosuspend_delay;
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index a1307b58cc2c..9120de05ead0 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -10,6 +10,11 @@
#ifndef ASM_ARM_HARDWARE_SERIAL_AMBA_H
#define ASM_ARM_HARDWARE_SERIAL_AMBA_H
+#ifndef __ASSEMBLY__
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#endif
+
#include <linux/types.h>
/* -------------------------------------------------------------------------------
@@ -70,141 +75,145 @@
#define ZX_UART011_ICR 0x4c
#define ZX_UART011_DMACR 0x50
-#define UART011_DR_OE (1 << 11)
-#define UART011_DR_BE (1 << 10)
-#define UART011_DR_PE (1 << 9)
-#define UART011_DR_FE (1 << 8)
-
-#define UART01x_RSR_OE 0x08
-#define UART01x_RSR_BE 0x04
-#define UART01x_RSR_PE 0x02
-#define UART01x_RSR_FE 0x01
-
-#define UART011_FR_RI 0x100
-#define UART011_FR_TXFE 0x080
-#define UART011_FR_RXFF 0x040
-#define UART01x_FR_TXFF 0x020
-#define UART01x_FR_RXFE 0x010
-#define UART01x_FR_BUSY 0x008
-#define UART01x_FR_DCD 0x004
-#define UART01x_FR_DSR 0x002
-#define UART01x_FR_CTS 0x001
+#define UART011_DR_OE BIT(11)
+#define UART011_DR_BE BIT(10)
+#define UART011_DR_PE BIT(9)
+#define UART011_DR_FE BIT(8)
+
+#define UART01x_RSR_OE BIT(3)
+#define UART01x_RSR_BE BIT(2)
+#define UART01x_RSR_PE BIT(1)
+#define UART01x_RSR_FE BIT(0)
+
+#define UART011_FR_RI BIT(8)
+#define UART011_FR_TXFE BIT(7)
+#define UART011_FR_RXFF BIT(6)
+#define UART01x_FR_TXFF (1 << 5) /* used in ASM */
+#define UART01x_FR_RXFE BIT(4)
+#define UART01x_FR_BUSY (1 << 3) /* used in ASM */
+#define UART01x_FR_DCD BIT(2)
+#define UART01x_FR_DSR BIT(1)
+#define UART01x_FR_CTS BIT(0)
#define UART01x_FR_TMSK (UART01x_FR_TXFF + UART01x_FR_BUSY)
/*
* Some bits of Flag Register on ZTE device have different position from
* standard ones.
*/
-#define ZX_UART01x_FR_BUSY 0x100
-#define ZX_UART01x_FR_DSR 0x008
-#define ZX_UART01x_FR_CTS 0x002
-#define ZX_UART011_FR_RI 0x001
-
-#define UART011_CR_CTSEN 0x8000 /* CTS hardware flow control */
-#define UART011_CR_RTSEN 0x4000 /* RTS hardware flow control */
-#define UART011_CR_OUT2 0x2000 /* OUT2 */
-#define UART011_CR_OUT1 0x1000 /* OUT1 */
-#define UART011_CR_RTS 0x0800 /* RTS */
-#define UART011_CR_DTR 0x0400 /* DTR */
-#define UART011_CR_RXE 0x0200 /* receive enable */
-#define UART011_CR_TXE 0x0100 /* transmit enable */
-#define UART011_CR_LBE 0x0080 /* loopback enable */
-#define UART010_CR_RTIE 0x0040
-#define UART010_CR_TIE 0x0020
-#define UART010_CR_RIE 0x0010
-#define UART010_CR_MSIE 0x0008
-#define ST_UART011_CR_OVSFACT 0x0008 /* Oversampling factor */
-#define UART01x_CR_IIRLP 0x0004 /* SIR low power mode */
-#define UART01x_CR_SIREN 0x0002 /* SIR enable */
-#define UART01x_CR_UARTEN 0x0001 /* UART enable */
-
-#define UART011_LCRH_SPS 0x80
+#define ZX_UART01x_FR_BUSY BIT(8)
+#define ZX_UART01x_FR_DSR BIT(3)
+#define ZX_UART01x_FR_CTS BIT(1)
+#define ZX_UART011_FR_RI BIT(0)
+
+#define UART011_CR_CTSEN BIT(15) /* CTS hardware flow control */
+#define UART011_CR_RTSEN BIT(14) /* RTS hardware flow control */
+#define UART011_CR_OUT2 BIT(13) /* OUT2 */
+#define UART011_CR_OUT1 BIT(12) /* OUT1 */
+#define UART011_CR_RTS BIT(11) /* RTS */
+#define UART011_CR_DTR BIT(10) /* DTR */
+#define UART011_CR_RXE BIT(9) /* receive enable */
+#define UART011_CR_TXE BIT(8) /* transmit enable */
+#define UART011_CR_LBE BIT(7) /* loopback enable */
+#define UART010_CR_RTIE BIT(6)
+#define UART010_CR_TIE BIT(5)
+#define UART010_CR_RIE BIT(4)
+#define UART010_CR_MSIE BIT(3)
+#define ST_UART011_CR_OVSFACT BIT(3) /* Oversampling factor */
+#define UART01x_CR_IIRLP BIT(2) /* SIR low power mode */
+#define UART01x_CR_SIREN BIT(1) /* SIR enable */
+#define UART01x_CR_UARTEN BIT(0) /* UART enable */
+
+#define UART011_LCRH_SPS BIT(7)
#define UART01x_LCRH_WLEN_8 0x60
#define UART01x_LCRH_WLEN_7 0x40
#define UART01x_LCRH_WLEN_6 0x20
#define UART01x_LCRH_WLEN_5 0x00
-#define UART01x_LCRH_FEN 0x10
-#define UART01x_LCRH_STP2 0x08
-#define UART01x_LCRH_EPS 0x04
-#define UART01x_LCRH_PEN 0x02
-#define UART01x_LCRH_BRK 0x01
-
-#define ST_UART011_DMAWM_RX_1 (0 << 3)
-#define ST_UART011_DMAWM_RX_2 (1 << 3)
-#define ST_UART011_DMAWM_RX_4 (2 << 3)
-#define ST_UART011_DMAWM_RX_8 (3 << 3)
-#define ST_UART011_DMAWM_RX_16 (4 << 3)
-#define ST_UART011_DMAWM_RX_32 (5 << 3)
-#define ST_UART011_DMAWM_RX_48 (6 << 3)
-#define ST_UART011_DMAWM_TX_1 0
-#define ST_UART011_DMAWM_TX_2 1
-#define ST_UART011_DMAWM_TX_4 2
-#define ST_UART011_DMAWM_TX_8 3
-#define ST_UART011_DMAWM_TX_16 4
-#define ST_UART011_DMAWM_TX_32 5
-#define ST_UART011_DMAWM_TX_48 6
-
-#define UART010_IIR_RTIS 0x08
-#define UART010_IIR_TIS 0x04
-#define UART010_IIR_RIS 0x02
-#define UART010_IIR_MIS 0x01
-
-#define UART011_IFLS_RX1_8 (0 << 3)
-#define UART011_IFLS_RX2_8 (1 << 3)
-#define UART011_IFLS_RX4_8 (2 << 3)
-#define UART011_IFLS_RX6_8 (3 << 3)
-#define UART011_IFLS_RX7_8 (4 << 3)
-#define UART011_IFLS_TX1_8 (0 << 0)
-#define UART011_IFLS_TX2_8 (1 << 0)
-#define UART011_IFLS_TX4_8 (2 << 0)
-#define UART011_IFLS_TX6_8 (3 << 0)
-#define UART011_IFLS_TX7_8 (4 << 0)
+#define UART01x_LCRH_FEN BIT(4)
+#define UART01x_LCRH_STP2 BIT(3)
+#define UART01x_LCRH_EPS BIT(2)
+#define UART01x_LCRH_PEN BIT(1)
+#define UART01x_LCRH_BRK BIT(0)
+
+#define ST_UART011_DMAWM_RX GENMASK(5, 3)
+#define ST_UART011_DMAWM_RX_1 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 0)
+#define ST_UART011_DMAWM_RX_2 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 1)
+#define ST_UART011_DMAWM_RX_4 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 2)
+#define ST_UART011_DMAWM_RX_8 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 3)
+#define ST_UART011_DMAWM_RX_16 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 4)
+#define ST_UART011_DMAWM_RX_32 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 5)
+#define ST_UART011_DMAWM_RX_48 FIELD_PREP_CONST(ST_UART011_DMAWM_RX, 6)
+#define ST_UART011_DMAWM_TX GENMASK(2, 0)
+#define ST_UART011_DMAWM_TX_1 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 0)
+#define ST_UART011_DMAWM_TX_2 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 1)
+#define ST_UART011_DMAWM_TX_4 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 2)
+#define ST_UART011_DMAWM_TX_8 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 3)
+#define ST_UART011_DMAWM_TX_16 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 4)
+#define ST_UART011_DMAWM_TX_32 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 5)
+#define ST_UART011_DMAWM_TX_48 FIELD_PREP_CONST(ST_UART011_DMAWM_TX, 6)
+
+#define UART010_IIR_RTIS BIT(3)
+#define UART010_IIR_TIS BIT(2)
+#define UART010_IIR_RIS BIT(1)
+#define UART010_IIR_MIS BIT(0)
+
+#define UART011_IFLS_RXIFLSEL GENMASK(5, 3)
+#define UART011_IFLS_RX1_8 FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 0)
+#define UART011_IFLS_RX2_8 FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 1)
+#define UART011_IFLS_RX4_8 FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 2)
+#define UART011_IFLS_RX6_8 FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 3)
+#define UART011_IFLS_RX7_8 FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 4)
+#define UART011_IFLS_TXIFLSEL GENMASK(2, 0)
+#define UART011_IFLS_TX1_8 FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 0)
+#define UART011_IFLS_TX2_8 FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 1)
+#define UART011_IFLS_TX4_8 FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 2)
+#define UART011_IFLS_TX6_8 FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 3)
+#define UART011_IFLS_TX7_8 FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 4)
/* special values for ST vendor with deeper fifo */
-#define UART011_IFLS_RX_HALF (5 << 3)
-#define UART011_IFLS_TX_HALF (5 << 0)
-
-#define UART011_OEIM (1 << 10) /* overrun error interrupt mask */
-#define UART011_BEIM (1 << 9) /* break error interrupt mask */
-#define UART011_PEIM (1 << 8) /* parity error interrupt mask */
-#define UART011_FEIM (1 << 7) /* framing error interrupt mask */
-#define UART011_RTIM (1 << 6) /* receive timeout interrupt mask */
-#define UART011_TXIM (1 << 5) /* transmit interrupt mask */
-#define UART011_RXIM (1 << 4) /* receive interrupt mask */
-#define UART011_DSRMIM (1 << 3) /* DSR interrupt mask */
-#define UART011_DCDMIM (1 << 2) /* DCD interrupt mask */
-#define UART011_CTSMIM (1 << 1) /* CTS interrupt mask */
-#define UART011_RIMIM (1 << 0) /* RI interrupt mask */
-
-#define UART011_OEIS (1 << 10) /* overrun error interrupt status */
-#define UART011_BEIS (1 << 9) /* break error interrupt status */
-#define UART011_PEIS (1 << 8) /* parity error interrupt status */
-#define UART011_FEIS (1 << 7) /* framing error interrupt status */
-#define UART011_RTIS (1 << 6) /* receive timeout interrupt status */
-#define UART011_TXIS (1 << 5) /* transmit interrupt status */
-#define UART011_RXIS (1 << 4) /* receive interrupt status */
-#define UART011_DSRMIS (1 << 3) /* DSR interrupt status */
-#define UART011_DCDMIS (1 << 2) /* DCD interrupt status */
-#define UART011_CTSMIS (1 << 1) /* CTS interrupt status */
-#define UART011_RIMIS (1 << 0) /* RI interrupt status */
-
-#define UART011_OEIC (1 << 10) /* overrun error interrupt clear */
-#define UART011_BEIC (1 << 9) /* break error interrupt clear */
-#define UART011_PEIC (1 << 8) /* parity error interrupt clear */
-#define UART011_FEIC (1 << 7) /* framing error interrupt clear */
-#define UART011_RTIC (1 << 6) /* receive timeout interrupt clear */
-#define UART011_TXIC (1 << 5) /* transmit interrupt clear */
-#define UART011_RXIC (1 << 4) /* receive interrupt clear */
-#define UART011_DSRMIC (1 << 3) /* DSR interrupt clear */
-#define UART011_DCDMIC (1 << 2) /* DCD interrupt clear */
-#define UART011_CTSMIC (1 << 1) /* CTS interrupt clear */
-#define UART011_RIMIC (1 << 0) /* RI interrupt clear */
-
-#define UART011_DMAONERR (1 << 2) /* disable dma on error */
-#define UART011_TXDMAE (1 << 1) /* enable transmit dma */
-#define UART011_RXDMAE (1 << 0) /* enable receive dma */
-
-#define UART01x_RSR_ANY (UART01x_RSR_OE|UART01x_RSR_BE|UART01x_RSR_PE|UART01x_RSR_FE)
-#define UART01x_FR_MODEM_ANY (UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS)
+#define UART011_IFLS_RX_HALF FIELD_PREP_CONST(UART011_IFLS_RXIFLSEL, 5)
+#define UART011_IFLS_TX_HALF FIELD_PREP_CONST(UART011_IFLS_TXIFLSEL, 5)
+
+#define UART011_OEIM BIT(10) /* overrun error interrupt mask */
+#define UART011_BEIM BIT(9) /* break error interrupt mask */
+#define UART011_PEIM BIT(8) /* parity error interrupt mask */
+#define UART011_FEIM BIT(7) /* framing error interrupt mask */
+#define UART011_RTIM BIT(6) /* receive timeout interrupt mask */
+#define UART011_TXIM BIT(5) /* transmit interrupt mask */
+#define UART011_RXIM BIT(4) /* receive interrupt mask */
+#define UART011_DSRMIM BIT(3) /* DSR interrupt mask */
+#define UART011_DCDMIM BIT(2) /* DCD interrupt mask */
+#define UART011_CTSMIM BIT(1) /* CTS interrupt mask */
+#define UART011_RIMIM BIT(0) /* RI interrupt mask */
+
+#define UART011_OEIS BIT(10) /* overrun error interrupt status */
+#define UART011_BEIS BIT(9) /* break error interrupt status */
+#define UART011_PEIS BIT(8) /* parity error interrupt status */
+#define UART011_FEIS BIT(7) /* framing error interrupt status */
+#define UART011_RTIS BIT(6) /* receive timeout interrupt status */
+#define UART011_TXIS BIT(5) /* transmit interrupt status */
+#define UART011_RXIS BIT(4) /* receive interrupt status */
+#define UART011_DSRMIS BIT(3) /* DSR interrupt status */
+#define UART011_DCDMIS BIT(2) /* DCD interrupt status */
+#define UART011_CTSMIS BIT(1) /* CTS interrupt status */
+#define UART011_RIMIS BIT(0) /* RI interrupt status */
+
+#define UART011_OEIC BIT(10) /* overrun error interrupt clear */
+#define UART011_BEIC BIT(9) /* break error interrupt clear */
+#define UART011_PEIC BIT(8) /* parity error interrupt clear */
+#define UART011_FEIC BIT(7) /* framing error interrupt clear */
+#define UART011_RTIC BIT(6) /* receive timeout interrupt clear */
+#define UART011_TXIC BIT(5) /* transmit interrupt clear */
+#define UART011_RXIC BIT(4) /* receive interrupt clear */
+#define UART011_DSRMIC BIT(3) /* DSR interrupt clear */
+#define UART011_DCDMIC BIT(2) /* DCD interrupt clear */
+#define UART011_CTSMIC BIT(1) /* CTS interrupt clear */
+#define UART011_RIMIC BIT(0) /* RI interrupt clear */
+
+#define UART011_DMAONERR BIT(2) /* disable dma on error */
+#define UART011_TXDMAE BIT(1) /* enable transmit dma */
+#define UART011_RXDMAE BIT(0) /* enable receive dma */
+
+#define UART01x_RSR_ANY (UART01x_RSR_OE | UART01x_RSR_BE | UART01x_RSR_PE | UART01x_RSR_FE)
+#define UART01x_FR_MODEM_ANY (UART01x_FR_DCD | UART01x_FR_DSR | UART01x_FR_CTS)
#ifndef __ASSEMBLY__
struct amba_device; /* in uncompress this is included but amba/bus.h is not */
@@ -220,8 +229,8 @@ struct amba_pl011_data {
bool dma_rx_poll_enable;
unsigned int dma_rx_poll_rate;
unsigned int dma_rx_poll_timeout;
- void (*init) (void);
- void (*exit) (void);
+ void (*init)(void);
+ void (*exit)(void);
};
#endif
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 953e6f12fa1c..2b90c48a6a87 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -32,128 +32,7 @@ struct task_struct;
struct pci_dev;
extern int amd_iommu_detect(void);
-extern int amd_iommu_init_hardware(void);
-
-/**
- * amd_iommu_init_device() - Init device for use with IOMMUv2 driver
- * @pdev: The PCI device to initialize
- * @pasids: Number of PASIDs to support for this device
- *
- * This function does all setup for the device pdev so that it can be
- * used with IOMMUv2.
- * Returns 0 on success or negative value on error.
- */
-extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids);
-
-/**
- * amd_iommu_free_device() - Free all IOMMUv2 related device resources
- * and disable IOMMUv2 usage for this device
- * @pdev: The PCI device to disable IOMMUv2 usage for'
- */
-extern void amd_iommu_free_device(struct pci_dev *pdev);
-
-/**
- * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device
- * @pdev: The PCI device to bind the task to
- * @pasid: The PASID on the device the task should be bound to
- * @task: the task to bind
- *
- * The function returns 0 on success or a negative value on error.
- */
-extern int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
- struct task_struct *task);
-
-/**
- * amd_iommu_unbind_pasid() - Unbind a PASID from its task on
- * a device
- * @pdev: The device of the PASID
- * @pasid: The PASID to unbind
- *
- * When this function returns the device is no longer using the PASID
- * and the PASID is no longer bound to its task.
- */
-extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid);
-
-/**
- * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed
- * PRI requests
- * @pdev: The PCI device the call-back should be registered for
- * @cb: The call-back function
- *
- * The IOMMUv2 driver invokes this call-back when it is unable to
- * successfully handle a PRI request. The device driver can then decide
- * which PRI response the device should see. Possible return values for
- * the call-back are:
- *
- * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device
- * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device
- * - AMD_IOMMU_INV_PRI_RSP_FAIL - Send Failure back to the device,
- * the device is required to disable
- * PRI when it receives this response
- *
- * The function returns 0 on success or negative value on error.
- */
-#define AMD_IOMMU_INV_PRI_RSP_SUCCESS 0
-#define AMD_IOMMU_INV_PRI_RSP_INVALID 1
-#define AMD_IOMMU_INV_PRI_RSP_FAIL 2
-
-typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev,
- u32 pasid,
- unsigned long address,
- u16);
-
-extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
- amd_iommu_invalid_ppr_cb cb);
-
-#define PPR_FAULT_EXEC (1 << 1)
-#define PPR_FAULT_READ (1 << 2)
-#define PPR_FAULT_WRITE (1 << 5)
-#define PPR_FAULT_USER (1 << 6)
-#define PPR_FAULT_RSVD (1 << 7)
-#define PPR_FAULT_GN (1 << 8)
-
-/**
- * amd_iommu_device_info() - Get information about IOMMUv2 support of a
- * PCI device
- * @pdev: PCI device to query information from
- * @info: A pointer to an amd_iommu_device_info structure which will contain
- * the information about the PCI device
- *
- * Returns 0 on success, negative value on error
- */
-
-#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */
-#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */
-#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */
-#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8 /* Device may request execution
- on memory pages */
-#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10 /* Device may request
- super-user privileges */
-
-struct amd_iommu_device_info {
- int max_pasids;
- u32 flags;
-};
-
-extern int amd_iommu_device_info(struct pci_dev *pdev,
- struct amd_iommu_device_info *info);
-
-/**
- * amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating
- * a pasid context. This call-back is
- * invoked when the IOMMUv2 driver needs to
- * invalidate a PASID context, for example
- * because the task that is bound to that
- * context is about to exit.
- *
- * @pdev: The PCI device the call-back should be registered for
- * @cb: The call-back function
- */
-
-typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, u32 pasid);
-extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
- amd_iommu_invalidate_ctx cb);
#else /* CONFIG_AMD_IOMMU */
static inline int amd_iommu_detect(void) { return -ENODEV; }
@@ -206,8 +85,10 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn,
u64 *value);
struct amd_iommu *get_amd_iommu(unsigned int idx);
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-int amd_iommu_snp_enable(void);
+#ifdef CONFIG_KVM_AMD_SEV
+int amd_iommu_snp_disable(void);
+#else
+static inline int amd_iommu_snp_disable(void) { return 0; }
#endif
#endif /* _ASM_X86_AMD_IOMMU_H */
diff --git a/include/linux/amd-pmf-io.h b/include/linux/amd-pmf-io.h
new file mode 100644
index 000000000000..b4f818205216
--- /dev/null
+++ b/include/linux/amd-pmf-io.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD Platform Management Framework Interface
+ *
+ * Copyright (c) 2023, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Authors: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
+ * Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+ */
+
+#ifndef AMD_PMF_IO_H
+#define AMD_PMF_IO_H
+
+#include <linux/types.h>
+
+/**
+ * enum sfh_message_type - Query the SFH message type
+ * @MT_HPD: Message ID to know the Human presence info from MP2 FW
+ * @MT_ALS: Message ID to know the Ambient light info from MP2 FW
+ */
+enum sfh_message_type {
+ MT_HPD,
+ MT_ALS,
+};
+
+/**
+ * enum sfh_hpd_info - Query the Human presence information
+ * @SFH_NOT_DETECTED: Check the HPD connection information from MP2 FW
+ * @SFH_USER_PRESENT: Check if the user is present from HPD sensor
+ * @SFH_USER_AWAY: Check if the user is away from HPD sensor
+ */
+enum sfh_hpd_info {
+ SFH_NOT_DETECTED,
+ SFH_USER_PRESENT,
+ SFH_USER_AWAY,
+};
+
+/**
+ * struct amd_sfh_info - get HPD sensor info from MP2 FW
+ * @ambient_light: Populates the ambient light information
+ * @user_present: Populates the user presence information
+ */
+struct amd_sfh_info {
+ u32 ambient_light;
+ u8 user_present;
+};
+
+int amd_get_sfh_info(struct amd_sfh_info *sfh_info, enum sfh_message_type op);
+#endif
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
index 446394f84606..d21838835abd 100644
--- a/include/linux/amd-pstate.h
+++ b/include/linux/amd-pstate.h
@@ -39,11 +39,16 @@ struct amd_aperf_mperf {
* @cppc_req_cached: cached performance request hints
* @highest_perf: the maximum performance an individual processor may reach,
* assuming ideal conditions
+ * For platforms that do not support the preferred core feature, the
+ * highest_pef may be configured with 166 or 255, to avoid max frequency
+ * calculated wrongly. we take the fixed value as the highest_perf.
* @nominal_perf: the maximum sustained performance level of the processor,
* assuming ideal operating conditions
* @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
* savings are achieved
* @lowest_perf: the absolute lowest performance level of the processor
+ * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
+ * priority.
* @max_freq: the frequency that mapped to highest_perf
* @min_freq: the frequency that mapped to lowest_perf
* @nominal_freq: the frequency that mapped to nominal_perf
@@ -52,6 +57,9 @@ struct amd_aperf_mperf {
* @prev: Last Aperf/Mperf/tsc count value read from register
* @freq: current cpu frequency value
* @boost_supported: check whether the Processor or SBIOS supports boost mode
+ * @hw_prefcore: check whether HW supports preferred core featue.
+ * Only when hw_prefcore and early prefcore param are true,
+ * AMD P-State driver supports preferred core featue.
* @epp_policy: Last saved policy used to set energy-performance preference
* @epp_cached: Cached CPPC energy-performance preference value
* @policy: Cpufreq policy value
@@ -70,6 +78,11 @@ struct amd_cpudata {
u32 nominal_perf;
u32 lowest_nonlinear_perf;
u32 lowest_perf;
+ u32 prefcore_ranking;
+ u32 min_limit_perf;
+ u32 max_limit_perf;
+ u32 min_limit_freq;
+ u32 max_limit_freq;
u32 max_freq;
u32 min_freq;
@@ -81,6 +94,7 @@ struct amd_cpudata {
u64 freq;
bool boost_supported;
+ bool hw_prefcore;
/* EPP feature related attributes*/
s16 epp_policy;
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h
index 5deaddbd7927..93a5f16d03f3 100644
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -15,13 +15,13 @@ struct inode;
struct file *anon_inode_getfile(const char *name,
const struct file_operations *fops,
void *priv, int flags);
-struct file *anon_inode_getfile_secure(const char *name,
+struct file *anon_inode_create_getfile(const char *name,
const struct file_operations *fops,
void *priv, int flags,
const struct inode *context_inode);
int anon_inode_getfd(const char *name, const struct file_operations *fops,
void *priv, int flags);
-int anon_inode_getfd_secure(const char *name,
+int anon_inode_create_getfd(const char *name,
const struct file_operations *fops,
void *priv, int flags,
const struct inode *context_inode);
diff --git a/include/linux/apple-mailbox.h b/include/linux/apple-mailbox.h
deleted file mode 100644
index 720fbb70294a..000000000000
--- a/include/linux/apple-mailbox.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
-/*
- * Apple mailbox message format
- *
- * Copyright (C) 2021 The Asahi Linux Contributors
- */
-
-#ifndef _LINUX_APPLE_MAILBOX_H_
-#define _LINUX_APPLE_MAILBOX_H_
-
-#include <linux/types.h>
-
-/* encodes a single 96bit message sent over the single channel */
-struct apple_mbox_msg {
- u64 msg0;
- u32 msg1;
-};
-
-#endif
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index a07b510e7dc5..a63d61ca55af 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -27,6 +27,13 @@ static inline unsigned long topology_get_cpu_scale(int cpu)
void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity);
+DECLARE_PER_CPU(unsigned long, capacity_freq_ref);
+
+static inline unsigned long topology_get_freq_ref(int cpu)
+{
+ return per_cpu(capacity_freq_ref, cpu);
+}
+
DECLARE_PER_CPU(unsigned long, arch_freq_scale);
static inline unsigned long topology_get_freq_scale(int cpu)
@@ -92,6 +99,7 @@ void update_siblings_masks(unsigned int cpu);
void remove_cpu_topology(unsigned int cpuid);
void reset_cpu_topology(void);
int parse_acpi_topology(void);
+void freq_inv_set_max_ratio(int cpu, u64 max_rate);
#endif
#endif /* _LINUX_ARCH_TOPOLOGY_H_ */
diff --git a/include/linux/args.h b/include/linux/args.h
new file mode 100644
index 000000000000..8ff60a54eb7d
--- /dev/null
+++ b/include/linux/args.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_ARGS_H
+#define _LINUX_ARGS_H
+
+/*
+ * How do these macros work?
+ *
+ * In __COUNT_ARGS() _0 to _12 are just placeholders from the start
+ * in order to make sure _n is positioned over the correct number
+ * from 12 to 0 (depending on X, which is a variadic argument list).
+ * They serve no purpose other than occupying a position. Since each
+ * macro parameter must have a distinct identifier, those identifiers
+ * are as good as any.
+ *
+ * In COUNT_ARGS() we use actual integers, so __COUNT_ARGS() returns
+ * that as _n.
+ */
+
+/* This counts to 12. Any more, it will return 13th argument. */
+#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
+#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+
+/* Concatenate two parameters, but allow them to be expanded beforehand. */
+#define __CONCAT(a, b) a ## b
+#define CONCATENATE(a, b) __CONCAT(a, b)
+
+#endif /* _LINUX_ARGS_H */
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index f196c19f8e55..083f85653716 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -5,6 +5,7 @@
#ifndef __LINUX_ARM_SMCCC_H
#define __LINUX_ARM_SMCCC_H
+#include <linux/args.h>
#include <linux/init.h>
#include <uapi/linux/const.h>
@@ -66,6 +67,8 @@
#define ARM_SMCCC_VERSION_1_3 0x10003
#define ARM_SMCCC_1_3_SVE_HINT 0x10000
+#define ARM_SMCCC_CALL_HINTS ARM_SMCCC_1_3_SVE_HINT
+
#define ARM_SMCCC_VERSION_FUNC_ID \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
@@ -413,31 +416,26 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
#endif
-#define ___count_args(_0, _1, _2, _3, _4, _5, _6, _7, _8, x, ...) x
-
-#define __count_args(...) \
- ___count_args(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0)
-
-#define __constraint_read_0 "r" (arg0)
-#define __constraint_read_1 __constraint_read_0, "r" (arg1)
-#define __constraint_read_2 __constraint_read_1, "r" (arg2)
-#define __constraint_read_3 __constraint_read_2, "r" (arg3)
-#define __constraint_read_4 __constraint_read_3, "r" (arg4)
-#define __constraint_read_5 __constraint_read_4, "r" (arg5)
-#define __constraint_read_6 __constraint_read_5, "r" (arg6)
-#define __constraint_read_7 __constraint_read_6, "r" (arg7)
+#define __constraint_read_2 "r" (arg0)
+#define __constraint_read_3 __constraint_read_2, "r" (arg1)
+#define __constraint_read_4 __constraint_read_3, "r" (arg2)
+#define __constraint_read_5 __constraint_read_4, "r" (arg3)
+#define __constraint_read_6 __constraint_read_5, "r" (arg4)
+#define __constraint_read_7 __constraint_read_6, "r" (arg5)
+#define __constraint_read_8 __constraint_read_7, "r" (arg6)
+#define __constraint_read_9 __constraint_read_8, "r" (arg7)
-#define __declare_arg_0(a0, res) \
+#define __declare_arg_2(a0, res) \
struct arm_smccc_res *___res = res; \
register unsigned long arg0 asm("r0") = (u32)a0
-#define __declare_arg_1(a0, a1, res) \
+#define __declare_arg_3(a0, a1, res) \
typeof(a1) __a1 = a1; \
struct arm_smccc_res *___res = res; \
register unsigned long arg0 asm("r0") = (u32)a0; \
register typeof(a1) arg1 asm("r1") = __a1
-#define __declare_arg_2(a0, a1, a2, res) \
+#define __declare_arg_4(a0, a1, a2, res) \
typeof(a1) __a1 = a1; \
typeof(a2) __a2 = a2; \
struct arm_smccc_res *___res = res; \
@@ -445,7 +443,7 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
register typeof(a1) arg1 asm("r1") = __a1; \
register typeof(a2) arg2 asm("r2") = __a2
-#define __declare_arg_3(a0, a1, a2, a3, res) \
+#define __declare_arg_5(a0, a1, a2, a3, res) \
typeof(a1) __a1 = a1; \
typeof(a2) __a2 = a2; \
typeof(a3) __a3 = a3; \
@@ -455,34 +453,26 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
register typeof(a2) arg2 asm("r2") = __a2; \
register typeof(a3) arg3 asm("r3") = __a3
-#define __declare_arg_4(a0, a1, a2, a3, a4, res) \
+#define __declare_arg_6(a0, a1, a2, a3, a4, res) \
typeof(a4) __a4 = a4; \
- __declare_arg_3(a0, a1, a2, a3, res); \
+ __declare_arg_5(a0, a1, a2, a3, res); \
register typeof(a4) arg4 asm("r4") = __a4
-#define __declare_arg_5(a0, a1, a2, a3, a4, a5, res) \
+#define __declare_arg_7(a0, a1, a2, a3, a4, a5, res) \
typeof(a5) __a5 = a5; \
- __declare_arg_4(a0, a1, a2, a3, a4, res); \
+ __declare_arg_6(a0, a1, a2, a3, a4, res); \
register typeof(a5) arg5 asm("r5") = __a5
-#define __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res) \
+#define __declare_arg_8(a0, a1, a2, a3, a4, a5, a6, res) \
typeof(a6) __a6 = a6; \
- __declare_arg_5(a0, a1, a2, a3, a4, a5, res); \
+ __declare_arg_7(a0, a1, a2, a3, a4, a5, res); \
register typeof(a6) arg6 asm("r6") = __a6
-#define __declare_arg_7(a0, a1, a2, a3, a4, a5, a6, a7, res) \
+#define __declare_arg_9(a0, a1, a2, a3, a4, a5, a6, a7, res) \
typeof(a7) __a7 = a7; \
- __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res); \
+ __declare_arg_8(a0, a1, a2, a3, a4, a5, a6, res); \
register typeof(a7) arg7 asm("r7") = __a7
-#define ___declare_args(count, ...) __declare_arg_ ## count(__VA_ARGS__)
-#define __declare_args(count, ...) ___declare_args(count, __VA_ARGS__)
-
-#define ___constraints(count) \
- : __constraint_read_ ## count \
- : smccc_sve_clobbers "memory"
-#define __constraints(count) ___constraints(count)
-
/*
* We have an output list that is not necessarily used, and GCC feels
* entitled to optimise the whole sequence away. "volatile" is what
@@ -494,11 +484,14 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
register unsigned long r1 asm("r1"); \
register unsigned long r2 asm("r2"); \
register unsigned long r3 asm("r3"); \
- __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \
+ CONCATENATE(__declare_arg_, \
+ COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__); \
asm volatile(SMCCC_SVE_CHECK \
inst "\n" : \
"=r" (r0), "=r" (r1), "=r" (r2), "=r" (r3) \
- __constraints(__count_args(__VA_ARGS__))); \
+ : CONCATENATE(__constraint_read_, \
+ COUNT_ARGS(__VA_ARGS__)) \
+ : smccc_sve_clobbers "memory"); \
if (___res) \
*___res = (typeof(*___res)){r0, r1, r2, r3}; \
} while (0)
@@ -542,8 +535,12 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
*/
#define __fail_smccc_1_1(...) \
do { \
- __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \
- asm ("" : __constraints(__count_args(__VA_ARGS__))); \
+ CONCATENATE(__declare_arg_, \
+ COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__); \
+ asm ("" : \
+ : CONCATENATE(__constraint_read_, \
+ COUNT_ARGS(__VA_ARGS__)) \
+ : smccc_sve_clobbers "memory"); \
if (___res) \
___res->a0 = SMCCC_RET_NOT_SUPPORTED; \
} while (0)
diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index cc060da51bec..c906f666ff5d 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -6,6 +6,7 @@
#ifndef _LINUX_ARM_FFA_H
#define _LINUX_ARM_FFA_H
+#include <linux/bitfield.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -20,6 +21,7 @@
#define FFA_ERROR FFA_SMC_32(0x60)
#define FFA_SUCCESS FFA_SMC_32(0x61)
+#define FFA_FN64_SUCCESS FFA_SMC_64(0x61)
#define FFA_INTERRUPT FFA_SMC_32(0x62)
#define FFA_VERSION FFA_SMC_32(0x63)
#define FFA_FEATURES FFA_SMC_32(0x64)
@@ -54,6 +56,23 @@
#define FFA_MEM_FRAG_RX FFA_SMC_32(0x7A)
#define FFA_MEM_FRAG_TX FFA_SMC_32(0x7B)
#define FFA_NORMAL_WORLD_RESUME FFA_SMC_32(0x7C)
+#define FFA_NOTIFICATION_BITMAP_CREATE FFA_SMC_32(0x7D)
+#define FFA_NOTIFICATION_BITMAP_DESTROY FFA_SMC_32(0x7E)
+#define FFA_NOTIFICATION_BIND FFA_SMC_32(0x7F)
+#define FFA_NOTIFICATION_UNBIND FFA_SMC_32(0x80)
+#define FFA_NOTIFICATION_SET FFA_SMC_32(0x81)
+#define FFA_NOTIFICATION_GET FFA_SMC_32(0x82)
+#define FFA_NOTIFICATION_INFO_GET FFA_SMC_32(0x83)
+#define FFA_FN64_NOTIFICATION_INFO_GET FFA_SMC_64(0x83)
+#define FFA_RX_ACQUIRE FFA_SMC_32(0x84)
+#define FFA_SPM_ID_GET FFA_SMC_32(0x85)
+#define FFA_MSG_SEND2 FFA_SMC_32(0x86)
+#define FFA_SECONDARY_EP_REGISTER FFA_SMC_32(0x87)
+#define FFA_FN64_SECONDARY_EP_REGISTER FFA_SMC_64(0x87)
+#define FFA_MEM_PERM_GET FFA_SMC_32(0x88)
+#define FFA_FN64_MEM_PERM_GET FFA_SMC_64(0x88)
+#define FFA_MEM_PERM_SET FFA_SMC_32(0x89)
+#define FFA_FN64_MEM_PERM_SET FFA_SMC_64(0x89)
/*
* For some calls it is necessary to use SMC64 to pass or return 64-bit values.
@@ -76,6 +95,7 @@
#define FFA_RET_DENIED (-6)
#define FFA_RET_RETRY (-7)
#define FFA_RET_ABORTED (-8)
+#define FFA_RET_NO_DATA (-9)
/* FFA version encoding */
#define FFA_MAJOR_VERSION_MASK GENMASK(30, 16)
@@ -86,6 +106,7 @@
(FIELD_PREP(FFA_MAJOR_VERSION_MASK, (major)) | \
FIELD_PREP(FFA_MINOR_VERSION_MASK, (minor)))
#define FFA_VERSION_1_0 FFA_PACK_VERSION_INFO(1, 0)
+#define FFA_VERSION_1_1 FFA_PACK_VERSION_INFO(1, 1)
/**
* FF-A specification mentions explicitly about '4K pages'. This should
@@ -188,6 +209,8 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; }
#define module_ffa_driver(__ffa_driver) \
module_driver(__ffa_driver, ffa_register, ffa_unregister)
+extern const struct bus_type ffa_bus_type;
+
/* FFA transport related */
struct ffa_partition_info {
u16 id;
@@ -278,8 +301,8 @@ struct ffa_mem_region {
#define FFA_MEM_NON_SHAREABLE (0)
#define FFA_MEM_OUTER_SHAREABLE (2)
#define FFA_MEM_INNER_SHAREABLE (3)
- u8 attributes;
- u8 reserved_0;
+ /* Memory region attributes, upper byte MBZ pre v1.1 */
+ u16 attributes;
/*
* Clear memory region contents after unmapping it from the sender and
* before mapping it for any receiver.
@@ -317,27 +340,41 @@ struct ffa_mem_region {
* memory region.
*/
u64 tag;
- u32 reserved_1;
+ /* Size of each endpoint memory access descriptor, MBZ pre v1.1 */
+ u32 ep_mem_size;
/*
* The number of `ffa_mem_region_attributes` entries included in this
* transaction.
*/
u32 ep_count;
/*
- * An array of endpoint memory access descriptors.
- * Each one specifies a memory region offset, an endpoint and the
- * attributes with which this memory region should be mapped in that
- * endpoint's page table.
+ * 16-byte aligned offset from the base address of this descriptor
+ * to the first element of the endpoint memory access descriptor array
+ * Valid only from v1.1
*/
- struct ffa_mem_region_attributes ep_mem_access[];
+ u32 ep_mem_offset;
+ /* MBZ, valid only from v1.1 */
+ u32 reserved[3];
};
-#define COMPOSITE_OFFSET(x) \
- (offsetof(struct ffa_mem_region, ep_mem_access[x]))
#define CONSTITUENTS_OFFSET(x) \
(offsetof(struct ffa_composite_mem_region, constituents[x]))
-#define COMPOSITE_CONSTITUENTS_OFFSET(x, y) \
- (COMPOSITE_OFFSET(x) + CONSTITUENTS_OFFSET(y))
+
+static inline u32
+ffa_mem_desc_offset(struct ffa_mem_region *buf, int count, u32 ffa_version)
+{
+ u32 offset = count * sizeof(struct ffa_mem_region_attributes);
+ /*
+ * Earlier to v1.1, the endpoint memory descriptor array started at
+ * offset 32(i.e. offset of ep_mem_offset in the current structure)
+ */
+ if (ffa_version <= FFA_VERSION_1_0)
+ offset += offsetof(struct ffa_mem_region, ep_mem_offset);
+ else
+ offset += sizeof(struct ffa_mem_region);
+
+ return offset;
+}
struct ffa_mem_ops_args {
bool use_txbuf;
@@ -367,10 +404,30 @@ struct ffa_mem_ops {
int (*memory_lend)(struct ffa_mem_ops_args *args);
};
+struct ffa_cpu_ops {
+ int (*run)(struct ffa_device *dev, u16 vcpu);
+};
+
+typedef void (*ffa_sched_recv_cb)(u16 vcpu, bool is_per_vcpu, void *cb_data);
+typedef void (*ffa_notifier_cb)(int notify_id, void *cb_data);
+
+struct ffa_notifier_ops {
+ int (*sched_recv_cb_register)(struct ffa_device *dev,
+ ffa_sched_recv_cb cb, void *cb_data);
+ int (*sched_recv_cb_unregister)(struct ffa_device *dev);
+ int (*notify_request)(struct ffa_device *dev, bool per_vcpu,
+ ffa_notifier_cb cb, void *cb_data, int notify_id);
+ int (*notify_relinquish)(struct ffa_device *dev, int notify_id);
+ int (*notify_send)(struct ffa_device *dev, int notify_id, bool per_vcpu,
+ u16 vcpu);
+};
+
struct ffa_ops {
const struct ffa_info_ops *info_ops;
const struct ffa_msg_ops *msg_ops;
const struct ffa_mem_ops *mem_ops;
+ const struct ffa_cpu_ops *cpu_ops;
+ const struct ffa_notifier_ops *notifier_ops;
};
#endif /* _LINUX_ARM_FFA_H */
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 14dc461b0e82..255701e1251b 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -47,10 +47,12 @@ int sdei_unregister_ghes(struct ghes *ghes);
int sdei_mask_local_cpu(void);
int sdei_unmask_local_cpu(void);
void __init sdei_init(void);
+void sdei_handler_abort(void);
#else
static inline int sdei_mask_local_cpu(void) { return 0; }
static inline int sdei_unmask_local_cpu(void) { return 0; }
static inline void sdei_init(void) { }
+static inline void sdei_handler_abort(void) { }
#endif /* CONFIG_ARM_SDE_INTERFACE */
diff --git a/include/linux/array_size.h b/include/linux/array_size.h
new file mode 100644
index 000000000000..06d7d83196ca
--- /dev/null
+++ b/include/linux/array_size.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_ARRAY_SIZE_H
+#define _LINUX_ARRAY_SIZE_H
+
+#include <linux/compiler.h>
+
+/**
+ * ARRAY_SIZE - get the number of elements in array @arr
+ * @arr: array to be sized
+ */
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
+
+#endif /* _LINUX_ARRAY_SIZE_H */
diff --git a/include/linux/async.h b/include/linux/async.h
index cce4ad31e8fc..19b778d08600 100644
--- a/include/linux/async.h
+++ b/include/linux/async.h
@@ -90,6 +90,8 @@ async_schedule_dev(async_func_t func, struct device *dev)
return async_schedule_node(func, dev, dev_to_node(dev));
}
+bool async_schedule_dev_nocall(async_func_t func, struct device *dev);
+
/**
* async_schedule_dev_domain - A device specific version of async_schedule_domain
* @func: function to execute asynchronously
@@ -118,4 +120,5 @@ extern void async_synchronize_cookie(async_cookie_t cookie);
extern void async_synchronize_cookie_domain(async_cookie_t cookie,
struct async_domain *domain);
extern bool current_is_async(void);
+extern void async_init(void);
#endif
diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h
deleted file mode 100644
index 1491af38cc6e..000000000000
--- a/include/linux/atmel-mci.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LINUX_ATMEL_MCI_H
-#define __LINUX_ATMEL_MCI_H
-
-#include <linux/types.h>
-#include <linux/dmaengine.h>
-
-#define ATMCI_MAX_NR_SLOTS 2
-
-/**
- * struct mci_slot_pdata - board-specific per-slot configuration
- * @bus_width: Number of data lines wired up the slot
- * @detect_pin: GPIO pin wired to the card detect switch
- * @wp_pin: GPIO pin wired to the write protect sensor
- * @detect_is_active_high: The state of the detect pin when it is active
- * @non_removable: The slot is not removable, only detect once
- *
- * If a given slot is not present on the board, @bus_width should be
- * set to 0. The other fields are ignored in this case.
- *
- * Any pins that aren't available should be set to a negative value.
- *
- * Note that support for multiple slots is experimental -- some cards
- * might get upset if we don't get the clock management exactly right.
- * But in most cases, it should work just fine.
- */
-struct mci_slot_pdata {
- unsigned int bus_width;
- int detect_pin;
- int wp_pin;
- bool detect_is_active_high;
- bool non_removable;
-};
-
-/**
- * struct mci_platform_data - board-specific MMC/SDcard configuration
- * @dma_slave: DMA slave interface to use in data transfers.
- * @slot: Per-slot configuration data.
- */
-struct mci_platform_data {
- void *dma_slave;
- dma_filter_fn dma_filter;
- struct mci_slot_pdata slot[ATMCI_MAX_NR_SLOTS];
-};
-
-#endif /* __LINUX_ATMEL_MCI_H */
diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index 18f5744dfb5d..956bcba5dbf2 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -428,6 +428,19 @@ extern void raw_cmpxchg128_relaxed_not_implemented(void);
#define raw_sync_cmpxchg arch_sync_cmpxchg
+#ifdef arch_sync_try_cmpxchg
+#define raw_sync_try_cmpxchg arch_sync_try_cmpxchg
+#else
+#define raw_sync_try_cmpxchg(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = raw_sync_cmpxchg((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif
+
/**
* raw_atomic_read() - atomic load with relaxed ordering
* @v: pointer to atomic_t
@@ -459,8 +472,6 @@ raw_atomic_read_acquire(const atomic_t *v)
{
#if defined(arch_atomic_read_acquire)
return arch_atomic_read_acquire(v);
-#elif defined(arch_atomic_read)
- return arch_atomic_read(v);
#else
int ret;
@@ -508,8 +519,6 @@ raw_atomic_set_release(atomic_t *v, int i)
{
#if defined(arch_atomic_set_release)
arch_atomic_set_release(v, i);
-#elif defined(arch_atomic_set)
- arch_atomic_set(v, i);
#else
if (__native_word(atomic_t)) {
smp_store_release(&(v)->counter, i);
@@ -1996,6 +2005,7 @@ raw_atomic_xchg_relaxed(atomic_t *v, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_cmpxchg() elsewhere.
*
@@ -2024,6 +2034,7 @@ raw_atomic_cmpxchg(atomic_t *v, int old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_cmpxchg_acquire() elsewhere.
*
@@ -2052,6 +2063,7 @@ raw_atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_cmpxchg_release() elsewhere.
*
@@ -2079,6 +2091,7 @@ raw_atomic_cmpxchg_release(atomic_t *v, int old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_cmpxchg_relaxed() elsewhere.
*
@@ -2103,7 +2116,8 @@ raw_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_try_cmpxchg() elsewhere.
*
@@ -2136,7 +2150,8 @@ raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_try_cmpxchg_acquire() elsewhere.
*
@@ -2169,7 +2184,8 @@ raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_try_cmpxchg_release() elsewhere.
*
@@ -2201,7 +2217,8 @@ raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_try_cmpxchg_relaxed() elsewhere.
*
@@ -2394,6 +2411,7 @@ raw_atomic_add_negative_relaxed(int i, atomic_t *v)
* @u: int value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_fetch_add_unless() elsewhere.
*
@@ -2423,6 +2441,7 @@ raw_atomic_fetch_add_unless(atomic_t *v, int a, int u)
* @u: int value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_add_unless() elsewhere.
*
@@ -2443,6 +2462,7 @@ raw_atomic_add_unless(atomic_t *v, int a, int u)
* @v: pointer to atomic_t
*
* If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_inc_not_zero() elsewhere.
*
@@ -2463,6 +2483,7 @@ raw_atomic_inc_not_zero(atomic_t *v)
* @v: pointer to atomic_t
*
* If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_inc_unless_negative() elsewhere.
*
@@ -2490,6 +2511,7 @@ raw_atomic_inc_unless_negative(atomic_t *v)
* @v: pointer to atomic_t
*
* If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_dec_unless_positive() elsewhere.
*
@@ -2517,6 +2539,7 @@ raw_atomic_dec_unless_positive(atomic_t *v)
* @v: pointer to atomic_t
*
* If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_dec_if_positive() elsewhere.
*
@@ -2575,8 +2598,6 @@ raw_atomic64_read_acquire(const atomic64_t *v)
{
#if defined(arch_atomic64_read_acquire)
return arch_atomic64_read_acquire(v);
-#elif defined(arch_atomic64_read)
- return arch_atomic64_read(v);
#else
s64 ret;
@@ -2624,8 +2645,6 @@ raw_atomic64_set_release(atomic64_t *v, s64 i)
{
#if defined(arch_atomic64_set_release)
arch_atomic64_set_release(v, i);
-#elif defined(arch_atomic64_set)
- arch_atomic64_set(v, i);
#else
if (__native_word(atomic64_t)) {
smp_store_release(&(v)->counter, i);
@@ -4112,6 +4131,7 @@ raw_atomic64_xchg_relaxed(atomic64_t *v, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_cmpxchg() elsewhere.
*
@@ -4140,6 +4160,7 @@ raw_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_cmpxchg_acquire() elsewhere.
*
@@ -4168,6 +4189,7 @@ raw_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_cmpxchg_release() elsewhere.
*
@@ -4195,6 +4217,7 @@ raw_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_cmpxchg_relaxed() elsewhere.
*
@@ -4219,7 +4242,8 @@ raw_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_try_cmpxchg() elsewhere.
*
@@ -4252,7 +4276,8 @@ raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_try_cmpxchg_acquire() elsewhere.
*
@@ -4285,7 +4310,8 @@ raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_try_cmpxchg_release() elsewhere.
*
@@ -4317,7 +4343,8 @@ raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_try_cmpxchg_relaxed() elsewhere.
*
@@ -4510,6 +4537,7 @@ raw_atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
* @u: s64 value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_fetch_add_unless() elsewhere.
*
@@ -4539,6 +4567,7 @@ raw_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
* @u: s64 value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_add_unless() elsewhere.
*
@@ -4559,6 +4588,7 @@ raw_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
* @v: pointer to atomic64_t
*
* If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_inc_not_zero() elsewhere.
*
@@ -4579,6 +4609,7 @@ raw_atomic64_inc_not_zero(atomic64_t *v)
* @v: pointer to atomic64_t
*
* If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_inc_unless_negative() elsewhere.
*
@@ -4606,6 +4637,7 @@ raw_atomic64_inc_unless_negative(atomic64_t *v)
* @v: pointer to atomic64_t
*
* If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_dec_unless_positive() elsewhere.
*
@@ -4633,6 +4665,7 @@ raw_atomic64_dec_unless_positive(atomic64_t *v)
* @v: pointer to atomic64_t
*
* If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic64_dec_if_positive() elsewhere.
*
@@ -4657,4 +4690,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v)
}
#endif /* _LINUX_ATOMIC_FALLBACK_H */
-// 202b45c7db600ce36198eb1f1fc2c2d5268ace2d
+// 14850c0b0db20c62fdc78ccd1d42b98b88d76331
diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index d401b406ef7c..debd487fe971 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -1182,6 +1182,7 @@ atomic_xchg_relaxed(atomic_t *v, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_cmpxchg() there.
*
@@ -1202,6 +1203,7 @@ atomic_cmpxchg(atomic_t *v, int old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_cmpxchg_acquire() there.
*
@@ -1221,6 +1223,7 @@ atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_cmpxchg_release() there.
*
@@ -1241,6 +1244,7 @@ atomic_cmpxchg_release(atomic_t *v, int old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_cmpxchg_relaxed() there.
*
@@ -1260,7 +1264,8 @@ atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg() there.
*
@@ -1282,7 +1287,8 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_acquire() there.
*
@@ -1303,7 +1309,8 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_release() there.
*
@@ -1325,7 +1332,8 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
* @new: int value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_relaxed() there.
*
@@ -1475,6 +1483,7 @@ atomic_add_negative_relaxed(int i, atomic_t *v)
* @u: int value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_fetch_add_unless() there.
*
@@ -1495,6 +1504,7 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u)
* @u: int value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_add_unless() there.
*
@@ -1513,6 +1523,7 @@ atomic_add_unless(atomic_t *v, int a, int u)
* @v: pointer to atomic_t
*
* If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_inc_not_zero() there.
*
@@ -1531,6 +1542,7 @@ atomic_inc_not_zero(atomic_t *v)
* @v: pointer to atomic_t
*
* If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_inc_unless_negative() there.
*
@@ -1549,6 +1561,7 @@ atomic_inc_unless_negative(atomic_t *v)
* @v: pointer to atomic_t
*
* If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_dec_unless_positive() there.
*
@@ -1567,6 +1580,7 @@ atomic_dec_unless_positive(atomic_t *v)
* @v: pointer to atomic_t
*
* If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_dec_if_positive() there.
*
@@ -2746,6 +2760,7 @@ atomic64_xchg_relaxed(atomic64_t *v, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_cmpxchg() there.
*
@@ -2766,6 +2781,7 @@ atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_acquire() there.
*
@@ -2785,6 +2801,7 @@ atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_release() there.
*
@@ -2805,6 +2822,7 @@ atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_relaxed() there.
*
@@ -2824,7 +2842,8 @@ atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg() there.
*
@@ -2846,7 +2865,8 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_acquire() there.
*
@@ -2867,7 +2887,8 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_release() there.
*
@@ -2889,7 +2910,8 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
* @new: s64 value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_relaxed() there.
*
@@ -3039,6 +3061,7 @@ atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
* @u: s64 value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_fetch_add_unless() there.
*
@@ -3059,6 +3082,7 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
* @u: s64 value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_add_unless() there.
*
@@ -3077,6 +3101,7 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
* @v: pointer to atomic64_t
*
* If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_inc_not_zero() there.
*
@@ -3095,6 +3120,7 @@ atomic64_inc_not_zero(atomic64_t *v)
* @v: pointer to atomic64_t
*
* If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_inc_unless_negative() there.
*
@@ -3113,6 +3139,7 @@ atomic64_inc_unless_negative(atomic64_t *v)
* @v: pointer to atomic64_t
*
* If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_dec_unless_positive() there.
*
@@ -3131,6 +3158,7 @@ atomic64_dec_unless_positive(atomic64_t *v)
* @v: pointer to atomic64_t
*
* If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic64_dec_if_positive() there.
*
@@ -4310,6 +4338,7 @@ atomic_long_xchg_relaxed(atomic_long_t *v, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg() there.
*
@@ -4330,6 +4359,7 @@ atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_acquire() there.
*
@@ -4349,6 +4379,7 @@ atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_release() there.
*
@@ -4369,6 +4400,7 @@ atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_relaxed() there.
*
@@ -4388,7 +4420,8 @@ atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg() there.
*
@@ -4410,7 +4443,8 @@ atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_acquire() there.
*
@@ -4431,7 +4465,8 @@ atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_release() there.
*
@@ -4453,7 +4488,8 @@ atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_relaxed() there.
*
@@ -4603,6 +4639,7 @@ atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
* @u: long value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_fetch_add_unless() there.
*
@@ -4623,6 +4660,7 @@ atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
* @u: long value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_add_unless() there.
*
@@ -4641,6 +4679,7 @@ atomic_long_add_unless(atomic_long_t *v, long a, long u)
* @v: pointer to atomic_long_t
*
* If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_inc_not_zero() there.
*
@@ -4659,6 +4698,7 @@ atomic_long_inc_not_zero(atomic_long_t *v)
* @v: pointer to atomic_long_t
*
* If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_inc_unless_negative() there.
*
@@ -4677,6 +4717,7 @@ atomic_long_inc_unless_negative(atomic_long_t *v)
* @v: pointer to atomic_long_t
*
* If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_dec_unless_positive() there.
*
@@ -4695,6 +4736,7 @@ atomic_long_dec_unless_positive(atomic_long_t *v)
* @v: pointer to atomic_long_t
*
* If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Unsafe to use in noinstr code; use raw_atomic_long_dec_if_positive() there.
*
@@ -4998,6 +5040,14 @@ atomic_long_dec_if_positive(atomic_long_t *v)
raw_try_cmpxchg128_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})
+#define sync_try_cmpxchg(ptr, ...) \
+({ \
+ typeof(ptr) __ai_ptr = (ptr); \
+ kcsan_mb(); \
+ instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
+ raw_sync_try_cmpxchg(__ai_ptr, __VA_ARGS__); \
+})
+
#endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// 1568f875fef72097413caab8339120c065a39aa4
+// ce5b65e0f1f8a276268b667194581d24bed219d4
diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h
index c82947170ddc..3ef844b3ab8a 100644
--- a/include/linux/atomic/atomic-long.h
+++ b/include/linux/atomic/atomic-long.h
@@ -1352,6 +1352,7 @@ raw_atomic_long_xchg_relaxed(atomic_long_t *v, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_cmpxchg() elsewhere.
*
@@ -1374,6 +1375,7 @@ raw_atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_cmpxchg_acquire() elsewhere.
*
@@ -1396,6 +1398,7 @@ raw_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_cmpxchg_release() elsewhere.
*
@@ -1418,6 +1421,7 @@ raw_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_cmpxchg_relaxed() elsewhere.
*
@@ -1440,7 +1444,8 @@ raw_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with full ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_try_cmpxchg() elsewhere.
*
@@ -1463,7 +1468,8 @@ raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with acquire ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_acquire() elsewhere.
*
@@ -1486,7 +1492,8 @@ raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with release ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_release() elsewhere.
*
@@ -1509,7 +1516,8 @@ raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
* @new: long value to assign
*
* If (@v == @old), atomically updates @v to @new with relaxed ordering.
- * Otherwise, updates @old to the current value of @v.
+ * Otherwise, @v is not modified, @old is updated to the current value of @v,
+ * and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_relaxed() elsewhere.
*
@@ -1677,6 +1685,7 @@ raw_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
* @u: long value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_fetch_add_unless() elsewhere.
*
@@ -1699,6 +1708,7 @@ raw_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
* @u: long value to compare with
*
* If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_add_unless() elsewhere.
*
@@ -1719,6 +1729,7 @@ raw_atomic_long_add_unless(atomic_long_t *v, long a, long u)
* @v: pointer to atomic_long_t
*
* If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_inc_not_zero() elsewhere.
*
@@ -1739,6 +1750,7 @@ raw_atomic_long_inc_not_zero(atomic_long_t *v)
* @v: pointer to atomic_long_t
*
* If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_inc_unless_negative() elsewhere.
*
@@ -1759,6 +1771,7 @@ raw_atomic_long_inc_unless_negative(atomic_long_t *v)
* @v: pointer to atomic_long_t
*
* If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_dec_unless_positive() elsewhere.
*
@@ -1779,6 +1792,7 @@ raw_atomic_long_dec_unless_positive(atomic_long_t *v)
* @v: pointer to atomic_long_t
*
* If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ * Otherwise, @v is not modified and relaxed ordering is provided.
*
* Safe to use in noinstr code; prefer atomic_long_dec_if_positive() elsewhere.
*
@@ -1795,4 +1809,4 @@ raw_atomic_long_dec_if_positive(atomic_long_t *v)
}
#endif /* _LINUX_ATOMIC_LONG_H */
-// 4ef23f98c73cff96d239896175fd26b10b88899e
+// 1c4a26fc77f345342953770ebe3c4d08e7ce2f9a
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 6a3a9e122bb5..0050ef288ab3 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -36,6 +36,7 @@ struct mqstat;
struct audit_watch;
struct audit_tree;
struct sk_buff;
+struct kern_ipc_perm;
struct audit_krule {
u32 pflags;
@@ -117,6 +118,8 @@ enum audit_nfcfgop {
AUDIT_NFT_OP_OBJ_RESET,
AUDIT_NFT_OP_FLOWTABLE_REGISTER,
AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
+ AUDIT_NFT_OP_SETELEM_RESET,
+ AUDIT_NFT_OP_RULE_RESET,
AUDIT_NFT_OP_INVALID,
};
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index c15221dcb75e..8e177b67e82f 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -4,6 +4,11 @@
#ifndef _VIRTCHNL_H_
#define _VIRTCHNL_H_
+#include <linux/bitops.h>
+#include <linux/bits.h>
+#include <linux/overflow.h>
+#include <uapi/linux/if_ether.h>
+
/* Description:
* This header file describes the Virtual Function (VF) - Physical Function
* (PF) communication protocol used by the drivers for all devices starting
@@ -114,6 +119,7 @@ enum virtchnl_ops {
VIRTCHNL_OP_GET_STATS = 15,
VIRTCHNL_OP_RSVD = 16,
VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */
+ VIRTCHNL_OP_CONFIG_RSS_HFUNC = 18,
/* opcode 19 is reserved */
VIRTCHNL_OP_IWARP = 20, /* advanced opcode */
VIRTCHNL_OP_RDMA = VIRTCHNL_OP_IWARP,
@@ -240,6 +246,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
#define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES BIT(6)
/* used to negotiate communicating link speeds in Mbps */
#define VIRTCHNL_VF_CAP_ADV_LINK_SPEED BIT(7)
+#define VIRTCHNL_VF_OFFLOAD_CRC BIT(10)
#define VIRTCHNL_VF_OFFLOAD_VLAN_V2 BIT(15)
#define VIRTCHNL_VF_OFFLOAD_VLAN BIT(16)
#define VIRTCHNL_VF_OFFLOAD_RX_POLLING BIT(17)
@@ -268,10 +275,11 @@ struct virtchnl_vf_resource {
u32 rss_key_size;
u32 rss_lut_size;
- struct virtchnl_vsi_resource vsi_res[1];
+ struct virtchnl_vsi_resource vsi_res[];
};
-VIRTCHNL_CHECK_STRUCT_LEN(36, virtchnl_vf_resource);
+VIRTCHNL_CHECK_STRUCT_LEN(20, virtchnl_vf_resource);
+#define virtchnl_vf_resource_LEGACY_SIZEOF 36
/* VIRTCHNL_OP_CONFIG_TX_QUEUE
* VF sends this message to set up parameters for one TX queue.
@@ -294,7 +302,13 @@ VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info);
/* VIRTCHNL_OP_CONFIG_RX_QUEUE
* VF sends this message to set up parameters for one RX queue.
* External data buffer contains one instance of virtchnl_rxq_info.
- * PF configures requested queue and returns a status code.
+ * PF configures requested queue and returns a status code. The
+ * crc_disable flag disables CRC stripping on the VF. Setting
+ * the crc_disable flag to 1 will disable CRC stripping for each
+ * queue in the VF where the flag is set. The VIRTCHNL_VF_OFFLOAD_CRC
+ * offload must have been set prior to sending this info or the PF
+ * will ignore the request. This flag should be set the same for
+ * all of the queues for a VF.
*/
/* Rx queue config info */
@@ -306,7 +320,7 @@ struct virtchnl_rxq_info {
u16 splithdr_enabled; /* deprecated with AVF 1.0 */
u32 databuffer_size;
u32 max_pkt_size;
- u8 pad0;
+ u8 crc_disable;
u8 rxdid;
u8 pad1[2];
u64 dma_ring_addr;
@@ -340,10 +354,11 @@ struct virtchnl_vsi_queue_config_info {
u16 vsi_id;
u16 num_queue_pairs;
u32 pad;
- struct virtchnl_queue_pair_info qpair[1];
+ struct virtchnl_queue_pair_info qpair[];
};
-VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_vsi_queue_config_info);
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_vsi_queue_config_info);
+#define virtchnl_vsi_queue_config_info_LEGACY_SIZEOF 72
/* VIRTCHNL_OP_REQUEST_QUEUES
* VF sends this message to request the PF to allocate additional queues to
@@ -385,10 +400,11 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_vector_map);
struct virtchnl_irq_map_info {
u16 num_vectors;
- struct virtchnl_vector_map vecmap[1];
+ struct virtchnl_vector_map vecmap[];
};
-VIRTCHNL_CHECK_STRUCT_LEN(14, virtchnl_irq_map_info);
+VIRTCHNL_CHECK_STRUCT_LEN(2, virtchnl_irq_map_info);
+#define virtchnl_irq_map_info_LEGACY_SIZEOF 14
/* VIRTCHNL_OP_ENABLE_QUEUES
* VIRTCHNL_OP_DISABLE_QUEUES
@@ -459,10 +475,11 @@ VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_ether_addr);
struct virtchnl_ether_addr_list {
u16 vsi_id;
u16 num_elements;
- struct virtchnl_ether_addr list[1];
+ struct virtchnl_ether_addr list[];
};
-VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_ether_addr_list);
+VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_ether_addr_list);
+#define virtchnl_ether_addr_list_LEGACY_SIZEOF 12
/* VIRTCHNL_OP_ADD_VLAN
* VF sends this message to add one or more VLAN tag filters for receives.
@@ -481,10 +498,11 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_ether_addr_list);
struct virtchnl_vlan_filter_list {
u16 vsi_id;
u16 num_elements;
- u16 vlan_id[1];
+ u16 vlan_id[];
};
-VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_vlan_filter_list);
+VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_vlan_filter_list);
+#define virtchnl_vlan_filter_list_LEGACY_SIZEOF 6
/* This enum is used for all of the VIRTCHNL_VF_OFFLOAD_VLAN_V2_CAPS related
* structures and opcodes.
@@ -711,10 +729,11 @@ struct virtchnl_vlan_filter_list_v2 {
u16 vport_id;
u16 num_elements;
u8 pad[4];
- struct virtchnl_vlan_filter filters[1];
+ struct virtchnl_vlan_filter filters[];
};
-VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_vlan_filter_list_v2);
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_vlan_filter_list_v2);
+#define virtchnl_vlan_filter_list_v2_LEGACY_SIZEOF 40
/* VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2
* VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2
@@ -866,18 +885,20 @@ VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_promisc_info);
struct virtchnl_rss_key {
u16 vsi_id;
u16 key_len;
- u8 key[1]; /* RSS hash key, packed bytes */
+ u8 key[]; /* RSS hash key, packed bytes */
};
-VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_key);
+VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_rss_key);
+#define virtchnl_rss_key_LEGACY_SIZEOF 6
struct virtchnl_rss_lut {
u16 vsi_id;
u16 lut_entries;
- u8 lut[1]; /* RSS lookup table */
+ u8 lut[]; /* RSS lookup table */
};
-VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_lut);
+VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_rss_lut);
+#define virtchnl_rss_lut_LEGACY_SIZEOF 6
/* VIRTCHNL_OP_GET_RSS_HENA_CAPS
* VIRTCHNL_OP_SET_RSS_HENA
@@ -892,6 +913,29 @@ struct virtchnl_rss_hena {
VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hena);
+/* Type of RSS algorithm */
+enum virtchnl_rss_algorithm {
+ VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC = 0,
+ VIRTCHNL_RSS_ALG_R_ASYMMETRIC = 1,
+ VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC = 2,
+ VIRTCHNL_RSS_ALG_XOR_SYMMETRIC = 3,
+};
+
+/* VIRTCHNL_OP_CONFIG_RSS_HFUNC
+ * VF sends this message to configure the RSS hash function. Only supported
+ * if both PF and VF drivers set the VIRTCHNL_VF_OFFLOAD_RSS_PF bit during
+ * configuration negotiation.
+ * The hash function is initialized to VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC
+ * by the PF.
+ */
+struct virtchnl_rss_hfunc {
+ u16 vsi_id;
+ u16 rss_algorithm; /* enum virtchnl_rss_algorithm */
+ u32 reserved;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hfunc);
+
/* VIRTCHNL_OP_ENABLE_CHANNELS
* VIRTCHNL_OP_DISABLE_CHANNELS
* VF sends these messages to enable or disable channels based on
@@ -911,10 +955,11 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_channel_info);
struct virtchnl_tc_info {
u32 num_tc;
u32 pad;
- struct virtchnl_channel_info list[1];
+ struct virtchnl_channel_info list[];
};
-VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_tc_info);
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_tc_info);
+#define virtchnl_tc_info_LEGACY_SIZEOF 24
/* VIRTCHNL_ADD_CLOUD_FILTER
* VIRTCHNL_DEL_CLOUD_FILTER
@@ -1052,10 +1097,11 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_rdma_qv_info);
struct virtchnl_rdma_qvlist_info {
u32 num_vectors;
- struct virtchnl_rdma_qv_info qv_info[1];
+ struct virtchnl_rdma_qv_info qv_info[];
};
-VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_rdma_qvlist_info);
+VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_rdma_qvlist_info);
+#define virtchnl_rdma_qvlist_info_LEGACY_SIZEOF 16
/* VF reset states - these are written into the RSTAT register:
* VFGEN_RSTAT on the VF
@@ -1074,14 +1120,6 @@ enum virtchnl_vfr_states {
VIRTCHNL_VFR_VFACTIVE,
};
-/* Type of RSS algorithm */
-enum virtchnl_rss_algorithm {
- VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC = 0,
- VIRTCHNL_RSS_ALG_R_ASYMMETRIC = 1,
- VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC = 2,
- VIRTCHNL_RSS_ALG_XOR_SYMMETRIC = 3,
-};
-
#define VIRTCHNL_MAX_NUM_PROTO_HDRS 32
#define PROTO_HDR_SHIFT 5
#define PROTO_HDR_FIELD_START(proto_hdr_type) ((proto_hdr_type) << PROTO_HDR_SHIFT)
@@ -1367,6 +1405,31 @@ struct virtchnl_fdir_del {
VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del);
+#define __vss_byone(p, member, count, old) \
+ (struct_size(p, member, count) + (old - 1 - struct_size(p, member, 0)))
+
+#define __vss_byelem(p, member, count, old) \
+ (struct_size(p, member, count - 1) + (old - struct_size(p, member, 0)))
+
+#define __vss_full(p, member, count, old) \
+ (struct_size(p, member, count) + (old - struct_size(p, member, 0)))
+
+#define __vss(type, func, p, member, count) \
+ struct type: func(p, member, count, type##_LEGACY_SIZEOF)
+
+#define virtchnl_struct_size(p, m, c) \
+ _Generic(*p, \
+ __vss(virtchnl_vf_resource, __vss_full, p, m, c), \
+ __vss(virtchnl_vsi_queue_config_info, __vss_full, p, m, c), \
+ __vss(virtchnl_irq_map_info, __vss_full, p, m, c), \
+ __vss(virtchnl_ether_addr_list, __vss_full, p, m, c), \
+ __vss(virtchnl_vlan_filter_list, __vss_full, p, m, c), \
+ __vss(virtchnl_vlan_filter_list_v2, __vss_byelem, p, m, c), \
+ __vss(virtchnl_tc_info, __vss_byelem, p, m, c), \
+ __vss(virtchnl_rdma_qvlist_info, __vss_byelem, p, m, c), \
+ __vss(virtchnl_rss_key, __vss_byone, p, m, c), \
+ __vss(virtchnl_rss_lut, __vss_byone, p, m, c))
+
/**
* virtchnl_vc_validate_vf_msg
* @ver: Virtchnl version info
@@ -1401,24 +1464,23 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
valid_len = sizeof(struct virtchnl_rxq_info);
break;
case VIRTCHNL_OP_CONFIG_VSI_QUEUES:
- valid_len = sizeof(struct virtchnl_vsi_queue_config_info);
+ valid_len = virtchnl_vsi_queue_config_info_LEGACY_SIZEOF;
if (msglen >= valid_len) {
struct virtchnl_vsi_queue_config_info *vqc =
(struct virtchnl_vsi_queue_config_info *)msg;
- valid_len += (vqc->num_queue_pairs *
- sizeof(struct
- virtchnl_queue_pair_info));
+ valid_len = virtchnl_struct_size(vqc, qpair,
+ vqc->num_queue_pairs);
if (vqc->num_queue_pairs == 0)
err_msg_format = true;
}
break;
case VIRTCHNL_OP_CONFIG_IRQ_MAP:
- valid_len = sizeof(struct virtchnl_irq_map_info);
+ valid_len = virtchnl_irq_map_info_LEGACY_SIZEOF;
if (msglen >= valid_len) {
struct virtchnl_irq_map_info *vimi =
(struct virtchnl_irq_map_info *)msg;
- valid_len += (vimi->num_vectors *
- sizeof(struct virtchnl_vector_map));
+ valid_len = virtchnl_struct_size(vimi, vecmap,
+ vimi->num_vectors);
if (vimi->num_vectors == 0)
err_msg_format = true;
}
@@ -1429,23 +1491,24 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
break;
case VIRTCHNL_OP_ADD_ETH_ADDR:
case VIRTCHNL_OP_DEL_ETH_ADDR:
- valid_len = sizeof(struct virtchnl_ether_addr_list);
+ valid_len = virtchnl_ether_addr_list_LEGACY_SIZEOF;
if (msglen >= valid_len) {
struct virtchnl_ether_addr_list *veal =
(struct virtchnl_ether_addr_list *)msg;
- valid_len += veal->num_elements *
- sizeof(struct virtchnl_ether_addr);
+ valid_len = virtchnl_struct_size(veal, list,
+ veal->num_elements);
if (veal->num_elements == 0)
err_msg_format = true;
}
break;
case VIRTCHNL_OP_ADD_VLAN:
case VIRTCHNL_OP_DEL_VLAN:
- valid_len = sizeof(struct virtchnl_vlan_filter_list);
+ valid_len = virtchnl_vlan_filter_list_LEGACY_SIZEOF;
if (msglen >= valid_len) {
struct virtchnl_vlan_filter_list *vfl =
(struct virtchnl_vlan_filter_list *)msg;
- valid_len += vfl->num_elements * sizeof(u16);
+ valid_len = virtchnl_struct_size(vfl, vlan_id,
+ vfl->num_elements);
if (vfl->num_elements == 0)
err_msg_format = true;
}
@@ -1469,31 +1532,36 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
case VIRTCHNL_OP_RELEASE_RDMA_IRQ_MAP:
break;
case VIRTCHNL_OP_CONFIG_RDMA_IRQ_MAP:
- valid_len = sizeof(struct virtchnl_rdma_qvlist_info);
+ valid_len = virtchnl_rdma_qvlist_info_LEGACY_SIZEOF;
if (msglen >= valid_len) {
struct virtchnl_rdma_qvlist_info *qv =
(struct virtchnl_rdma_qvlist_info *)msg;
- valid_len += ((qv->num_vectors - 1) *
- sizeof(struct virtchnl_rdma_qv_info));
+ valid_len = virtchnl_struct_size(qv, qv_info,
+ qv->num_vectors);
}
break;
case VIRTCHNL_OP_CONFIG_RSS_KEY:
- valid_len = sizeof(struct virtchnl_rss_key);
+ valid_len = virtchnl_rss_key_LEGACY_SIZEOF;
if (msglen >= valid_len) {
struct virtchnl_rss_key *vrk =
(struct virtchnl_rss_key *)msg;
- valid_len += vrk->key_len - 1;
+ valid_len = virtchnl_struct_size(vrk, key,
+ vrk->key_len);
}
break;
case VIRTCHNL_OP_CONFIG_RSS_LUT:
- valid_len = sizeof(struct virtchnl_rss_lut);
+ valid_len = virtchnl_rss_lut_LEGACY_SIZEOF;
if (msglen >= valid_len) {
struct virtchnl_rss_lut *vrl =
(struct virtchnl_rss_lut *)msg;
- valid_len += vrl->lut_entries - 1;
+ valid_len = virtchnl_struct_size(vrl, lut,
+ vrl->lut_entries);
}
break;
+ case VIRTCHNL_OP_CONFIG_RSS_HFUNC:
+ valid_len = sizeof(struct virtchnl_rss_hfunc);
+ break;
case VIRTCHNL_OP_GET_RSS_HENA_CAPS:
break;
case VIRTCHNL_OP_SET_RSS_HENA:
@@ -1506,12 +1574,12 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
valid_len = sizeof(struct virtchnl_vf_res_request);
break;
case VIRTCHNL_OP_ENABLE_CHANNELS:
- valid_len = sizeof(struct virtchnl_tc_info);
+ valid_len = virtchnl_tc_info_LEGACY_SIZEOF;
if (msglen >= valid_len) {
struct virtchnl_tc_info *vti =
(struct virtchnl_tc_info *)msg;
- valid_len += (vti->num_tc - 1) *
- sizeof(struct virtchnl_channel_info);
+ valid_len = virtchnl_struct_size(vti, list,
+ vti->num_tc);
if (vti->num_tc == 0)
err_msg_format = true;
}
@@ -1538,13 +1606,13 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
break;
case VIRTCHNL_OP_ADD_VLAN_V2:
case VIRTCHNL_OP_DEL_VLAN_V2:
- valid_len = sizeof(struct virtchnl_vlan_filter_list_v2);
+ valid_len = virtchnl_vlan_filter_list_v2_LEGACY_SIZEOF;
if (msglen >= valid_len) {
struct virtchnl_vlan_filter_list_v2 *vfl =
(struct virtchnl_vlan_filter_list_v2 *)msg;
- valid_len += (vfl->num_elements - 1) *
- sizeof(struct virtchnl_vlan_filter);
+ valid_len = virtchnl_struct_size(vfl, filters,
+ vfl->num_elements);
if (vfl->num_elements == 0) {
err_msg_format = true;
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index ae12696ec492..2ad261082bba 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -141,8 +141,6 @@ struct bdi_writeback {
struct delayed_work dwork; /* work item used for writeback */
struct delayed_work bw_dwork; /* work item used for bandwidth estimate */
- unsigned long dirty_sleep; /* last wait */
-
struct list_head bdi_node; /* anchored at bdi->wb_list */
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -179,6 +177,11 @@ struct backing_dev_info {
* any dirty wbs, which is depended upon by bdi_has_dirty().
*/
atomic_long_t tot_write_bandwidth;
+ /*
+ * Jiffies when last process was dirty throttled on this bdi. Used by
+ * blk-wbt.
+ */
+ unsigned long last_bdp_sleep;
struct bdi_writeback wb; /* the root writeback info for this bdi */
struct list_head wb_list; /* list of all wbs */
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index fbad4fcd408e..8e7af9a03b41 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -38,7 +38,6 @@ struct backing_dev_info *bdi_alloc(int node_id);
void wb_start_background_writeback(struct bdi_writeback *wb);
void wb_workfn(struct work_struct *work);
-void wb_wakeup_delayed(struct bdi_writeback *wb);
void wb_wait_for_completion(struct wb_completion *done);
@@ -46,7 +45,6 @@ extern spinlock_t bdi_lock;
extern struct list_head bdi_list;
extern struct workqueue_struct *bdi_wq;
-extern struct workqueue_struct *bdi_async_bio_wq;
static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
{
diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h
new file mode 100644
index 000000000000..3f1fe1774f1b
--- /dev/null
+++ b/include/linux/backing-file.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Common helpers for stackable filesystems and backing files.
+ *
+ * Copyright (C) 2023 CTERA Networks.
+ */
+
+#ifndef _LINUX_BACKING_FILE_H
+#define _LINUX_BACKING_FILE_H
+
+#include <linux/file.h>
+#include <linux/uio.h>
+#include <linux/fs.h>
+
+struct backing_file_ctx {
+ const struct cred *cred;
+ struct file *user_file;
+ void (*accessed)(struct file *);
+ void (*end_write)(struct file *);
+};
+
+struct file *backing_file_open(const struct path *user_path, int flags,
+ const struct path *real_path,
+ const struct cred *cred);
+ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
+ struct kiocb *iocb, int flags,
+ struct backing_file_ctx *ctx);
+ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
+ struct kiocb *iocb, int flags,
+ struct backing_file_ctx *ctx);
+ssize_t backing_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags,
+ struct backing_file_ctx *ctx);
+ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *ppos, size_t len,
+ unsigned int flags,
+ struct backing_file_ctx *ctx);
+int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
+ struct backing_file_ctx *ctx);
+
+#endif /* _LINUX_BACKING_FILE_H */
diff --git a/include/linux/badblocks.h b/include/linux/badblocks.h
index 2426276b9bd3..670f2dae692f 100644
--- a/include/linux/badblocks.h
+++ b/include/linux/badblocks.h
@@ -15,6 +15,7 @@
#define BB_OFFSET(x) (((x) & BB_OFFSET_MASK) >> 9)
#define BB_LEN(x) (((x) & BB_LEN_MASK) + 1)
#define BB_ACK(x) (!!((x) & BB_ACK_MASK))
+#define BB_END(x) (BB_OFFSET(x) + BB_LEN(x))
#define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63))
/* Bad block numbers are stored sorted in a single page.
@@ -41,6 +42,12 @@ struct badblocks {
sector_t size; /* in sectors */
};
+struct badblocks_context {
+ sector_t start;
+ sector_t len;
+ int ack;
+};
+
int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
sector_t *first_bad, int *bad_sectors);
int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
@@ -63,4 +70,27 @@ static inline void devm_exit_badblocks(struct device *dev, struct badblocks *bb)
}
badblocks_exit(bb);
}
+
+static inline int badblocks_full(struct badblocks *bb)
+{
+ return (bb->count >= MAX_BADBLOCKS);
+}
+
+static inline int badblocks_empty(struct badblocks *bb)
+{
+ return (bb->count == 0);
+}
+
+static inline void set_changed(struct badblocks *bb)
+{
+ if (bb->changed != 1)
+ bb->changed = 1;
+}
+
+static inline void clear_changed(struct badblocks *bb)
+{
+ if (bb->changed != 0)
+ bb->changed = 0;
+}
+
#endif
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 8d51f69f9f5e..70f97f685bff 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -90,6 +90,16 @@ struct linux_binfmt {
#endif
} __randomize_layout;
+#if IS_ENABLED(CONFIG_BINFMT_MISC)
+struct binfmt_misc {
+ struct list_head entries;
+ rwlock_t entries_lock;
+ bool enabled;
+} __randomize_layout;
+
+extern struct binfmt_misc init_binfmt_misc;
+#endif
+
extern void __register_binfmt(struct linux_binfmt *fmt, int insert);
/* Registration of default binfmt handlers */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 11984ed29cb8..875d792bffff 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -253,6 +253,11 @@ static inline struct page *bio_first_page_all(struct bio *bio)
return bio_first_bvec_all(bio)->bv_page;
}
+static inline struct folio *bio_first_folio_all(struct bio *bio)
+{
+ return page_folio(bio_first_page_all(bio));
+}
+
static inline struct bio_vec *bio_last_bvec_all(struct bio *bio)
{
WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
@@ -281,6 +286,11 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio,
{
struct bio_vec *bvec = bio_first_bvec_all(bio) + i;
+ if (unlikely(i >= bio->bi_vcnt)) {
+ fi->folio = NULL;
+ return;
+ }
+
fi->folio = page_folio(bvec->bv_page);
fi->offset = bvec->bv_offset +
PAGE_SIZE * (bvec->bv_page - &fi->folio->page);
@@ -298,10 +308,8 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio)
fi->offset = 0;
fi->length = min(folio_size(fi->folio), fi->_seg_count);
fi->_next = folio_next(fi->folio);
- } else if (fi->_i + 1 < bio->bi_vcnt) {
- bio_first_folio(fi, bio, fi->_i + 1);
} else {
- fi->folio = NULL;
+ bio_first_folio(fi, bio, fi->_i + 1);
}
}
@@ -319,6 +327,8 @@ enum bip_flags {
BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */
BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */
BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */
+ BIP_INTEGRITY_USER = 1 << 5, /* Integrity payload is user address */
+ BIP_COPY_USER = 1 << 6, /* Kernel bounce buffer in use */
};
/*
@@ -488,7 +498,12 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
extern void bio_copy_data(struct bio *dst, struct bio *src);
extern void bio_free_pages(struct bio *bio);
void guard_bio_eod(struct bio *bio);
-void zero_fill_bio(struct bio *bio);
+void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter);
+
+static inline void zero_fill_bio(struct bio *bio)
+{
+ zero_fill_bio_iter(bio, bio->bi_iter);
+}
static inline void bio_release_pages(struct bio *bio, bool mark_dirty)
{
@@ -708,6 +723,7 @@ static inline bool bioset_initialized(struct bio_set *bs)
for_each_bio(_bio) \
bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
+int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed);
extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
extern bool bio_integrity_prep(struct bio *);
@@ -779,6 +795,12 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
return 0;
}
+static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
+ ssize_t len, u32 seed)
+{
+ return -EINVAL;
+}
+
#endif /* CONFIG_BLK_DEV_INTEGRITY */
/*
diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
index ebfa12f69501..63928f173223 100644
--- a/include/linux/bitfield.h
+++ b/include/linux/bitfield.h
@@ -66,7 +66,8 @@
_pfx "mask is not constant"); \
BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero"); \
BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \
- ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \
+ ~((_mask) >> __bf_shf(_mask)) & \
+ (0 + (_val)) : 0, \
_pfx "value too large for the field"); \
BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \
__bf_cast_unsigned(_reg, ~0ull), \
diff --git a/include/linux/bitmap-str.h b/include/linux/bitmap-str.h
new file mode 100644
index 000000000000..17caeca94cab
--- /dev/null
+++ b/include/linux/bitmap-str.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_BITMAP_STR_H
+#define __LINUX_BITMAP_STR_H
+
+int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits);
+int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits);
+extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp,
+ int nmaskbits, loff_t off, size_t count);
+extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp,
+ int nmaskbits, loff_t off, size_t count);
+int bitmap_parse(const char *buf, unsigned int buflen, unsigned long *dst, int nbits);
+int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits);
+int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen,
+ unsigned long *dst, int nbits);
+
+#endif /* __LINUX_BITMAP_STR_H */
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 03644237e1ef..aa4096126553 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -6,10 +6,13 @@
#include <linux/align.h>
#include <linux/bitops.h>
+#include <linux/cleanup.h>
+#include <linux/errno.h>
#include <linux/find.h>
#include <linux/limits.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <linux/bitmap-str.h>
struct device;
@@ -52,6 +55,7 @@ struct device;
* bitmap_full(src, nbits) Are all bits set in *src?
* bitmap_weight(src, nbits) Hamming Weight: number set bits
* bitmap_weight_and(src1, src2, nbits) Hamming Weight of and'ed bitmap
+ * bitmap_weight_andnot(src1, src2, nbits) Hamming Weight of andnot'ed bitmap
* bitmap_set(dst, pos, nbits) Set specified bit area
* bitmap_clear(dst, pos, nbits) Clear specified bit area
* bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area
@@ -60,6 +64,8 @@ struct device;
* bitmap_shift_left(dst, src, n, nbits) *dst = *src << n
* bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest
* bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask)
+ * bitmap_scatter(dst, src, mask, nbits) *dst = map(dense, sparse)(src)
+ * bitmap_gather(dst, src, mask, nbits) *dst = map(sparse, dense)(src)
* bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src)
* bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit)
* bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap
@@ -125,6 +131,8 @@ unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node);
unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node);
void bitmap_free(const unsigned long *bitmap);
+DEFINE_FREE(bitmap, unsigned long *, if (_T) bitmap_free(_T))
+
/* Managed variants of the above. */
unsigned long *devm_bitmap_alloc(struct device *dev,
unsigned int nbits, gfp_t flags);
@@ -167,6 +175,8 @@ bool __bitmap_subset(const unsigned long *bitmap1,
unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
unsigned int __bitmap_weight_and(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
+unsigned int __bitmap_weight_andnot(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int nbits);
void __bitmap_set(unsigned long *map, unsigned int start, int len);
void __bitmap_clear(unsigned long *map, unsigned int start, int len);
@@ -200,14 +210,6 @@ bitmap_find_next_zero_area(unsigned long *map,
align_mask, 0);
}
-int bitmap_parse(const char *buf, unsigned int buflen,
- unsigned long *dst, int nbits);
-int bitmap_parse_user(const char __user *ubuf, unsigned int ulen,
- unsigned long *dst, int nbits);
-int bitmap_parselist(const char *buf, unsigned long *maskp,
- int nmaskbits);
-int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen,
- unsigned long *dst, int nbits);
void bitmap_remap(unsigned long *dst, const unsigned long *src,
const unsigned long *old, const unsigned long *new, unsigned int nbits);
int bitmap_bitremap(int oldbit,
@@ -216,23 +218,6 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig,
const unsigned long *relmap, unsigned int bits);
void bitmap_fold(unsigned long *dst, const unsigned long *orig,
unsigned int sz, unsigned int nbits);
-int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
-void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
-int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
-
-#ifdef __BIG_ENDIAN
-void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits);
-#else
-#define bitmap_copy_le bitmap_copy
-#endif
-int bitmap_print_to_pagebuf(bool list, char *buf,
- const unsigned long *maskp, int nmaskbits);
-
-extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp,
- int nmaskbits, loff_t off, size_t count);
-
-extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp,
- int nmaskbits, loff_t off, size_t count);
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
@@ -448,6 +433,15 @@ unsigned long bitmap_weight_and(const unsigned long *src1,
return __bitmap_weight_and(src1, src2, nbits);
}
+static __always_inline
+unsigned long bitmap_weight_andnot(const unsigned long *src1,
+ const unsigned long *src2, unsigned int nbits)
+{
+ if (small_const_nbits(nbits))
+ return hweight_long(*src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits));
+ return __bitmap_weight_andnot(src1, src2, nbits);
+}
+
static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
unsigned int nbits)
{
@@ -510,6 +504,107 @@ static inline void bitmap_replace(unsigned long *dst,
__bitmap_replace(dst, old, new, mask, nbits);
}
+/**
+ * bitmap_scatter - Scatter a bitmap according to the given mask
+ * @dst: scattered bitmap
+ * @src: gathered bitmap
+ * @mask: mask representing bits to assign to in the scattered bitmap
+ * @nbits: number of bits in each of these bitmaps
+ *
+ * Scatters bitmap with sequential bits according to the given @mask.
+ *
+ * Example:
+ * If @src bitmap = 0x005a, with @mask = 0x1313, @dst will be 0x0302.
+ *
+ * Or in binary form
+ * @src @mask @dst
+ * 0000000001011010 0001001100010011 0000001100000010
+ *
+ * (Bits 0, 1, 2, 3, 4, 5 are copied to the bits 0, 1, 4, 8, 9, 12)
+ *
+ * A more 'visual' description of the operation::
+ *
+ * src: 0000000001011010
+ * ||||||
+ * +------+|||||
+ * | +----+||||
+ * | |+----+|||
+ * | || +-+||
+ * | || | ||
+ * mask: ...v..vv...v..vv
+ * ...0..11...0..10
+ * dst: 0000001100000010
+ *
+ * A relationship exists between bitmap_scatter() and bitmap_gather().
+ * bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation.
+ * See bitmap_scatter() for details related to this relationship.
+ */
+static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src,
+ const unsigned long *mask, unsigned int nbits)
+{
+ unsigned int n = 0;
+ unsigned int bit;
+
+ bitmap_zero(dst, nbits);
+
+ for_each_set_bit(bit, mask, nbits)
+ __assign_bit(bit, dst, test_bit(n++, src));
+}
+
+/**
+ * bitmap_gather - Gather a bitmap according to given mask
+ * @dst: gathered bitmap
+ * @src: scattered bitmap
+ * @mask: mask representing bits to extract from in the scattered bitmap
+ * @nbits: number of bits in each of these bitmaps
+ *
+ * Gathers bitmap with sparse bits according to the given @mask.
+ *
+ * Example:
+ * If @src bitmap = 0x0302, with @mask = 0x1313, @dst will be 0x001a.
+ *
+ * Or in binary form
+ * @src @mask @dst
+ * 0000001100000010 0001001100010011 0000000000011010
+ *
+ * (Bits 0, 1, 4, 8, 9, 12 are copied to the bits 0, 1, 2, 3, 4, 5)
+ *
+ * A more 'visual' description of the operation::
+ *
+ * mask: ...v..vv...v..vv
+ * src: 0000001100000010
+ * ^ ^^ ^ 0
+ * | || | 10
+ * | || > 010
+ * | |+--> 1010
+ * | +--> 11010
+ * +----> 011010
+ * dst: 0000000000011010
+ *
+ * A relationship exists between bitmap_gather() and bitmap_scatter(). See
+ * bitmap_scatter() for the bitmap scatter detailed operations.
+ * Suppose scattered computed using bitmap_scatter(scattered, src, mask, n).
+ * The operation bitmap_gather(result, scattered, mask, n) leads to a result
+ * equal or equivalent to src.
+ *
+ * The result can be 'equivalent' because bitmap_scatter() and bitmap_gather()
+ * are not bijective.
+ * The result and src values are equivalent in that sense that a call to
+ * bitmap_scatter(res, src, mask, n) and a call to
+ * bitmap_scatter(res, result, mask, n) will lead to the same res value.
+ */
+static inline void bitmap_gather(unsigned long *dst, const unsigned long *src,
+ const unsigned long *mask, unsigned int nbits)
+{
+ unsigned int n = 0;
+ unsigned int bit;
+
+ bitmap_zero(dst, nbits);
+
+ for_each_set_bit(bit, mask, nbits)
+ __assign_bit(n++, dst, test_bit(bit, src));
+}
+
static inline void bitmap_next_set_region(unsigned long *bitmap,
unsigned int *rs, unsigned int *re,
unsigned int end)
@@ -519,6 +614,66 @@ static inline void bitmap_next_set_region(unsigned long *bitmap,
}
/**
+ * bitmap_release_region - release allocated bitmap region
+ * @bitmap: array of unsigned longs corresponding to the bitmap
+ * @pos: beginning of bit region to release
+ * @order: region size (log base 2 of number of bits) to release
+ *
+ * This is the complement to __bitmap_find_free_region() and releases
+ * the found region (by clearing it in the bitmap).
+ */
+static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
+{
+ bitmap_clear(bitmap, pos, BIT(order));
+}
+
+/**
+ * bitmap_allocate_region - allocate bitmap region
+ * @bitmap: array of unsigned longs corresponding to the bitmap
+ * @pos: beginning of bit region to allocate
+ * @order: region size (log base 2 of number of bits) to allocate
+ *
+ * Allocate (set bits in) a specified region of a bitmap.
+ *
+ * Returns: 0 on success, or %-EBUSY if specified region wasn't
+ * free (not all bits were zero).
+ */
+static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
+{
+ unsigned int len = BIT(order);
+
+ if (find_next_bit(bitmap, pos + len, pos) < pos + len)
+ return -EBUSY;
+ bitmap_set(bitmap, pos, len);
+ return 0;
+}
+
+/**
+ * bitmap_find_free_region - find a contiguous aligned mem region
+ * @bitmap: array of unsigned longs corresponding to the bitmap
+ * @bits: number of bits in the bitmap
+ * @order: region size (log base 2 of number of bits) to find
+ *
+ * Find a region of free (zero) bits in a @bitmap of @bits bits and
+ * allocate them (set them to one). Only consider regions of length
+ * a power (@order) of two, aligned to that power of two, which
+ * makes the search algorithm much faster.
+ *
+ * Returns: the bit offset in bitmap of the allocated region,
+ * or -errno on failure.
+ */
+static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
+{
+ unsigned int pos, end; /* scans bitmap by regions of size order */
+
+ for (pos = 0; (end = pos + BIT(order)) <= bits; pos = end) {
+ if (!bitmap_allocate_region(bitmap, pos, order))
+ return pos;
+ }
+ return -ENOMEM;
+}
+
+/**
* BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap.
* @n: u64 value
*
diff --git a/include/linux/bits.h b/include/linux/bits.h
index 7c0cf5031abe..0eb24d21aac2 100644
--- a/include/linux/bits.h
+++ b/include/linux/bits.h
@@ -4,6 +4,7 @@
#include <linux/const.h>
#include <vdso/bits.h>
+#include <uapi/linux/bits.h>
#include <asm/bitsperlong.h>
#define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG))
@@ -30,15 +31,8 @@
#define GENMASK_INPUT_CHECK(h, l) 0
#endif
-#define __GENMASK(h, l) \
- (((~UL(0)) - (UL(1) << (l)) + 1) & \
- (~UL(0) >> (BITS_PER_LONG - 1 - (h))))
#define GENMASK(h, l) \
(GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
-
-#define __GENMASK_ULL(h, l) \
- (((~ULL(0)) - (ULL(1) << (l)) + 1) & \
- (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h))))
#define GENMASK_ULL(h, l) \
(GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l))
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index 378b2459efe2..e253e7bd0d17 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -20,6 +20,7 @@ struct blk_integrity_iter {
unsigned int data_size;
unsigned short interval;
unsigned char tuple_size;
+ unsigned char pi_offset;
const char *disk_name;
};
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 495ca198775f..d3d8fd8e229b 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -8,6 +8,7 @@
#include <linux/scatterlist.h>
#include <linux/prefetch.h>
#include <linux/srcu.h>
+#include <linux/rw_hint.h>
struct blk_mq_tags;
struct blk_flush_queue;
@@ -32,8 +33,6 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4))
/* merge of different types, fail separately */
#define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5))
-/* track inflight for MQ */
-#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6))
/* don't call prep for this one */
#define RQF_DONTPREP ((__force req_flags_t)(1 << 7))
/* use hctx->sched_tags */
@@ -137,6 +136,7 @@ struct request {
struct blk_crypto_keyslot *crypt_keyslot;
#endif
+ enum rw_hint write_hint;
unsigned short ioprio;
enum mq_rq_state state;
@@ -178,14 +178,10 @@ struct request {
struct {
unsigned int seq;
- struct list_head list;
rq_end_io_fn *saved_end_io;
} flush;
- union {
- struct __call_single_data csd;
- u64 fifo_time;
- };
+ u64 fifo_time;
/*
* completion callback.
@@ -397,9 +393,6 @@ struct blk_mq_hw_ctx {
*/
struct blk_mq_tags *sched_tags;
- /** @run: Number of dispatched requests. */
- unsigned long run;
-
/** @numa_node: NUMA node the storage adapter has been connected to. */
unsigned int numa_node;
/** @queue_num: Index of this hardware queue. */
@@ -691,17 +684,19 @@ enum {
#define BLK_MQ_NO_HCTX_IDX (-1U)
-struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
+struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set,
+ struct queue_limits *lim, void *queuedata,
struct lock_class_key *lkclass);
-#define blk_mq_alloc_disk(set, queuedata) \
+#define blk_mq_alloc_disk(set, lim, queuedata) \
({ \
static struct lock_class_key __key; \
\
- __blk_mq_alloc_disk(set, queuedata, &__key); \
+ __blk_mq_alloc_disk(set, lim, queuedata, &__key); \
})
struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q,
struct lock_class_key *lkclass);
-struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
+struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set,
+ struct queue_limits *lim, void *queuedata);
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
struct request_queue *q);
void blk_mq_destroy_queue(struct request_queue *);
@@ -836,6 +831,12 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib);
*/
static inline bool blk_mq_need_time_stamp(struct request *rq)
{
+ /*
+ * passthrough io doesn't use iostat accounting, cgroup stats
+ * and io scheduler functionalities.
+ */
+ if (blk_rq_is_passthrough(rq))
+ return false;
return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED));
}
diff --git a/include/linux/blk-pm.h b/include/linux/blk-pm.h
index 2580e05a8ab6..004b38a538ff 100644
--- a/include/linux/blk-pm.h
+++ b/include/linux/blk-pm.h
@@ -15,7 +15,6 @@ extern int blk_pre_runtime_suspend(struct request_queue *q);
extern void blk_post_runtime_suspend(struct request_queue *q, int err);
extern void blk_pre_runtime_resume(struct request_queue *q);
extern void blk_post_runtime_resume(struct request_queue *q);
-extern void blk_set_runtime_active(struct request_queue *q);
#else
static inline void blk_pm_runtime_init(struct request_queue *q,
struct device *dev) {}
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0bad62cca3d0..cb1526ec44b5 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -10,6 +10,7 @@
#include <linux/bvec.h>
#include <linux/device.h>
#include <linux/ktime.h>
+#include <linux/rw_hint.h>
struct bio_set;
struct bio;
@@ -49,27 +50,26 @@ struct block_device {
bool bd_write_holder;
bool bd_has_submit_bio;
dev_t bd_dev;
+ struct inode *bd_inode; /* will die */
+
atomic_t bd_openers;
spinlock_t bd_size_lock; /* for bd_inode->i_size updates */
- struct inode * bd_inode; /* will die */
- struct super_block * bd_super;
void * bd_claiming;
void * bd_holder;
const struct blk_holder_ops *bd_holder_ops;
struct mutex bd_holder_lock;
- /* The counter of freeze processes */
- int bd_fsfreeze_count;
int bd_holders;
struct kobject *bd_holder_dir;
- /* Mutex for freeze */
- struct mutex bd_fsfreeze_mutex;
- struct super_block *bd_fsfreeze_sb;
+ atomic_t bd_fsfreeze_count; /* number of freeze requests */
+ struct mutex bd_fsfreeze_mutex; /* serialize freeze/thaw */
struct partition_meta_info *bd_meta_info;
#ifdef CONFIG_FAIL_MAKE_REQUEST
bool bd_make_it_fail;
#endif
+ bool bd_ro_warned;
+ int bd_writers;
/*
* keep this out-of-line as it's both big and not needed in the fast
* path
@@ -207,52 +207,10 @@ static inline bool blk_path_error(blk_status_t error)
return true;
}
-/*
- * From most significant bit:
- * 1 bit: reserved for other usage, see below
- * 12 bits: original size of bio
- * 51 bits: issue time of bio
- */
-#define BIO_ISSUE_RES_BITS 1
-#define BIO_ISSUE_SIZE_BITS 12
-#define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS)
-#define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS)
-#define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1)
-#define BIO_ISSUE_SIZE_MASK \
- (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT)
-#define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1))
-
-/* Reserved bit for blk-throtl */
-#define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63)
-
struct bio_issue {
u64 value;
};
-static inline u64 __bio_issue_time(u64 time)
-{
- return time & BIO_ISSUE_TIME_MASK;
-}
-
-static inline u64 bio_issue_time(struct bio_issue *issue)
-{
- return __bio_issue_time(issue->value);
-}
-
-static inline sector_t bio_issue_size(struct bio_issue *issue)
-{
- return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT);
-}
-
-static inline void bio_issue_init(struct bio_issue *issue,
- sector_t size)
-{
- size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1;
- issue->value = ((issue->value & BIO_ISSUE_RES_MASK) |
- (ktime_get_ns() & BIO_ISSUE_TIME_MASK) |
- ((u64)size << BIO_ISSUE_SIZE_SHIFT));
-}
-
typedef __u32 __bitwise blk_opf_t;
typedef unsigned int blk_qc_t;
@@ -270,6 +228,7 @@ struct bio {
*/
unsigned short bi_flags; /* BIO_* below */
unsigned short bi_ioprio;
+ enum rw_hint bi_write_hint;
blk_status_t bi_status;
atomic_t __bi_remaining;
@@ -379,6 +338,8 @@ enum req_op {
REQ_OP_DISCARD = (__force blk_opf_t)3,
/* securely erase sectors */
REQ_OP_SECURE_ERASE = (__force blk_opf_t)5,
+ /* write data at the current zone write pointer */
+ REQ_OP_ZONE_APPEND = (__force blk_opf_t)7,
/* write the zero filled sector many times */
REQ_OP_WRITE_ZEROES = (__force blk_opf_t)9,
/* Open a zone */
@@ -387,12 +348,10 @@ enum req_op {
REQ_OP_ZONE_CLOSE = (__force blk_opf_t)11,
/* Transition a zone to full */
REQ_OP_ZONE_FINISH = (__force blk_opf_t)12,
- /* write data at the current zone write pointer */
- REQ_OP_ZONE_APPEND = (__force blk_opf_t)13,
/* reset a zone write pointer */
- REQ_OP_ZONE_RESET = (__force blk_opf_t)15,
+ REQ_OP_ZONE_RESET = (__force blk_opf_t)13,
/* reset all the zone present on the device */
- REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)17,
+ REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)15,
/* Driver private requests */
REQ_OP_DRV_IN = (__force blk_opf_t)34,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 87d94be7825a..69e7da33ca49 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -24,6 +24,7 @@
#include <linux/sbitmap.h>
#include <linux/uuid.h>
#include <linux/xarray.h>
+#include <linux/file.h>
struct module;
struct request_queue;
@@ -42,7 +43,7 @@ struct blk_crypto_profile;
extern const struct device_type disk_type;
extern const struct device_type part_type;
-extern struct class block_class;
+extern const struct class block_class;
/*
* Maximum number of blkcg policies allowed to be registered concurrently.
@@ -108,6 +109,7 @@ struct blk_integrity {
const struct blk_integrity_profile *profile;
unsigned char flags;
unsigned char tuple_size;
+ unsigned char pi_offset;
unsigned char interval_exp;
unsigned char tag_size;
};
@@ -124,6 +126,10 @@ typedef unsigned int __bitwise blk_mode_t;
#define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3))
/* open for "writes" only for ioctls (specialy hack for floppy.c) */
#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4))
+/* open is exclusive wrt all other BLK_OPEN_WRITE opens to the device */
+#define BLK_OPEN_RESTRICT_WRITES ((__force blk_mode_t)(1 << 5))
+/* return partition scanning errors */
+#define BLK_OPEN_STRICT_SCAN ((__force blk_mode_t)(1 << 6))
struct gendisk {
/*
@@ -187,8 +193,6 @@ struct gendisk {
* blk_mq_unfreeze_queue().
*/
unsigned int nr_zones;
- unsigned int max_open_zones;
- unsigned int max_active_zones;
unsigned long *conv_zones_bitmap;
unsigned long *seq_zones_wlock;
#endif /* CONFIG_BLK_DEV_ZONED */
@@ -264,18 +268,6 @@ static inline bool blk_op_is_passthrough(blk_opf_t op)
}
/*
- * Zoned block device models (zoned limit).
- *
- * Note: This needs to be ordered from the least to the most severe
- * restrictions for the inheritance in blk_stack_limits() to work.
- */
-enum blk_zoned_model {
- BLK_ZONED_NONE = 0, /* Regular block device */
- BLK_ZONED_HA, /* Host-aware zoned block device */
- BLK_ZONED_HM, /* Host-managed zoned block device */
-};
-
-/*
* BLK_BOUNCE_NONE: never bounce (default)
* BLK_BOUNCE_HIGH: bounce all highmem pages
*/
@@ -302,6 +294,7 @@ struct queue_limits {
unsigned int io_opt;
unsigned int max_discard_sectors;
unsigned int max_hw_discard_sectors;
+ unsigned int max_user_discard_sectors;
unsigned int max_secure_erase_sectors;
unsigned int max_write_zeroes_sectors;
unsigned int max_zone_append_sectors;
@@ -316,7 +309,9 @@ struct queue_limits {
unsigned char misaligned;
unsigned char discard_misaligned;
unsigned char raid_partial_stripes_expensive;
- enum blk_zoned_model zoned;
+ bool zoned;
+ unsigned int max_open_zones;
+ unsigned int max_active_zones;
/*
* Drivers that set dma_alignment to less than 511 must be prepared to
@@ -329,24 +324,15 @@ struct queue_limits {
typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
void *data);
-void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model);
+void disk_set_zoned(struct gendisk *disk);
-#ifdef CONFIG_BLK_DEV_ZONED
#define BLK_ALL_ZONES ((unsigned int)-1)
int blkdev_report_zones(struct block_device *bdev, sector_t sector,
- unsigned int nr_zones, report_zones_cb cb, void *data);
-unsigned int bdev_nr_zones(struct block_device *bdev);
-extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
- sector_t sectors, sector_t nr_sectors,
- gfp_t gfp_mask);
+ unsigned int nr_zones, report_zones_cb cb, void *data);
+int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
+ sector_t sectors, sector_t nr_sectors);
int blk_revalidate_disk_zones(struct gendisk *disk,
- void (*update_driver_data)(struct gendisk *disk));
-#else /* CONFIG_BLK_DEV_ZONED */
-static inline unsigned int bdev_nr_zones(struct block_device *bdev)
-{
- return 0;
-}
-#endif /* CONFIG_BLK_DEV_ZONED */
+ void (*update_driver_data)(struct gendisk *disk));
/*
* Independent access ranges: struct blk_independent_access_range describes
@@ -376,59 +362,51 @@ struct blk_independent_access_ranges {
};
struct request_queue {
- struct request *last_merge;
- struct elevator_queue *elevator;
-
- struct percpu_ref q_usage_counter;
+ /*
+ * The queue owner gets to use this for whatever they like.
+ * ll_rw_blk doesn't touch it.
+ */
+ void *queuedata;
- struct blk_queue_stats *stats;
- struct rq_qos *rq_qos;
- struct mutex rq_qos_mutex;
+ struct elevator_queue *elevator;
const struct blk_mq_ops *mq_ops;
/* sw queues */
struct blk_mq_ctx __percpu *queue_ctx;
+ /*
+ * various queue flags, see QUEUE_* below
+ */
+ unsigned long queue_flags;
+
+ unsigned int rq_timeout;
+
unsigned int queue_depth;
+ refcount_t refs;
+
/* hw dispatch queues */
- struct xarray hctx_table;
unsigned int nr_hw_queues;
+ struct xarray hctx_table;
- /*
- * The queue owner gets to use this for whatever they like.
- * ll_rw_blk doesn't touch it.
- */
- void *queuedata;
-
- /*
- * various queue flags, see QUEUE_* below
- */
- unsigned long queue_flags;
- /*
- * Number of contexts that have called blk_set_pm_only(). If this
- * counter is above zero then only RQF_PM requests are processed.
- */
- atomic_t pm_only;
+ struct percpu_ref q_usage_counter;
- /*
- * ida allocated id for this queue. Used to index queues from
- * ioctx.
- */
- int id;
+ struct request *last_merge;
spinlock_t queue_lock;
- struct gendisk *disk;
+ int quiesce_depth;
- refcount_t refs;
+ struct gendisk *disk;
/*
* mq queue kobject
*/
struct kobject *mq_kobj;
+ struct queue_limits limits;
+
#ifdef CONFIG_BLK_DEV_INTEGRITY
struct blk_integrity integrity;
#endif /* CONFIG_BLK_DEV_INTEGRITY */
@@ -439,24 +417,40 @@ struct request_queue {
#endif
/*
- * queue settings
+ * Number of contexts that have called blk_set_pm_only(). If this
+ * counter is above zero then only RQF_PM requests are processed.
*/
- unsigned long nr_requests; /* Max # of requests */
+ atomic_t pm_only;
+
+ struct blk_queue_stats *stats;
+ struct rq_qos *rq_qos;
+ struct mutex rq_qos_mutex;
+
+ /*
+ * ida allocated id for this queue. Used to index queues from
+ * ioctx.
+ */
+ int id;
unsigned int dma_pad_mask;
+ /*
+ * queue settings
+ */
+ unsigned long nr_requests; /* Max # of requests */
+
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
struct blk_crypto_profile *crypto_profile;
struct kobject *crypto_kobject;
#endif
- unsigned int rq_timeout;
-
struct timer_list timeout;
struct work_struct timeout_work;
atomic_t nr_active_requests_shared_tags;
+ unsigned int required_elevator_features;
+
struct blk_mq_tags *sched_shared_tags;
struct list_head icq_list;
@@ -467,11 +461,12 @@ struct request_queue {
struct mutex blkcg_mutex;
#endif
- struct queue_limits limits;
+ int node;
- unsigned int required_elevator_features;
+ spinlock_t requeue_lock;
+ struct list_head requeue_list;
+ struct delayed_work requeue_work;
- int node;
#ifdef CONFIG_BLK_DEV_IO_TRACE
struct blk_trace __rcu *blk_trace;
#endif
@@ -481,12 +476,9 @@ struct request_queue {
struct blk_flush_queue *fq;
struct list_head flush_list;
- struct list_head requeue_list;
- spinlock_t requeue_lock;
- struct delayed_work requeue_work;
-
struct mutex sysfs_lock;
struct mutex sysfs_dir_lock;
+ struct mutex limits_lock;
/*
* for reusing dead hctx instance in case of updating
@@ -509,8 +501,6 @@ struct request_queue {
*/
struct mutex mq_freeze_lock;
- int quiesce_depth;
-
struct blk_mq_tag_set *tag_set;
struct list_head tag_set_list;
@@ -538,6 +528,7 @@ struct request_queue {
#define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */
#define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */
#define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */
+#define QUEUE_FLAG_HW_WC 13 /* Write back caching supported */
#define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */
#define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */
#define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */
@@ -622,26 +613,14 @@ static inline enum rpm_status queue_rpm_status(struct request_queue *q)
}
#endif
-static inline enum blk_zoned_model
-blk_queue_zoned_model(struct request_queue *q)
-{
- if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
- return q->limits.zoned;
- return BLK_ZONED_NONE;
-}
-
static inline bool blk_queue_is_zoned(struct request_queue *q)
{
- switch (blk_queue_zoned_model(q)) {
- case BLK_ZONED_HA:
- case BLK_ZONED_HM:
- return true;
- default:
- return false;
- }
+ return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && q->limits.zoned;
}
#ifdef CONFIG_BLK_DEV_ZONED
+unsigned int bdev_nr_zones(struct block_device *bdev);
+
static inline unsigned int disk_nr_zones(struct gendisk *disk)
{
return blk_queue_is_zoned(disk->queue) ? disk->nr_zones : 0;
@@ -666,26 +645,31 @@ static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector)
static inline void disk_set_max_open_zones(struct gendisk *disk,
unsigned int max_open_zones)
{
- disk->max_open_zones = max_open_zones;
+ disk->queue->limits.max_open_zones = max_open_zones;
}
static inline void disk_set_max_active_zones(struct gendisk *disk,
unsigned int max_active_zones)
{
- disk->max_active_zones = max_active_zones;
+ disk->queue->limits.max_active_zones = max_active_zones;
}
static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
{
- return bdev->bd_disk->max_open_zones;
+ return bdev->bd_disk->queue->limits.max_open_zones;
}
static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
{
- return bdev->bd_disk->max_active_zones;
+ return bdev->bd_disk->queue->limits.max_active_zones;
}
#else /* CONFIG_BLK_DEV_ZONED */
+static inline unsigned int bdev_nr_zones(struct block_device *bdev)
+{
+ return 0;
+}
+
static inline unsigned int disk_nr_zones(struct gendisk *disk)
{
return 0;
@@ -750,7 +734,8 @@ static inline int bdev_read_only(struct block_device *bdev)
}
bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
-bool disk_force_media_change(struct gendisk *disk, unsigned int events);
+void disk_force_media_change(struct gendisk *disk);
+void bdev_mark_dead(struct block_device *bdev, bool surprise);
void add_disk_randomness(struct gendisk *disk) __latent_entropy;
void rand_initialize_disk(struct gendisk *disk);
@@ -784,22 +769,26 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb)
int bdev_disk_changed(struct gendisk *disk, bool invalidate);
void put_disk(struct gendisk *disk);
-struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass);
+struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node,
+ struct lock_class_key *lkclass);
/**
* blk_alloc_disk - allocate a gendisk structure
+ * @lim: queue limits to be used for this disk.
* @node_id: numa node to allocate on
*
* Allocate and pre-initialize a gendisk structure for use with BIO based
* drivers.
*
+ * Returns an ERR_PTR on error, else the allocated disk.
+ *
* Context: can sleep
*/
-#define blk_alloc_disk(node_id) \
+#define blk_alloc_disk(lim, node_id) \
({ \
static struct lock_class_key __key; \
\
- __blk_alloc_disk(node_id, &__key); \
+ __blk_alloc_disk(lim, node_id, &__key); \
})
int __register_blkdev(unsigned int major, const char *name,
@@ -809,7 +798,6 @@ int __register_blkdev(unsigned int major, const char *name,
void unregister_blkdev(unsigned int major, const char *name);
bool disk_check_media_change(struct gendisk *disk);
-int __invalidate_device(struct block_device *bdev, bool kill_dirty);
void set_capacity(struct gendisk *disk, sector_t size);
#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
@@ -846,6 +834,7 @@ extern const char *blk_op_str(enum req_op op);
int blk_status_to_errno(blk_status_t status);
blk_status_t errno_to_blk_status(int errno);
+const char *blk_status_to_str(blk_status_t status);
/* only poll the hardware once, don't continue until a completion was found */
#define BLK_POLL_ONESHOT (1 << 0)
@@ -882,6 +871,29 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset,
return chunk_sectors - (offset & (chunk_sectors - 1));
}
+/**
+ * queue_limits_start_update - start an atomic update of queue limits
+ * @q: queue to update
+ *
+ * This functions starts an atomic update of the queue limits. It takes a lock
+ * to prevent other updates and returns a snapshot of the current limits that
+ * the caller can modify. The caller must call queue_limits_commit_update()
+ * to finish the update.
+ *
+ * Context: process context. The caller must have frozen the queue or ensured
+ * that there is outstanding I/O by other means.
+ */
+static inline struct queue_limits
+queue_limits_start_update(struct request_queue *q)
+ __acquires(q->limits_lock)
+{
+ mutex_lock(&q->limits_lock);
+ return q->limits;
+}
+int queue_limits_commit_update(struct request_queue *q,
+ struct queue_limits *lim);
+int queue_limits_set(struct request_queue *q, struct queue_limits *lim);
+
/*
* Access functions for manipulating queue properties
*/
@@ -915,8 +927,8 @@ extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
extern void blk_set_stacking_limits(struct queue_limits *lim);
extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
sector_t offset);
-extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
- sector_t offset);
+void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev,
+ sector_t offset, const char *pfx);
extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
extern void blk_queue_virt_boundary(struct request_queue *, unsigned long);
@@ -963,6 +975,7 @@ struct blk_plug {
/* if ios_left is > 1, we can batch tag/rq allocations */
struct request *cached_rq;
+ u64 cur_ktime;
unsigned short nr_ios;
unsigned short rq_count;
@@ -993,6 +1006,18 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async)
__blk_flush_plug(plug, async);
}
+/*
+ * tsk == current here
+ */
+static inline void blk_plug_invalidate_ts(struct task_struct *tsk)
+{
+ struct blk_plug *plug = tsk->plug;
+
+ if (plug)
+ plug->cur_ktime = 0;
+ current->flags &= ~PF_BLOCK_TS;
+}
+
int blkdev_issue_flush(struct block_device *bdev);
long nr_blockdev_pages(void);
#else /* CONFIG_BLOCK */
@@ -1016,6 +1041,10 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async)
{
}
+static inline void blk_plug_invalidate_ts(struct task_struct *tsk)
+{
+}
+
static inline int blkdev_issue_flush(struct block_device *bdev)
{
return 0;
@@ -1078,7 +1107,14 @@ enum blk_default_limits {
BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL,
};
-#define BLK_DEF_MAX_SECTORS 2560u
+/*
+ * Default upper limit for the software max_sectors limit used for
+ * regular file system I/O. This can be increased through sysfs.
+ *
+ * Not to be confused with the max_hw_sector limit that is entirely
+ * controlled by the driver, usually based on hardware limits.
+ */
+#define BLK_DEF_MAX_SECTORS_CAP 2560u
static inline unsigned long queue_segment_boundary(const struct request_queue *q)
{
@@ -1257,11 +1293,6 @@ static inline bool bdev_nowait(struct block_device *bdev)
return test_bit(QUEUE_FLAG_NOWAIT, &bdev_get_queue(bdev)->queue_flags);
}
-static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
-{
- return blk_queue_zoned_model(bdev_get_queue(bdev));
-}
-
static inline bool bdev_is_zoned(struct block_device *bdev)
{
return blk_queue_is_zoned(bdev_get_queue(bdev));
@@ -1460,30 +1491,53 @@ void blkdev_show(struct seq_file *seqf, off_t offset);
#endif
struct blk_holder_ops {
- void (*mark_dead)(struct block_device *bdev);
+ void (*mark_dead)(struct block_device *bdev, bool surprise);
+
+ /*
+ * Sync the file system mounted on the block device.
+ */
+ void (*sync)(struct block_device *bdev);
+
+ /*
+ * Freeze the file system mounted on the block device.
+ */
+ int (*freeze)(struct block_device *bdev);
+
+ /*
+ * Thaw the file system mounted on the block device.
+ */
+ int (*thaw)(struct block_device *bdev);
};
/*
+ * For filesystems using @fs_holder_ops, the @holder argument passed to
+ * helpers used to open and claim block devices via
+ * bd_prepare_to_claim() must point to a superblock.
+ */
+extern const struct blk_holder_ops fs_holder_ops;
+
+/*
* Return the correct open flags for blkdev_get_by_* for super block flags
* as stored in sb->s_flags.
*/
#define sb_open_mode(flags) \
- (BLK_OPEN_READ | (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE))
+ (BLK_OPEN_READ | BLK_OPEN_RESTRICT_WRITES | \
+ (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE))
-struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder,
+struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
const struct blk_holder_ops *hops);
-struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode,
+struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
void *holder, const struct blk_holder_ops *hops);
int bd_prepare_to_claim(struct block_device *bdev, void *holder,
const struct blk_holder_ops *hops);
void bd_abort_claiming(struct block_device *bdev, void *holder);
-void blkdev_put(struct block_device *bdev, void *holder);
/* just for blk-cgroup, don't use elsewhere */
struct block_device *blkdev_get_no_open(dev_t dev);
void blkdev_put_no_open(struct block_device *bdev);
struct block_device *I_BDEV(struct inode *inode);
+struct block_device *file_bdev(struct file *bdev_file);
#ifdef CONFIG_BLOCK
void invalidate_bdev(struct block_device *bdev);
@@ -1521,10 +1575,9 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev)
}
#endif /* CONFIG_BLOCK */
-int fsync_bdev(struct block_device *bdev);
-
-int freeze_bdev(struct block_device *bdev);
-int thaw_bdev(struct block_device *bdev);
+int bdev_freeze(struct block_device *bdev);
+int bdev_thaw(struct block_device *bdev);
+void bdev_fput(struct file *bdev_file);
struct io_comp_batch {
struct request *req_list;
diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index ca73940e26df..3f4b4ac527ca 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -10,6 +10,7 @@
#ifdef __KERNEL__
#include <linux/kernel.h>
#include <linux/types.h>
+bool __init cmdline_has_extra_options(void);
#else /* !__KERNEL__ */
/*
* NOTE: This is only for tools/bootconfig, because tools/bootconfig will
@@ -287,7 +288,12 @@ int __init xbc_init(const char *buf, size_t size, const char **emsg, int *epos);
int __init xbc_get_info(int *node_size, size_t *data_size);
/* XBC cleanup data structures */
-void __init xbc_exit(void);
+void __init _xbc_exit(bool early);
+
+static inline void xbc_exit(void)
+{
+ _xbc_exit(false);
+}
/* XBC embedded bootconfig data in kernel */
#ifdef CONFIG_BOOT_CONFIG_EMBED
diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h
index e1a3c9c9754c..cffa38a73618 100644
--- a/include/linux/bootmem_info.h
+++ b/include/linux/bootmem_info.h
@@ -60,7 +60,7 @@ static inline void get_page_bootmem(unsigned long info, struct page *page,
static inline void free_bootmem_page(struct page *page)
{
- kmemleak_free_part(page_to_virt(page), PAGE_SIZE);
+ kmemleak_free_part_phys(PFN_PHYS(page_to_pfn(page)), PAGE_SIZE);
free_reserved_page(page);
}
#endif
diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
index 7b121bd780eb..0985221d5478 100644
--- a/include/linux/bpf-cgroup-defs.h
+++ b/include/linux/bpf-cgroup-defs.h
@@ -28,19 +28,24 @@ enum cgroup_bpf_attach_type {
CGROUP_INET6_BIND,
CGROUP_INET4_CONNECT,
CGROUP_INET6_CONNECT,
+ CGROUP_UNIX_CONNECT,
CGROUP_INET4_POST_BIND,
CGROUP_INET6_POST_BIND,
CGROUP_UDP4_SENDMSG,
CGROUP_UDP6_SENDMSG,
+ CGROUP_UNIX_SENDMSG,
CGROUP_SYSCTL,
CGROUP_UDP4_RECVMSG,
CGROUP_UDP6_RECVMSG,
+ CGROUP_UNIX_RECVMSG,
CGROUP_GETSOCKOPT,
CGROUP_SETSOCKOPT,
CGROUP_INET4_GETPEERNAME,
CGROUP_INET6_GETPEERNAME,
+ CGROUP_UNIX_GETPEERNAME,
CGROUP_INET4_GETSOCKNAME,
CGROUP_INET6_GETSOCKNAME,
+ CGROUP_UNIX_GETSOCKNAME,
CGROUP_INET_SOCK_RELEASE,
CGROUP_LSM_START,
CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1,
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 57e9e109257e..fb3c3e7181e6 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -48,19 +48,24 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type)
CGROUP_ATYPE(CGROUP_INET6_BIND);
CGROUP_ATYPE(CGROUP_INET4_CONNECT);
CGROUP_ATYPE(CGROUP_INET6_CONNECT);
+ CGROUP_ATYPE(CGROUP_UNIX_CONNECT);
CGROUP_ATYPE(CGROUP_INET4_POST_BIND);
CGROUP_ATYPE(CGROUP_INET6_POST_BIND);
CGROUP_ATYPE(CGROUP_UDP4_SENDMSG);
CGROUP_ATYPE(CGROUP_UDP6_SENDMSG);
+ CGROUP_ATYPE(CGROUP_UNIX_SENDMSG);
CGROUP_ATYPE(CGROUP_SYSCTL);
CGROUP_ATYPE(CGROUP_UDP4_RECVMSG);
CGROUP_ATYPE(CGROUP_UDP6_RECVMSG);
+ CGROUP_ATYPE(CGROUP_UNIX_RECVMSG);
CGROUP_ATYPE(CGROUP_GETSOCKOPT);
CGROUP_ATYPE(CGROUP_SETSOCKOPT);
CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME);
CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME);
+ CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME);
CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME);
CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME);
+ CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME);
CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE);
default:
return CGROUP_BPF_ATTACH_TYPE_INVALID;
@@ -120,6 +125,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
+ int *uaddrlen,
enum cgroup_bpf_attach_type atype,
void *t_ctx,
u32 *flags);
@@ -137,11 +143,12 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
enum cgroup_bpf_attach_type atype);
int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
- int *optname, char __user *optval,
+ int *optname, sockptr_t optval,
int *optlen, char **kernel_optval);
+
int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
- int optname, char __user *optval,
- int __user *optlen, int max_optlen,
+ int optname, sockptr_t optval,
+ sockptr_t optlen, int max_optlen,
int retval);
int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
@@ -189,7 +196,8 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
({ \
int __ret = 0; \
if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && \
- cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS)) \
+ cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS) && sk && \
+ sk_fullsock(sk)) \
__ret = __cgroup_bpf_run_filter_skb(sk, skb, \
CGROUP_INET_INGRESS); \
\
@@ -199,9 +207,9 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
+ if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) { \
typeof(sk) __sk = sk_to_full_sk(sk); \
- if (sk_fullsock(__sk) && \
+ if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) && \
cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS)) \
__ret = __cgroup_bpf_run_filter_skb(__sk, skb, \
CGROUP_INET_EGRESS); \
@@ -230,22 +238,22 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \
BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND)
-#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \
+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled(atype)) \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
- NULL, NULL); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \
+ atype, NULL, NULL); \
__ret; \
})
-#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled(atype)) { \
lock_sock(sk); \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
- t_ctx, NULL); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \
+ atype, t_ctx, NULL); \
release_sock(sk); \
} \
__ret; \
@@ -256,14 +264,14 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
* (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
* should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
*/
-#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags) \
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, bind_flags) \
({ \
u32 __flags = 0; \
int __ret = 0; \
if (cgroup_bpf_enabled(atype)) { \
lock_sock(sk); \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
- NULL, &__flags); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \
+ atype, NULL, &__flags); \
release_sock(sk); \
if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \
*bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \
@@ -276,29 +284,38 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \
(sk)->sk_prot->pre_connect)
-#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT)
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT)
+
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT)
+
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL)
+
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL)
-#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT)
+#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL)
-#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL)
+#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx)
-#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL)
+#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx)
-#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx)
+#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx)
-#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx)
+#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL)
-#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL)
+#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL)
-#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL)
+#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL)
/* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
* fullsock and its parent fullsock cannot be traced by
@@ -377,7 +394,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
({ \
int __ret = 0; \
if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \
- get_user(__ret, optlen); \
+ copy_from_sockptr(&__ret, optlen, sizeof(int)); \
__ret; \
})
@@ -477,24 +494,27 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
}
#define cgroup_bpf_enabled(atype) (0)
-#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; })
-#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; })
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) ({ 0; })
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f58895830ada..890e152d553e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -29,6 +29,7 @@
#include <linux/rcupdate_trace.h>
#include <linux/static_call.h>
#include <linux/memcontrol.h>
+#include <linux/cfi.h>
struct bpf_verifier_env;
struct bpf_verifier_log;
@@ -36,6 +37,7 @@ struct perf_event;
struct bpf_prog;
struct bpf_prog_aux;
struct bpf_map;
+struct bpf_arena;
struct sock;
struct seq_file;
struct btf;
@@ -51,11 +53,15 @@ struct module;
struct bpf_func_state;
struct ftrace_ops;
struct cgroup;
+struct bpf_token;
+struct user_namespace;
+struct super_block;
+struct inode;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
extern struct kobject *btf_kobj;
-extern struct bpf_mem_alloc bpf_global_ma;
+extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
extern bool bpf_global_ma_set;
typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
@@ -106,7 +112,11 @@ struct bpf_map_ops {
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
int fd);
- void (*map_fd_put_ptr)(void *ptr);
+ /* If need_defer is true, the implementation should guarantee that
+ * the to-be-put element is still alive before the bpf program, which
+ * may manipulate it, exists.
+ */
+ void (*map_fd_put_ptr)(struct bpf_map *map, void *ptr, bool need_defer);
int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
u32 (*map_fd_sys_lookup_elem)(void *ptr);
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
@@ -130,6 +140,9 @@ struct bpf_map_ops {
int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma);
__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
struct poll_table_struct *pts);
+ unsigned long (*map_get_unmapped_area)(struct file *filep, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags);
/* Functions called by bpf_local_storage maps */
int (*map_local_storage_charge)(struct bpf_local_storage_map *smap,
@@ -180,14 +193,15 @@ enum btf_field_type {
BPF_TIMER = (1 << 1),
BPF_KPTR_UNREF = (1 << 2),
BPF_KPTR_REF = (1 << 3),
- BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF,
- BPF_LIST_HEAD = (1 << 4),
- BPF_LIST_NODE = (1 << 5),
- BPF_RB_ROOT = (1 << 6),
- BPF_RB_NODE = (1 << 7),
- BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD |
- BPF_RB_NODE | BPF_RB_ROOT,
- BPF_REFCOUNT = (1 << 8),
+ BPF_KPTR_PERCPU = (1 << 4),
+ BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU,
+ BPF_LIST_HEAD = (1 << 5),
+ BPF_LIST_NODE = (1 << 6),
+ BPF_RB_ROOT = (1 << 7),
+ BPF_RB_NODE = (1 << 8),
+ BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE,
+ BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD,
+ BPF_REFCOUNT = (1 << 9),
};
typedef void (*btf_dtor_kfunc_t)(void *);
@@ -228,11 +242,20 @@ struct btf_record {
struct btf_field fields[];
};
+/* Non-opaque version of bpf_rb_node in uapi/linux/bpf.h */
+struct bpf_rb_node_kern {
+ struct rb_node rb_node;
+ void *owner;
+} __attribute__((aligned(8)));
+
+/* Non-opaque version of bpf_list_node in uapi/linux/bpf.h */
+struct bpf_list_node_kern {
+ struct list_head list_head;
+ void *owner;
+} __attribute__((aligned(8)));
+
struct bpf_map {
- /* The first two cachelines with read-mostly members of which some
- * are also accessed in fast-path (e.g. ops, max_entries).
- */
- const struct bpf_map_ops *ops ____cacheline_aligned;
+ const struct bpf_map_ops *ops;
struct bpf_map *inner_map_meta;
#ifdef CONFIG_SECURITY
void *security;
@@ -254,13 +277,14 @@ struct bpf_map {
struct obj_cgroup *objcg;
#endif
char name[BPF_OBJ_NAME_LEN];
- /* The 3rd and 4th cacheline with misc members to avoid false sharing
- * particularly with refcounting.
- */
- atomic64_t refcnt ____cacheline_aligned;
- atomic64_t usercnt;
- struct work_struct work;
struct mutex freeze_mutex;
+ atomic64_t refcnt;
+ atomic64_t usercnt;
+ /* rcu is used before freeing and work is only used during freeing */
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
atomic64_t writecnt;
/* 'Ownership' of program-containing map is claimed by the first program
* that is going to use this map or by the first program which FD is
@@ -275,6 +299,10 @@ struct bpf_map {
} owner;
bool bypass_spec_v1;
bool frozen; /* write-once; write-protected by freeze_mutex */
+ bool free_after_mult_rcu_gp;
+ bool free_after_rcu_gp;
+ atomic64_t sleepable_refcnt;
+ s64 __percpu *elem_count;
};
static inline const char *btf_field_type_name(enum btf_field_type type)
@@ -287,6 +315,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
return "kptr";
+ case BPF_KPTR_PERCPU:
+ return "percpu_kptr";
case BPF_LIST_HEAD:
return "bpf_list_head";
case BPF_LIST_NODE:
@@ -312,6 +342,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
return sizeof(struct bpf_timer);
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
return sizeof(u64);
case BPF_LIST_HEAD:
return sizeof(struct bpf_list_head);
@@ -338,6 +369,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
return __alignof__(struct bpf_timer);
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
return __alignof__(u64);
case BPF_LIST_HEAD:
return __alignof__(struct bpf_list_head);
@@ -376,6 +408,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
case BPF_TIMER:
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
break;
default:
WARN_ON_ONCE(1);
@@ -425,7 +458,7 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
size /= sizeof(long);
while (size--)
- *ldst++ = *lsrc++;
+ data_race(*ldst++ = *lsrc++);
}
/* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */
@@ -496,8 +529,8 @@ void bpf_list_head_free(const struct btf_field *field, void *list_head,
struct bpf_spin_lock *spin_lock);
void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
struct bpf_spin_lock *spin_lock);
-
-
+u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena);
+u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena);
int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
struct bpf_offload_dev;
@@ -640,7 +673,8 @@ enum bpf_type_flag {
MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS),
/* Used to tag PTR_TO_BTF_ID | MEM_ALLOC references which are non-owning.
- * Currently only valid for linked-list and rbtree nodes.
+ * Currently only valid for linked-list and rbtree nodes. If the nodes
+ * have a bpf_refcount_field, they must be tagged MEM_RCU as well.
*/
NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS),
@@ -678,6 +712,7 @@ enum bpf_arg_type {
* on eBPF program stack
*/
ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */
+ ARG_PTR_TO_ARENA,
ARG_CONST_SIZE, /* number of bytes accessed from memory */
ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */
@@ -849,6 +884,7 @@ enum bpf_reg_type {
* an explicit null check is required for this struct.
*/
PTR_TO_MEM, /* reg points to valid memory region */
+ PTR_TO_ARENA,
PTR_TO_BUF, /* reg points to a read/write buffer */
PTR_TO_FUNC, /* reg points to a bpf program function */
CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */
@@ -889,10 +925,14 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
aux->ctx_field_size = size;
}
+static bool bpf_is_ldimm64(const struct bpf_insn *insn)
+{
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
static inline bool bpf_pseudo_func(const struct bpf_insn *insn)
{
- return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
- insn->src_reg == BPF_PSEUDO_FUNC;
+ return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
}
struct bpf_prog_ops {
@@ -1015,6 +1055,22 @@ struct btf_func_model {
*/
#define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6)
+/* Indicate that current trampoline is in a tail call context. Then, it has to
+ * cache and restore tail_call_cnt to avoid infinite tail call loop.
+ */
+#define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7)
+
+/*
+ * Indicate the trampoline should be suitable to receive indirect calls;
+ * without this indirectly calling the generated code can result in #UD/#CP,
+ * depending on the CFI options.
+ *
+ * Used by bpf_struct_ops.
+ *
+ * Incompatible with FENTRY usage, overloads @func_addr argument.
+ */
+#define BPF_TRAMP_F_INDIRECT BIT(8)
+
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86.
*/
@@ -1054,10 +1110,17 @@ struct bpf_tramp_run_ctx;
* fexit = a set of program to run after original function
*/
struct bpf_tramp_image;
-int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks,
- void *orig_call);
+ void *func_addr);
+void *arch_alloc_bpf_trampoline(unsigned int size);
+void arch_free_bpf_trampoline(void *image, unsigned int size);
+void arch_protect_bpf_trampoline(void *image, unsigned int size);
+void arch_unprotect_bpf_trampoline(void *image, unsigned int size);
+int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+ struct bpf_tramp_links *tlinks, void *func_addr);
+
u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
@@ -1090,6 +1153,7 @@ enum bpf_tramp_prog_type {
struct bpf_tramp_image {
void *image;
+ int size;
struct bpf_ksym ksym;
struct percpu_ref pcref;
void *ip_after_call;
@@ -1125,7 +1189,6 @@ struct bpf_trampoline {
int progs_cnt[BPF_TRAMP_MAX];
/* Executable image of trampoline */
struct bpf_tramp_image *cur_image;
- struct module *mod;
};
struct bpf_attach_target_info {
@@ -1159,7 +1222,11 @@ struct bpf_dispatcher {
#endif
};
-static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
+#ifndef __bpfcall
+#define __bpfcall __nocfi
+#endif
+
+static __always_inline __bpfcall unsigned int bpf_dispatcher_nop_func(
const void *ctx,
const struct bpf_insn *insnsi,
bpf_func_t bpf_func)
@@ -1197,6 +1264,8 @@ enum bpf_dynptr_type {
int bpf_dynptr_check_size(u32 size);
u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
+const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len);
+void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len);
#ifdef CONFIG_BPF_JIT
int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
@@ -1249,7 +1318,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
#define DEFINE_BPF_DISPATCHER(name) \
__BPF_DISPATCHER_SC(name); \
- noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \
+ noinline __bpfcall unsigned int bpf_dispatcher_##name##_func( \
const void *ctx, \
const struct bpf_insn *insnsi, \
bpf_func_t bpf_func) \
@@ -1272,7 +1341,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
struct bpf_prog *to);
/* Called only from JIT-enabled code, so there's no need for stubs. */
-void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
+void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym);
void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
void bpf_ksym_del(struct bpf_ksym *ksym);
@@ -1293,7 +1362,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
static inline struct bpf_trampoline *bpf_trampoline_get(u64 key,
struct bpf_attach_target_info *tgt_info)
{
- return ERR_PTR(-EOPNOTSUPP);
+ return NULL;
}
static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
#define DEFINE_BPF_DISPATCHER(name)
@@ -1316,6 +1385,8 @@ static inline bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
struct bpf_func_info_aux {
u16 linkage;
bool unreliable;
+ bool called : 1;
+ bool verified : 1;
};
enum bpf_jit_poke_reason {
@@ -1344,6 +1415,7 @@ struct bpf_jit_poke_descriptor {
struct bpf_ctx_arg_aux {
u32 offset;
enum bpf_reg_type reg_type;
+ struct btf *btf;
u32 btf_id;
};
@@ -1364,6 +1436,7 @@ struct bpf_prog_aux {
u32 stack_depth;
u32 id;
u32 func_cnt; /* used by non-func prog as the number of func progs */
+ u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
u32 attach_btf_id; /* in-kernel BTF type id to attach to */
u32 ctx_arg_info_size;
@@ -1380,10 +1453,13 @@ struct bpf_prog_aux {
bool dev_bound; /* Program is bound to the netdev. */
bool offload_requested; /* Program is bound and offloaded to the netdev. */
bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
+ bool attach_tracing_prog; /* true if tracing another tracing program */
bool func_proto_unreliable;
- bool sleepable;
bool tail_call_reachable;
bool xdp_has_frags;
+ bool exception_cb;
+ bool exception_boundary;
+ struct bpf_arena *arena;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
/* function name for valid attach_btf_id */
@@ -1394,6 +1470,9 @@ struct bpf_prog_aux {
struct bpf_kfunc_desc_tab *kfunc_tab;
struct bpf_kfunc_btf_tab *kfunc_btf_tab;
u32 size_poke_tab;
+#ifdef CONFIG_FINEIBT
+ struct bpf_ksym ksym_prefix;
+#endif
struct bpf_ksym ksym;
const struct bpf_prog_ops *ops;
struct bpf_map **used_maps;
@@ -1406,9 +1485,11 @@ struct bpf_prog_aux {
int cgroup_atype; /* enum cgroup_bpf_attach_type */
struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
char name[BPF_OBJ_NAME_LEN];
+ u64 (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp, u64, u64);
#ifdef CONFIG_SECURITY
void *security;
#endif
+ struct bpf_token *token;
struct bpf_prog_offload *offload;
struct btf *btf;
struct bpf_func_info *func_info;
@@ -1459,7 +1540,8 @@ struct bpf_prog {
enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
call_get_func_ip:1, /* Do we call get_func_ip() */
- tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */
+ tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */
+ sleepable:1; /* BPF program is sleepable */
enum bpf_prog_type type; /* Type of BPF program */
enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
@@ -1492,12 +1574,26 @@ struct bpf_link {
enum bpf_link_type type;
const struct bpf_link_ops *ops;
struct bpf_prog *prog;
- struct work_struct work;
+ /* rcu is used before freeing, work can be used to schedule that
+ * RCU-based freeing before that, so they never overlap
+ */
+ union {
+ struct rcu_head rcu;
+ struct work_struct work;
+ };
};
struct bpf_link_ops {
void (*release)(struct bpf_link *link);
+ /* deallocate link resources callback, called without RCU grace period
+ * waiting
+ */
void (*dealloc)(struct bpf_link *link);
+ /* deallocate link resources callback, called after RCU grace period;
+ * if underlying BPF program is sleepable we go through tasks trace
+ * RCU GP and then "classic" RCU GP
+ */
+ void (*dealloc_deferred)(struct bpf_link *link);
int (*detach)(struct bpf_link *link);
int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
struct bpf_prog *old_prog);
@@ -1533,10 +1629,82 @@ struct bpf_link_primer {
u32 id;
};
+struct bpf_mount_opts {
+ kuid_t uid;
+ kgid_t gid;
+ umode_t mode;
+
+ /* BPF token-related delegation options */
+ u64 delegate_cmds;
+ u64 delegate_maps;
+ u64 delegate_progs;
+ u64 delegate_attachs;
+};
+
+struct bpf_token {
+ struct work_struct work;
+ atomic64_t refcnt;
+ struct user_namespace *userns;
+ u64 allowed_cmds;
+ u64 allowed_maps;
+ u64 allowed_progs;
+ u64 allowed_attachs;
+#ifdef CONFIG_SECURITY
+ void *security;
+#endif
+};
+
struct bpf_struct_ops_value;
struct btf_member;
#define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64
+/**
+ * struct bpf_struct_ops - A structure of callbacks allowing a subsystem to
+ * define a BPF_MAP_TYPE_STRUCT_OPS map type composed
+ * of BPF_PROG_TYPE_STRUCT_OPS progs.
+ * @verifier_ops: A structure of callbacks that are invoked by the verifier
+ * when determining whether the struct_ops progs in the
+ * struct_ops map are valid.
+ * @init: A callback that is invoked a single time, and before any other
+ * callback, to initialize the structure. A nonzero return value means
+ * the subsystem could not be initialized.
+ * @check_member: When defined, a callback invoked by the verifier to allow
+ * the subsystem to determine if an entry in the struct_ops map
+ * is valid. A nonzero return value means that the map is
+ * invalid and should be rejected by the verifier.
+ * @init_member: A callback that is invoked for each member of the struct_ops
+ * map to allow the subsystem to initialize the member. A nonzero
+ * value means the member could not be initialized. This callback
+ * is exclusive with the @type, @type_id, @value_type, and
+ * @value_id fields.
+ * @reg: A callback that is invoked when the struct_ops map has been
+ * initialized and is being attached to. Zero means the struct_ops map
+ * has been successfully registered and is live. A nonzero return value
+ * means the struct_ops map could not be registered.
+ * @unreg: A callback that is invoked when the struct_ops map should be
+ * unregistered.
+ * @update: A callback that is invoked when the live struct_ops map is being
+ * updated to contain new values. This callback is only invoked when
+ * the struct_ops map is loaded with BPF_F_LINK. If not defined, the
+ * it is assumed that the struct_ops map cannot be updated.
+ * @validate: A callback that is invoked after all of the members have been
+ * initialized. This callback should perform static checks on the
+ * map, meaning that it should either fail or succeed
+ * deterministically. A struct_ops map that has been validated may
+ * not necessarily succeed in being registered if the call to @reg
+ * fails. For example, a valid struct_ops map may be loaded, but
+ * then fail to be registered due to there being another active
+ * struct_ops map on the system in the subsystem already. For this
+ * reason, if this callback is not defined, the check is skipped as
+ * the struct_ops map will have final verification performed in
+ * @reg.
+ * @type: BTF type.
+ * @value_type: Value type.
+ * @name: The name of the struct bpf_struct_ops object.
+ * @func_models: Func models
+ * @type_id: BTF type id.
+ * @value_id: BTF value id.
+ */
struct bpf_struct_ops {
const struct bpf_verifier_ops *verifier_ops;
int (*init)(struct btf *btf);
@@ -1550,18 +1718,64 @@ struct bpf_struct_ops {
void (*unreg)(void *kdata);
int (*update)(void *kdata, void *old_kdata);
int (*validate)(void *kdata);
- const struct btf_type *type;
- const struct btf_type *value_type;
+ void *cfi_stubs;
+ struct module *owner;
const char *name;
struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS];
+};
+
+/* Every member of a struct_ops type has an instance even a member is not
+ * an operator (function pointer). The "info" field will be assigned to
+ * prog->aux->ctx_arg_info of BPF struct_ops programs to provide the
+ * argument information required by the verifier to verify the program.
+ *
+ * btf_ctx_access() will lookup prog->aux->ctx_arg_info to find the
+ * corresponding entry for an given argument.
+ */
+struct bpf_struct_ops_arg_info {
+ struct bpf_ctx_arg_aux *info;
+ u32 cnt;
+};
+
+struct bpf_struct_ops_desc {
+ struct bpf_struct_ops *st_ops;
+
+ const struct btf_type *type;
+ const struct btf_type *value_type;
u32 type_id;
u32 value_id;
+
+ /* Collection of argument information for each member */
+ struct bpf_struct_ops_arg_info *arg_info;
+};
+
+enum bpf_struct_ops_state {
+ BPF_STRUCT_OPS_STATE_INIT,
+ BPF_STRUCT_OPS_STATE_INUSE,
+ BPF_STRUCT_OPS_STATE_TOBEFREE,
+ BPF_STRUCT_OPS_STATE_READY,
+};
+
+struct bpf_struct_ops_common_value {
+ refcount_t refcnt;
+ enum bpf_struct_ops_state state;
};
#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
+/* This macro helps developer to register a struct_ops type and generate
+ * type information correctly. Developers should use this macro to register
+ * a struct_ops type instead of calling __register_bpf_struct_ops() directly.
+ */
+#define register_bpf_struct_ops(st_ops, type) \
+ ({ \
+ struct bpf_struct_ops_##type { \
+ struct bpf_struct_ops_common_value common; \
+ struct type data ____cacheline_aligned_in_smp; \
+ }; \
+ BTF_TYPE_EMIT(struct bpf_struct_ops_##type); \
+ __register_bpf_struct_ops(st_ops); \
+ })
#define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA))
-const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id);
-void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log);
bool bpf_struct_ops_get(const void *kdata);
void bpf_struct_ops_put(const void *kdata);
int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
@@ -1569,7 +1783,10 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
struct bpf_tramp_link *link,
const struct btf_func_model *model,
- void *image, void *image_end);
+ void *stub_func,
+ void **image, u32 *image_off,
+ bool allow_alloc);
+void bpf_struct_ops_image_free(void *image);
static inline bool bpf_try_module_get(const void *data, struct module *owner)
{
if (owner == BPF_MODULE_OWNER)
@@ -1602,15 +1819,13 @@ struct bpf_dummy_ops {
int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
#endif
+int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc,
+ struct btf *btf,
+ struct bpf_verifier_log *log);
+void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map);
+void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc);
#else
-static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
-{
- return NULL;
-}
-static inline void bpf_struct_ops_init(struct btf *btf,
- struct bpf_verifier_log *log)
-{
-}
+#define register_bpf_struct_ops(st_ops, type) ({ (void *)(st_ops); 0; })
static inline bool bpf_try_module_get(const void *data, struct module *owner)
{
return try_module_get(owner);
@@ -1629,6 +1844,13 @@ static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
{
return -EOPNOTSUPP;
}
+static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map)
+{
+}
+
+static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc)
+{
+}
#endif
@@ -1806,6 +2028,7 @@ struct bpf_cg_run_ctx {
struct bpf_trace_run_ctx {
struct bpf_run_ctx run_ctx;
u64 bpf_cookie;
+ bool is_uprobe;
};
struct bpf_tramp_run_ctx {
@@ -1854,6 +2077,8 @@ bpf_prog_run_array(const struct bpf_prog_array *array,
if (unlikely(!array))
return ret;
+ run_ctx.is_uprobe = false;
+
migrate_disable();
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
item = &array->items[0];
@@ -1878,8 +2103,8 @@ bpf_prog_run_array(const struct bpf_prog_array *array,
* rcu-protected dynamically sized maps.
*/
static __always_inline u32
-bpf_prog_run_array_sleepable(const struct bpf_prog_array __rcu *array_rcu,
- const void *ctx, bpf_prog_run_fn run_prog)
+bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu,
+ const void *ctx, bpf_prog_run_fn run_prog)
{
const struct bpf_prog_array_item *item;
const struct bpf_prog *prog;
@@ -1893,20 +2118,22 @@ bpf_prog_run_array_sleepable(const struct bpf_prog_array __rcu *array_rcu,
rcu_read_lock_trace();
migrate_disable();
+ run_ctx.is_uprobe = true;
+
array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held());
if (unlikely(!array))
goto out;
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
item = &array->items[0];
while ((prog = READ_ONCE(item->prog))) {
- if (!prog->aux->sleepable)
+ if (!prog->sleepable)
rcu_read_lock();
run_ctx.bpf_cookie = item->bpf_cookie;
ret &= run_prog(prog, ctx);
item++;
- if (!prog->aux->sleepable)
+ if (!prog->sleepable)
rcu_read_unlock();
}
bpf_reset_run_ctx(old_run_ctx);
@@ -1938,6 +2165,7 @@ static inline void bpf_enable_instrumentation(void)
migrate_enable();
}
+extern const struct super_operations bpf_super_ops;
extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
extern const struct file_operations bpf_iter_fops;
@@ -1977,6 +2205,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec);
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
+void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
struct bpf_map *bpf_map_get(u32 ufd);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
@@ -2004,6 +2233,8 @@ int generic_map_delete_batch(struct bpf_map *map,
struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
+int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
+ unsigned long nr_pages, struct page **page_array);
#ifdef CONFIG_MEMCG_KMEM
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
int node);
@@ -2040,26 +2271,57 @@ bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align,
}
#endif
+static inline int
+bpf_map_init_elem_count(struct bpf_map *map)
+{
+ size_t size = sizeof(*map->elem_count), align = size;
+ gfp_t flags = GFP_USER | __GFP_NOWARN;
+
+ map->elem_count = bpf_map_alloc_percpu(map, size, align, flags);
+ if (!map->elem_count)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static inline void
+bpf_map_free_elem_count(struct bpf_map *map)
+{
+ free_percpu(map->elem_count);
+}
+
+static inline void bpf_map_inc_elem_count(struct bpf_map *map)
+{
+ this_cpu_inc(*map->elem_count);
+}
+
+static inline void bpf_map_dec_elem_count(struct bpf_map *map)
+{
+ this_cpu_dec(*map->elem_count);
+}
+
extern int sysctl_unprivileged_bpf_disabled;
-static inline bool bpf_allow_ptr_leaks(void)
+bool bpf_token_capable(const struct bpf_token *token, int cap);
+
+static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token)
{
- return perfmon_capable();
+ return bpf_token_capable(token, CAP_PERFMON);
}
-static inline bool bpf_allow_uninit_stack(void)
+static inline bool bpf_allow_uninit_stack(const struct bpf_token *token)
{
- return perfmon_capable();
+ return bpf_token_capable(token, CAP_PERFMON);
}
-static inline bool bpf_bypass_spec_v1(void)
+static inline bool bpf_bypass_spec_v1(const struct bpf_token *token)
{
- return perfmon_capable();
+ return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON);
}
-static inline bool bpf_bypass_spec_v4(void)
+static inline bool bpf_bypass_spec_v4(const struct bpf_token *token)
{
- return perfmon_capable();
+ return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON);
}
int bpf_map_new_fd(struct bpf_map *map, int flags);
@@ -2073,12 +2335,24 @@ void bpf_link_cleanup(struct bpf_link_primer *primer);
void bpf_link_inc(struct bpf_link *link);
void bpf_link_put(struct bpf_link *link);
int bpf_link_new_fd(struct bpf_link *link);
-struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd);
struct bpf_link *bpf_link_get_from_fd(u32 ufd);
struct bpf_link *bpf_link_get_curr_or_next(u32 *id);
+void bpf_token_inc(struct bpf_token *token);
+void bpf_token_put(struct bpf_token *token);
+int bpf_token_create(union bpf_attr *attr);
+struct bpf_token *bpf_token_get_from_fd(u32 ufd);
+
+bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
+bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type);
+bool bpf_token_allow_prog_type(const struct bpf_token *token,
+ enum bpf_prog_type prog_type,
+ enum bpf_attach_type attach_type);
+
int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname);
int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags);
+struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir,
+ umode_t mode);
#define BPF_ITER_FUNC_PREFIX "bpf_iter_"
#define DEFINE_BPF_ITER_FUNC(target, args...) \
@@ -2308,19 +2582,19 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
struct btf_func_model *m);
struct bpf_reg_state;
-int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
- struct bpf_reg_state *regs);
-int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
- struct bpf_reg_state *regs);
-int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
- struct bpf_reg_state *reg);
+int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog);
int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
struct btf *btf, const struct btf_type *t);
+const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt,
+ int comp_idx, const char *tag_key);
+int btf_find_next_decl_tag(const struct btf *btf, const struct btf_type *pt,
+ int comp_idx, const char *tag_key, int last_id);
struct bpf_prog *bpf_prog_by_id(u32 id);
struct bpf_link *bpf_link_by_id(u32 id);
-const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
+const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id,
+ const struct bpf_prog *prog);
void bpf_task_storage_free(struct task_struct *task);
void bpf_cgrp_storage_free(struct cgroup *cgroup);
bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
@@ -2367,6 +2641,9 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
enum bpf_dynptr_type type, u32 offset, u32 size);
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
+
+bool dev_check_flush(void);
+bool cpu_map_check_flush(void);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -2436,6 +2713,24 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags)
return -EOPNOTSUPP;
}
+static inline bool bpf_token_capable(const struct bpf_token *token, int cap)
+{
+ return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN));
+}
+
+static inline void bpf_token_inc(struct bpf_token *token)
+{
+}
+
+static inline void bpf_token_put(struct bpf_token *token)
+{
+}
+
+static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static inline void __dev_flush(void)
{
}
@@ -2559,7 +2854,7 @@ static inline int btf_struct_access(struct bpf_verifier_log *log,
}
static inline const struct bpf_func_proto *
-bpf_base_func_proto(enum bpf_func_id func_id)
+bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
return NULL;
}
@@ -2619,6 +2914,18 @@ static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
}
#endif /* CONFIG_BPF_SYSCALL */
+static __always_inline int
+bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
+{
+ int ret = -EFAULT;
+
+ if (IS_ENABLED(CONFIG_BPF_EVENTS))
+ ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+ return ret;
+}
+
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
struct btf_mod_pair *used_btfs, u32 len);
@@ -2799,6 +3106,22 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
#endif /* CONFIG_BPF_SYSCALL */
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
+static __always_inline void
+bpf_prog_inc_misses_counters(const struct bpf_prog_array *array)
+{
+ const struct bpf_prog_array_item *item;
+ struct bpf_prog *prog;
+
+ if (unlikely(!array))
+ return;
+
+ item = &array->items[0];
+ while ((prog = READ_ONCE(item->prog))) {
+ bpf_prog_inc_misses_counter(prog);
+ item++;
+ }
+}
+
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
@@ -3028,6 +3351,9 @@ enum bpf_text_poke_type {
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *addr1, void *addr2);
+void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
+ struct bpf_prog *new, struct bpf_prog *old);
+
void *bpf_arch_text_copy(void *dst, void *src, size_t len);
int bpf_arch_text_invalidate(void *dst, size_t len);
@@ -3077,4 +3403,9 @@ static inline gfp_t bpf_memcg_flags(gfp_t flags)
return flags;
}
+static inline bool bpf_is_subprog(const struct bpf_prog *prog)
+{
+ return prog->aux->func_idx != 0;
+}
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 173ec7f43ed1..dcddb0aef7d8 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -129,10 +129,36 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
struct bpf_local_storage_cache *cache,
bool bpf_ma);
-struct bpf_local_storage_data *
+void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem *selem);
+/* If cacheit_lockit is false, this lookup function is lockless */
+static inline struct bpf_local_storage_data *
bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
struct bpf_local_storage_map *smap,
- bool cacheit_lockit);
+ bool cacheit_lockit)
+{
+ struct bpf_local_storage_data *sdata;
+ struct bpf_local_storage_elem *selem;
+
+ /* Fast path (cache hit) */
+ sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx],
+ bpf_rcu_lock_held());
+ if (sdata && rcu_access_pointer(sdata->smap) == smap)
+ return sdata;
+
+ /* Slow path (cache miss) */
+ hlist_for_each_entry_rcu(selem, &local_storage->list, snode,
+ rcu_read_lock_trace_held())
+ if (rcu_access_pointer(SDATA(selem)->smap) == smap)
+ break;
+
+ if (!selem)
+ return NULL;
+ if (cacheit_lockit)
+ __bpf_local_storage_insert_cache(local_storage, smap, selem);
+ return SDATA(selem);
+}
void bpf_local_storage_destroy(struct bpf_local_storage *local_storage);
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
index 3929be5743f4..aaf004d94322 100644
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -11,6 +11,8 @@ struct bpf_mem_caches;
struct bpf_mem_alloc {
struct bpf_mem_caches __percpu *caches;
struct bpf_mem_cache __percpu *cache;
+ struct obj_cgroup *objcg;
+ bool percpu;
struct work_struct work;
};
@@ -20,17 +22,26 @@ struct bpf_mem_alloc {
* 'size = 0' is for bpf_mem_alloc which manages many fixed-size objects.
* Alloc and free are done with bpf_mem_{alloc,free}() and the size of
* the returned object is given by the size argument of bpf_mem_alloc().
+ * If percpu equals true, error will be returned in order to avoid
+ * large memory consumption and the below bpf_mem_alloc_percpu_unit_init()
+ * should be used to do on-demand per-cpu allocation for each size.
*/
int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu);
+/* Initialize a non-fix-size percpu memory allocator */
+int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg);
+/* The percpu allocation with a specific unit size. */
+int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size);
void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);
/* kmalloc/kfree equivalent: */
void *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size);
void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr);
+void bpf_mem_free_rcu(struct bpf_mem_alloc *ma, void *ptr);
/* kmem_cache_alloc/free equivalent: */
void *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma);
void bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr);
+void bpf_mem_cache_free_rcu(struct bpf_mem_alloc *ma, void *ptr);
void bpf_mem_cache_raw_free(void *ptr);
void *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags);
diff --git a/include/linux/bpf_mprog.h b/include/linux/bpf_mprog.h
new file mode 100644
index 000000000000..929225f7b095
--- /dev/null
+++ b/include/linux/bpf_mprog.h
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Isovalent */
+#ifndef __BPF_MPROG_H
+#define __BPF_MPROG_H
+
+#include <linux/bpf.h>
+
+/* bpf_mprog framework:
+ *
+ * bpf_mprog is a generic layer for multi-program attachment. In-kernel users
+ * of the bpf_mprog don't need to care about the dependency resolution
+ * internals, they can just consume it with few API calls. Currently available
+ * dependency directives are BPF_F_{BEFORE,AFTER} which enable insertion of
+ * a BPF program or BPF link relative to an existing BPF program or BPF link
+ * inside the multi-program array as well as prepend and append behavior if
+ * no relative object was specified, see corresponding selftests for concrete
+ * examples (e.g. tc_links and tc_opts test cases of test_progs).
+ *
+ * Usage of bpf_mprog_{attach,detach,query}() core APIs with pseudo code:
+ *
+ * Attach case:
+ *
+ * struct bpf_mprog_entry *entry, *entry_new;
+ * int ret;
+ *
+ * // bpf_mprog user-side lock
+ * // fetch active @entry from attach location
+ * [...]
+ * ret = bpf_mprog_attach(entry, &entry_new, [...]);
+ * if (!ret) {
+ * if (entry != entry_new) {
+ * // swap @entry to @entry_new at attach location
+ * // ensure there are no inflight users of @entry:
+ * synchronize_rcu();
+ * }
+ * bpf_mprog_commit(entry);
+ * } else {
+ * // error path, bail out, propagate @ret
+ * }
+ * // bpf_mprog user-side unlock
+ *
+ * Detach case:
+ *
+ * struct bpf_mprog_entry *entry, *entry_new;
+ * int ret;
+ *
+ * // bpf_mprog user-side lock
+ * // fetch active @entry from attach location
+ * [...]
+ * ret = bpf_mprog_detach(entry, &entry_new, [...]);
+ * if (!ret) {
+ * // all (*) marked is optional and depends on the use-case
+ * // whether bpf_mprog_bundle should be freed or not
+ * if (!bpf_mprog_total(entry_new)) (*)
+ * entry_new = NULL (*)
+ * // swap @entry to @entry_new at attach location
+ * // ensure there are no inflight users of @entry:
+ * synchronize_rcu();
+ * bpf_mprog_commit(entry);
+ * if (!entry_new) (*)
+ * // free bpf_mprog_bundle (*)
+ * } else {
+ * // error path, bail out, propagate @ret
+ * }
+ * // bpf_mprog user-side unlock
+ *
+ * Query case:
+ *
+ * struct bpf_mprog_entry *entry;
+ * int ret;
+ *
+ * // bpf_mprog user-side lock
+ * // fetch active @entry from attach location
+ * [...]
+ * ret = bpf_mprog_query(attr, uattr, entry);
+ * // bpf_mprog user-side unlock
+ *
+ * Data/fast path:
+ *
+ * struct bpf_mprog_entry *entry;
+ * struct bpf_mprog_fp *fp;
+ * struct bpf_prog *prog;
+ * int ret = [...];
+ *
+ * rcu_read_lock();
+ * // fetch active @entry from attach location
+ * [...]
+ * bpf_mprog_foreach_prog(entry, fp, prog) {
+ * ret = bpf_prog_run(prog, [...]);
+ * // process @ret from program
+ * }
+ * [...]
+ * rcu_read_unlock();
+ *
+ * bpf_mprog locking considerations:
+ *
+ * bpf_mprog_{attach,detach,query}() must be protected by an external lock
+ * (like RTNL in case of tcx).
+ *
+ * bpf_mprog_entry pointer can be an __rcu annotated pointer (in case of tcx
+ * the netdevice has tcx_ingress and tcx_egress __rcu pointer) which gets
+ * updated via rcu_assign_pointer() pointing to the active bpf_mprog_entry of
+ * the bpf_mprog_bundle.
+ *
+ * Fast path accesses the active bpf_mprog_entry within RCU critical section
+ * (in case of tcx it runs in NAPI which provides RCU protection there,
+ * other users might need explicit rcu_read_lock()). The bpf_mprog_commit()
+ * assumes that for the old bpf_mprog_entry there are no inflight users
+ * anymore.
+ *
+ * The READ_ONCE()/WRITE_ONCE() pairing for bpf_mprog_fp's prog access is for
+ * the replacement case where we don't swap the bpf_mprog_entry.
+ */
+
+#define bpf_mprog_foreach_tuple(entry, fp, cp, t) \
+ for (fp = &entry->fp_items[0], cp = &entry->parent->cp_items[0];\
+ ({ \
+ t.prog = READ_ONCE(fp->prog); \
+ t.link = cp->link; \
+ t.prog; \
+ }); \
+ fp++, cp++)
+
+#define bpf_mprog_foreach_prog(entry, fp, p) \
+ for (fp = &entry->fp_items[0]; \
+ (p = READ_ONCE(fp->prog)); \
+ fp++)
+
+#define BPF_MPROG_MAX 64
+
+struct bpf_mprog_fp {
+ struct bpf_prog *prog;
+};
+
+struct bpf_mprog_cp {
+ struct bpf_link *link;
+};
+
+struct bpf_mprog_entry {
+ struct bpf_mprog_fp fp_items[BPF_MPROG_MAX];
+ struct bpf_mprog_bundle *parent;
+};
+
+struct bpf_mprog_bundle {
+ struct bpf_mprog_entry a;
+ struct bpf_mprog_entry b;
+ struct bpf_mprog_cp cp_items[BPF_MPROG_MAX];
+ struct bpf_prog *ref;
+ atomic64_t revision;
+ u32 count;
+};
+
+struct bpf_tuple {
+ struct bpf_prog *prog;
+ struct bpf_link *link;
+};
+
+static inline struct bpf_mprog_entry *
+bpf_mprog_peer(const struct bpf_mprog_entry *entry)
+{
+ if (entry == &entry->parent->a)
+ return &entry->parent->b;
+ else
+ return &entry->parent->a;
+}
+
+static inline void bpf_mprog_bundle_init(struct bpf_mprog_bundle *bundle)
+{
+ BUILD_BUG_ON(sizeof(bundle->a.fp_items[0]) > sizeof(u64));
+ BUILD_BUG_ON(ARRAY_SIZE(bundle->a.fp_items) !=
+ ARRAY_SIZE(bundle->cp_items));
+
+ memset(bundle, 0, sizeof(*bundle));
+ atomic64_set(&bundle->revision, 1);
+ bundle->a.parent = bundle;
+ bundle->b.parent = bundle;
+}
+
+static inline void bpf_mprog_inc(struct bpf_mprog_entry *entry)
+{
+ entry->parent->count++;
+}
+
+static inline void bpf_mprog_dec(struct bpf_mprog_entry *entry)
+{
+ entry->parent->count--;
+}
+
+static inline int bpf_mprog_max(void)
+{
+ return ARRAY_SIZE(((struct bpf_mprog_entry *)NULL)->fp_items) - 1;
+}
+
+static inline int bpf_mprog_total(struct bpf_mprog_entry *entry)
+{
+ int total = entry->parent->count;
+
+ WARN_ON_ONCE(total > bpf_mprog_max());
+ return total;
+}
+
+static inline bool bpf_mprog_exists(struct bpf_mprog_entry *entry,
+ struct bpf_prog *prog)
+{
+ const struct bpf_mprog_fp *fp;
+ const struct bpf_prog *tmp;
+
+ bpf_mprog_foreach_prog(entry, fp, tmp) {
+ if (tmp == prog)
+ return true;
+ }
+ return false;
+}
+
+static inline void bpf_mprog_mark_for_release(struct bpf_mprog_entry *entry,
+ struct bpf_tuple *tuple)
+{
+ WARN_ON_ONCE(entry->parent->ref);
+ if (!tuple->link)
+ entry->parent->ref = tuple->prog;
+}
+
+static inline void bpf_mprog_complete_release(struct bpf_mprog_entry *entry)
+{
+ /* In the non-link case prog deletions can only drop the reference
+ * to the prog after the bpf_mprog_entry got swapped and the
+ * bpf_mprog ensured that there are no inflight users anymore.
+ *
+ * Paired with bpf_mprog_mark_for_release().
+ */
+ if (entry->parent->ref) {
+ bpf_prog_put(entry->parent->ref);
+ entry->parent->ref = NULL;
+ }
+}
+
+static inline void bpf_mprog_revision_new(struct bpf_mprog_entry *entry)
+{
+ atomic64_inc(&entry->parent->revision);
+}
+
+static inline void bpf_mprog_commit(struct bpf_mprog_entry *entry)
+{
+ bpf_mprog_complete_release(entry);
+ bpf_mprog_revision_new(entry);
+}
+
+static inline u64 bpf_mprog_revision(struct bpf_mprog_entry *entry)
+{
+ return atomic64_read(&entry->parent->revision);
+}
+
+static inline void bpf_mprog_entry_copy(struct bpf_mprog_entry *dst,
+ struct bpf_mprog_entry *src)
+{
+ memcpy(dst->fp_items, src->fp_items, sizeof(src->fp_items));
+}
+
+static inline void bpf_mprog_entry_clear(struct bpf_mprog_entry *dst)
+{
+ memset(dst->fp_items, 0, sizeof(dst->fp_items));
+}
+
+static inline void bpf_mprog_clear_all(struct bpf_mprog_entry *entry,
+ struct bpf_mprog_entry **entry_new)
+{
+ struct bpf_mprog_entry *peer;
+
+ peer = bpf_mprog_peer(entry);
+ bpf_mprog_entry_clear(peer);
+ peer->parent->count = 0;
+ *entry_new = peer;
+}
+
+static inline void bpf_mprog_entry_grow(struct bpf_mprog_entry *entry, int idx)
+{
+ int total = bpf_mprog_total(entry);
+
+ memmove(entry->fp_items + idx + 1,
+ entry->fp_items + idx,
+ (total - idx) * sizeof(struct bpf_mprog_fp));
+
+ memmove(entry->parent->cp_items + idx + 1,
+ entry->parent->cp_items + idx,
+ (total - idx) * sizeof(struct bpf_mprog_cp));
+}
+
+static inline void bpf_mprog_entry_shrink(struct bpf_mprog_entry *entry, int idx)
+{
+ /* Total array size is needed in this case to enure the NULL
+ * entry is copied at the end.
+ */
+ int total = ARRAY_SIZE(entry->fp_items);
+
+ memmove(entry->fp_items + idx,
+ entry->fp_items + idx + 1,
+ (total - idx - 1) * sizeof(struct bpf_mprog_fp));
+
+ memmove(entry->parent->cp_items + idx,
+ entry->parent->cp_items + idx + 1,
+ (total - idx - 1) * sizeof(struct bpf_mprog_cp));
+}
+
+static inline void bpf_mprog_read(struct bpf_mprog_entry *entry, u32 idx,
+ struct bpf_mprog_fp **fp,
+ struct bpf_mprog_cp **cp)
+{
+ *fp = &entry->fp_items[idx];
+ *cp = &entry->parent->cp_items[idx];
+}
+
+static inline void bpf_mprog_write(struct bpf_mprog_fp *fp,
+ struct bpf_mprog_cp *cp,
+ struct bpf_tuple *tuple)
+{
+ WRITE_ONCE(fp->prog, tuple->prog);
+ cp->link = tuple->link;
+}
+
+int bpf_mprog_attach(struct bpf_mprog_entry *entry,
+ struct bpf_mprog_entry **entry_new,
+ struct bpf_prog *prog_new, struct bpf_link *link,
+ struct bpf_prog *prog_old,
+ u32 flags, u32 id_or_fd, u64 revision);
+
+int bpf_mprog_detach(struct bpf_mprog_entry *entry,
+ struct bpf_mprog_entry **entry_new,
+ struct bpf_prog *prog, struct bpf_link *link,
+ u32 flags, u32 id_or_fd, u64 revision);
+
+int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr,
+ struct bpf_mprog_entry *entry);
+
+static inline bool bpf_mprog_supported(enum bpf_prog_type type)
+{
+ switch (type) {
+ case BPF_PROG_TYPE_SCHED_CLS:
+ return true;
+ default:
+ return false;
+ }
+}
+#endif /* __BPF_MPROG_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index fc0d6f32c687..9f2a6b83b49e 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -132,6 +132,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_ARENA, arena_map_ops)
BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
@@ -142,9 +143,13 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
#ifdef CONFIG_NET
BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns)
BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp)
+BPF_LINK_TYPE(BPF_LINK_TYPE_NETFILTER, netfilter)
+BPF_LINK_TYPE(BPF_LINK_TYPE_TCX, tcx)
+BPF_LINK_TYPE(BPF_LINK_TYPE_NETKIT, netkit)
#endif
#ifdef CONFIG_PERF_EVENTS
BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf)
#endif
BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi)
BPF_LINK_TYPE(BPF_LINK_TYPE_STRUCT_OPS, struct_ops)
+BPF_LINK_TYPE(BPF_LINK_TYPE_UPROBE_MULTI, uprobe_multi)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index f70f9ac884d2..7cb1b75eee38 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -275,6 +275,11 @@ struct bpf_reference_state {
int callback_ref;
};
+struct bpf_retval_range {
+ s32 minval;
+ s32 maxval;
+};
+
/* state of the program:
* type of all registers and stack info
*/
@@ -297,23 +302,67 @@ struct bpf_func_state {
* void foo(void) { bpf_timer_set_callback(,foo); }
*/
u32 async_entry_cnt;
+ struct bpf_retval_range callback_ret_range;
bool in_callback_fn;
- struct tnum callback_ret_range;
bool in_async_callback_fn;
+ bool in_exception_callback_fn;
+ /* For callback calling functions that limit number of possible
+ * callback executions (e.g. bpf_loop) keeps track of current
+ * simulated iteration number.
+ * Value in frame N refers to number of times callback with frame
+ * N+1 was simulated, e.g. for the following call:
+ *
+ * bpf_loop(..., fn, ...); | suppose current frame is N
+ * | fn would be simulated in frame N+1
+ * | number of simulations is tracked in frame N
+ */
+ u32 callback_depth;
/* The following fields should be last. See copy_func_state() */
int acquired_refs;
struct bpf_reference_state *refs;
- int allocated_stack;
+ /* The state of the stack. Each element of the array describes BPF_REG_SIZE
+ * (i.e. 8) bytes worth of stack memory.
+ * stack[0] represents bytes [*(r10-8)..*(r10-1)]
+ * stack[1] represents bytes [*(r10-16)..*(r10-9)]
+ * ...
+ * stack[allocated_stack/8 - 1] represents [*(r10-allocated_stack)..*(r10-allocated_stack+7)]
+ */
struct bpf_stack_state *stack;
+ /* Size of the current stack, in bytes. The stack state is tracked below, in
+ * `stack`. allocated_stack is always a multiple of BPF_REG_SIZE.
+ */
+ int allocated_stack;
};
-struct bpf_idx_pair {
- u32 prev_idx;
+#define MAX_CALL_FRAMES 8
+
+/* instruction history flags, used in bpf_jmp_history_entry.flags field */
+enum {
+ /* instruction references stack slot through PTR_TO_STACK register;
+ * we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8)
+ * and accessed stack slot's index in next 6 bits (MAX_BPF_STACK is 512,
+ * 8 bytes per slot, so slot index (spi) is [0, 63])
+ */
+ INSN_F_FRAMENO_MASK = 0x7, /* 3 bits */
+
+ INSN_F_SPI_MASK = 0x3f, /* 6 bits */
+ INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */
+
+ INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */
+};
+
+static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES);
+static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8);
+
+struct bpf_jmp_history_entry {
u32 idx;
+ /* insn idx can't be bigger than 1 million */
+ u32 prev_idx : 22;
+ /* special flags, e.g., whether insn is doing register stack spill/load */
+ u32 flags : 10;
};
-#define MAX_CALL_FRAMES 8
/* Maximum number of register states that can exist at once */
#define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
struct bpf_verifier_state {
@@ -372,32 +421,49 @@ struct bpf_verifier_state {
struct bpf_active_lock active_lock;
bool speculative;
bool active_rcu_lock;
+ /* If this state was ever pointed-to by other state's loop_entry field
+ * this flag would be set to true. Used to avoid freeing such states
+ * while they are still in use.
+ */
+ bool used_as_loop_entry;
/* first and last insn idx of this verifier state */
u32 first_insn_idx;
u32 last_insn_idx;
+ /* If this state is a part of states loop this field points to some
+ * parent of this state such that:
+ * - it is also a member of the same states loop;
+ * - DFS states traversal starting from initial state visits loop_entry
+ * state before this state.
+ * Used to compute topmost loop entry for state loops.
+ * State loops might appear because of open coded iterators logic.
+ * See get_loop_entry() for more information.
+ */
+ struct bpf_verifier_state *loop_entry;
/* jmp history recorded from first to last.
* backtracking is using it to go from last to first.
* For most states jmp_history_cnt is [0-3].
* For loops can go up to ~40.
*/
- struct bpf_idx_pair *jmp_history;
+ struct bpf_jmp_history_entry *jmp_history;
u32 jmp_history_cnt;
+ u32 dfs_depth;
+ u32 callback_unroll_depth;
+ u32 may_goto_depth;
};
-#define bpf_get_spilled_reg(slot, frame) \
+#define bpf_get_spilled_reg(slot, frame, mask) \
(((slot < frame->allocated_stack / BPF_REG_SIZE) && \
- (frame->stack[slot].slot_type[0] == STACK_SPILL)) \
+ ((1 << frame->stack[slot].slot_type[BPF_REG_SIZE - 1]) & (mask))) \
? &frame->stack[slot].spilled_ptr : NULL)
/* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */
-#define bpf_for_each_spilled_reg(iter, frame, reg) \
- for (iter = 0, reg = bpf_get_spilled_reg(iter, frame); \
+#define bpf_for_each_spilled_reg(iter, frame, reg, mask) \
+ for (iter = 0, reg = bpf_get_spilled_reg(iter, frame, mask); \
iter < frame->allocated_stack / BPF_REG_SIZE; \
- iter++, reg = bpf_get_spilled_reg(iter, frame))
+ iter++, reg = bpf_get_spilled_reg(iter, frame, mask))
-/* Invoke __expr over regsiters in __vst, setting __state and __reg */
-#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \
+#define bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, __mask, __expr) \
({ \
struct bpf_verifier_state *___vstate = __vst; \
int ___i, ___j; \
@@ -409,7 +475,7 @@ struct bpf_verifier_state {
__reg = &___regs[___j]; \
(void)(__expr); \
} \
- bpf_for_each_spilled_reg(___j, __state, __reg) { \
+ bpf_for_each_spilled_reg(___j, __state, __reg, __mask) { \
if (!__reg) \
continue; \
(void)(__expr); \
@@ -417,6 +483,10 @@ struct bpf_verifier_state {
} \
})
+/* Invoke __expr over regsiters in __vst, setting __state and __reg */
+#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \
+ bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, 1 << STACK_SPILL, __expr)
+
/* linked list of verifier states used to prune search */
struct bpf_verifier_state_list {
struct bpf_verifier_state state;
@@ -478,8 +548,10 @@ struct bpf_insn_aux_data {
u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
bool zext_dst; /* this insn zero extends dst reg */
+ bool needs_zext; /* alu op needs to clear upper bits */
bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */
+ bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */
u8 alu_state; /* used in combination with alu_limit */
/* below fields are initialized once */
@@ -490,6 +562,10 @@ struct bpf_insn_aux_data {
* this instruction, regardless of any heuristics
*/
bool force_checkpoint;
+ /* true if instruction is a call to a helper function that
+ * accepts callback function as a parameter.
+ */
+ bool calls_callback;
};
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
@@ -532,15 +608,30 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
#define BPF_MAX_SUBPROGS 256
+struct bpf_subprog_arg_info {
+ enum bpf_arg_type arg_type;
+ union {
+ u32 mem_size;
+ u32 btf_id;
+ };
+};
+
struct bpf_subprog_info {
/* 'start' has to be the first field otherwise find_subprog() won't work */
u32 start; /* insn idx of function entry point */
u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
u16 stack_depth; /* max. stack depth used by this function */
- bool has_tail_call;
- bool tail_call_reachable;
- bool has_ld_abs;
- bool is_async_cb;
+ u16 stack_extra;
+ bool has_tail_call: 1;
+ bool tail_call_reachable: 1;
+ bool has_ld_abs: 1;
+ bool is_cb: 1;
+ bool is_async_cb: 1;
+ bool is_exception_cb: 1;
+ bool args_cached: 1;
+
+ u8 arg_cnt;
+ struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS];
};
struct bpf_verifier_env;
@@ -575,10 +666,12 @@ struct bpf_verifier_env {
u32 prev_insn_idx;
struct bpf_prog *prog; /* eBPF program being verified */
const struct bpf_verifier_ops *ops;
+ struct module *attach_btf_mod; /* The owner module of prog->aux->attach_btf */
struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
int stack_size; /* number of states to be processed */
bool strict_alignment; /* perform strict pointer alignment checks */
bool test_state_freq; /* test verifier with different pruning frequency */
+ bool test_reg_invariants; /* fail verification on register invariants violations */
struct bpf_verifier_state *cur_state; /* current verifier state */
struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
struct bpf_verifier_state_list *free_list;
@@ -587,17 +680,24 @@ struct bpf_verifier_env {
u32 used_map_cnt; /* number of used maps */
u32 used_btf_cnt; /* number of used BTF objects */
u32 id_gen; /* used to generate unique reg IDs */
+ u32 hidden_subprog_cnt; /* number of hidden subprogs */
+ int exception_callback_subprog;
bool explore_alu_limits;
bool allow_ptr_leaks;
+ /* Allow access to uninitialized stack memory. Writes with fixed offset are
+ * always allowed, so this refers to reads (with fixed or variable offset),
+ * to writes with variable offset and to indirect (helper) accesses.
+ */
bool allow_uninit_stack;
bool bpf_capable;
bool bypass_spec_v1;
bool bypass_spec_v4;
bool seen_direct_write;
+ bool seen_exception;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
const struct bpf_line_info *prev_linfo;
struct bpf_verifier_log log;
- struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
+ struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 2]; /* max + 2 for the fake and exception subprogs */
union {
struct bpf_idmap idmap_scratch;
struct bpf_idset idset_scratch;
@@ -608,6 +708,7 @@ struct bpf_verifier_env {
int cur_stack;
} cfg;
struct backtrack_state bt;
+ struct bpf_jmp_history_entry *cur_hist_ent;
u32 pass_cnt; /* number of times do_check() was called */
u32 subprog_cnt;
/* number of instructions analyzed by the verifier */
@@ -642,6 +743,16 @@ struct bpf_verifier_env {
char tmp_str_buf[TMP_STR_BUF_LEN];
};
+static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog)
+{
+ return &env->prog->aux->func_info_aux[subprog];
+}
+
+static inline struct bpf_subprog_info *subprog_info(struct bpf_verifier_env *env, int subprog)
+{
+ return &env->subprog_info[subprog];
+}
+
__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
const char *fmt, va_list args);
__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
@@ -653,6 +764,10 @@ int bpf_vlog_init(struct bpf_verifier_log *log, u32 log_level,
void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos);
int bpf_vlog_finalize(struct bpf_verifier_log *log, u32 *log_size_actual);
+__printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
+ u32 insn_off,
+ const char *prefix_fmt, ...);
+
static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
{
struct bpf_verifier_state *cur = env->cur_state;
@@ -675,14 +790,6 @@ bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off,
void
bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
-int check_ptr_off_reg(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg, int regno);
-int check_func_arg_reg_off(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg, int regno,
- enum bpf_arg_type arg_type);
-int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
- u32 regno, u32 mem_size);
-
/* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
struct btf *btf, u32 btf_id)
@@ -745,11 +852,92 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
}
}
-#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED)
+#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED | NON_OWN_REF)
static inline bool bpf_type_has_unsafe_modifiers(u32 type)
{
return type_flag(type) & ~BPF_REG_TRUSTED_MODIFIERS;
}
+static inline bool type_is_ptr_alloc_obj(u32 type)
+{
+ return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
+}
+
+static inline bool type_is_non_owning_ref(u32 type)
+{
+ return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF;
+}
+
+static inline bool type_is_pkt_pointer(enum bpf_reg_type type)
+{
+ type = base_type(type);
+ return type == PTR_TO_PACKET ||
+ type == PTR_TO_PACKET_META;
+}
+
+static inline bool type_is_sk_pointer(enum bpf_reg_type type)
+{
+ return type == PTR_TO_SOCKET ||
+ type == PTR_TO_SOCK_COMMON ||
+ type == PTR_TO_TCP_SOCK ||
+ type == PTR_TO_XDP_SOCK;
+}
+
+static inline void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
+{
+ env->scratched_regs |= 1U << regno;
+}
+
+static inline void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
+{
+ env->scratched_stack_slots |= 1ULL << spi;
+}
+
+static inline bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
+{
+ return (env->scratched_regs >> regno) & 1;
+}
+
+static inline bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
+{
+ return (env->scratched_stack_slots >> regno) & 1;
+}
+
+static inline bool verifier_state_scratched(const struct bpf_verifier_env *env)
+{
+ return env->scratched_regs || env->scratched_stack_slots;
+}
+
+static inline void mark_verifier_state_clean(struct bpf_verifier_env *env)
+{
+ env->scratched_regs = 0U;
+ env->scratched_stack_slots = 0ULL;
+}
+
+/* Used for printing the entire verifier state. */
+static inline void mark_verifier_state_scratched(struct bpf_verifier_env *env)
+{
+ env->scratched_regs = ~0U;
+ env->scratched_stack_slots = ~0ULL;
+}
+
+static inline bool bpf_stack_narrow_access_ok(int off, int fill_size, int spill_size)
+{
+#ifdef __BIG_ENDIAN
+ off -= spill_size - fill_size;
+#endif
+
+ return !(off % BPF_REG_SIZE);
+}
+
+const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type);
+const char *dynptr_type_str(enum bpf_dynptr_type type);
+const char *iter_type_str(const struct btf *btf, u32 btf_id);
+const char *iter_state_str(enum bpf_iter_state state);
+
+void print_verifier_state(struct bpf_verifier_env *env,
+ const struct bpf_func_state *state, bool print_all);
+void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state *state);
+
#endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h
deleted file mode 100644
index 736ded4905e0..000000000000
--- a/include/linux/bpfilter.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_BPFILTER_H
-#define _LINUX_BPFILTER_H
-
-#include <uapi/linux/bpfilter.h>
-#include <linux/usermode_driver.h>
-#include <linux/sockptr.h>
-
-struct sock;
-int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval,
- unsigned int optlen);
-int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
- int __user *optlen);
-
-struct bpfilter_umh_ops {
- struct umd_info info;
- /* since ip_getsockopt() can run in parallel, serialize access to umh */
- struct mutex lock;
- int (*sockopt)(struct sock *sk, int optname, sockptr_t optval,
- unsigned int optlen, bool is_set);
- int (*start)(void);
-};
-extern struct bpfilter_umh_ops bpfilter_ops;
-#endif
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 5d732f48f787..1394ba302367 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -11,6 +11,7 @@
#define PHY_ID_BCM50610 0x0143bd60
#define PHY_ID_BCM50610M 0x0143bd70
+#define PHY_ID_BCM5221 0x004061e0
#define PHY_ID_BCM5241 0x0143bc30
#define PHY_ID_BCMAC131 0x0143bc70
#define PHY_ID_BCM5481 0x0143bca0
@@ -44,6 +45,7 @@
#define PHY_ID_BCM7366 0x600d8490
#define PHY_ID_BCM7346 0x600d8650
#define PHY_ID_BCM7362 0x600d84b0
+#define PHY_ID_BCM74165 0x359052c0
#define PHY_ID_BCM7425 0x600d86b0
#define PHY_ID_BCM7429 0x600d8730
#define PHY_ID_BCM7435 0x600d8750
@@ -330,6 +332,15 @@
#define BCM54XX_WOL_INT_STATUS (MII_BCM54XX_EXP_SEL_WOL + 0x94)
+/* BCM5221 Registers */
+#define BCM5221_AEGSR 0x1C
+#define BCM5221_AEGSR_MDIX_STATUS BIT(13)
+#define BCM5221_AEGSR_MDIX_MAN_SWAP BIT(12)
+#define BCM5221_AEGSR_MDIX_DIS BIT(11)
+
+#define BCM5221_SHDW_AM4_EN_CLK_LPM BIT(2)
+#define BCM5221_SHDW_AM4_FORCE_LPM BIT(1)
+
/*****************************************************************************/
/* Fast Ethernet Transceiver definitions. */
/*****************************************************************************/
diff --git a/include/linux/btf.h b/include/linux/btf.h
index cac9f304e27a..f9e56fd12a9f 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -74,6 +74,7 @@
#define KF_ITER_NEW (1 << 8) /* kfunc implements BPF iter constructor */
#define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */
#define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */
+#define KF_RCU_PROTECTED (1 << 11) /* kfunc should be protected by rcu cs when they are invoked */
/*
* Tag marking a kernel function as a kfunc. This is meant to minimize the
@@ -83,6 +84,17 @@
*/
#define __bpf_kfunc __used noinline
+#define __bpf_kfunc_start_defs() \
+ __diag_push(); \
+ __diag_ignore_all("-Wmissing-declarations", \
+ "Global kfuncs as their definitions will be in BTF");\
+ __diag_ignore_all("-Wmissing-prototypes", \
+ "Global kfuncs as their definitions will be in BTF")
+
+#define __bpf_kfunc_end_defs() __diag_pop()
+#define __bpf_hook_start() __bpf_kfunc_start_defs()
+#define __bpf_hook_end() __bpf_kfunc_end_defs()
+
/*
* Return the name of the passed struct, if exists, or halt the build if for
* example the structure gets renamed. In this way, developers have to revisit
@@ -125,6 +137,7 @@ struct btf_struct_metas {
extern const struct file_operations btf_fops;
+const char *btf_get_name(const struct btf *btf);
void btf_get(struct btf *btf);
void btf_put(struct btf *btf);
int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz);
@@ -204,13 +217,12 @@ u32 btf_nr_types(const struct btf *btf);
bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
const struct btf_member *m,
u32 expected_offset, u32 expected_size);
-int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
-int btf_find_timer(const struct btf *btf, const struct btf_type *t);
struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
u32 field_mask, u32 value_size);
int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec);
bool btf_type_is_void(const struct btf_type *t);
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
+s32 bpf_find_btf_id(const char *name, u32 kind, struct btf **btf_p);
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
u32 id, u32 *res_id);
const struct btf_type *btf_type_resolve_ptr(const struct btf *btf,
@@ -483,8 +495,26 @@ static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
}
+bool btf_param_match_suffix(const struct btf *btf,
+ const struct btf_param *arg,
+ const char *suffix);
+int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto,
+ u32 arg_no);
+
struct bpf_verifier_log;
+#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_struct_ops;
+int __register_bpf_struct_ops(struct bpf_struct_ops *st_ops);
+const struct bpf_struct_ops_desc *bpf_struct_ops_find_value(struct btf *btf, u32 value_id);
+const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id);
+#else
+static inline const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id)
+{
+ return NULL;
+}
+#endif
+
#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
@@ -501,10 +531,9 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id);
int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
struct module *owner);
struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id);
-const struct btf_member *
-btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
- const struct btf_type *t, enum bpf_prog_type prog_type,
- int arg);
+bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type,
+ int arg);
int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type);
bool btf_types_are_same(const struct btf *btf1, u32 id1,
const struct btf *btf2, u32 id2);
@@ -544,12 +573,12 @@ static inline struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf
{
return NULL;
}
-static inline const struct btf_member *
-btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
- const struct btf_type *t, enum bpf_prog_type prog_type,
- int arg)
+static inline bool
+btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type,
+ int arg)
{
- return NULL;
+ return false;
}
static inline int get_kern_ctx_btf_id(struct bpf_verifier_log *log,
enum bpf_prog_type prog_type) {
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 00950cc03bff..e24aabfe8ecc 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -8,6 +8,9 @@ struct btf_id_set {
u32 ids[];
};
+/* This flag implies BTF_SET8 holds kfunc(s) */
+#define BTF_SET8_KFUNCS (1 << 0)
+
struct btf_id_set8 {
u32 cnt;
u32 flags;
@@ -21,6 +24,7 @@ struct btf_id_set8 {
#include <linux/compiler.h> /* for __PASTE */
#include <linux/compiler_attributes.h> /* for __maybe_unused */
+#include <linux/stringify.h>
/*
* Following macros help to define lists of BTF IDs placed
@@ -49,7 +53,7 @@ word \
____BTF_ID(symbol, word)
#define __ID(prefix) \
- __PASTE(prefix, __COUNTER__)
+ __PASTE(__PASTE(prefix, __COUNTER__), __LINE__)
/*
* The BTF_ID defines unique symbol for each ID pointing
@@ -183,17 +187,18 @@ extern struct btf_id_set name;
* .word (1 << 3) | (1 << 1) | (1 << 2)
*
*/
-#define __BTF_SET8_START(name, scope) \
+#define __BTF_SET8_START(name, scope, flags) \
+__BTF_ID_LIST(name, local) \
asm( \
".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
"." #scope " __BTF_ID__set8__" #name "; \n" \
"__BTF_ID__set8__" #name ":; \n" \
-".zero 8 \n" \
+".zero 4 \n" \
+".long " __stringify(flags) "\n" \
".popsection; \n");
#define BTF_SET8_START(name) \
-__BTF_ID_LIST(name, local) \
-__BTF_SET8_START(name, local)
+__BTF_SET8_START(name, local, 0)
#define BTF_SET8_END(name) \
asm( \
@@ -202,6 +207,12 @@ asm( \
".popsection; \n"); \
extern struct btf_id_set8 name;
+#define BTF_KFUNCS_START(name) \
+__BTF_SET8_START(name, local, BTF_SET8_KFUNCS)
+
+#define BTF_KFUNCS_END(name) \
+BTF_SET8_END(name)
+
#else
#define BTF_ID_LIST(name) static u32 __maybe_unused name[64];
@@ -216,6 +227,8 @@ extern struct btf_id_set8 name;
#define BTF_SET_END(name)
#define BTF_SET8_START(name) static struct btf_id_set8 __maybe_unused name = { 0 };
#define BTF_SET8_END(name)
+#define BTF_KFUNCS_START(name) static struct btf_id_set8 __maybe_unused name = { .flags = BTF_SET8_KFUNCS };
+#define BTF_KFUNCS_END(name)
#endif /* CONFIG_DEBUG_INFO_BTF */
@@ -267,5 +280,6 @@ MAX_BTF_TRACING_TYPE,
extern u32 btf_tracing_ids[];
extern u32 bpf_cgroup_btf_id[];
extern u32 bpf_local_storage_map_btf_id[];
+extern u32 btf_bpf_map_id[];
#endif
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 6cb3e9af78c9..d78454a4dd1f 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -16,8 +16,6 @@
#include <linux/wait.h>
#include <linux/atomic.h>
-#ifdef CONFIG_BLOCK
-
enum bh_state_bits {
BH_Uptodate, /* Contains valid data */
BH_Dirty, /* Is dirty */
@@ -173,7 +171,10 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh)
return test_bit_acquire(BH_Uptodate, &bh->b_state);
}
-#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK)
+static inline unsigned long bh_offset(const struct buffer_head *bh)
+{
+ return (unsigned long)(bh)->b_data & (page_size(bh->b_page) - 1);
+}
/* If we *know* page->private refers to buffer_heads */
#define page_buffers(page) \
@@ -194,29 +195,19 @@ void buffer_check_dirty_writeback(struct folio *folio,
void mark_buffer_dirty(struct buffer_head *bh);
void mark_buffer_write_io_error(struct buffer_head *bh);
void touch_buffer(struct buffer_head *bh);
-void set_bh_page(struct buffer_head *bh,
- struct page *page, unsigned long offset);
void folio_set_bh(struct buffer_head *bh, struct folio *folio,
unsigned long offset);
-bool try_to_free_buffers(struct folio *);
struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size,
- bool retry);
+ gfp_t gfp);
struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
bool retry);
-void create_empty_buffers(struct page *, unsigned long,
- unsigned long b_state);
-void folio_create_empty_buffers(struct folio *folio, unsigned long blocksize,
- unsigned long b_state);
+struct buffer_head *create_empty_buffers(struct folio *folio,
+ unsigned long blocksize, unsigned long b_state);
void end_buffer_read_sync(struct buffer_head *bh, int uptodate);
void end_buffer_write_sync(struct buffer_head *bh, int uptodate);
-void end_buffer_async_write(struct buffer_head *bh, int uptodate);
/* Things to do with buffers at mapping->private_list */
void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode);
-int inode_has_buffers(struct inode *);
-void invalidate_inode_buffers(struct inode *);
-int remove_inode_buffers(struct inode *inode);
-int sync_mapping_buffers(struct address_space *mapping);
int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end,
bool datasync);
int generic_buffers_fsync(struct file *file, loff_t start, loff_t end,
@@ -233,16 +224,13 @@ void __wait_on_buffer(struct buffer_head *);
wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block,
unsigned size);
-struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block,
- unsigned size, gfp_t gfp);
+struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block,
+ unsigned size, gfp_t gfp);
void __brelse(struct buffer_head *);
void __bforget(struct buffer_head *);
void __breadahead(struct block_device *, sector_t block, unsigned int size);
struct buffer_head *__bread_gfp(struct block_device *,
sector_t block, unsigned size, gfp_t gfp);
-void invalidate_bh_lrus(void);
-void invalidate_bh_lrus_cpu(void);
-bool has_bh_in_lru(int cpu, void *dummy);
struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
void free_buffer_head(struct buffer_head * bh);
void unlock_buffer(struct buffer_head *bh);
@@ -258,18 +246,15 @@ int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait);
void __bh_read_batch(int nr, struct buffer_head *bhs[],
blk_opf_t op_flags, bool force_lock);
-extern int buffer_heads_over_limit;
-
/*
* Generic address_space_operations implementations for buffer_head-backed
* address_spaces.
*/
void block_invalidate_folio(struct folio *folio, size_t offset, size_t length);
-int block_write_full_page(struct page *page, get_block_t *get_block,
- struct writeback_control *wbc);
+int block_write_full_folio(struct folio *folio, struct writeback_control *wbc,
+ void *get_block);
int __block_write_full_folio(struct inode *inode, struct folio *folio,
- get_block_t *get_block, struct writeback_control *wbc,
- bh_end_io_t *handler);
+ get_block_t *get_block, struct writeback_control *wbc);
int block_read_full_folio(struct folio *, get_block_t *);
bool block_is_partially_uptodate(struct folio *, size_t from, size_t count);
int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
@@ -283,26 +268,13 @@ int generic_write_end(struct file *, struct address_space *,
loff_t, unsigned, unsigned,
struct page *, void *);
void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to);
-void clean_page_buffers(struct page *page);
int cont_write_begin(struct file *, struct address_space *, loff_t,
unsigned, struct page **, void **,
get_block_t *, loff_t *);
int generic_cont_expand_simple(struct inode *inode, loff_t size);
-int block_commit_write(struct page *page, unsigned from, unsigned to);
+void block_commit_write(struct page *page, unsigned int from, unsigned int to);
int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
get_block_t get_block);
-/* Convert errno to return value from ->page_mkwrite() call */
-static inline vm_fault_t block_page_mkwrite_return(int err)
-{
- if (err == 0)
- return VM_FAULT_LOCKED;
- if (err == -EFAULT || err == -EAGAIN)
- return VM_FAULT_NOPAGE;
- if (err == -ENOMEM)
- return VM_FAULT_OOM;
- /* -ENOSPC, -EDQUOT, -EIO ... */
- return VM_FAULT_SIGBUS;
-}
sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
int block_truncate_page(struct address_space *, loff_t, get_block_t *);
@@ -316,8 +288,6 @@ extern int buffer_migrate_folio_norefs(struct address_space *,
#define buffer_migrate_folio_norefs NULL
#endif
-void buffer_init(void);
-
/*
* inline definitions
*/
@@ -363,17 +333,38 @@ sb_breadahead(struct super_block *sb, sector_t block)
__breadahead(sb->s_bdev, block, sb->s_blocksize);
}
-static inline struct buffer_head *
-sb_getblk(struct super_block *sb, sector_t block)
+static inline struct buffer_head *getblk_unmovable(struct block_device *bdev,
+ sector_t block, unsigned size)
{
- return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE);
+ gfp_t gfp;
+
+ gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
+ gfp |= __GFP_NOFAIL;
+
+ return bdev_getblk(bdev, block, size, gfp);
}
+static inline struct buffer_head *__getblk(struct block_device *bdev,
+ sector_t block, unsigned size)
+{
+ gfp_t gfp;
-static inline struct buffer_head *
-sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp)
+ gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
+ gfp |= __GFP_MOVABLE | __GFP_NOFAIL;
+
+ return bdev_getblk(bdev, block, size, gfp);
+}
+
+static inline struct buffer_head *sb_getblk(struct super_block *sb,
+ sector_t block)
{
- return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp);
+ return __getblk(sb->s_bdev, block, sb->s_blocksize);
+}
+
+static inline struct buffer_head *sb_getblk_gfp(struct super_block *sb,
+ sector_t block, gfp_t gfp)
+{
+ return bdev_getblk(sb->s_bdev, block, sb->s_blocksize, gfp);
}
static inline struct buffer_head *
@@ -410,20 +401,6 @@ static inline void lock_buffer(struct buffer_head *bh)
__lock_buffer(bh);
}
-static inline struct buffer_head *getblk_unmovable(struct block_device *bdev,
- sector_t block,
- unsigned size)
-{
- return __getblk_gfp(bdev, block, size, 0);
-}
-
-static inline struct buffer_head *__getblk(struct block_device *bdev,
- sector_t block,
- unsigned size)
-{
- return __getblk_gfp(bdev, block, size, __GFP_MOVABLE);
-}
-
static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags)
{
if (!buffer_uptodate(bh) && trylock_buffer(bh)) {
@@ -475,9 +452,44 @@ __bread(struct block_device *bdev, sector_t block, unsigned size)
return __bread_gfp(bdev, block, size, __GFP_MOVABLE);
}
+/**
+ * get_nth_bh - Get a reference on the n'th buffer after this one.
+ * @bh: The buffer to start counting from.
+ * @count: How many buffers to skip.
+ *
+ * This is primarily useful for finding the nth buffer in a folio; in
+ * that case you pass the head buffer and the byte offset in the folio
+ * divided by the block size. It can be used for other purposes, but
+ * it will wrap at the end of the folio rather than returning NULL or
+ * proceeding to the next folio for you.
+ *
+ * Return: The requested buffer with an elevated refcount.
+ */
+static inline __must_check
+struct buffer_head *get_nth_bh(struct buffer_head *bh, unsigned int count)
+{
+ while (count--)
+ bh = bh->b_this_page;
+ get_bh(bh);
+ return bh;
+}
+
bool block_dirty_folio(struct address_space *mapping, struct folio *folio);
-#else /* CONFIG_BLOCK */
+#ifdef CONFIG_BUFFER_HEAD
+
+void buffer_init(void);
+bool try_to_free_buffers(struct folio *folio);
+int inode_has_buffers(struct inode *inode);
+void invalidate_inode_buffers(struct inode *inode);
+int remove_inode_buffers(struct inode *inode);
+int sync_mapping_buffers(struct address_space *mapping);
+void invalidate_bh_lrus(void);
+void invalidate_bh_lrus_cpu(void);
+bool has_bh_in_lru(int cpu, void *dummy);
+extern int buffer_heads_over_limit;
+
+#else /* CONFIG_BUFFER_HEAD */
static inline void buffer_init(void) {}
static inline bool try_to_free_buffers(struct folio *folio) { return true; }
@@ -485,9 +497,10 @@ static inline int inode_has_buffers(struct inode *inode) { return 0; }
static inline void invalidate_inode_buffers(struct inode *inode) {}
static inline int remove_inode_buffers(struct inode *inode) { return 1; }
static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
+static inline void invalidate_bh_lrus(void) {}
static inline void invalidate_bh_lrus_cpu(void) {}
static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; }
#define buffer_heads_over_limit 0
-#endif /* CONFIG_BLOCK */
+#endif /* CONFIG_BUFFER_HEAD */
#endif /* _LINUX_BUFFER_HEAD_H */
diff --git a/include/linux/buildid.h b/include/linux/buildid.h
index 3b7a0ff4642f..20aa3c2d89f7 100644
--- a/include/linux/buildid.h
+++ b/include/linux/buildid.h
@@ -2,15 +2,16 @@
#ifndef _LINUX_BUILDID_H
#define _LINUX_BUILDID_H
-#include <linux/mm_types.h>
+#include <linux/types.h>
#define BUILD_ID_SIZE_MAX 20
+struct vm_area_struct;
int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
__u32 *size);
int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size);
-#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_CRASH_CORE)
+#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO)
extern unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX];
void init_vmlinux_build_id(void);
#else
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 555aae5448ae..bd1e361b351c 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -83,7 +83,7 @@ struct bvec_iter {
unsigned int bi_bvec_done; /* number of bytes completed in
current bvec */
-} __packed;
+} __packed __aligned(4);
struct bvec_iter_all {
struct bio_vec bv;
diff --git a/include/linux/cache.h b/include/linux/cache.h
index 9900d20b76c2..0ecb17bb6883 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -85,6 +85,31 @@
#define cache_line_size() L1_CACHE_BYTES
#endif
+#ifndef __cacheline_group_begin
+#define __cacheline_group_begin(GROUP) \
+ __u8 __cacheline_group_begin__##GROUP[0]
+#endif
+
+#ifndef __cacheline_group_end
+#define __cacheline_group_end(GROUP) \
+ __u8 __cacheline_group_end__##GROUP[0]
+#endif
+
+#ifndef CACHELINE_ASSERT_GROUP_MEMBER
+#define CACHELINE_ASSERT_GROUP_MEMBER(TYPE, GROUP, MEMBER) \
+ BUILD_BUG_ON(!(offsetof(TYPE, MEMBER) >= \
+ offsetofend(TYPE, __cacheline_group_begin__##GROUP) && \
+ offsetofend(TYPE, MEMBER) <= \
+ offsetof(TYPE, __cacheline_group_end__##GROUP)))
+#endif
+
+#ifndef CACHELINE_ASSERT_GROUP_SIZE
+#define CACHELINE_ASSERT_GROUP_SIZE(TYPE, GROUP, SIZE) \
+ BUILD_BUG_ON(offsetof(TYPE, __cacheline_group_end__##GROUP) - \
+ offsetofend(TYPE, __cacheline_group_begin__##GROUP) > \
+ SIZE)
+#endif
+
/*
* Helper to add padding within a struct to ensure data fall into separate
* cachelines.
diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h
index a6189d21f2ba..55f297b2c23f 100644
--- a/include/linux/cacheflush.h
+++ b/include/linux/cacheflush.h
@@ -7,14 +7,23 @@
struct folio;
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
-#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
+#ifndef flush_dcache_folio
void flush_dcache_folio(struct folio *folio);
#endif
#else
static inline void flush_dcache_folio(struct folio *folio)
{
}
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO 0
+#define flush_dcache_folio flush_dcache_folio
#endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */
+#ifndef flush_icache_pages
+static inline void flush_icache_pages(struct vm_area_struct *vma,
+ struct page *page, unsigned int nr)
+{
+}
+#endif
+
+#define flush_icache_page(vma, page) flush_icache_pages(vma, page, 1)
+
#endif /* _LINUX_CACHEFLUSH_H */
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index a5cfd44fab45..2cb15fe4fe12 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -73,6 +73,7 @@ struct cacheinfo {
struct cpu_cacheinfo {
struct cacheinfo *info_list;
+ unsigned int per_cpu_data_slice_size;
unsigned int num_levels;
unsigned int num_leaves;
bool cpu_map_populated;
@@ -137,4 +138,10 @@ static inline int get_cpu_cacheinfo_id(int cpu, int level)
#define use_arch_cache_info() (false)
#endif
+#ifndef CONFIG_ARCH_HAS_CPU_CACHE_ALIASING
+#define cpu_dcache_is_aliasing() false
+#else
+#include <asm/cachetype.h>
+#endif
+
#endif /* _LINUX_CACHEINFO_H */
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 982ba245eb41..1b92aed49363 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -195,6 +195,10 @@ int can_restart_now(struct net_device *dev);
void can_bus_off(struct net_device *dev);
const char *can_get_state_str(const enum can_state state);
+void can_state_get_by_berr_counter(const struct net_device *dev,
+ const struct can_berr_counter *bec,
+ enum can_state *tx_state,
+ enum can_state *rx_state);
void can_change_state(struct net_device *dev, struct can_frame *cf,
enum can_state tx_state, enum can_state rx_state);
diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h
index c205c51d79c9..d29bb4521947 100644
--- a/include/linux/can/rx-offload.h
+++ b/include/linux/can/rx-offload.h
@@ -3,7 +3,7 @@
* linux/can/rx-offload.h
*
* Copyright (c) 2014 David Jander, Protonic Holland
- * Copyright (c) 2014-2017 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de>
+ * Copyright (c) 2014-2017, 2023 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de>
*/
#ifndef _CAN_RX_OFFLOAD_H
@@ -44,11 +44,14 @@ int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload,
int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload);
int can_rx_offload_queue_timestamp(struct can_rx_offload *offload,
struct sk_buff *skb, u32 timestamp);
-unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload,
- unsigned int idx, u32 timestamp,
- unsigned int *frame_len_ptr);
+unsigned int can_rx_offload_get_echo_skb_queue_timestamp(struct can_rx_offload *offload,
+ unsigned int idx, u32 timestamp,
+ unsigned int *frame_len_ptr);
int can_rx_offload_queue_tail(struct can_rx_offload *offload,
struct sk_buff *skb);
+unsigned int can_rx_offload_get_echo_skb_queue_tail(struct can_rx_offload *offload,
+ unsigned int idx,
+ unsigned int *frame_len_ptr);
void can_rx_offload_irq_finish(struct can_rx_offload *offload);
void can_rx_offload_threaded_irq_finish(struct can_rx_offload *offload);
void can_rx_offload_del(struct can_rx_offload *offload);
diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h
index cb0d6cd1c12f..60693a145894 100644
--- a/include/linux/cc_platform.h
+++ b/include/linux/cc_platform.h
@@ -90,6 +90,14 @@ enum cc_attr {
* Examples include TDX Guest.
*/
CC_ATTR_HOTPLUG_DISABLED,
+
+ /**
+ * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host.
+ *
+ * The host kernel is running with the necessary features
+ * enabled to run SEV-SNP guests.
+ */
+ CC_ATTR_HOST_SEV_SNP,
};
#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
@@ -107,10 +115,14 @@ enum cc_attr {
* * FALSE - Specified Confidential Computing attribute is not active
*/
bool cc_platform_has(enum cc_attr attr);
+void cc_platform_set(enum cc_attr attr);
+void cc_platform_clear(enum cc_attr attr);
#else /* !CONFIG_ARCH_HAS_CC_PLATFORM */
static inline bool cc_platform_has(enum cc_attr attr) { return false; }
+static inline void cc_platform_set(enum cc_attr attr) { }
+static inline void cc_platform_clear(enum cc_attr attr) { }
#endif /* CONFIG_ARCH_HAS_CC_PLATFORM */
diff --git a/include/linux/cdx/cdx_bus.h b/include/linux/cdx/cdx_bus.h
index bead71b7bc73..b57118aaa679 100644
--- a/include/linux/cdx/cdx_bus.h
+++ b/include/linux/cdx/cdx_bus.h
@@ -12,6 +12,7 @@
#include <linux/device.h>
#include <linux/list.h>
#include <linux/mod_devicetable.h>
+#include <linux/msi.h>
#define MAX_CDX_DEV_RESOURCES 4
#define CDX_CONTROLLER_ID_SHIFT 4
@@ -21,13 +22,31 @@
struct cdx_controller;
enum {
+ CDX_DEV_MSI_CONF,
+ CDX_DEV_BUS_MASTER_CONF,
CDX_DEV_RESET_CONF,
+ CDX_DEV_MSI_ENABLE,
+};
+
+struct cdx_msi_config {
+ u64 addr;
+ u32 data;
+ u16 msi_index;
};
struct cdx_device_config {
u8 type;
+ union {
+ struct cdx_msi_config msi;
+ bool bus_master_enable;
+ bool msi_enable;
+ };
};
+typedef int (*cdx_bus_enable_cb)(struct cdx_controller *cdx, u8 bus_num);
+
+typedef int (*cdx_bus_disable_cb)(struct cdx_controller *cdx, u8 bus_num);
+
typedef int (*cdx_scan_cb)(struct cdx_controller *cdx);
typedef int (*cdx_dev_configure_cb)(struct cdx_controller *cdx,
@@ -35,6 +54,19 @@ typedef int (*cdx_dev_configure_cb)(struct cdx_controller *cdx,
struct cdx_device_config *dev_config);
/**
+ * CDX_DEVICE - macro used to describe a specific CDX device
+ * @vend: the 16 bit CDX Vendor ID
+ * @dev: the 16 bit CDX Device ID
+ *
+ * This macro is used to create a struct cdx_device_id that matches a
+ * specific device. The subvendor and subdevice fields will be set to
+ * CDX_ANY_ID.
+ */
+#define CDX_DEVICE(vend, dev) \
+ .vendor = (vend), .device = (dev), \
+ .subvendor = CDX_ANY_ID, .subdevice = CDX_ANY_ID
+
+/**
* CDX_DEVICE_DRIVER_OVERRIDE - macro used to describe a CDX device with
* override_only flags.
* @vend: the 16 bit CDX Vendor ID
@@ -42,18 +74,24 @@ typedef int (*cdx_dev_configure_cb)(struct cdx_controller *cdx,
* @driver_override: the 32 bit CDX Device override_only
*
* This macro is used to create a struct cdx_device_id that matches only a
- * driver_override device.
+ * driver_override device. The subvendor and subdevice fields will be set to
+ * CDX_ANY_ID.
*/
#define CDX_DEVICE_DRIVER_OVERRIDE(vend, dev, driver_override) \
- .vendor = (vend), .device = (dev), .override_only = (driver_override)
+ .vendor = (vend), .device = (dev), .subvendor = CDX_ANY_ID,\
+ .subdevice = CDX_ANY_ID, .override_only = (driver_override)
/**
* struct cdx_ops - Callbacks supported by CDX controller.
+ * @bus_enable: enable bus on the controller
+ * @bus_disable: disable bus on the controller
* @scan: scan the devices on the controller
* @dev_configure: configuration like reset, master_enable,
* msi_config etc for a CDX device
*/
struct cdx_ops {
+ cdx_bus_enable_cb bus_enable;
+ cdx_bus_disable_cb bus_disable;
cdx_scan_cb scan;
cdx_dev_configure_cb dev_configure;
};
@@ -62,13 +100,17 @@ struct cdx_ops {
* struct cdx_controller: CDX controller object
* @dev: Linux device associated with the CDX controller.
* @priv: private data
+ * @msi_domain: MSI domain
* @id: Controller ID
+ * @controller_registered: controller registered with bus
* @ops: CDX controller ops
*/
struct cdx_controller {
struct device *dev;
void *priv;
+ struct irq_domain *msi_domain;
u32 id;
+ bool controller_registered;
struct cdx_ops *ops;
};
@@ -78,36 +120,68 @@ struct cdx_controller {
* @cdx: CDX controller associated with the device
* @vendor: Vendor ID for CDX device
* @device: Device ID for CDX device
+ * @subsystem_vendor: Subsystem Vendor ID for CDX device
+ * @subsystem_device: Subsystem Device ID for CDX device
+ * @class: Class for the CDX device
+ * @revision: Revision of the CDX device
* @bus_num: Bus number for this CDX device
* @dev_num: Device number for this device
* @res: array of MMIO region entries
* @res_attr: resource binary attribute
+ * @debugfs_dir: debugfs directory for this device
* @res_count: number of valid MMIO regions
* @dma_mask: Default DMA mask
* @flags: CDX device flags
* @req_id: Requestor ID associated with CDX device
+ * @is_bus: Is this bus device
+ * @enabled: is this bus enabled
+ * @msi_dev_id: MSI Device ID associated with CDX device
+ * @num_msi: Number of MSI's supported by the device
* @driver_override: driver name to force a match; do not set directly,
* because core frees it; use driver_set_override() to
* set or clear it.
+ * @irqchip_lock: lock to synchronize irq/msi configuration
+ * @msi_write_pending: MSI write pending for this device
*/
struct cdx_device {
struct device dev;
struct cdx_controller *cdx;
u16 vendor;
u16 device;
+ u16 subsystem_vendor;
+ u16 subsystem_device;
+ u32 class;
+ u8 revision;
u8 bus_num;
u8 dev_num;
struct resource res[MAX_CDX_DEV_RESOURCES];
+ struct bin_attribute *res_attr[MAX_CDX_DEV_RESOURCES];
+ struct dentry *debugfs_dir;
u8 res_count;
u64 dma_mask;
u16 flags;
u32 req_id;
+ bool is_bus;
+ bool enabled;
+ u32 msi_dev_id;
+ u32 num_msi;
const char *driver_override;
+ struct mutex irqchip_lock;
+ bool msi_write_pending;
};
#define to_cdx_device(_dev) \
container_of(_dev, struct cdx_device, dev)
+#define cdx_resource_start(dev, num) ((dev)->res[(num)].start)
+#define cdx_resource_end(dev, num) ((dev)->res[(num)].end)
+#define cdx_resource_flags(dev, num) ((dev)->res[(num)].flags)
+#define cdx_resource_len(dev, num) \
+ ((cdx_resource_start((dev), (num)) == 0 && \
+ cdx_resource_end((dev), (num)) == \
+ cdx_resource_start((dev), (num))) ? 0 : \
+ (cdx_resource_end((dev), (num)) - \
+ cdx_resource_start((dev), (num)) + 1))
/**
* struct cdx_driver - CDX device driver
* @driver: Generic device driver
@@ -170,4 +244,48 @@ extern struct bus_type cdx_bus_type;
*/
int cdx_dev_reset(struct device *dev);
+/**
+ * cdx_set_master - enables bus-mastering for CDX device
+ * @cdx_dev: the CDX device to enable
+ *
+ * Return: 0 for success, -errno on failure
+ */
+int cdx_set_master(struct cdx_device *cdx_dev);
+
+/**
+ * cdx_clear_master - disables bus-mastering for CDX device
+ * @cdx_dev: the CDX device to disable
+ *
+ * Return: 0 for success, -errno on failure
+ */
+int cdx_clear_master(struct cdx_device *cdx_dev);
+
+#ifdef CONFIG_GENERIC_MSI_IRQ
+/**
+ * cdx_enable_msi - Enable MSI for the CDX device.
+ * @cdx_dev: device pointer
+ *
+ * Return: 0 for success, -errno on failure
+ */
+int cdx_enable_msi(struct cdx_device *cdx_dev);
+
+/**
+ * cdx_disable_msi - Disable MSI for the CDX device.
+ * @cdx_dev: device pointer
+ */
+void cdx_disable_msi(struct cdx_device *cdx_dev);
+
+#else /* CONFIG_GENERIC_MSI_IRQ */
+
+static inline int cdx_enable_msi(struct cdx_device *cdx_dev)
+{
+ return -ENODEV;
+}
+
+static inline void cdx_disable_msi(struct cdx_device *cdx_dev)
+{
+}
+
+#endif /* CONFIG_GENERIC_MSI_IRQ */
+
#endif /* _CDX_BUS_H_ */
diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h
index d5a5da838caf..11a92a946016 100644
--- a/include/linux/ceph/ceph_debug.h
+++ b/include/linux/ceph/ceph_debug.h
@@ -19,12 +19,25 @@
pr_debug("%.*s %12.12s:%-4d : " fmt, \
8 - (int)sizeof(KBUILD_MODNAME), " ", \
kbasename(__FILE__), __LINE__, ##__VA_ARGS__)
+# define doutc(client, fmt, ...) \
+ pr_debug("%.*s %12.12s:%-4d : [%pU %llu] " fmt, \
+ 8 - (int)sizeof(KBUILD_MODNAME), " ", \
+ kbasename(__FILE__), __LINE__, \
+ &client->fsid, client->monc.auth->global_id, \
+ ##__VA_ARGS__)
# else
/* faux printk call just to see any compiler warnings. */
# define dout(fmt, ...) do { \
if (0) \
printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
} while (0)
+# define doutc(client, fmt, ...) do { \
+ if (0) \
+ printk(KERN_DEBUG "[%pU %llu] " fmt, \
+ &client->fsid, \
+ client->monc.auth->global_id, \
+ ##__VA_ARGS__); \
+ } while (0)
# endif
#else
@@ -33,7 +46,32 @@
* or, just wrap pr_debug
*/
# define dout(fmt, ...) pr_debug(" " fmt, ##__VA_ARGS__)
+# define doutc(client, fmt, ...) \
+ pr_debug(" [%pU %llu] %s: " fmt, &client->fsid, \
+ client->monc.auth->global_id, __func__, ##__VA_ARGS__)
#endif
+#define pr_notice_client(client, fmt, ...) \
+ pr_notice("[%pU %llu]: " fmt, &client->fsid, \
+ client->monc.auth->global_id, ##__VA_ARGS__)
+#define pr_info_client(client, fmt, ...) \
+ pr_info("[%pU %llu]: " fmt, &client->fsid, \
+ client->monc.auth->global_id, ##__VA_ARGS__)
+#define pr_warn_client(client, fmt, ...) \
+ pr_warn("[%pU %llu]: " fmt, &client->fsid, \
+ client->monc.auth->global_id, ##__VA_ARGS__)
+#define pr_warn_once_client(client, fmt, ...) \
+ pr_warn_once("[%pU %llu]: " fmt, &client->fsid, \
+ client->monc.auth->global_id, ##__VA_ARGS__)
+#define pr_err_client(client, fmt, ...) \
+ pr_err("[%pU %llu]: " fmt, &client->fsid, \
+ client->monc.auth->global_id, ##__VA_ARGS__)
+#define pr_warn_ratelimited_client(client, fmt, ...) \
+ pr_warn_ratelimited("[%pU %llu]: " fmt, &client->fsid, \
+ client->monc.auth->global_id, ##__VA_ARGS__)
+#define pr_err_ratelimited_client(client, fmt, ...) \
+ pr_err_ratelimited("[%pU %llu]: " fmt, &client->fsid, \
+ client->monc.auth->global_id, ##__VA_ARGS__)
+
#endif
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 49586ff26152..ee1d0e5f9789 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -357,16 +357,26 @@ enum {
CEPH_MDS_OP_RENAMESNAP = 0x01403,
};
-extern const char *ceph_mds_op_name(int op);
+#define IS_CEPH_MDS_OP_NEWINODE(op) (op == CEPH_MDS_OP_CREATE || \
+ op == CEPH_MDS_OP_MKNOD || \
+ op == CEPH_MDS_OP_MKDIR || \
+ op == CEPH_MDS_OP_SYMLINK)
+extern const char *ceph_mds_op_name(int op);
-#define CEPH_SETATTR_MODE 1
-#define CEPH_SETATTR_UID 2
-#define CEPH_SETATTR_GID 4
-#define CEPH_SETATTR_MTIME 8
-#define CEPH_SETATTR_ATIME 16
-#define CEPH_SETATTR_SIZE 32
-#define CEPH_SETATTR_CTIME 64
+#define CEPH_SETATTR_MODE (1 << 0)
+#define CEPH_SETATTR_UID (1 << 1)
+#define CEPH_SETATTR_GID (1 << 2)
+#define CEPH_SETATTR_MTIME (1 << 3)
+#define CEPH_SETATTR_ATIME (1 << 4)
+#define CEPH_SETATTR_SIZE (1 << 5)
+#define CEPH_SETATTR_CTIME (1 << 6)
+#define CEPH_SETATTR_MTIME_NOW (1 << 7)
+#define CEPH_SETATTR_ATIME_NOW (1 << 8)
+#define CEPH_SETATTR_BTIME (1 << 9)
+#define CEPH_SETATTR_KILL_SGUID (1 << 10)
+#define CEPH_SETATTR_FSCRYPT_AUTH (1 << 11)
+#define CEPH_SETATTR_FSCRYPT_FILE (1 << 12)
/*
* Ceph setxattr request flags.
@@ -479,7 +489,7 @@ union ceph_mds_request_args_ext {
#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */
#define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */
-struct ceph_mds_request_head_old {
+struct ceph_mds_request_head_legacy {
__le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */
__le32 flags; /* CEPH_MDS_FLAG_* */
@@ -492,9 +502,9 @@ struct ceph_mds_request_head_old {
union ceph_mds_request_args args;
} __attribute__ ((packed));
-#define CEPH_MDS_REQUEST_HEAD_VERSION 1
+#define CEPH_MDS_REQUEST_HEAD_VERSION 3
-struct ceph_mds_request_head {
+struct ceph_mds_request_head_old {
__le16 version; /* struct version */
__le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */
@@ -508,6 +518,26 @@ struct ceph_mds_request_head {
union ceph_mds_request_args_ext args;
} __attribute__ ((packed));
+struct ceph_mds_request_head {
+ __le16 version; /* struct version */
+ __le64 oldest_client_tid;
+ __le32 mdsmap_epoch; /* on client */
+ __le32 flags; /* CEPH_MDS_FLAG_* */
+ __u8 num_retry, num_fwd; /* legacy count retry and fwd attempts */
+ __le16 num_releases; /* # include cap/lease release records */
+ __le32 op; /* mds op code */
+ __le32 caller_uid, caller_gid;
+ __le64 ino; /* use this ino for openc, mkdir, mknod,
+ etc. (if replaying) */
+ union ceph_mds_request_args_ext args;
+
+ __le32 ext_num_retry; /* new count retry attempts */
+ __le32 ext_num_fwd; /* new count fwd attempts */
+
+ __le32 struct_len; /* to store size of struct ceph_mds_request_head */
+ __le32 owner_uid, owner_gid; /* used for OPs which create inodes */
+} __attribute__ ((packed));
+
/* cap/lease release record */
struct ceph_mds_request_release {
__le64 ino, cap_id; /* ino and unique cap id */
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
deleted file mode 100644
index 4c3e0648dc27..000000000000
--- a/include/linux/ceph/mdsmap.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _FS_CEPH_MDSMAP_H
-#define _FS_CEPH_MDSMAP_H
-
-#include <linux/bug.h>
-#include <linux/ceph/types.h>
-
-/*
- * mds map - describe servers in the mds cluster.
- *
- * we limit fields to those the client actually xcares about
- */
-struct ceph_mds_info {
- u64 global_id;
- struct ceph_entity_addr addr;
- s32 state;
- int num_export_targets;
- bool laggy;
- u32 *export_targets;
-};
-
-struct ceph_mdsmap {
- u32 m_epoch, m_client_epoch, m_last_failure;
- u32 m_root;
- u32 m_session_timeout; /* seconds */
- u32 m_session_autoclose; /* seconds */
- u64 m_max_file_size;
- u64 m_max_xattr_size; /* maximum size for xattrs blob */
- u32 m_max_mds; /* expected up:active mds number */
- u32 m_num_active_mds; /* actual up:active mds number */
- u32 possible_max_rank; /* possible max rank index */
- struct ceph_mds_info *m_info;
-
- /* which object pools file data can be stored in */
- int m_num_data_pg_pools;
- u64 *m_data_pg_pools;
- u64 m_cas_pg_pool;
-
- bool m_enabled;
- bool m_damaged;
- int m_num_laggy;
-};
-
-static inline struct ceph_entity_addr *
-ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
-{
- if (w >= m->possible_max_rank)
- return NULL;
- return &m->m_info[w].addr;
-}
-
-static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w)
-{
- BUG_ON(w < 0);
- if (w >= m->possible_max_rank)
- return CEPH_MDS_STATE_DNE;
- return m->m_info[w].state;
-}
-
-static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
-{
- if (w >= 0 && w < m->possible_max_rank)
- return m->m_info[w].laggy;
- return false;
-}
-
-extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
-struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2);
-extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
-extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m);
-
-#endif
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 99c1726be6ee..1717cc57cdac 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -17,6 +17,7 @@
struct ceph_msg;
struct ceph_connection;
+struct ceph_msg_data_cursor;
/*
* Ceph defines these callbacks for handling connection events.
@@ -70,6 +71,30 @@ struct ceph_connection_operations {
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt);
+
+ /**
+ * sparse_read: read sparse data
+ * @con: connection we're reading from
+ * @cursor: data cursor for reading extents
+ * @buf: optional buffer to read into
+ *
+ * This should be called more than once, each time setting up to
+ * receive an extent into the current cursor position, and zeroing
+ * the holes between them.
+ *
+ * Returns amount of data to be read (in bytes), 0 if reading is
+ * complete, or -errno if there was an error.
+ *
+ * If @buf is set on a >0 return, then the data should be read into
+ * the provided buffer. Otherwise, it should be read into the cursor.
+ *
+ * The sparse read operation is expected to initialize the cursor
+ * with a length covering up to the end of the last extent.
+ */
+ int (*sparse_read)(struct ceph_connection *con,
+ struct ceph_msg_data_cursor *cursor,
+ char **buf);
+
};
/* use format string %s%lld */
@@ -98,6 +123,7 @@ enum ceph_msg_data_type {
CEPH_MSG_DATA_BIO, /* data source/destination is a bio list */
#endif /* CONFIG_BLOCK */
CEPH_MSG_DATA_BVECS, /* data source/destination is a bio_vec array */
+ CEPH_MSG_DATA_ITER, /* data source/destination is an iov_iter */
};
#ifdef CONFIG_BLOCK
@@ -199,6 +225,7 @@ struct ceph_msg_data {
bool own_pages;
};
struct ceph_pagelist *pagelist;
+ struct iov_iter iter;
};
};
@@ -207,6 +234,7 @@ struct ceph_msg_data_cursor {
struct ceph_msg_data *data; /* current data item */
size_t resid; /* bytes not yet consumed */
+ int sr_resid; /* residual sparse_read len */
bool need_crc; /* crc update needed */
union {
#ifdef CONFIG_BLOCK
@@ -222,6 +250,10 @@ struct ceph_msg_data_cursor {
struct page *page; /* page from list */
size_t offset; /* bytes from list */
};
+ struct {
+ struct iov_iter iov_iter;
+ unsigned int lastlen;
+ };
};
};
@@ -251,6 +283,7 @@ struct ceph_msg {
struct kref kref;
bool more_to_follow;
bool needs_out_seq;
+ u64 sparse_read_total;
int front_alloc_len;
struct ceph_msgpool *pool;
@@ -309,6 +342,10 @@ struct ceph_connection_v1_info {
int in_base_pos; /* bytes read */
+ /* sparse reads */
+ struct kvec in_sr_kvec; /* current location to receive into */
+ u64 in_sr_len; /* amount of data in this extent */
+
/* message in temps */
u8 in_tag; /* protocol control byte */
struct ceph_msg_header in_hdr;
@@ -395,6 +432,7 @@ struct ceph_connection_v2_info {
void *conn_bufs[16];
int conn_buf_cnt;
+ int data_len_remain;
struct kvec in_sign_kvecs[8];
struct kvec out_sign_kvecs[8];
@@ -573,6 +611,8 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
#endif /* CONFIG_BLOCK */
void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
struct ceph_bvec_iter *bvec_pos);
+void ceph_msg_data_add_iter(struct ceph_msg *msg,
+ struct iov_iter *iter);
struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
gfp_t flags, bool can_fail);
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index b658961156a0..7a9a40163c0f 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -19,7 +19,7 @@ struct ceph_monmap {
struct ceph_fsid fsid;
u32 epoch;
u32 num_mon;
- struct ceph_entity_inst mon_inst[];
+ struct ceph_entity_inst mon_inst[] __counted_by(num_mon);
};
struct ceph_mon_client;
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index fb6be72104df..f66f6aac74f6 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -29,14 +29,63 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
#define CEPH_HOMELESS_OSD -1
-/* a given osd we're communicating with */
+/*
+ * A single extent in a SPARSE_READ reply.
+ *
+ * Note that these come from the OSD as little-endian values. On BE arches,
+ * we convert them in-place after receipt.
+ */
+struct ceph_sparse_extent {
+ u64 off;
+ u64 len;
+} __packed;
+
+/* Sparse read state machine state values */
+enum ceph_sparse_read_state {
+ CEPH_SPARSE_READ_HDR = 0,
+ CEPH_SPARSE_READ_EXTENTS,
+ CEPH_SPARSE_READ_DATA_LEN,
+ CEPH_SPARSE_READ_DATA_PRE,
+ CEPH_SPARSE_READ_DATA,
+};
+
+/*
+ * A SPARSE_READ reply is a 32-bit count of extents, followed by an array of
+ * 64-bit offset/length pairs, and then all of the actual file data
+ * concatenated after it (sans holes).
+ *
+ * Unfortunately, we don't know how long the extent array is until we've
+ * started reading the data section of the reply. The caller should send down
+ * a destination buffer for the array, but we'll alloc one if it's too small
+ * or if the caller doesn't.
+ */
+struct ceph_sparse_read {
+ enum ceph_sparse_read_state sr_state; /* state machine state */
+ u64 sr_req_off; /* orig request offset */
+ u64 sr_req_len; /* orig request length */
+ u64 sr_pos; /* current pos in buffer */
+ int sr_index; /* current extent index */
+ u32 sr_datalen; /* length of actual data */
+ u32 sr_count; /* extent count in reply */
+ int sr_ext_len; /* length of extent array */
+ struct ceph_sparse_extent *sr_extent; /* extent array */
+};
+
+/*
+ * A given osd we're communicating with.
+ *
+ * Note that the o_requests tree can be searched while holding the "lock" mutex
+ * or the "o_requests_lock" spinlock. Insertion or removal requires both!
+ */
struct ceph_osd {
refcount_t o_ref;
+ int o_sparse_op_idx;
struct ceph_osd_client *o_osdc;
int o_osd;
int o_incarnation;
struct rb_node o_node;
struct ceph_connection o_con;
+ spinlock_t o_requests_lock;
struct rb_root o_requests;
struct rb_root o_linger_requests;
struct rb_root o_backoff_mappings;
@@ -46,6 +95,7 @@ struct ceph_osd {
unsigned long lru_ttl;
struct list_head o_keepalive_item;
struct mutex lock;
+ struct ceph_sparse_read o_sparse_read;
};
#define CEPH_OSD_SLAB_OPS 2
@@ -59,6 +109,7 @@ enum ceph_osd_data_type {
CEPH_OSD_DATA_TYPE_BIO,
#endif /* CONFIG_BLOCK */
CEPH_OSD_DATA_TYPE_BVECS,
+ CEPH_OSD_DATA_TYPE_ITER,
};
struct ceph_osd_data {
@@ -82,6 +133,7 @@ struct ceph_osd_data {
struct ceph_bvec_iter bvec_pos;
u32 num_bvecs;
};
+ struct iov_iter iter;
};
};
@@ -98,6 +150,8 @@ struct ceph_osd_req_op {
u64 offset, length;
u64 truncate_size;
u32 truncate_seq;
+ int sparse_ext_cnt;
+ struct ceph_sparse_extent *sparse_ext;
struct ceph_osd_data osd_data;
} extent;
struct {
@@ -145,6 +199,9 @@ struct ceph_osd_req_op {
u32 src_fadvise_flags;
struct ceph_osd_data osd_data;
} copy_from;
+ struct {
+ u64 ver;
+ } assert_ver;
};
};
@@ -199,6 +256,7 @@ struct ceph_osd_request {
struct ceph_osd_client *r_osdc;
struct kref r_kref;
bool r_mempool;
+ bool r_linger; /* don't resend on failure */
struct completion r_completion; /* private to osd_client.c */
ceph_osdc_callback_t r_callback;
@@ -211,9 +269,9 @@ struct ceph_osd_request {
struct ceph_snap_context *r_snapc; /* for writes */
struct timespec64 r_mtime; /* ditto */
u64 r_data_offset; /* ditto */
- bool r_linger; /* don't resend on failure */
/* internal */
+ u64 r_version; /* data version sent in reply */
unsigned long r_stamp; /* jiffies, send or check time */
unsigned long r_start_stamp; /* jiffies */
ktime_t r_start_latency; /* ktime_t */
@@ -221,7 +279,7 @@ struct ceph_osd_request {
int r_attempts;
u32 r_map_dne_bound;
- struct ceph_osd_req_op r_ops[];
+ struct ceph_osd_req_op r_ops[] __counted_by(r_num_ops);
};
struct ceph_request_redirect {
@@ -450,6 +508,8 @@ void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
unsigned int which,
struct ceph_bvec_iter *bvec_pos);
+void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req,
+ unsigned int which, struct iov_iter *iter);
extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
unsigned int which,
@@ -504,6 +564,23 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
u32 truncate_seq, u64 truncate_size,
bool use_mempool);
+int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt);
+
+/*
+ * How big an extent array should we preallocate for a sparse read? This is
+ * just a starting value. If we get more than this back from the OSD, the
+ * receiver will reallocate.
+ */
+#define CEPH_SPARSE_EXT_ARRAY_INITIAL 16
+
+static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt)
+{
+ if (!cnt)
+ cnt = CEPH_SPARSE_EXT_ARRAY_INITIAL;
+
+ return __ceph_alloc_sparse_ext_map(op, cnt);
+}
+
extern void ceph_osdc_get_request(struct ceph_osd_request *req);
extern void ceph_osdc_put_request(struct ceph_osd_request *req);
@@ -558,5 +635,19 @@ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
struct ceph_object_locator *oloc,
struct ceph_watch_item **watchers,
u32 *num_watchers);
-#endif
+/* Find offset into the buffer of the end of the extent map */
+static inline u64 ceph_sparse_ext_map_end(struct ceph_osd_req_op *op)
+{
+ struct ceph_sparse_extent *ext;
+
+ /* No extents? No data */
+ if (op->extent.sparse_ext_cnt == 0)
+ return 0;
+
+ ext = &op->extent.sparse_ext[op->extent.sparse_ext_cnt - 1];
+
+ return ext->off + ext->len - op->extent.offset;
+}
+
+#endif
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 43a7a1573b51..73c3efbec36c 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -524,6 +524,10 @@ struct ceph_osd_op {
__le64 cookie;
} __attribute__ ((packed)) notify;
struct {
+ __le64 unused;
+ __le64 ver;
+ } __attribute__ ((packed)) assert_ver;
+ struct {
__le64 offset, length;
__le64 src_offset;
} __attribute__ ((packed)) clonerange;
diff --git a/include/linux/cfi.h b/include/linux/cfi.h
index 5e134f4ce8b7..f0df518e11dd 100644
--- a/include/linux/cfi.h
+++ b/include/linux/cfi.h
@@ -9,6 +9,14 @@
#include <linux/bug.h>
#include <linux/module.h>
+#include <asm/cfi.h>
+
+#ifndef cfi_get_offset
+static inline int cfi_get_offset(void)
+{
+ return 0;
+}
+#endif
#ifdef CONFIG_CFI_CLANG
enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr,
@@ -19,11 +27,13 @@ static inline enum bug_trap_type report_cfi_failure_noaddr(struct pt_regs *regs,
{
return report_cfi_failure(regs, addr, NULL, 0);
}
+#endif /* CONFIG_CFI_CLANG */
#ifdef CONFIG_ARCH_USES_CFI_TRAPS
bool is_cfi_trap(unsigned long addr);
+#else
+static inline bool is_cfi_trap(unsigned long addr) { return false; }
#endif
-#endif /* CONFIG_CFI_CLANG */
#ifdef CONFIG_MODULES
#ifdef CONFIG_ARCH_USES_CFI_TRAPS
@@ -36,4 +46,8 @@ static inline void module_cfi_finalize(const Elf_Ehdr *hdr,
#endif /* CONFIG_ARCH_USES_CFI_TRAPS */
#endif /* CONFIG_MODULES */
+#ifndef CFI_NOSEAL
+#define CFI_NOSEAL(x)
+#endif
+
#endif /* _LINUX_CFI_H */
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 8a0d5466c7be..ea48c861cd36 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -115,6 +115,11 @@ enum {
* Enable recursive subtree protection
*/
CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 18),
+
+ /*
+ * Enable hugetlb accounting for the memory controller.
+ */
+ CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19),
};
/* cftype->flags */
@@ -238,7 +243,7 @@ struct css_set {
* Lists running through all tasks using this cgroup group.
* mg_tasks lists tasks which belong to this cset but are in the
* process of being migrated out or in. Protected by
- * css_set_rwsem, but, during migration, once tasks are moved to
+ * css_set_lock, but, during migration, once tasks are moved to
* mg_tasks, it can be read safely while holding cgroup_mutex.
*/
struct list_head tasks;
@@ -342,6 +347,20 @@ struct cgroup_rstat_cpu {
struct cgroup_base_stat last_bstat;
/*
+ * This field is used to record the cumulative per-cpu time of
+ * the cgroup and its descendants. Currently it can be read via
+ * eBPF/drgn etc, and we are still trying to determine how to
+ * expose it in the cgroupfs interface.
+ */
+ struct cgroup_base_stat subtree_bstat;
+
+ /*
+ * Snapshots at the last reading. These are used to calculate the
+ * deltas to propagate to the per-cpu subtree_bstat.
+ */
+ struct cgroup_base_stat last_subtree_bstat;
+
+ /*
* Child cgroups with stat updates on this cpu since the last read
* are linked on the parent's ->updated_children through
* ->updated_next.
@@ -477,6 +496,20 @@ struct cgroup {
struct cgroup_rstat_cpu __percpu *rstat_cpu;
struct list_head rstat_css_list;
+ /*
+ * Add padding to separate the read mostly rstat_cpu and
+ * rstat_css_list into a different cacheline from the following
+ * rstat_flush_next and *bstat fields which can have frequent updates.
+ */
+ CACHELINE_PADDING(_pad_);
+
+ /*
+ * A singly-linked list of cgroup structures to be rstat flushed.
+ * This is a scratch field to be used exclusively by
+ * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock.
+ */
+ struct cgroup *rstat_flush_next;
+
/* cgroup basic resource statistics */
struct cgroup_base_stat last_bstat;
struct cgroup_base_stat bstat;
@@ -529,6 +562,10 @@ struct cgroup_root {
/* Unique id for this hierarchy. */
int hierarchy_id;
+ /* A list running through the active hierarchies */
+ struct list_head root_list;
+ struct rcu_head rcu; /* Must be near the top */
+
/*
* The root cgroup. The containing cgroup_root will be destroyed on its
* release. cgrp->ancestors[0] will be used overflowing into the
@@ -542,9 +579,6 @@ struct cgroup_root {
/* Number of cgroups in the hierarchy, used only for /proc/cgroups */
atomic_t nr_cgrps;
- /* A list running through the active hierarchies */
- struct list_head root_list;
-
/* Hierarchy-specific flags */
unsigned int flags;
@@ -661,6 +695,8 @@ struct cgroup_subsys {
void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu);
int (*css_extra_stat_show)(struct seq_file *seq,
struct cgroup_subsys_state *css);
+ int (*css_local_stat_show)(struct seq_file *seq,
+ struct cgroup_subsys_state *css);
int (*can_attach)(struct cgroup_taskset *tset);
void (*cancel_attach)(struct cgroup_taskset *tset);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b307013b9c6c..34aaf0e87def 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -40,13 +40,11 @@ struct kernel_clone_args;
#define CGROUP_WEIGHT_DFL 100
#define CGROUP_WEIGHT_MAX 10000
-/* walk only threadgroup leaders */
-#define CSS_TASK_ITER_PROCS (1U << 0)
-/* walk all threaded css_sets in the domain */
-#define CSS_TASK_ITER_THREADED (1U << 1)
-
-/* internal flags */
-#define CSS_TASK_ITER_SKIPPED (1U << 16)
+enum {
+ CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */
+ CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */
+ CSS_TASK_ITER_SKIPPED = (1U << 16), /* internal flags */
+};
/* a css_task_iter should be treated as an opaque object */
struct css_task_iter {
@@ -71,6 +69,7 @@ struct css_task_iter {
extern struct file_system_type cgroup_fs_type;
extern struct cgroup_root cgrp_dfl_root;
extern struct css_set init_css_set;
+extern spinlock_t css_set_lock;
#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys;
#include <linux/cgroup_subsys.h>
@@ -388,7 +387,6 @@ static inline void cgroup_unlock(void)
* as locks used during the cgroup_subsys::attach() methods.
*/
#ifdef CONFIG_PROVE_RCU
-extern spinlock_t css_set_lock;
#define task_css_set_check(task, __c) \
rcu_dereference_check((task)->cgroups, \
rcu_read_lock_sched_held() || \
@@ -855,4 +853,6 @@ static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
#endif /* CONFIG_CGROUP_BPF */
+struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id);
+
#endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index 53f1a7a932b0..c2d09bc4f976 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -7,8 +7,9 @@
/*
* DEFINE_FREE(name, type, free):
* simple helper macro that defines the required wrapper for a __free()
- * based cleanup function. @free is an expression using '_T' to access
- * the variable.
+ * based cleanup function. @free is an expression using '_T' to access the
+ * variable. @free should typically include a NULL test before calling a
+ * function, see the example below.
*
* __free(name):
* variable attribute to add a scoped based cleanup to the variable.
@@ -17,6 +18,9 @@
* like a non-atomic xchg(var, NULL), such that the cleanup function will
* be inhibited -- provided it sanely deals with a NULL value.
*
+ * NOTE: this has __must_check semantics so that it is harder to accidentally
+ * leak the resource.
+ *
* return_ptr(p):
* returns p while inhibiting the __free().
*
@@ -24,6 +28,8 @@
*
* DEFINE_FREE(kfree, void *, if (_T) kfree(_T))
*
+ * void *alloc_obj(...)
+ * {
* struct obj *p __free(kfree) = kmalloc(...);
* if (!p)
* return NULL;
@@ -32,6 +38,24 @@
* return NULL;
*
* return_ptr(p);
+ * }
+ *
+ * NOTE: the DEFINE_FREE()'s @free expression includes a NULL test even though
+ * kfree() is fine to be called with a NULL value. This is on purpose. This way
+ * the compiler sees the end of our alloc_obj() function as:
+ *
+ * tmp = p;
+ * p = NULL;
+ * if (p)
+ * kfree(p);
+ * return tmp;
+ *
+ * And through the magic of value-propagation and dead-code-elimination, it
+ * eliminates the actual cleanup call and compiles into:
+ *
+ * return p;
+ *
+ * Without the NULL test it turns into a mess and the compiler can't help us.
*/
#define DEFINE_FREE(_name, _type, _free) \
@@ -39,8 +63,17 @@
#define __free(_name) __cleanup(__free_##_name)
+#define __get_and_null_ptr(p) \
+ ({ __auto_type __ptr = &(p); \
+ __auto_type __val = *__ptr; \
+ *__ptr = NULL; __val; })
+
+static inline __must_check
+const volatile void * __must_check_fn(const volatile void *val)
+{ return val; }
+
#define no_free_ptr(p) \
- ({ __auto_type __ptr = (p); (p) = NULL; __ptr; })
+ ((typeof(p)) __must_check_fn(__get_and_null_ptr(p)))
#define return_ptr(p) return no_free_ptr(p)
@@ -92,25 +125,55 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
* trivial wrapper around DEFINE_CLASS() above specifically
* for locks.
*
+ * DEFINE_GUARD_COND(name, ext, condlock)
+ * wrapper around EXTEND_CLASS above to add conditional lock
+ * variants to a base class, eg. mutex_trylock() or
+ * mutex_lock_interruptible().
+ *
* guard(name):
- * an anonymous instance of the (guard) class
+ * an anonymous instance of the (guard) class, not recommended for
+ * conditional locks.
*
* scoped_guard (name, args...) { }:
* similar to CLASS(name, scope)(args), except the variable (with the
* explicit name 'scope') is declard in a for-loop such that its scope is
* bound to the next (compound) statement.
*
+ * for conditional locks the loop body is skipped when the lock is not
+ * acquired.
+ *
+ * scoped_cond_guard (name, fail, args...) { }:
+ * similar to scoped_guard(), except it does fail when the lock
+ * acquire fails.
+ *
*/
#define DEFINE_GUARD(_name, _type, _lock, _unlock) \
- DEFINE_CLASS(_name, _type, _unlock, ({ _lock; _T; }), _type _T)
+ DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \
+ static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \
+ { return *_T; }
+
+#define DEFINE_GUARD_COND(_name, _ext, _condlock) \
+ EXTEND_CLASS(_name, _ext, \
+ ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \
+ class_##_name##_t _T) \
+ static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \
+ { return class_##_name##_lock_ptr(_T); }
#define guard(_name) \
CLASS(_name, __UNIQUE_ID(guard))
+#define __guard_ptr(_name) class_##_name##_lock_ptr
+
#define scoped_guard(_name, args...) \
for (CLASS(_name, scope)(args), \
- *done = NULL; !done; done = (void *)1)
+ *done = NULL; __guard_ptr(_name)(&scope) && !done; done = (void *)1)
+
+#define scoped_cond_guard(_name, _fail, args...) \
+ for (CLASS(_name, scope)(args), \
+ *done = NULL; !done; done = (void *)1) \
+ if (!__guard_ptr(_name)(&scope)) _fail; \
+ else
/*
* Additional helper macros for generating lock guards with types, either for
@@ -119,6 +182,7 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
*
* DEFINE_LOCK_GUARD_0(name, lock, unlock, ...)
* DEFINE_LOCK_GUARD_1(name, type, lock, unlock, ...)
+ * DEFINE_LOCK_GUARD_1_COND(name, ext, condlock)
*
* will result in the following type:
*
@@ -140,6 +204,11 @@ typedef struct { \
static inline void class_##_name##_destructor(class_##_name##_t *_T) \
{ \
if (_T->lock) { _unlock; } \
+} \
+ \
+static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \
+{ \
+ return _T->lock; \
}
@@ -168,4 +237,14 @@ __DEFINE_LOCK_GUARD_1(_name, _type, _lock)
__DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \
__DEFINE_LOCK_GUARD_0(_name, _lock)
+#define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \
+ EXTEND_CLASS(_name, _ext, \
+ ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\
+ if (_T->lock && !(_condlock)) _T->lock = NULL; \
+ _t; }), \
+ typeof_member(class_##_name##_t, lock) l) \
+ static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \
+ { return class_##_name##_lock_ptr(_T); }
+
+
#endif /* __LINUX_GUARDS_H */
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 0f0cd01906b4..4a537260f655 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -74,7 +74,7 @@ void clk_hw_forward_rate_request(const struct clk_hw *core,
unsigned long parent_rate);
/**
- * struct clk_duty - Struture encoding the duty cycle ratio of a clock
+ * struct clk_duty - Structure encoding the duty cycle ratio of a clock
*
* @num: Numerator of the duty cycle ratio
* @den: Denominator of the duty cycle ratio
@@ -129,7 +129,7 @@ struct clk_duty {
* @restore_context: Restore the context of the clock after a restoration
* of power.
*
- * @recalc_rate Recalculate the rate of this clock, by querying hardware. The
+ * @recalc_rate: Recalculate the rate of this clock, by querying hardware. The
* parent rate is an input parameter. It is up to the caller to
* ensure that the prepare_mutex is held across this call. If the
* driver cannot figure out a rate for this clock, it must return
@@ -448,15 +448,15 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
*/
#define clk_hw_register_fixed_rate_with_accuracy_parent_hw(dev, name, \
parent_hw, flags, fixed_rate, fixed_accuracy) \
- __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, (parent_hw) \
- NULL, NULL, (flags), (fixed_rate), \
+ __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, (parent_hw), \
+ NULL, (flags), (fixed_rate), \
(fixed_accuracy), 0, false)
/**
* clk_hw_register_fixed_rate_with_accuracy_parent_data - register fixed-rate
* clock with the clock framework
* @dev: device that is registering this clock
* @name: name of this clock
- * @parent_name: name of clock's parent
+ * @parent_data: name of clock's parent
* @flags: framework-specific flags
* @fixed_rate: non-adjustable clock rate
* @fixed_accuracy: non-adjustable clock accuracy
@@ -471,7 +471,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
* the clock framework
* @dev: device that is registering this clock
* @name: name of this clock
- * @parent_name: name of clock's parent
+ * @parent_data: name of clock's parent
* @flags: framework-specific flags
* @fixed_rate: non-adjustable clock rate
*/
@@ -649,7 +649,7 @@ struct clk_div_table {
* Clock with an adjustable divider affecting its output frequency. Implements
* .recalc_rate, .set_rate and .round_rate
*
- * Flags:
+ * @flags:
* CLK_DIVIDER_ONE_BASED - by default the divisor is the value read from the
* register plus one. If CLK_DIVIDER_ONE_BASED is set then the divider is
* the raw value read from the register, with the value of zero considered
@@ -1084,18 +1084,28 @@ void of_fixed_factor_clk_setup(struct device_node *node);
* @hw: handle between common and hardware-specific interfaces
* @mult: multiplier
* @div: divider
+ * @acc: fixed accuracy in ppb
+ * @flags: behavior modifying flags
*
* Clock with a fixed multiplier and divider. The output frequency is the
* parent clock rate divided by div and multiplied by mult.
- * Implements .recalc_rate, .set_rate and .round_rate
+ * Implements .recalc_rate, .set_rate, .round_rate and .recalc_accuracy
+ *
+ * Flags:
+ * * CLK_FIXED_FACTOR_FIXED_ACCURACY - Use the value in @acc instead of the
+ * parent clk accuracy.
*/
struct clk_fixed_factor {
struct clk_hw hw;
unsigned int mult;
unsigned int div;
+ unsigned long acc;
+ unsigned int flags;
};
+#define CLK_FIXED_FACTOR_FIXED_ACCURACY BIT(0)
+
#define to_clk_fixed_factor(_hw) container_of(_hw, struct clk_fixed_factor, hw)
extern const struct clk_ops clk_fixed_factor_ops;
@@ -1106,10 +1116,24 @@ void clk_unregister_fixed_factor(struct clk *clk);
struct clk_hw *clk_hw_register_fixed_factor(struct device *dev,
const char *name, const char *parent_name, unsigned long flags,
unsigned int mult, unsigned int div);
+struct clk_hw *clk_hw_register_fixed_factor_fwname(struct device *dev,
+ struct device_node *np, const char *name, const char *fw_name,
+ unsigned long flags, unsigned int mult, unsigned int div);
+struct clk_hw *clk_hw_register_fixed_factor_with_accuracy_fwname(struct device *dev,
+ struct device_node *np, const char *name, const char *fw_name,
+ unsigned long flags, unsigned int mult, unsigned int div,
+ unsigned long acc);
void clk_hw_unregister_fixed_factor(struct clk_hw *hw);
struct clk_hw *devm_clk_hw_register_fixed_factor(struct device *dev,
const char *name, const char *parent_name, unsigned long flags,
unsigned int mult, unsigned int div);
+struct clk_hw *devm_clk_hw_register_fixed_factor_fwname(struct device *dev,
+ struct device_node *np, const char *name, const char *fw_name,
+ unsigned long flags, unsigned int mult, unsigned int div);
+struct clk_hw *devm_clk_hw_register_fixed_factor_with_accuracy_fwname(struct device *dev,
+ struct device_node *np, const char *name, const char *fw_name,
+ unsigned long flags, unsigned int mult, unsigned int div,
+ unsigned long acc);
struct clk_hw *devm_clk_hw_register_fixed_factor_index(struct device *dev,
const char *name, unsigned int index, unsigned long flags,
unsigned int mult, unsigned int div);
@@ -1130,11 +1154,12 @@ struct clk_hw *clk_hw_register_fixed_factor_parent_hw(struct device *dev,
* @mwidth: width of the numerator bit field
* @nshift: shift to the denominator bit field
* @nwidth: width of the denominator bit field
+ * @approximation: clk driver's callback for calculating the divider clock
* @lock: register lock
*
* Clock with adjustable fractional divider affecting its output frequency.
*
- * Flags:
+ * @flags:
* CLK_FRAC_DIVIDER_ZERO_BASED - by default the numerator and denominator
* is the value read from the register. If CLK_FRAC_DIVIDER_ZERO_BASED
* is set then the numerator and denominator are both the value read
@@ -1191,7 +1216,7 @@ void clk_hw_unregister_fractional_divider(struct clk_hw *hw);
* Clock with an adjustable multiplier affecting its output frequency.
* Implements .recalc_rate, .set_rate and .round_rate
*
- * Flags:
+ * @flags:
* CLK_MULTIPLIER_ZERO_BYPASS - By default, the multiplier is the value read
* from the register, with 0 being a valid value effectively
* zeroing the output clock rate. If CLK_MULTIPLIER_ZERO_BYPASS is
@@ -1379,7 +1404,7 @@ struct clk_onecell_data {
struct clk_hw_onecell_data {
unsigned int num;
- struct clk_hw *hws[];
+ struct clk_hw *hws[] __counted_by(num);
};
#define CLK_OF_DECLARE(name, compat, fn) \
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 06f1b292f8a0..0fa56d672532 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -202,6 +202,18 @@ bool clk_is_match(const struct clk *p, const struct clk *q);
int clk_rate_exclusive_get(struct clk *clk);
/**
+ * devm_clk_rate_exclusive_get - devm variant of clk_rate_exclusive_get
+ * @dev: device the exclusivity is bound to
+ * @clk: clock source
+ *
+ * Calls clk_rate_exclusive_get() on @clk and registers a devm cleanup handler
+ * on @dev to call clk_rate_exclusive_put().
+ *
+ * Must not be called from within atomic context.
+ */
+int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk);
+
+/**
* clk_rate_exclusive_put - release exclusivity over the rate control of a
* producer
* @clk: clock source
@@ -274,6 +286,11 @@ static inline int clk_rate_exclusive_get(struct clk *clk)
return 0;
}
+static inline int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk)
+{
+ return 0;
+}
+
static inline void clk_rate_exclusive_put(struct clk *clk) {}
#endif
@@ -479,6 +496,22 @@ int __must_check devm_clk_bulk_get_all(struct device *dev,
struct clk_bulk_data **clks);
/**
+ * devm_clk_bulk_get_all_enable - Get and enable all clocks of the consumer (managed)
+ * @dev: device for clock "consumer"
+ * @clks: pointer to the clk_bulk_data table of consumer
+ *
+ * Returns success (0) or negative errno.
+ *
+ * This helper function allows drivers to get all clocks of the
+ * consumer and enables them in one operation with management.
+ * The clks will automatically be disabled and freed when the device
+ * is unbound.
+ */
+
+int __must_check devm_clk_bulk_get_all_enable(struct device *dev,
+ struct clk_bulk_data **clks);
+
+/**
* devm_clk_get - lookup and obtain a managed reference to a clock producer.
* @dev: device for clock "consumer"
* @id: clock consumer ID
@@ -968,6 +1001,12 @@ static inline int __must_check devm_clk_bulk_get_all(struct device *dev,
return 0;
}
+static inline int __must_check devm_clk_bulk_get_all_enable(struct device *dev,
+ struct clk_bulk_data **clks)
+{
+ return 0;
+}
+
static inline struct clk *devm_get_clk_from_child(struct device *dev,
struct device_node *np, const char *con_id)
{
diff --git a/include/linux/clk/mmp.h b/include/linux/clk/mmp.h
deleted file mode 100644
index 445130460380..000000000000
--- a/include/linux/clk/mmp.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __CLK_MMP_H
-#define __CLK_MMP_H
-
-#include <linux/types.h>
-
-extern void pxa168_clk_init(phys_addr_t mpmu_phys,
- phys_addr_t apmu_phys,
- phys_addr_t apbc_phys);
-extern void pxa910_clk_init(phys_addr_t mpmu_phys,
- phys_addr_t apmu_phys,
- phys_addr_t apbc_phys,
- phys_addr_t apbcp_phys);
-extern void mmp2_clk_init(phys_addr_t mpmu_phys,
- phys_addr_t apmu_phys,
- phys_addr_t apbc_phys);
-
-#endif
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index cbfcbf186ce3..e656f63efdce 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -13,11 +13,14 @@
/**
* struct clk_omap_reg - OMAP register declaration
* @offset: offset from the master IP module base address
+ * @bit: register bit offset
* @index: index of the master IP module
+ * @flags: flags
*/
struct clk_omap_reg {
void __iomem *ptr;
u16 offset;
+ u8 bit;
u8 index;
u8 flags;
};
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 1d42d4b17327..0ad8b550bb4b 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -291,7 +291,19 @@ static inline void timer_probe(void) {}
#define TIMER_ACPI_DECLARE(name, table_id, fn) \
ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn)
-extern ulong max_cswd_read_retries;
+static inline unsigned int clocksource_get_max_watchdog_retry(void)
+{
+ /*
+ * When system is in the boot phase or under heavy workload, there
+ * can be random big latencies during the clocksource/watchdog
+ * read, so allow retries to filter the noise latency. As the
+ * latency's frequency and maximum value goes up with the number of
+ * CPUs, scale the number of retries with the number of online
+ * CPUs.
+ */
+ return (ilog2(num_online_cpus()) / 2) + 1;
+}
+
void clocksource_verify_percpu(struct clocksource *cs);
#endif /* _LINUX_CLOCKSOURCE_H */
diff --git a/include/linux/clocksource_ids.h b/include/linux/clocksource_ids.h
index 16775d7d8f8d..a4fa3436940c 100644
--- a/include/linux/clocksource_ids.h
+++ b/include/linux/clocksource_ids.h
@@ -6,6 +6,9 @@
enum clocksource_ids {
CSID_GENERIC = 0,
CSID_ARM_ARCH_COUNTER,
+ CSID_X86_TSC_EARLY,
+ CSID_X86_TSC,
+ CSID_X86_KVM_CLK,
CSID_MAX,
};
diff --git a/include/linux/closure.h b/include/linux/closure.h
new file mode 100644
index 000000000000..c554c6a08768
--- /dev/null
+++ b/include/linux/closure.h
@@ -0,0 +1,415 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CLOSURE_H
+#define _LINUX_CLOSURE_H
+
+#include <linux/llist.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/workqueue.h>
+
+/*
+ * Closure is perhaps the most overused and abused term in computer science, but
+ * since I've been unable to come up with anything better you're stuck with it
+ * again.
+ *
+ * What are closures?
+ *
+ * They embed a refcount. The basic idea is they count "things that are in
+ * progress" - in flight bios, some other thread that's doing something else -
+ * anything you might want to wait on.
+ *
+ * The refcount may be manipulated with closure_get() and closure_put().
+ * closure_put() is where many of the interesting things happen, when it causes
+ * the refcount to go to 0.
+ *
+ * Closures can be used to wait on things both synchronously and asynchronously,
+ * and synchronous and asynchronous use can be mixed without restriction. To
+ * wait synchronously, use closure_sync() - you will sleep until your closure's
+ * refcount hits 1.
+ *
+ * To wait asynchronously, use
+ * continue_at(cl, next_function, workqueue);
+ *
+ * passing it, as you might expect, the function to run when nothing is pending
+ * and the workqueue to run that function out of.
+ *
+ * continue_at() also, critically, requires a 'return' immediately following the
+ * location where this macro is referenced, to return to the calling function.
+ * There's good reason for this.
+ *
+ * To use safely closures asynchronously, they must always have a refcount while
+ * they are running owned by the thread that is running them. Otherwise, suppose
+ * you submit some bios and wish to have a function run when they all complete:
+ *
+ * foo_endio(struct bio *bio)
+ * {
+ * closure_put(cl);
+ * }
+ *
+ * closure_init(cl);
+ *
+ * do_stuff();
+ * closure_get(cl);
+ * bio1->bi_endio = foo_endio;
+ * bio_submit(bio1);
+ *
+ * do_more_stuff();
+ * closure_get(cl);
+ * bio2->bi_endio = foo_endio;
+ * bio_submit(bio2);
+ *
+ * continue_at(cl, complete_some_read, system_wq);
+ *
+ * If closure's refcount started at 0, complete_some_read() could run before the
+ * second bio was submitted - which is almost always not what you want! More
+ * importantly, it wouldn't be possible to say whether the original thread or
+ * complete_some_read()'s thread owned the closure - and whatever state it was
+ * associated with!
+ *
+ * So, closure_init() initializes a closure's refcount to 1 - and when a
+ * closure_fn is run, the refcount will be reset to 1 first.
+ *
+ * Then, the rule is - if you got the refcount with closure_get(), release it
+ * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
+ * on a closure because you called closure_init() or you were run out of a
+ * closure - _always_ use continue_at(). Doing so consistently will help
+ * eliminate an entire class of particularly pernicious races.
+ *
+ * Lastly, you might have a wait list dedicated to a specific event, and have no
+ * need for specifying the condition - you just want to wait until someone runs
+ * closure_wake_up() on the appropriate wait list. In that case, just use
+ * closure_wait(). It will return either true or false, depending on whether the
+ * closure was already on a wait list or not - a closure can only be on one wait
+ * list at a time.
+ *
+ * Parents:
+ *
+ * closure_init() takes two arguments - it takes the closure to initialize, and
+ * a (possibly null) parent.
+ *
+ * If parent is non null, the new closure will have a refcount for its lifetime;
+ * a closure is considered to be "finished" when its refcount hits 0 and the
+ * function to run is null. Hence
+ *
+ * continue_at(cl, NULL, NULL);
+ *
+ * returns up the (spaghetti) stack of closures, precisely like normal return
+ * returns up the C stack. continue_at() with non null fn is better thought of
+ * as doing a tail call.
+ *
+ * All this implies that a closure should typically be embedded in a particular
+ * struct (which its refcount will normally control the lifetime of), and that
+ * struct can very much be thought of as a stack frame.
+ */
+
+struct closure;
+struct closure_syncer;
+typedef void (closure_fn) (struct work_struct *);
+extern struct dentry *bcache_debug;
+
+struct closure_waitlist {
+ struct llist_head list;
+};
+
+enum closure_state {
+ /*
+ * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
+ * the thread that owns the closure, and cleared by the thread that's
+ * waking up the closure.
+ *
+ * The rest are for debugging and don't affect behaviour:
+ *
+ * CLOSURE_RUNNING: Set when a closure is running (i.e. by
+ * closure_init() and when closure_put() runs then next function), and
+ * must be cleared before remaining hits 0. Primarily to help guard
+ * against incorrect usage and accidentally transferring references.
+ * continue_at() and closure_return() clear it for you, if you're doing
+ * something unusual you can use closure_set_dead() which also helps
+ * annotate where references are being transferred.
+ */
+
+ CLOSURE_BITS_START = (1U << 26),
+ CLOSURE_DESTRUCTOR = (1U << 26),
+ CLOSURE_WAITING = (1U << 28),
+ CLOSURE_RUNNING = (1U << 30),
+};
+
+#define CLOSURE_GUARD_MASK \
+ ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1)
+
+#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1)
+#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING)
+
+struct closure {
+ union {
+ struct {
+ struct workqueue_struct *wq;
+ struct closure_syncer *s;
+ struct llist_node list;
+ closure_fn *fn;
+ };
+ struct work_struct work;
+ };
+
+ struct closure *parent;
+
+ atomic_t remaining;
+ bool closure_get_happened;
+
+#ifdef CONFIG_DEBUG_CLOSURES
+#define CLOSURE_MAGIC_DEAD 0xc054dead
+#define CLOSURE_MAGIC_ALIVE 0xc054a11e
+
+ unsigned int magic;
+ struct list_head all;
+ unsigned long ip;
+ unsigned long waiting_on;
+#endif
+};
+
+void closure_sub(struct closure *cl, int v);
+void closure_put(struct closure *cl);
+void __closure_wake_up(struct closure_waitlist *list);
+bool closure_wait(struct closure_waitlist *list, struct closure *cl);
+void __closure_sync(struct closure *cl);
+
+static inline unsigned closure_nr_remaining(struct closure *cl)
+{
+ return atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK;
+}
+
+/**
+ * closure_sync - sleep until a closure a closure has nothing left to wait on
+ *
+ * Sleeps until the refcount hits 1 - the thread that's running the closure owns
+ * the last refcount.
+ */
+static inline void closure_sync(struct closure *cl)
+{
+#ifdef CONFIG_DEBUG_CLOSURES
+ BUG_ON(closure_nr_remaining(cl) != 1 && !cl->closure_get_happened);
+#endif
+
+ if (cl->closure_get_happened)
+ __closure_sync(cl);
+}
+
+#ifdef CONFIG_DEBUG_CLOSURES
+
+void closure_debug_create(struct closure *cl);
+void closure_debug_destroy(struct closure *cl);
+
+#else
+
+static inline void closure_debug_create(struct closure *cl) {}
+static inline void closure_debug_destroy(struct closure *cl) {}
+
+#endif
+
+static inline void closure_set_ip(struct closure *cl)
+{
+#ifdef CONFIG_DEBUG_CLOSURES
+ cl->ip = _THIS_IP_;
+#endif
+}
+
+static inline void closure_set_ret_ip(struct closure *cl)
+{
+#ifdef CONFIG_DEBUG_CLOSURES
+ cl->ip = _RET_IP_;
+#endif
+}
+
+static inline void closure_set_waiting(struct closure *cl, unsigned long f)
+{
+#ifdef CONFIG_DEBUG_CLOSURES
+ cl->waiting_on = f;
+#endif
+}
+
+static inline void closure_set_stopped(struct closure *cl)
+{
+ atomic_sub(CLOSURE_RUNNING, &cl->remaining);
+}
+
+static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
+ struct workqueue_struct *wq)
+{
+ closure_set_ip(cl);
+ cl->fn = fn;
+ cl->wq = wq;
+}
+
+static inline void closure_queue(struct closure *cl)
+{
+ struct workqueue_struct *wq = cl->wq;
+ /**
+ * Changes made to closure, work_struct, or a couple of other structs
+ * may cause work.func not pointing to the right location.
+ */
+ BUILD_BUG_ON(offsetof(struct closure, fn)
+ != offsetof(struct work_struct, func));
+
+ if (wq) {
+ INIT_WORK(&cl->work, cl->work.func);
+ BUG_ON(!queue_work(wq, &cl->work));
+ } else
+ cl->fn(&cl->work);
+}
+
+/**
+ * closure_get - increment a closure's refcount
+ */
+static inline void closure_get(struct closure *cl)
+{
+ cl->closure_get_happened = true;
+
+#ifdef CONFIG_DEBUG_CLOSURES
+ BUG_ON((atomic_inc_return(&cl->remaining) &
+ CLOSURE_REMAINING_MASK) <= 1);
+#else
+ atomic_inc(&cl->remaining);
+#endif
+}
+
+/**
+ * closure_init - Initialize a closure, setting the refcount to 1
+ * @cl: closure to initialize
+ * @parent: parent of the new closure. cl will take a refcount on it for its
+ * lifetime; may be NULL.
+ */
+static inline void closure_init(struct closure *cl, struct closure *parent)
+{
+ cl->fn = NULL;
+ cl->parent = parent;
+ if (parent)
+ closure_get(parent);
+
+ atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+ cl->closure_get_happened = false;
+
+ closure_debug_create(cl);
+ closure_set_ip(cl);
+}
+
+static inline void closure_init_stack(struct closure *cl)
+{
+ memset(cl, 0, sizeof(struct closure));
+ atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+}
+
+/**
+ * closure_wake_up - wake up all closures on a wait list,
+ * with memory barrier
+ */
+static inline void closure_wake_up(struct closure_waitlist *list)
+{
+ /* Memory barrier for the wait list */
+ smp_mb();
+ __closure_wake_up(list);
+}
+
+#define CLOSURE_CALLBACK(name) void name(struct work_struct *ws)
+#define closure_type(name, type, member) \
+ struct closure *cl = container_of(ws, struct closure, work); \
+ type *name = container_of(cl, type, member)
+
+/**
+ * continue_at - jump to another function with barrier
+ *
+ * After @cl is no longer waiting on anything (i.e. all outstanding refs have
+ * been dropped with closure_put()), it will resume execution at @fn running out
+ * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly).
+ *
+ * This is because after calling continue_at() you no longer have a ref on @cl,
+ * and whatever @cl owns may be freed out from under you - a running closure fn
+ * has a ref on its own closure which continue_at() drops.
+ *
+ * Note you are expected to immediately return after using this macro.
+ */
+#define continue_at(_cl, _fn, _wq) \
+do { \
+ set_closure_fn(_cl, _fn, _wq); \
+ closure_sub(_cl, CLOSURE_RUNNING + 1); \
+} while (0)
+
+/**
+ * closure_return - finish execution of a closure
+ *
+ * This is used to indicate that @cl is finished: when all outstanding refs on
+ * @cl have been dropped @cl's ref on its parent closure (as passed to
+ * closure_init()) will be dropped, if one was specified - thus this can be
+ * thought of as returning to the parent closure.
+ */
+#define closure_return(_cl) continue_at((_cl), NULL, NULL)
+
+/**
+ * continue_at_nobarrier - jump to another function without barrier
+ *
+ * Causes @fn to be executed out of @cl, in @wq context (or called directly if
+ * @wq is NULL).
+ *
+ * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn,
+ * thus it's not safe to touch anything protected by @cl after a
+ * continue_at_nobarrier().
+ */
+#define continue_at_nobarrier(_cl, _fn, _wq) \
+do { \
+ set_closure_fn(_cl, _fn, _wq); \
+ closure_queue(_cl); \
+} while (0)
+
+/**
+ * closure_return_with_destructor - finish execution of a closure,
+ * with destructor
+ *
+ * Works like closure_return(), except @destructor will be called when all
+ * outstanding refs on @cl have been dropped; @destructor may be used to safely
+ * free the memory occupied by @cl, and it is called with the ref on the parent
+ * closure still held - so @destructor could safely return an item to a
+ * freelist protected by @cl's parent.
+ */
+#define closure_return_with_destructor(_cl, _destructor) \
+do { \
+ set_closure_fn(_cl, _destructor, NULL); \
+ closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \
+} while (0)
+
+/**
+ * closure_call - execute @fn out of a new, uninitialized closure
+ *
+ * Typically used when running out of one closure, and we want to run @fn
+ * asynchronously out of a new closure - @parent will then wait for @cl to
+ * finish.
+ */
+static inline void closure_call(struct closure *cl, closure_fn fn,
+ struct workqueue_struct *wq,
+ struct closure *parent)
+{
+ closure_init(cl, parent);
+ continue_at_nobarrier(cl, fn, wq);
+}
+
+#define __closure_wait_event(waitlist, _cond) \
+do { \
+ struct closure cl; \
+ \
+ closure_init_stack(&cl); \
+ \
+ while (1) { \
+ closure_wait(waitlist, &cl); \
+ if (_cond) \
+ break; \
+ closure_sync(&cl); \
+ } \
+ closure_wake_up(waitlist); \
+ closure_sync(&cl); \
+} while (0)
+
+#define closure_wait_event(waitlist, _cond) \
+do { \
+ if (!(_cond)) \
+ __closure_wait_event(waitlist, _cond); \
+} while (0)
+
+#endif /* _LINUX_CLOSURE_H */
diff --git a/include/linux/cma.h b/include/linux/cma.h
index 63873b93deaa..9db877506ea8 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -6,12 +6,8 @@
#include <linux/types.h>
#include <linux/numa.h>
-/*
- * There is always at least global CMA area and a few optional
- * areas configured in kernel .config.
- */
#ifdef CONFIG_CMA_AREAS
-#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS)
+#define MAX_CMA_AREAS CONFIG_CMA_AREAS
#endif
#define CMA_MAX_NAME 64
diff --git a/include/linux/comedi/comedi_8254.h b/include/linux/comedi/comedi_8254.h
index d8264417e53c..d527f04400df 100644
--- a/include/linux/comedi/comedi_8254.h
+++ b/include/linux/comedi/comedi_8254.h
@@ -12,6 +12,8 @@
#define _COMEDI_8254_H
#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/err.h>
struct comedi_device;
struct comedi_insn;
@@ -57,10 +59,24 @@ struct comedi_subdevice;
/* counter maps zero to 0x10000 */
#define I8254_MAX_COUNT 0x10000
+struct comedi_8254;
+
+/**
+ * typedef comedi_8254_iocb_fn - call-back function type for 8254 register access
+ * @i8254: pointer to struct comedi_8254
+ * @dir: direction (0 = read, 1 = write)
+ * @reg: register number
+ * @val: value to write
+ *
+ * Return: Register value when reading, 0 when writing.
+ */
+typedef unsigned int comedi_8254_iocb_fn(struct comedi_8254 *i8254, int dir,
+ unsigned int reg, unsigned int val);
+
/**
* struct comedi_8254 - private data used by this module
- * @iobase: PIO base address of the registers (in/out)
- * @mmio: MMIO base address of the registers (read/write)
+ * @iocb: I/O call-back function for register access
+ * @context: context for register access (e.g. a base address)
* @iosize: I/O size used to access the registers (b/w/l)
* @regshift: register gap shift
* @osc_base: cascaded oscillator speed in ns
@@ -76,8 +92,8 @@ struct comedi_subdevice;
* @insn_config: driver specific (*insn_config) callback
*/
struct comedi_8254 {
- unsigned long iobase;
- void __iomem *mmio;
+ comedi_8254_iocb_fn *iocb;
+ unsigned long context;
unsigned int iosize;
unsigned int regshift;
unsigned int osc_base;
@@ -122,13 +138,24 @@ void comedi_8254_set_busy(struct comedi_8254 *i8254,
void comedi_8254_subdevice_init(struct comedi_subdevice *s,
struct comedi_8254 *i8254);
-struct comedi_8254 *comedi_8254_init(unsigned long iobase,
- unsigned int osc_base,
- unsigned int iosize,
- unsigned int regshift);
-struct comedi_8254 *comedi_8254_mm_init(void __iomem *mmio,
- unsigned int osc_base,
- unsigned int iosize,
- unsigned int regshift);
+#ifdef CONFIG_HAS_IOPORT
+struct comedi_8254 *comedi_8254_io_alloc(unsigned long iobase,
+ unsigned int osc_base,
+ unsigned int iosize,
+ unsigned int regshift);
+#else
+static inline struct comedi_8254 *comedi_8254_io_alloc(unsigned long iobase,
+ unsigned int osc_base,
+ unsigned int iosize,
+ unsigned int regshift)
+{
+ return ERR_PTR(-ENXIO);
+}
+#endif
+
+struct comedi_8254 *comedi_8254_mm_alloc(void __iomem *mmio,
+ unsigned int osc_base,
+ unsigned int iosize,
+ unsigned int regshift);
#endif /* _COMEDI_8254_H */
diff --git a/include/linux/comedi/comedi_8255.h b/include/linux/comedi/comedi_8255.h
index b2a5bc6b3a49..d24a69da389b 100644
--- a/include/linux/comedi/comedi_8255.h
+++ b/include/linux/comedi/comedi_8255.h
@@ -10,6 +10,8 @@
#ifndef _COMEDI_8255_H
#define _COMEDI_8255_H
+#include <linux/errno.h>
+
#define I8255_SIZE 0x04
#define I8255_DATA_A_REG 0x00
@@ -27,16 +29,26 @@
struct comedi_device;
struct comedi_subdevice;
-int subdev_8255_init(struct comedi_device *dev, struct comedi_subdevice *s,
- int (*io)(struct comedi_device *dev, int dir, int port,
- int data, unsigned long regbase),
- unsigned long regbase);
+#ifdef CONFIG_HAS_IOPORT
+int subdev_8255_io_init(struct comedi_device *dev, struct comedi_subdevice *s,
+ unsigned long regbase);
+#else
+static inline int subdev_8255_io_init(struct comedi_device *dev,
+ struct comedi_subdevice *s,
+ unsigned long regbase)
+{
+ return -ENXIO;
+}
+#endif
int subdev_8255_mm_init(struct comedi_device *dev, struct comedi_subdevice *s,
- int (*io)(struct comedi_device *dev, int dir, int port,
- int data, unsigned long regbase),
unsigned long regbase);
+int subdev_8255_cb_init(struct comedi_device *dev, struct comedi_subdevice *s,
+ int (*io)(struct comedi_device *dev, int dir, int port,
+ int data, unsigned long context),
+ unsigned long context);
+
unsigned long subdev_8255_regbase(struct comedi_subdevice *s);
#endif
diff --git a/include/linux/comedi/comedidev.h b/include/linux/comedi/comedidev.h
index 0a1150900ef3..c08416a7364b 100644
--- a/include/linux/comedi/comedidev.h
+++ b/include/linux/comedi/comedidev.h
@@ -633,7 +633,7 @@ extern const struct comedi_lrange range_unknown;
*/
struct comedi_lrange {
int length;
- struct comedi_krange range[];
+ struct comedi_krange range[] __counted_by(length);
};
/**
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 1cfa4f0f490a..233f61ec8afc 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -581,7 +581,6 @@ asmlinkage long compat_sys_io_pgetevents_time64(compat_aio_context_t ctx_id,
struct io_event __user *events,
struct __kernel_timespec __user *timeout,
const struct __compat_aio_sigset __user *usig);
-asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t);
asmlinkage long compat_sys_epoll_pwait(int epfd,
struct epoll_event __user *events,
int maxevents, int timeout,
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 9b673fefcef8..49feac0162a5 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -9,16 +9,11 @@
* Clang prior to 17 is being silly and considers many __cleanup() variables
* as unused (because they are, their sole purpose is to go out of scope).
*
- * https://reviews.llvm.org/D152180
+ * https://github.com/llvm/llvm-project/commit/877210faa447f4cc7db87812f8ed80e398fedd61
*/
#undef __cleanup
#define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func)))
-/* same as gcc, this was present in clang-2.6 so we can assume it works
- * with any version that can compile the kernel
- */
-#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
-
/* all clang versions usable with the kernel support KASAN ABI version 5 */
#define KASAN_ABI_VERSION 5
@@ -119,11 +114,7 @@
#define __diag_str(s) __diag_str1(s)
#define __diag(s) _Pragma(__diag_str(clang diagnostic s))
-#if CONFIG_CLANG_VERSION >= 110000
-#define __diag_clang_11(s) __diag(s)
-#else
-#define __diag_clang_11(s)
-#endif
+#define __diag_clang_13(s) __diag(s)
#define __diag_ignore_all(option, comment) \
- __diag_clang(11, ignore, option)
+ __diag_clang(13, ignore, option)
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 7af9e34ec261..aff92b1d284f 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -35,12 +35,10 @@
(typeof(ptr)) (__ptr + (off)); \
})
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
#define __noretpoline __attribute__((__indirect_branch__("keep")))
#endif
-#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
-
#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
#define __latent_entropy __attribute__((latent_entropy))
#endif
@@ -66,6 +64,26 @@
__builtin_unreachable(); \
} while (0)
+/*
+ * GCC 'asm goto' with outputs miscompiles certain code sequences:
+ *
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
+ *
+ * Work around it via the same compiler barrier quirk that we used
+ * to use for the old 'asm goto' workaround.
+ *
+ * Also, always mark such 'asm goto' statements as volatile: all
+ * asm goto statements are supposed to be volatile as per the
+ * documentation, but some versions of gcc didn't actually do
+ * that for asms with outputs:
+ *
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619
+ */
+#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND
+#define asm_goto_output(x...) \
+ do { asm volatile goto(x); asm (""); } while (0)
+#endif
+
#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP)
#define __HAVE_BUILTIN_BSWAP32__
#define __HAVE_BUILTIN_BSWAP64__
@@ -138,7 +156,7 @@
#endif
#define __diag_ignore_all(option, comment) \
- __diag_GCC(8, ignore, option)
+ __diag(__diag_GCC_ignore option)
/*
* Prior to 9.1, -Wno-alloc-size-larger-than (and therefore the "alloc_size"
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index d7779a18b24f..8c252e073bd8 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -116,6 +116,14 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
*/
#define __stringify_label(n) #n
+#define __annotate_reachable(c) ({ \
+ asm volatile(__stringify_label(c) ":\n\t" \
+ ".pushsection .discard.reachable\n\t" \
+ ".long " __stringify_label(c) "b - .\n\t" \
+ ".popsection\n\t"); \
+})
+#define annotate_reachable() __annotate_reachable(__COUNTER__)
+
#define __annotate_unreachable(c) ({ \
asm volatile(__stringify_label(c) ":\n\t" \
".pushsection .discard.unreachable\n\t" \
@@ -128,6 +136,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
#define __annotate_jump_table __section(".rodata..c_jump_table")
#else /* !CONFIG_OBJTOOL */
+#define annotate_reachable()
#define annotate_unreachable()
#define __annotate_jump_table
#endif /* CONFIG_OBJTOOL */
@@ -177,10 +186,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
__asm__ ("" : "=r" (var) : "0" (var))
#endif
-/* Not-quite-unique ID. */
-#ifndef __UNIQUE_ID
-# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
-#endif
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
/**
* data_race - mark an expression as containing intentional data races
@@ -212,7 +218,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
*/
#define ___ADDRESSABLE(sym, __attrs) \
static void * __used __attrs \
- __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym;
+ __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)(uintptr_t)&sym;
#define __ADDRESSABLE(sym) \
___ADDRESSABLE(sym, __section(".discard.addressable"))
@@ -231,6 +237,53 @@ static inline void *offset_to_ptr(const int *off)
#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
/*
+ * This returns a constant expression while determining if an argument is
+ * a constant expression, most importantly without evaluating the argument.
+ * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
+ *
+ * Details:
+ * - sizeof() return an integer constant expression, and does not evaluate
+ * the value of its operand; it only examines the type of its operand.
+ * - The results of comparing two integer constant expressions is also
+ * an integer constant expression.
+ * - The first literal "8" isn't important. It could be any literal value.
+ * - The second literal "8" is to avoid warnings about unaligned pointers;
+ * this could otherwise just be "1".
+ * - (long)(x) is used to avoid warnings about 64-bit types on 32-bit
+ * architectures.
+ * - The C Standard defines "null pointer constant", "(void *)0", as
+ * distinct from other void pointers.
+ * - If (x) is an integer constant expression, then the "* 0l" resolves
+ * it into an integer constant expression of value 0. Since it is cast to
+ * "void *", this makes the second operand a null pointer constant.
+ * - If (x) is not an integer constant expression, then the second operand
+ * resolves to a void pointer (but not a null pointer constant: the value
+ * is not an integer constant 0).
+ * - The conditional operator's third operand, "(int *)8", is an object
+ * pointer (to type "int").
+ * - The behavior (including the return type) of the conditional operator
+ * ("operand1 ? operand2 : operand3") depends on the kind of expressions
+ * given for the second and third operands. This is the central mechanism
+ * of the macro:
+ * - When one operand is a null pointer constant (i.e. when x is an integer
+ * constant expression) and the other is an object pointer (i.e. our
+ * third operand), the conditional operator returns the type of the
+ * object pointer operand (i.e. "int *"). Here, within the sizeof(), we
+ * would then get:
+ * sizeof(*((int *)(...)) == sizeof(int) == 4
+ * - When one operand is a void pointer (i.e. when x is not an integer
+ * constant expression) and the other is an object pointer (i.e. our
+ * third operand), the conditional operator returns a "void *" type.
+ * Here, within the sizeof(), we would then get:
+ * sizeof(*((void *)(...)) == sizeof(void) == 1
+ * - The equality comparison to "sizeof(int)" therefore depends on (x):
+ * sizeof(int) == sizeof(int) (x) was a constant expression
+ * sizeof(int) != sizeof(void) (x) was not a constant expression
+ */
+#define __is_constexpr(x) \
+ (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
+
+/*
* Whether 'type' is a signed type or an unsigned type. Supports scalar types,
* bool and also pointer types.
*/
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 00efa35c350f..8bdf6e0918c1 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -95,6 +95,19 @@
#endif
/*
+ * Optional: only supported since gcc >= 15
+ * Optional: only supported since clang >= 18
+ *
+ * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896
+ * clang: https://github.com/llvm/llvm-project/pull/76348
+ */
+#if __has_attribute(__counted_by__)
+# define __counted_by(member) __attribute__((__counted_by__(member)))
+#else
+# define __counted_by(member)
+#endif
+
+/*
* Optional: not supported by gcc
* Optional: only supported since clang >= 14.0
*
@@ -130,19 +143,6 @@
#endif
/*
- * Optional: only supported since gcc >= 14
- * Optional: only supported since clang >= 17
- *
- * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896
- * clang: https://reviews.llvm.org/D148381
- */
-#if __has_attribute(__element_count__)
-# define __counted_by(member) __attribute__((__element_count__(#member)))
-#else
-# define __counted_by(member)
-#endif
-
-/*
* Optional: only supported since clang >= 14.0
*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-error-function-attribute
@@ -334,6 +334,18 @@
#define __section(section) __attribute__((__section__(section)))
/*
+ * Optional: only supported since gcc >= 12
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-uninitialized-variable-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#uninitialized
+ */
+#if __has_attribute(__uninitialized__)
+# define __uninitialized __attribute__((__uninitialized__))
+#else
+# define __uninitialized
+#endif
+
+/*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-unused-function-attribute
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-unused-type-attribute
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-unused-variable-attribute
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 547ea1ff806e..8f8236317d5b 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -2,6 +2,15 @@
#ifndef __LINUX_COMPILER_TYPES_H
#define __LINUX_COMPILER_TYPES_H
+/*
+ * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21.
+ * In the meantime, to support gcc < 10, we implement __has_builtin
+ * by hand.
+ */
+#ifndef __has_builtin
+#define __has_builtin(x) (0)
+#endif
+
#ifndef __ASSEMBLY__
/*
@@ -90,31 +99,48 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute
*
* When -falign-functions=N is in use, we must avoid the cold attribute as
- * contemporary versions of GCC drop the alignment for cold functions. Worse,
- * GCC can implicitly mark callees of cold functions as cold themselves, so
- * it's not sufficient to add __function_aligned here as that will not ensure
- * that callees are correctly aligned.
+ * GCC drops the alignment for cold functions. Worse, GCC can implicitly mark
+ * callees of cold functions as cold themselves, so it's not sufficient to add
+ * __function_aligned here as that will not ensure that callees are correctly
+ * aligned.
*
* See:
*
* https://lore.kernel.org/lkml/Y77%2FqVgvaJidFpYt@FVFF77S0Q05N
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345#c9
*/
-#if !defined(CONFIG_CC_IS_GCC) || (CONFIG_FUNCTION_ALIGNMENT == 0)
+#if defined(CONFIG_CC_HAS_SANE_FUNCTION_ALIGNMENT) || (CONFIG_FUNCTION_ALIGNMENT == 0)
#define __cold __attribute__((__cold__))
#else
#define __cold
#endif
-/* Builtins */
-
/*
- * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21.
- * In the meantime, to support gcc < 10, we implement __has_builtin
- * by hand.
+ * On x86-64 and arm64 targets, __preserve_most changes the calling convention
+ * of a function to make the code in the caller as unintrusive as possible. This
+ * convention behaves identically to the C calling convention on how arguments
+ * and return values are passed, but uses a different set of caller- and callee-
+ * saved registers.
+ *
+ * The purpose is to alleviates the burden of saving and recovering a large
+ * register set before and after the call in the caller. This is beneficial for
+ * rarely taken slow paths, such as error-reporting functions that may be called
+ * from hot paths.
+ *
+ * Note: This may conflict with instrumentation inserted on function entry which
+ * does not use __preserve_most or equivalent convention (if in assembly). Since
+ * function tracing assumes the normal C calling convention, where the attribute
+ * is supported, __preserve_most implies notrace. It is recommended to restrict
+ * use of the attribute to functions that should or already disable tracing.
+ *
+ * Optional: not supported by gcc.
+ *
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#preserve-most
*/
-#ifndef __has_builtin
-#define __has_builtin(x) (0)
+#if __has_attribute(__preserve_most__) && (defined(CONFIG_X86_64) || defined(CONFIG_ARM64))
+# define __preserve_most notrace __attribute__((__preserve_most__))
+#else
+# define __preserve_most
#endif
/* Compiler specific macros. */
@@ -252,15 +278,33 @@ struct ftrace_likely_data {
# define __no_kcsan
#endif
+#ifdef __SANITIZE_MEMORY__
+/*
+ * Similarly to KASAN and KCSAN, KMSAN loses function attributes of inlined
+ * functions, therefore disabling KMSAN checks also requires disabling inlining.
+ *
+ * __no_sanitize_or_inline effectively prevents KMSAN from reporting errors
+ * within the function and marks all its outputs as initialized.
+ */
+# define __no_sanitize_or_inline __no_kmsan_checks notrace __maybe_unused
+#endif
+
#ifndef __no_sanitize_or_inline
#define __no_sanitize_or_inline __always_inline
#endif
+/* Do not trap wrapping arithmetic within an annotated function. */
+#ifdef CONFIG_UBSAN_SIGNED_WRAP
+# define __signed_wrap __attribute__((no_sanitize("signed-integer-overflow")))
+#else
+# define __signed_wrap
+#endif
+
/* Section for code which can't be instrumented at all */
#define __noinstr_section(section) \
noinline notrace __attribute((__section__(section))) \
__no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \
- __no_sanitize_memory
+ __no_sanitize_memory __signed_wrap
#define noinstr __noinstr_section(".noinstr.text")
@@ -324,8 +368,27 @@ struct ftrace_likely_data {
# define __realloc_size(x, ...)
#endif
-#ifndef asm_volatile_goto
-#define asm_volatile_goto(x...) asm goto(x)
+/*
+ * When the size of an allocated object is needed, use the best available
+ * mechanism to find it. (For cases where sizeof() cannot be used.)
+ */
+#if __has_builtin(__builtin_dynamic_object_size)
+#define __struct_size(p) __builtin_dynamic_object_size(p, 0)
+#define __member_size(p) __builtin_dynamic_object_size(p, 1)
+#else
+#define __struct_size(p) __builtin_object_size(p, 0)
+#define __member_size(p) __builtin_object_size(p, 1)
+#endif
+
+/*
+ * Some versions of gcc do not mark 'asm goto' volatile:
+ *
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979
+ *
+ * We do it here by hand, because it doesn't hurt.
+ */
+#ifndef asm_goto_output
+#define asm_goto_output(x...) asm volatile goto(x)
#endif
#ifdef CONFIG_CC_HAS_ASM_INLINE
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 62b32b19e0a8..fb2915676574 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -116,6 +116,7 @@ extern bool try_wait_for_completion(struct completion *x);
extern bool completion_done(struct completion *x);
extern void complete(struct completion *);
+extern void complete_on_current_cpu(struct completion *x);
extern void complete_all(struct completion *);
#endif
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 487350bb19c3..70bc1160f3d8 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -90,13 +90,18 @@ void cn_del_callback(const struct cb_id *id);
* If @group is not zero, then message will be delivered
* to the specified group.
* @gfp_mask: GFP mask.
+ * @filter: Filter function to be used at netlink layer.
+ * @filter_data:Filter data to be supplied to the filter function
*
* It can be safely called from softirq context, but may silently
* fail under strong memory pressure.
*
* If there are no listeners for given group %-ESRCH can be returned.
*/
-int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 group, gfp_t gfp_mask);
+int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid,
+ u32 group, gfp_t gfp_mask,
+ netlink_filter_fn filter,
+ void *filter_data);
/**
* cn_netlink_send - Sends message to the specified groups.
diff --git a/include/linux/console.h b/include/linux/console.h
index d3195664baa5..31a8f5b85f5d 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -18,6 +18,7 @@
#include <linux/bits.h>
#include <linux/rculist.h>
#include <linux/types.h>
+#include <linux/vesa.h>
struct vc_data;
struct console_font_op;
@@ -36,63 +37,91 @@ enum vc_intensity;
/**
* struct consw - callbacks for consoles
*
+ * @owner: the module to get references of when this console is used
+ * @con_startup: set up the console and return its name (like VGA, EGA, ...)
+ * @con_init: initialize the console on @vc. @init is true for the very first
+ * call on this @vc.
+ * @con_deinit: deinitialize the console from @vc.
+ * @con_clear: erase @count characters at [@x, @y] on @vc. @count >= 1.
+ * @con_putc: emit one character with attributes @ca to [@x, @y] on @vc.
+ * (optional -- @con_putcs would be called instead)
+ * @con_putcs: emit @count characters with attributes @s to [@x, @y] on @vc.
+ * @con_cursor: enable/disable cursor depending on @enable
* @con_scroll: move lines from @top to @bottom in direction @dir by @lines.
* Return true if no generic handling should be done.
* Invoked by csi_M and printing to the console.
- * @con_set_palette: sets the palette of the console to @table (optional)
+ * @con_switch: notifier about the console switch; it is supposed to return
+ * true if a redraw is needed.
+ * @con_blank: blank/unblank the console. The target mode is passed in @blank.
+ * @mode_switch is set if changing from/to text/graphics. The hook
+ * is supposed to return true if a redraw is needed.
+ * @con_font_set: set console @vc font to @font with height @vpitch. @flags can
+ * be %KD_FONT_FLAG_DONT_RECALC. (optional)
+ * @con_font_get: fetch the current font on @vc of height @vpitch into @font.
+ * (optional)
+ * @con_font_default: set default font on @vc. @name can be %NULL or font name
+ * to search for. @font can be filled back. (optional)
+ * @con_resize: resize the @vc console to @width x @height. @from_user is true
+ * when this change comes from the user space.
+ * @con_set_palette: sets the palette of the console @vc to @table (optional)
* @con_scrolldelta: the contents of the console should be scrolled by @lines.
* Invoked by user. (optional)
+ * @con_set_origin: set origin (see &vc_data::vc_origin) of the @vc. If not
+ * provided or returns false, the origin is set to
+ * @vc->vc_screenbuf. (optional)
+ * @con_save_screen: save screen content into @vc->vc_screenbuf. Called e.g.
+ * upon entering graphics. (optional)
+ * @con_build_attr: build attributes based on @color, @intensity and other
+ * parameters. The result is used for both normal and erase
+ * characters. (optional)
+ * @con_invert_region: invert a region of length @count on @vc starting at @p.
+ * (optional)
+ * @con_debug_enter: prepare the console for the debugger. This includes, but
+ * is not limited to, unblanking the console, loading an
+ * appropriate palette, and allowing debugger generated output.
+ * (optional)
+ * @con_debug_leave: restore the console to its pre-debug state as closely as
+ * possible. (optional)
*/
struct consw {
struct module *owner;
const char *(*con_startup)(void);
- void (*con_init)(struct vc_data *vc, int init);
+ void (*con_init)(struct vc_data *vc, bool init);
void (*con_deinit)(struct vc_data *vc);
- void (*con_clear)(struct vc_data *vc, int sy, int sx, int height,
- int width);
- void (*con_putc)(struct vc_data *vc, int c, int ypos, int xpos);
- void (*con_putcs)(struct vc_data *vc, const unsigned short *s,
- int count, int ypos, int xpos);
- void (*con_cursor)(struct vc_data *vc, int mode);
+ void (*con_clear)(struct vc_data *vc, unsigned int y,
+ unsigned int x, unsigned int count);
+ void (*con_putc)(struct vc_data *vc, u16 ca, unsigned int y,
+ unsigned int x);
+ void (*con_putcs)(struct vc_data *vc, const u16 *s,
+ unsigned int count, unsigned int ypos,
+ unsigned int xpos);
+ void (*con_cursor)(struct vc_data *vc, bool enable);
bool (*con_scroll)(struct vc_data *vc, unsigned int top,
unsigned int bottom, enum con_scroll dir,
unsigned int lines);
- int (*con_switch)(struct vc_data *vc);
- int (*con_blank)(struct vc_data *vc, int blank, int mode_switch);
- int (*con_font_set)(struct vc_data *vc, struct console_font *font,
- unsigned int vpitch, unsigned int flags);
+ bool (*con_switch)(struct vc_data *vc);
+ bool (*con_blank)(struct vc_data *vc, enum vesa_blank_mode blank,
+ bool mode_switch);
+ int (*con_font_set)(struct vc_data *vc,
+ const struct console_font *font,
+ unsigned int vpitch, unsigned int flags);
int (*con_font_get)(struct vc_data *vc, struct console_font *font,
unsigned int vpitch);
int (*con_font_default)(struct vc_data *vc,
- struct console_font *font, char *name);
+ struct console_font *font, const char *name);
int (*con_resize)(struct vc_data *vc, unsigned int width,
- unsigned int height, unsigned int user);
+ unsigned int height, bool from_user);
void (*con_set_palette)(struct vc_data *vc,
const unsigned char *table);
void (*con_scrolldelta)(struct vc_data *vc, int lines);
- int (*con_set_origin)(struct vc_data *vc);
+ bool (*con_set_origin)(struct vc_data *vc);
void (*con_save_screen)(struct vc_data *vc);
u8 (*con_build_attr)(struct vc_data *vc, u8 color,
enum vc_intensity intensity,
bool blink, bool underline, bool reverse, bool italic);
void (*con_invert_region)(struct vc_data *vc, u16 *p, int count);
- u16 *(*con_screen_pos)(const struct vc_data *vc, int offset);
- unsigned long (*con_getxy)(struct vc_data *vc, unsigned long position,
- int *px, int *py);
- /*
- * Flush the video console driver's scrollback buffer
- */
- void (*con_flush_scrollback)(struct vc_data *vc);
- /*
- * Prepare the console for the debugger. This includes, but is not
- * limited to, unblanking the console, loading an appropriate
- * palette, and allowing debugger generated output.
- */
- int (*con_debug_enter)(struct vc_data *vc);
- /*
- * Restore the console to its pre-debug state as closely as possible.
- */
- int (*con_debug_leave)(struct vc_data *vc);
+ void (*con_debug_enter)(struct vc_data *vc);
+ void (*con_debug_leave)(struct vc_data *vc);
};
extern const struct consw *conswitchp;
@@ -101,36 +130,32 @@ extern const struct consw dummy_con; /* dummy console buffer */
extern const struct consw vga_con; /* VGA text console */
extern const struct consw newport_con; /* SGI Newport console */
+struct screen_info;
+#ifdef CONFIG_VGA_CONSOLE
+void vgacon_register_screen(struct screen_info *si);
+#else
+static inline void vgacon_register_screen(struct screen_info *si) { }
+#endif
+
int con_is_bound(const struct consw *csw);
int do_unregister_con_driver(const struct consw *csw);
int do_take_over_console(const struct consw *sw, int first, int last, int deflt);
void give_up_console(const struct consw *sw);
-#ifdef CONFIG_HW_CONSOLE
-int con_debug_enter(struct vc_data *vc);
-int con_debug_leave(void);
+#ifdef CONFIG_VT
+void con_debug_enter(struct vc_data *vc);
+void con_debug_leave(void);
#else
-static inline int con_debug_enter(struct vc_data *vc)
-{
- return 0;
-}
-static inline int con_debug_leave(void)
-{
- return 0;
-}
+static inline void con_debug_enter(struct vc_data *vc) { }
+static inline void con_debug_leave(void) { }
#endif
-/* cursor */
-#define CM_DRAW (1)
-#define CM_ERASE (2)
-#define CM_MOVE (3)
-
/*
* The interface for a console, or any other device that wants to capture
* console messages (printer driver?)
*/
/**
- * cons_flags - General console flags
+ * enum cons_flags - General console flags
* @CON_PRINTBUFFER: Used by newly registered consoles to avoid duplicate
* output of messages that were already shown by boot
* consoles or read by userspace via syslog() syscall.
@@ -154,6 +179,10 @@ static inline int con_debug_leave(void)
* receiving the printk spam for obvious reasons.
* @CON_EXTENDED: The console supports the extended output format of
* /dev/kmesg which requires a larger output buffer.
+ * @CON_SUSPENDED: Indicates if a console is suspended. If true, the
+ * printing callbacks must not be called.
+ * @CON_NBCON: Console can operate outside of the legacy style console_lock
+ * constraints.
*/
enum cons_flags {
CON_PRINTBUFFER = BIT(0),
@@ -163,6 +192,112 @@ enum cons_flags {
CON_ANYTIME = BIT(4),
CON_BRL = BIT(5),
CON_EXTENDED = BIT(6),
+ CON_SUSPENDED = BIT(7),
+ CON_NBCON = BIT(8),
+};
+
+/**
+ * struct nbcon_state - console state for nbcon consoles
+ * @atom: Compound of the state fields for atomic operations
+ *
+ * @req_prio: The priority of a handover request
+ * @prio: The priority of the current owner
+ * @unsafe: Console is busy in a non takeover region
+ * @unsafe_takeover: A hostile takeover in an unsafe state happened in the
+ * past. The console cannot be safe until re-initialized.
+ * @cpu: The CPU on which the owner runs
+ *
+ * To be used for reading and preparing of the value stored in the nbcon
+ * state variable @console::nbcon_state.
+ *
+ * The @prio and @req_prio fields are particularly important to allow
+ * spin-waiting to timeout and give up without the risk of a waiter being
+ * assigned the lock after giving up.
+ */
+struct nbcon_state {
+ union {
+ unsigned int atom;
+ struct {
+ unsigned int prio : 2;
+ unsigned int req_prio : 2;
+ unsigned int unsafe : 1;
+ unsigned int unsafe_takeover : 1;
+ unsigned int cpu : 24;
+ };
+ };
+};
+
+/*
+ * The nbcon_state struct is used to easily create and interpret values that
+ * are stored in the @console::nbcon_state variable. Ensure this struct stays
+ * within the size boundaries of the atomic variable's underlying type in
+ * order to avoid any accidental truncation.
+ */
+static_assert(sizeof(struct nbcon_state) <= sizeof(int));
+
+/**
+ * enum nbcon_prio - console owner priority for nbcon consoles
+ * @NBCON_PRIO_NONE: Unused
+ * @NBCON_PRIO_NORMAL: Normal (non-emergency) usage
+ * @NBCON_PRIO_EMERGENCY: Emergency output (WARN/OOPS...)
+ * @NBCON_PRIO_PANIC: Panic output
+ * @NBCON_PRIO_MAX: The number of priority levels
+ *
+ * A higher priority context can takeover the console when it is
+ * in the safe state. The final attempt to flush consoles in panic()
+ * can be allowed to do so even in an unsafe state (Hope and pray).
+ */
+enum nbcon_prio {
+ NBCON_PRIO_NONE = 0,
+ NBCON_PRIO_NORMAL,
+ NBCON_PRIO_EMERGENCY,
+ NBCON_PRIO_PANIC,
+ NBCON_PRIO_MAX,
+};
+
+struct console;
+struct printk_buffers;
+
+/**
+ * struct nbcon_context - Context for console acquire/release
+ * @console: The associated console
+ * @spinwait_max_us: Limit for spin-wait acquire
+ * @prio: Priority of the context
+ * @allow_unsafe_takeover: Allow performing takeover even if unsafe. Can
+ * be used only with NBCON_PRIO_PANIC @prio. It
+ * might cause a system freeze when the console
+ * is used later.
+ * @backlog: Ringbuffer has pending records
+ * @pbufs: Pointer to the text buffer for this context
+ * @seq: The sequence number to print for this context
+ */
+struct nbcon_context {
+ /* members set by caller */
+ struct console *console;
+ unsigned int spinwait_max_us;
+ enum nbcon_prio prio;
+ unsigned int allow_unsafe_takeover : 1;
+
+ /* members set by emit */
+ unsigned int backlog : 1;
+
+ /* members set by acquire */
+ struct printk_buffers *pbufs;
+ u64 seq;
+};
+
+/**
+ * struct nbcon_write_context - Context handed to the nbcon write callbacks
+ * @ctxt: The core console context
+ * @outbuf: Pointer to the text buffer for output
+ * @len: Length to write
+ * @unsafe_takeover: If a hostile takeover in an unsafe state has occurred
+ */
+struct nbcon_write_context {
+ struct nbcon_context __private ctxt;
+ char *outbuf;
+ unsigned int len;
+ bool unsafe_takeover;
};
/**
@@ -184,6 +319,11 @@ enum cons_flags {
* @dropped: Number of unreported dropped ringbuffer records
* @data: Driver private data
* @node: hlist node for the console list
+ *
+ * @write_atomic: Write callback for atomic context
+ * @nbcon_state: State for nbcon consoles
+ * @nbcon_seq: Sequence number of the next record for nbcon to print
+ * @pbufs: Pointer to nbcon private buffer
*/
struct console {
char name[16];
@@ -203,6 +343,13 @@ struct console {
unsigned long dropped;
void *data;
struct hlist_node node;
+
+ /* nbcon console specific members */
+ bool (*write_atomic)(struct console *con,
+ struct nbcon_write_context *wctxt);
+ atomic_t __private nbcon_state;
+ atomic_long_t __private nbcon_seq;
+ struct printk_buffers *pbufs;
};
#ifdef CONFIG_LOCKDEP
@@ -321,7 +468,7 @@ static inline bool console_is_registered(const struct console *con)
* for_each_console() - Iterator over registered consoles
* @con: struct console pointer used as loop cursor
*
- * The console list and the console->flags are immutable while iterating.
+ * The console list and the &console.flags are immutable while iterating.
*
* Requires console_list_lock to be held.
*/
@@ -329,6 +476,16 @@ static inline bool console_is_registered(const struct console *con)
lockdep_assert_console_list_lock_held(); \
hlist_for_each_entry(con, &console_list, node)
+#ifdef CONFIG_PRINTK
+extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt);
+extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt);
+extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt);
+#else
+static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; }
+static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; }
+static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; }
+#endif
+
extern int console_set_on_cmdline;
extern struct console *early_console;
@@ -337,7 +494,7 @@ enum con_flush_mode {
CONSOLE_REPLAY_ALL,
};
-extern int add_preferred_console(char *name, int idx, char *options);
+extern int add_preferred_console(const char *name, const short idx, char *options);
extern void console_force_preferred_locked(struct console *con);
extern void register_console(struct console *);
extern int unregister_console(struct console *);
@@ -381,12 +538,6 @@ void vcs_remove_sysfs(int index);
*/
extern atomic_t ignore_console_lock_warning;
-/* VESA Blanking Levels */
-#define VESA_NO_BLANKING 0
-#define VESA_VSYNC_SUSPEND 1
-#define VESA_HSYNC_SUSPEND 2
-#define VESA_POWERDOWN 3
-
extern void console_init(void);
/* For deferred console takeover */
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index 539f1cd45309..20f564e98552 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -151,7 +151,6 @@ struct vc_data {
DECLARE_BITMAP(vc_tab_stop, VC_TABSTOPS_COUNT); /* Tab stops. 256 columns. */
unsigned char vc_palette[16*3]; /* Colour palette for VGA+ */
unsigned short * vc_translate;
- unsigned int vc_resize_user; /* resize request from user */
unsigned int vc_bell_pitch; /* Console bell pitch */
unsigned int vc_bell_duration; /* Console bell duration */
unsigned short vc_cur_blink_ms; /* Cursor blink duration */
diff --git a/include/linux/const.h b/include/linux/const.h
index 435ddd72d2c4..81b8aae5a855 100644
--- a/include/linux/const.h
+++ b/include/linux/const.h
@@ -3,12 +3,4 @@
#include <vdso/const.h>
-/*
- * This returns a constant expression while determining if an argument is
- * a constant expression, most importantly without evaluating the argument.
- * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
- */
-#define __is_constexpr(x) \
- (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
-
#endif /* _LINUX_CONST_H */
diff --git a/include/linux/container.h b/include/linux/container.h
index 2566a1baa736..dd00cc918a92 100644
--- a/include/linux/container.h
+++ b/include/linux/container.h
@@ -12,7 +12,7 @@
#include <linux/device.h>
/* drivers/base/power/container.c */
-extern struct bus_type container_subsys;
+extern const struct bus_type container_subsys;
struct container_dev {
struct device dev;
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index bf70987240e4..5f288d475490 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -6,6 +6,8 @@
#ifndef _LINUX_CORESIGHT_H
#define _LINUX_CORESIGHT_H
+#include <linux/amba/bus.h>
+#include <linux/clk.h>
#include <linux/device.h>
#include <linux/io.h>
#include <linux/perf_event.h>
@@ -33,7 +35,7 @@
#define CORESIGHT_UNLOCK 0xc5acce55
-extern struct bus_type coresight_bustype;
+extern const struct bus_type coresight_bustype;
enum coresight_dev_type {
CORESIGHT_DEV_TYPE_SINK,
@@ -62,6 +64,7 @@ enum coresight_dev_subtype_source {
CORESIGHT_DEV_SUBTYPE_SOURCE_PROC,
CORESIGHT_DEV_SUBTYPE_SOURCE_BUS,
CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE,
+ CORESIGHT_DEV_SUBTYPE_SOURCE_TPDM,
CORESIGHT_DEV_SUBTYPE_SOURCE_OTHERS,
};
@@ -223,13 +226,26 @@ struct coresight_sysfs_link {
* by @coresight_ops.
* @access: Device i/o access abstraction for this device.
* @dev: The device entity associated to this component.
- * @refcnt: keep track of what is in use.
+ * @mode: This tracer's mode, i.e sysFS, Perf or disabled. This is
+ * actually an 'enum cs_mode', but is stored in an atomic type.
+ * This is always accessed through local_read() and local_set(),
+ * but wherever it's done from within the Coresight device's lock,
+ * a non-atomic read would also work. This is the main point of
+ * synchronisation between code happening inside the sysfs mode's
+ * coresight_mutex and outside when running in Perf mode. A compare
+ * and exchange swap is done to atomically claim one mode or the
+ * other.
+ * @refcnt: keep track of what is in use. Only access this outside of the
+ * device's spinlock when the coresight_mutex held and mode ==
+ * CS_MODE_SYSFS. Otherwise it must be accessed from inside the
+ * spinlock.
* @orphan: true if the component has connections that haven't been linked.
- * @enable: 'true' if component is currently part of an active path.
- * @activated: 'true' only if a _sink_ has been activated. A sink can be
- * activated but not yet enabled. Enabling for a _sink_
- * happens when a source has been selected and a path is enabled
- * from source to that sink.
+ * @sysfs_sink_activated: 'true' when a sink has been selected for use via sysfs
+ * by writing a 1 to the 'enable_sink' file. A sink can be
+ * activated but not yet enabled. Enabling for a _sink_ happens
+ * when a source has been selected and a path is enabled from
+ * source to that sink. A sink can also become enabled but not
+ * activated if it's used via Perf.
* @ea: Device attribute for sink representation under PMU directory.
* @def_sink: cached reference to default sink found for this device.
* @nr_links: number of sysfs links created to other components from this
@@ -247,11 +263,11 @@ struct coresight_device {
const struct coresight_ops *ops;
struct csdev_access access;
struct device dev;
- atomic_t refcnt;
+ local_t mode;
+ int refcnt;
bool orphan;
- bool enable; /* true only if configured as part of a path */
/* sink specific fields */
- bool activated; /* true only if a sink is part of a path */
+ bool sysfs_sink_activated;
struct dev_ext_attribute *ea;
struct coresight_device *def_sink;
/* sysfs links between components */
@@ -375,8 +391,6 @@ struct coresight_ops {
const struct coresight_ops_helper *helper_ops;
};
-#if IS_ENABLED(CONFIG_CORESIGHT)
-
static inline u32 csdev_access_relaxed_read32(struct csdev_access *csa,
u32 offset)
{
@@ -386,6 +400,63 @@ static inline u32 csdev_access_relaxed_read32(struct csdev_access *csa,
return csa->read(offset, true, false);
}
+#define CORESIGHT_CIDRn(i) (0xFF0 + ((i) * 4))
+
+static inline u32 coresight_get_cid(void __iomem *base)
+{
+ u32 i, cid = 0;
+
+ for (i = 0; i < 4; i++)
+ cid |= readl(base + CORESIGHT_CIDRn(i)) << (i * 8);
+
+ return cid;
+}
+
+static inline bool is_coresight_device(void __iomem *base)
+{
+ u32 cid = coresight_get_cid(base);
+
+ return cid == CORESIGHT_CID;
+}
+
+/*
+ * Attempt to find and enable "APB clock" for the given device
+ *
+ * Returns:
+ *
+ * clk - Clock is found and enabled
+ * NULL - clock is not found
+ * ERROR - Clock is found but failed to enable
+ */
+static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev)
+{
+ struct clk *pclk;
+ int ret;
+
+ pclk = clk_get(dev, "apb_pclk");
+ if (IS_ERR(pclk))
+ return NULL;
+
+ ret = clk_prepare_enable(pclk);
+ if (ret) {
+ clk_put(pclk);
+ return ERR_PTR(ret);
+ }
+ return pclk;
+}
+
+#define CORESIGHT_PIDRn(i) (0xFE0 + ((i) * 4))
+
+static inline u32 coresight_get_pid(struct csdev_access *csa)
+{
+ u32 i, pid = 0;
+
+ for (i = 0; i < 4; i++)
+ pid |= csdev_access_relaxed_read32(csa, CORESIGHT_PIDRn(i)) << (i * 8);
+
+ return pid;
+}
+
static inline u64 csdev_access_relaxed_read_pair(struct csdev_access *csa,
u32 lo_offset, u32 hi_offset)
{
@@ -509,11 +580,43 @@ static inline bool coresight_is_percpu_sink(struct coresight_device *csdev)
(csdev->subtype.sink_subtype == CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM);
}
+/*
+ * Atomically try to take the device and set a new mode. Returns true on
+ * success, false if the device is already taken by someone else.
+ */
+static inline bool coresight_take_mode(struct coresight_device *csdev,
+ enum cs_mode new_mode)
+{
+ return local_cmpxchg(&csdev->mode, CS_MODE_DISABLED, new_mode) ==
+ CS_MODE_DISABLED;
+}
+
+static inline enum cs_mode coresight_get_mode(struct coresight_device *csdev)
+{
+ return local_read(&csdev->mode);
+}
+
+static inline void coresight_set_mode(struct coresight_device *csdev,
+ enum cs_mode new_mode)
+{
+ enum cs_mode current_mode = coresight_get_mode(csdev);
+
+ /*
+ * Changing to a new mode must be done from an already disabled state
+ * unless it's synchronized with coresight_take_mode(). Otherwise the
+ * device is already in use and signifies a locking issue.
+ */
+ WARN(new_mode != CS_MODE_DISABLED && current_mode != CS_MODE_DISABLED &&
+ current_mode != new_mode, "Device already in use\n");
+
+ local_set(&csdev->mode, new_mode);
+}
+
extern struct coresight_device *
coresight_register(struct coresight_desc *desc);
extern void coresight_unregister(struct coresight_device *csdev);
-extern int coresight_enable(struct coresight_device *csdev);
-extern void coresight_disable(struct coresight_device *csdev);
+extern int coresight_enable_sysfs(struct coresight_device *csdev);
+extern void coresight_disable_sysfs(struct coresight_device *csdev);
extern int coresight_timeout(struct csdev_access *csa, u32 offset,
int position, int value);
@@ -538,83 +641,6 @@ void coresight_relaxed_write64(struct coresight_device *csdev,
u64 val, u32 offset);
void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset);
-#else
-static inline struct coresight_device *
-coresight_register(struct coresight_desc *desc) { return NULL; }
-static inline void coresight_unregister(struct coresight_device *csdev) {}
-static inline int
-coresight_enable(struct coresight_device *csdev) { return -ENOSYS; }
-static inline void coresight_disable(struct coresight_device *csdev) {}
-
-static inline int coresight_timeout(struct csdev_access *csa, u32 offset,
- int position, int value)
-{
- return 1;
-}
-
-static inline int coresight_claim_device_unlocked(struct coresight_device *csdev)
-{
- return -EINVAL;
-}
-
-static inline int coresight_claim_device(struct coresight_device *csdev)
-{
- return -EINVAL;
-}
-
-static inline void coresight_disclaim_device(struct coresight_device *csdev) {}
-static inline void coresight_disclaim_device_unlocked(struct coresight_device *csdev) {}
-
-static inline bool coresight_loses_context_with_cpu(struct device *dev)
-{
- return false;
-}
-
-static inline u32 coresight_relaxed_read32(struct coresight_device *csdev, u32 offset)
-{
- WARN_ON_ONCE(1);
- return 0;
-}
-
-static inline u32 coresight_read32(struct coresight_device *csdev, u32 offset)
-{
- WARN_ON_ONCE(1);
- return 0;
-}
-
-static inline void coresight_write32(struct coresight_device *csdev, u32 val, u32 offset)
-{
-}
-
-static inline void coresight_relaxed_write32(struct coresight_device *csdev,
- u32 val, u32 offset)
-{
-}
-
-static inline u64 coresight_relaxed_read64(struct coresight_device *csdev,
- u32 offset)
-{
- WARN_ON_ONCE(1);
- return 0;
-}
-
-static inline u64 coresight_read64(struct coresight_device *csdev, u32 offset)
-{
- WARN_ON_ONCE(1);
- return 0;
-}
-
-static inline void coresight_relaxed_write64(struct coresight_device *csdev,
- u64 val, u32 offset)
-{
-}
-
-static inline void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset)
-{
-}
-
-#endif /* IS_ENABLED(CONFIG_CORESIGHT) */
-
extern int coresight_get_cpu(struct device *dev);
struct coresight_platform_data *coresight_get_platform_data(struct device *dev);
diff --git a/include/linux/counter.h b/include/linux/counter.h
index b63746637de2..702e9108bbb4 100644
--- a/include/linux/counter.h
+++ b/include/linux/counter.h
@@ -399,7 +399,7 @@ struct counter_device {
struct mutex ops_exist_lock;
};
-void *counter_priv(const struct counter_device *const counter);
+void *counter_priv(const struct counter_device *const counter) __attribute_const__;
struct counter_device *counter_alloc(size_t sizeof_priv);
void counter_put(struct counter_device *const counter);
diff --git a/include/linux/cper.h b/include/linux/cper.h
index c1a7dc325121..265b0f8fc0b3 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -90,6 +90,29 @@ enum {
GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \
0x72, 0x2D, 0xEB, 0x41)
+/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */
+/*
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+#define CPER_SEC_CXL_GEN_MEDIA_GUID \
+ GUID_INIT(0xfbcd0a77, 0xc260, 0x417f, \
+ 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6)
+/*
+ * DRAM Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
+ */
+#define CPER_SEC_CXL_DRAM_GUID \
+ GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, \
+ 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24)
+/*
+ * Memory Module Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+#define CPER_SEC_CXL_MEM_MODULE_GUID \
+ GUID_INIT(0xfe927475, 0xdd59, 0x4339, \
+ 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74)
+
/*
* Flags bits definitions for flags in struct cper_record_header
* If set, the error has been recovered
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index e006c719182b..861c3bfc5f17 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -18,6 +18,7 @@
#include <linux/compiler.h>
#include <linux/cpumask.h>
#include <linux/cpuhotplug.h>
+#include <linux/cpu_smt.h>
struct device;
struct device_node;
@@ -74,17 +75,26 @@ extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev,
struct device_attribute *attr, char *buf);
extern ssize_t cpu_show_gds(struct device *dev,
struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev,
+ struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
const struct attribute_group **groups,
const char *fmt, ...);
+extern bool arch_cpu_is_hotpluggable(int cpu);
+extern int arch_register_cpu(int cpu);
+extern void arch_unregister_cpu(int cpu);
#ifdef CONFIG_HOTPLUG_CPU
extern void unregister_cpu(struct cpu *cpu);
extern ssize_t arch_cpu_probe(const char *, size_t);
extern ssize_t arch_cpu_release(const char *, size_t);
#endif
+#ifdef CONFIG_GENERIC_CPU_DEVICES
+DECLARE_PER_CPU(struct cpu, cpu_devices);
+#endif
+
/*
* These states are not related to the core CPU hotplug mechanism. They are
* used by various (sub)architectures to track internal state
@@ -104,7 +114,7 @@ void notify_cpu_starting(unsigned int cpu);
extern void cpu_maps_update_begin(void);
extern void cpu_maps_update_done(void);
int bringup_hibernate_cpu(unsigned int sleep_cpu);
-void bringup_nonboot_cpus(unsigned int setup_max_cpus);
+void bringup_nonboot_cpus(unsigned int max_cpus);
#else /* CONFIG_SMP */
#define cpuhp_tasks_frozen 0
@@ -120,7 +130,7 @@ static inline void cpu_maps_update_done(void)
static inline int add_cpu(unsigned int cpu) { return 0;}
#endif /* CONFIG_SMP */
-extern struct bus_type cpu_subsys;
+extern const struct bus_type cpu_subsys;
extern int lockdep_is_cpus_held(void);
@@ -152,6 +162,8 @@ static inline int remove_cpu(unsigned int cpu) { return -EPERM; }
static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { }
#endif /* !CONFIG_HOTPLUG_CPU */
+DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock())
+
#ifdef CONFIG_PM_SLEEP_SMP
extern int freeze_secondary_cpus(int primary);
extern void thaw_secondary_cpus(void);
@@ -186,6 +198,8 @@ void arch_cpu_idle(void);
void arch_cpu_idle_prepare(void);
void arch_cpu_idle_enter(void);
void arch_cpu_idle_exit(void);
+void arch_tick_broadcast_enter(void);
+void arch_tick_broadcast_exit(void);
void __noreturn arch_cpu_idle_dead(void);
#ifdef CONFIG_ARCH_HAS_CPU_FINALIZE_INIT
@@ -194,7 +208,6 @@ void arch_cpu_finalize_init(void);
static inline void arch_cpu_finalize_init(void) { }
#endif
-void cpu_set_state_online(int cpu);
void play_idle_precise(u64 duration_ns, u64 latency_ns);
static inline void play_idle(unsigned long duration_us)
@@ -208,31 +221,18 @@ void cpuhp_report_idle_dead(void);
static inline void cpuhp_report_idle_dead(void) { }
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-enum cpuhp_smt_control {
- CPU_SMT_ENABLED,
- CPU_SMT_DISABLED,
- CPU_SMT_FORCE_DISABLED,
- CPU_SMT_NOT_SUPPORTED,
- CPU_SMT_NOT_IMPLEMENTED,
-};
-
-#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
-extern enum cpuhp_smt_control cpu_smt_control;
-extern void cpu_smt_disable(bool force);
-extern void cpu_smt_check_topology(void);
-extern bool cpu_smt_possible(void);
-extern int cpuhp_smt_enable(void);
-extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval);
-#else
-# define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED)
-static inline void cpu_smt_disable(bool force) { }
-static inline void cpu_smt_check_topology(void) { }
-static inline bool cpu_smt_possible(void) { return false; }
-static inline int cpuhp_smt_enable(void) { return 0; }
-static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
-#endif
-
+#ifdef CONFIG_CPU_MITIGATIONS
extern bool cpu_mitigations_off(void);
extern bool cpu_mitigations_auto_nosmt(void);
+#else
+static inline bool cpu_mitigations_off(void)
+{
+ return true;
+}
+static inline bool cpu_mitigations_auto_nosmt(void)
+{
+ return false;
+}
+#endif
#endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/cpu_smt.h b/include/linux/cpu_smt.h
new file mode 100644
index 000000000000..0c1664294b57
--- /dev/null
+++ b/include/linux/cpu_smt.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CPU_SMT_H_
+#define _LINUX_CPU_SMT_H_
+
+enum cpuhp_smt_control {
+ CPU_SMT_ENABLED,
+ CPU_SMT_DISABLED,
+ CPU_SMT_FORCE_DISABLED,
+ CPU_SMT_NOT_SUPPORTED,
+ CPU_SMT_NOT_IMPLEMENTED,
+};
+
+#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
+extern enum cpuhp_smt_control cpu_smt_control;
+extern unsigned int cpu_smt_num_threads;
+extern void cpu_smt_disable(bool force);
+extern void cpu_smt_set_num_threads(unsigned int num_threads,
+ unsigned int max_threads);
+extern bool cpu_smt_possible(void);
+extern int cpuhp_smt_enable(void);
+extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval);
+#else
+# define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED)
+# define cpu_smt_num_threads 1
+static inline void cpu_smt_disable(bool force) { }
+static inline void cpu_smt_set_num_threads(unsigned int num_threads,
+ unsigned int max_threads) { }
+static inline bool cpu_smt_possible(void) { return false; }
+static inline int cpuhp_smt_enable(void) { return 0; }
+static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
+#endif
+
+#endif /* _LINUX_CPU_SMT_H_ */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 172ff51c1b2a..9956afb9acc2 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -19,6 +19,7 @@
#include <linux/pm_qos.h>
#include <linux/spinlock.h>
#include <linux/sysfs.h>
+#include <linux/minmax.h>
/*********************************************************************
* CPUFREQ INTERFACE *
@@ -140,6 +141,9 @@ struct cpufreq_policy {
*/
bool dvfs_possible_from_any_cpu;
+ /* Per policy boost enabled flag. */
+ bool boost_enabled;
+
/* Cached frequency lookup from cpufreq_driver_resolve_freq. */
unsigned int cached_target_freq;
unsigned int cached_resolved_idx;
@@ -259,6 +263,7 @@ static inline bool cpufreq_supports_freq_invariance(void)
return false;
}
static inline void disable_cpufreq(void) { }
+static inline void cpufreq_update_limits(unsigned int cpu) { }
#endif
#ifdef CONFIG_CPU_FREQ_STAT
@@ -370,7 +375,7 @@ struct cpufreq_driver {
int (*target_intermediate)(struct cpufreq_policy *policy,
unsigned int index);
- /* should be defined, if possible */
+ /* should be defined, if possible, return 0 on error */
unsigned int (*get)(unsigned int cpu);
/* Called to update policy limits on firmware notifications. */
@@ -467,17 +472,8 @@ static inline void cpufreq_verify_within_limits(struct cpufreq_policy_data *poli
unsigned int min,
unsigned int max)
{
- if (policy->min < min)
- policy->min = min;
- if (policy->max < min)
- policy->max = min;
- if (policy->min > max)
- policy->min = max;
- if (policy->max > max)
- policy->max = max;
- if (policy->min > policy->max)
- policy->min = policy->max;
- return;
+ policy->max = clamp(policy->max, min, max);
+ policy->min = clamp(policy->min, min, policy->max);
}
static inline void
@@ -573,9 +569,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div,
/*
* The polling frequency depends on the capability of the processor. Default
- * polling frequency is 1000 times the transition latency of the processor. The
- * ondemand governor will work on any processor with transition latency <= 10ms,
- * using appropriate sampling rate.
+ * polling frequency is 1000 times the transition latency of the processor.
*/
#define LATENCY_MULTIPLIER (1000)
@@ -699,26 +693,6 @@ struct cpufreq_frequency_table {
* order */
};
-#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP)
-int dev_pm_opp_init_cpufreq_table(struct device *dev,
- struct cpufreq_frequency_table **table);
-void dev_pm_opp_free_cpufreq_table(struct device *dev,
- struct cpufreq_frequency_table **table);
-#else
-static inline int dev_pm_opp_init_cpufreq_table(struct device *dev,
- struct cpufreq_frequency_table
- **table)
-{
- return -EINVAL;
-}
-
-static inline void dev_pm_opp_free_cpufreq_table(struct device *dev,
- struct cpufreq_frequency_table
- **table)
-{
-}
-#endif
-
/*
* cpufreq_for_each_entry - iterate over a cpufreq_frequency_table
* @pos: the cpufreq_frequency_table * to use as a loop cursor.
@@ -1026,6 +1000,18 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,
efficiencies);
}
+static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx)
+{
+ unsigned int freq;
+
+ if (idx < 0)
+ return false;
+
+ freq = policy->freq_table[idx].frequency;
+
+ return freq == clamp_val(freq, policy->min, policy->max);
+}
+
static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
@@ -1059,7 +1045,8 @@ retry:
return 0;
}
- if (idx < 0 && efficiencies) {
+ /* Limit frequency index to honor policy->min/max */
+ if (!cpufreq_is_in_limits(policy, idx) && efficiencies) {
efficiencies = false;
goto retry;
}
@@ -1154,8 +1141,7 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_
if (ret < 0)
continue;
- if (pargs->np == args.np && pargs->args_count == args.args_count &&
- !memcmp(pargs->args, args.args, sizeof(args.args[0]) * args.args_count))
+ if (of_phandle_args_equal(pargs, &args))
cpumask_set_cpu(cpu, cpumask);
of_node_put(args.np);
@@ -1198,14 +1184,6 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_
}
#endif
-#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
-void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
- struct cpufreq_governor *old_gov);
-#else
-static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
- struct cpufreq_governor *old_gov) { }
-#endif
-
extern unsigned int arch_freq_get_on_cpu(int cpu);
#ifndef arch_set_freq_scale
@@ -1216,6 +1194,7 @@ void arch_set_freq_scale(const struct cpumask *cpus,
{
}
#endif
+
/* the following are really really optional */
extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs;
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 25b6e6e6ba6b..35e78ddb2b37 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -48,7 +48,7 @@
* same section.
*
* If neither #1 nor #2 apply, please use the dynamic state space when
- * setting up a state by using CPUHP_PREPARE_DYN or CPUHP_PREPARE_ONLINE
+ * setting up a state by using CPUHP_BP_PREPARE_DYN or CPUHP_AP_ONLINE_DYN
* for the @state argument of the setup function.
*
* See Documentation/core-api/cpu_hotplug.rst for further information and
@@ -66,15 +66,12 @@ enum cpuhp_state {
CPUHP_PERF_POWER,
CPUHP_PERF_SUPERH,
CPUHP_X86_HPET_DEAD,
- CPUHP_X86_APB_DEAD,
CPUHP_X86_MCE_DEAD,
CPUHP_VIRT_NET_DEAD,
CPUHP_IBMVNIC_DEAD,
CPUHP_SLUB_DEAD,
CPUHP_DEBUG_OBJ_DEAD,
CPUHP_MM_WRITEBACK_DEAD,
- /* Must be after CPUHP_MM_VMSTAT_DEAD */
- CPUHP_MM_DEMOTION_DEAD,
CPUHP_MM_VMSTAT_DEAD,
CPUHP_SOFTIRQ_DEAD,
CPUHP_NET_MVNETA_DEAD,
@@ -90,14 +87,12 @@ enum cpuhp_state {
CPUHP_FS_BUFF_DEAD,
CPUHP_PRINTK_DEAD,
CPUHP_MM_MEMCQ_DEAD,
- CPUHP_XFS_DEAD,
CPUHP_PERCPU_CNT_DEAD,
CPUHP_RADIX_DEAD,
CPUHP_PAGE_ALLOC,
CPUHP_NET_DEV_DEAD,
CPUHP_PCI_XGENE_DEAD,
CPUHP_IOMMU_IOVA_DEAD,
- CPUHP_LUSTRE_CFS_DEAD,
CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
CPUHP_PADATA_DEAD,
CPUHP_AP_DTPM_CPU_DEAD,
@@ -109,7 +104,6 @@ enum cpuhp_state {
CPUHP_X2APIC_PREPARE,
CPUHP_SMPCFD_PREPARE,
CPUHP_RELAY_PREPARE,
- CPUHP_SLAB_PREPARE,
CPUHP_MD_RAID5_PREPARE,
CPUHP_RCUTREE_PREP,
CPUHP_CPUIDLE_COUPLED_PREPARE,
@@ -119,13 +113,11 @@ enum cpuhp_state {
CPUHP_XEN_EVTCHN_PREPARE,
CPUHP_ARM_SHMOBILE_SCU_PREPARE,
CPUHP_SH_SH3X_PREPARE,
- CPUHP_NET_FLOW_PREPARE,
CPUHP_TOPOLOGY_PREPARE,
CPUHP_NET_IUCV_PREPARE,
CPUHP_ARM_BL_PREPARE,
CPUHP_TRACE_RB_PREPARE,
CPUHP_MM_ZS_PREPARE,
- CPUHP_MM_ZSWP_MEM_PREPARE,
CPUHP_MM_ZSWP_POOL_PREPARE,
CPUHP_KVM_PPC_BOOK3S_PREPARE,
CPUHP_ZCOMP_PREPARE,
@@ -152,18 +144,14 @@ enum cpuhp_state {
CPUHP_AP_IRQ_ARMADA_XP_STARTING,
CPUHP_AP_IRQ_BCM2836_STARTING,
CPUHP_AP_IRQ_MIPS_GIC_STARTING,
- CPUHP_AP_IRQ_RISCV_STARTING,
CPUHP_AP_IRQ_LOONGARCH_STARTING,
CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
CPUHP_AP_ARM_MVEBU_COHERENCY,
- CPUHP_AP_MICROCODE_LOADER,
CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
CPUHP_AP_PERF_X86_STARTING,
CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
- CPUHP_AP_PERF_X86_CQM_STARTING,
CPUHP_AP_PERF_X86_CSTATE_STARTING,
CPUHP_AP_PERF_XTENSA_STARTING,
- CPUHP_AP_MIPS_OP_LOONGSON3_STARTING,
CPUHP_AP_ARM_VFP_STARTING,
CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
@@ -173,13 +161,13 @@ enum cpuhp_state {
CPUHP_AP_ARM_L2X0_STARTING,
CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
CPUHP_AP_ARM_ARCH_TIMER_STARTING,
+ CPUHP_AP_ARM_ARCH_TIMER_EVTSTRM_STARTING,
CPUHP_AP_ARM_GLOBAL_TIMER_STARTING,
CPUHP_AP_JCORE_TIMER_STARTING,
CPUHP_AP_ARM_TWD_STARTING,
CPUHP_AP_QCOM_TIMER_STARTING,
CPUHP_AP_TEGRA_TIMER_STARTING,
CPUHP_AP_ARMADA_TIMER_STARTING,
- CPUHP_AP_MARCO_TIMER_STARTING,
CPUHP_AP_MIPS_GIC_TIMER_STARTING,
CPUHP_AP_ARC_TIMER_STARTING,
CPUHP_AP_RISCV_TIMER_STARTING,
@@ -190,10 +178,13 @@ enum cpuhp_state {
/* Must be the last timer callback */
CPUHP_AP_DUMMY_TIMER_STARTING,
CPUHP_AP_ARM_XEN_STARTING,
+ CPUHP_AP_ARM_XEN_RUNSTATE_STARTING,
CPUHP_AP_ARM_CORESIGHT_STARTING,
CPUHP_AP_ARM_CORESIGHT_CTI_STARTING,
CPUHP_AP_ARM64_ISNDEP_STARTING,
CPUHP_AP_SMPCFD_DYING,
+ CPUHP_AP_HRTIMERS_DYING,
+ CPUHP_AP_TICK_DYING,
CPUHP_AP_X86_TBOOT_DYING,
CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
CPUHP_AP_ONLINE,
@@ -205,7 +196,6 @@ enum cpuhp_state {
CPUHP_AP_KVM_ONLINE,
CPUHP_AP_SCHED_WAIT_EMPTY,
CPUHP_AP_SMPBOOT_THREADS,
- CPUHP_AP_X86_VDSO_VMA_ONLINE,
CPUHP_AP_IRQ_AFFINITY_ONLINE,
CPUHP_AP_BLK_MQ_ONLINE,
CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS,
@@ -216,9 +206,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
CPUHP_AP_PERF_X86_RAPL_ONLINE,
- CPUHP_AP_PERF_X86_CQM_ONLINE,
CPUHP_AP_PERF_X86_CSTATE_ONLINE,
- CPUHP_AP_PERF_X86_IDXD_ONLINE,
CPUHP_AP_PERF_S390_CF_ONLINE,
CPUHP_AP_PERF_S390_SF_ONLINE,
CPUHP_AP_PERF_ARM_CCI_ONLINE,
@@ -244,15 +232,14 @@ enum cpuhp_state {
CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE,
CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
CPUHP_AP_PERF_CSKY_ONLINE,
+ CPUHP_AP_TMIGR_ONLINE,
CPUHP_AP_WATCHDOG_ONLINE,
CPUHP_AP_WORKQUEUE_ONLINE,
CPUHP_AP_RANDOM_ONLINE,
CPUHP_AP_RCUTREE_ONLINE,
CPUHP_AP_BASE_CACHEINFO_ONLINE,
CPUHP_AP_ONLINE_DYN,
- CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30,
- /* Must be after CPUHP_AP_ONLINE_DYN for node_states[N_CPU] update */
- CPUHP_AP_MM_DEMOTION_ONLINE,
+ CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 40,
CPUHP_AP_X86_HPET_ONLINE,
CPUHP_AP_X86_KVM_CLK_ONLINE,
CPUHP_AP_ACTIVE,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index f10fb87d49db..1c29947db848 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -4,9 +4,10 @@
/*
* Cpumasks provide a bitmap suitable for representing the
- * set of CPU's in a system, one bit position per CPU number. In general,
+ * set of CPUs in a system, one bit position per CPU number. In general,
* only nr_cpu_ids (<= NR_CPUS) bits are valid.
*/
+#include <linux/cleanup.h>
#include <linux/kernel.h>
#include <linux/threads.h>
#include <linux/bitmap.h>
@@ -97,7 +98,7 @@ static inline void set_nr_cpu_ids(unsigned int nr)
*
* If !CONFIG_HOTPLUG_CPU, present == possible, and active == online.
*
- * The cpu_possible_mask is fixed at boot time, as the set of CPU id's
+ * The cpu_possible_mask is fixed at boot time, as the set of CPU IDs
* that it is possible might ever be plugged in at anytime during the
* life of that system boot. The cpu_present_mask is dynamic(*),
* representing which CPUs are currently plugged in. And
@@ -112,7 +113,7 @@ static inline void set_nr_cpu_ids(unsigned int nr)
* hotplug, it's a copy of cpu_possible_mask, hence fixed at boot.
*
* Subtleties:
- * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode
+ * 1) UP ARCHes (NR_CPUS == 1, CONFIG_SMP not defined) hardcode
* assumption that their single CPU is online. The UP
* cpu_{online,possible,present}_masks are placebos. Changing them
* will have no useful affect on the following num_*_cpus()
@@ -155,7 +156,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
* cpumask_first - get the first cpu in a cpumask
* @srcp: the cpumask pointer
*
- * Returns >= nr_cpu_ids if no cpus set.
+ * Return: >= nr_cpu_ids if no cpus set.
*/
static inline unsigned int cpumask_first(const struct cpumask *srcp)
{
@@ -166,7 +167,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp)
* cpumask_first_zero - get the first unset cpu in a cpumask
* @srcp: the cpumask pointer
*
- * Returns >= nr_cpu_ids if all cpus are set.
+ * Return: >= nr_cpu_ids if all cpus are set.
*/
static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
{
@@ -178,7 +179,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
* @srcp1: the first input
* @srcp2: the second input
*
- * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and().
+ * Return: >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and().
*/
static inline
unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2)
@@ -190,7 +191,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask
* cpumask_last - get the last CPU in a cpumask
* @srcp: - the cpumask pointer
*
- * Returns >= nr_cpumask_bits if no CPUs set.
+ * Return: >= nr_cpumask_bits if no CPUs set.
*/
static inline unsigned int cpumask_last(const struct cpumask *srcp)
{
@@ -199,10 +200,10 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp)
/**
* cpumask_next - get the next cpu in a cpumask
- * @n: the cpu prior to the place to search (ie. return will be > @n)
+ * @n: the cpu prior to the place to search (i.e. return will be > @n)
* @srcp: the cpumask pointer
*
- * Returns >= nr_cpu_ids if no further cpus set.
+ * Return: >= nr_cpu_ids if no further cpus set.
*/
static inline
unsigned int cpumask_next(int n, const struct cpumask *srcp)
@@ -215,10 +216,10 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp)
/**
* cpumask_next_zero - get the next unset cpu in a cpumask
- * @n: the cpu prior to the place to search (ie. return will be > @n)
+ * @n: the cpu prior to the place to search (i.e. return will be > @n)
* @srcp: the cpumask pointer
*
- * Returns >= nr_cpu_ids if no further cpus unset.
+ * Return: >= nr_cpu_ids if no further cpus unset.
*/
static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
{
@@ -254,11 +255,11 @@ unsigned int cpumask_any_distribute(const struct cpumask *srcp);
/**
* cpumask_next_and - get the next cpu in *src1p & *src2p
- * @n: the cpu prior to the place to search (ie. return will be > @n)
+ * @n: the cpu prior to the place to search (i.e. return will be > @n)
* @src1p: the first cpumask pointer
* @src2p: the second cpumask pointer
*
- * Returns >= nr_cpu_ids if no further cpus set in both.
+ * Return: >= nr_cpu_ids if no further cpus set in both.
*/
static inline
unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
@@ -373,7 +374,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
* @cpu: the cpu to ignore.
*
* Often used to find any cpu but smp_processor_id() in a mask.
- * Returns >= nr_cpu_ids if no cpus set.
+ * Return: >= nr_cpu_ids if no cpus set.
*/
static inline
unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
@@ -388,11 +389,11 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
}
/**
- * cpumask_nth - get the first cpu in a cpumask
+ * cpumask_nth - get the Nth cpu in a cpumask
* @srcp: the cpumask pointer
- * @cpu: the N'th cpu to find, starting from 0
+ * @cpu: the Nth cpu to find, starting from 0
*
- * Returns >= nr_cpu_ids if such cpu doesn't exist.
+ * Return: >= nr_cpu_ids if such cpu doesn't exist.
*/
static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp)
{
@@ -400,12 +401,12 @@ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *s
}
/**
- * cpumask_nth_and - get the first cpu in 2 cpumasks
+ * cpumask_nth_and - get the Nth cpu in 2 cpumasks
* @srcp1: the cpumask pointer
* @srcp2: the cpumask pointer
- * @cpu: the N'th cpu to find, starting from 0
+ * @cpu: the Nth cpu to find, starting from 0
*
- * Returns >= nr_cpu_ids if such cpu doesn't exist.
+ * Return: >= nr_cpu_ids if such cpu doesn't exist.
*/
static inline
unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
@@ -416,12 +417,12 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
}
/**
- * cpumask_nth_andnot - get the first cpu set in 1st cpumask, and clear in 2nd.
+ * cpumask_nth_andnot - get the Nth cpu set in 1st cpumask, and clear in 2nd.
* @srcp1: the cpumask pointer
* @srcp2: the cpumask pointer
- * @cpu: the N'th cpu to find, starting from 0
+ * @cpu: the Nth cpu to find, starting from 0
*
- * Returns >= nr_cpu_ids if such cpu doesn't exist.
+ * Return: >= nr_cpu_ids if such cpu doesn't exist.
*/
static inline
unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1,
@@ -436,9 +437,9 @@ unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1,
* @srcp1: the cpumask pointer
* @srcp2: the cpumask pointer
* @srcp3: the cpumask pointer
- * @cpu: the N'th cpu to find, starting from 0
+ * @cpu: the Nth cpu to find, starting from 0
*
- * Returns >= nr_cpu_ids if such cpu doesn't exist.
+ * Return: >= nr_cpu_ids if such cpu doesn't exist.
*/
static __always_inline
unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp1,
@@ -497,7 +498,7 @@ static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp)
* @cpu: cpu number (< nr_cpu_ids)
* @cpumask: the cpumask pointer
*
- * Returns true if @cpu is set in @cpumask, else returns false
+ * Return: true if @cpu is set in @cpumask, else returns false
*/
static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask)
{
@@ -509,9 +510,9 @@ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpum
* @cpu: cpu number (< nr_cpu_ids)
* @cpumask: the cpumask pointer
*
- * Returns true if @cpu is set in old bitmap of @cpumask, else returns false
- *
* test_and_set_bit wrapper for cpumasks.
+ *
+ * Return: true if @cpu is set in old bitmap of @cpumask, else returns false
*/
static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask)
{
@@ -523,9 +524,9 @@ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cp
* @cpu: cpu number (< nr_cpu_ids)
* @cpumask: the cpumask pointer
*
- * Returns true if @cpu is set in old bitmap of @cpumask, else returns false
- *
* test_and_clear_bit wrapper for cpumasks.
+ *
+ * Return: true if @cpu is set in old bitmap of @cpumask, else returns false
*/
static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask)
{
@@ -560,7 +561,7 @@ static inline void cpumask_clear(struct cpumask *dstp)
* @src1p: the first input
* @src2p: the second input
*
- * If *@dstp is empty, returns false, else returns true
+ * Return: false if *@dstp is empty, else returns true
*/
static inline bool cpumask_and(struct cpumask *dstp,
const struct cpumask *src1p,
@@ -603,7 +604,7 @@ static inline void cpumask_xor(struct cpumask *dstp,
* @src1p: the first input
* @src2p: the second input
*
- * If *@dstp is empty, returns false, else returns true
+ * Return: false if *@dstp is empty, else returns true
*/
static inline bool cpumask_andnot(struct cpumask *dstp,
const struct cpumask *src1p,
@@ -617,6 +618,8 @@ static inline bool cpumask_andnot(struct cpumask *dstp,
* cpumask_equal - *src1p == *src2p
* @src1p: the first input
* @src2p: the second input
+ *
+ * Return: true if the cpumasks are equal, false if not
*/
static inline bool cpumask_equal(const struct cpumask *src1p,
const struct cpumask *src2p)
@@ -630,6 +633,9 @@ static inline bool cpumask_equal(const struct cpumask *src1p,
* @src1p: the first input
* @src2p: the second input
* @src3p: the third input
+ *
+ * Return: true if first cpumask ORed with second cpumask == third cpumask,
+ * otherwise false
*/
static inline bool cpumask_or_equal(const struct cpumask *src1p,
const struct cpumask *src2p,
@@ -643,6 +649,9 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p,
* cpumask_intersects - (*src1p & *src2p) != 0
* @src1p: the first input
* @src2p: the second input
+ *
+ * Return: true if first cpumask ANDed with second cpumask is non-empty,
+ * otherwise false
*/
static inline bool cpumask_intersects(const struct cpumask *src1p,
const struct cpumask *src2p)
@@ -656,7 +665,7 @@ static inline bool cpumask_intersects(const struct cpumask *src1p,
* @src1p: the first input
* @src2p: the second input
*
- * Returns true if *@src1p is a subset of *@src2p, else returns false
+ * Return: true if *@src1p is a subset of *@src2p, else returns false
*/
static inline bool cpumask_subset(const struct cpumask *src1p,
const struct cpumask *src2p)
@@ -668,6 +677,8 @@ static inline bool cpumask_subset(const struct cpumask *src1p,
/**
* cpumask_empty - *srcp == 0
* @srcp: the cpumask to that all cpus < nr_cpu_ids are clear.
+ *
+ * Return: true if srcp is empty (has no bits set), else false
*/
static inline bool cpumask_empty(const struct cpumask *srcp)
{
@@ -677,6 +688,8 @@ static inline bool cpumask_empty(const struct cpumask *srcp)
/**
* cpumask_full - *srcp == 0xFFFFFFFF...
* @srcp: the cpumask to that all cpus < nr_cpu_ids are set.
+ *
+ * Return: true if srcp is full (has all bits set), else false
*/
static inline bool cpumask_full(const struct cpumask *srcp)
{
@@ -686,6 +699,8 @@ static inline bool cpumask_full(const struct cpumask *srcp)
/**
* cpumask_weight - Count of bits in *srcp
* @srcp: the cpumask to count bits (< nr_cpu_ids) in.
+ *
+ * Return: count of bits set in *srcp
*/
static inline unsigned int cpumask_weight(const struct cpumask *srcp)
{
@@ -696,6 +711,8 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp)
* cpumask_weight_and - Count of bits in (*srcp1 & *srcp2)
* @srcp1: the cpumask to count bits (< nr_cpu_ids) in.
* @srcp2: the cpumask to count bits (< nr_cpu_ids) in.
+ *
+ * Return: count of bits set in both *srcp1 and *srcp2
*/
static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1,
const struct cpumask *srcp2)
@@ -704,6 +721,19 @@ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1,
}
/**
+ * cpumask_weight_andnot - Count of bits in (*srcp1 & ~*srcp2)
+ * @srcp1: the cpumask to count bits (< nr_cpu_ids) in.
+ * @srcp2: the cpumask to count bits (< nr_cpu_ids) in.
+ *
+ * Return: count of bits set in both *srcp1 and *srcp2
+ */
+static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1,
+ const struct cpumask *srcp2)
+{
+ return bitmap_weight_andnot(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
+}
+
+/**
* cpumask_shift_right - *dstp = *srcp >> n
* @dstp: the cpumask result
* @srcp: the input to shift
@@ -744,7 +774,7 @@ static inline void cpumask_copy(struct cpumask *dstp,
* cpumask_any - pick a "random" cpu from *srcp
* @srcp: the input cpumask
*
- * Returns >= nr_cpu_ids if no cpus set.
+ * Return: >= nr_cpu_ids if no cpus set.
*/
#define cpumask_any(srcp) cpumask_first(srcp)
@@ -753,7 +783,7 @@ static inline void cpumask_copy(struct cpumask *dstp,
* @mask1: the first input cpumask
* @mask2: the second input cpumask
*
- * Returns >= nr_cpu_ids if no cpus set.
+ * Return: >= nr_cpu_ids if no cpus set.
*/
#define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2))
@@ -769,7 +799,7 @@ static inline void cpumask_copy(struct cpumask *dstp,
* @len: the length of the buffer
* @dstp: the cpumask to set.
*
- * Returns -errno, or 0 for success.
+ * Return: -errno, or 0 for success.
*/
static inline int cpumask_parse_user(const char __user *buf, int len,
struct cpumask *dstp)
@@ -783,7 +813,7 @@ static inline int cpumask_parse_user(const char __user *buf, int len,
* @len: the length of the buffer
* @dstp: the cpumask to set.
*
- * Returns -errno, or 0 for success.
+ * Return: -errno, or 0 for success.
*/
static inline int cpumask_parselist_user(const char __user *buf, int len,
struct cpumask *dstp)
@@ -797,7 +827,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len,
* @buf: the buffer to extract from
* @dstp: the cpumask to set.
*
- * Returns -errno, or 0 for success.
+ * Return: -errno, or 0 for success.
*/
static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
{
@@ -809,7 +839,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
* @buf: the buffer to extract from
* @dstp: the cpumask to set.
*
- * Returns -errno, or 0 for success.
+ * Return: -errno, or 0 for success.
*/
static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
{
@@ -817,7 +847,9 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
}
/**
- * cpumask_size - size to allocate for a 'struct cpumask' in bytes
+ * cpumask_size - calculate size to allocate for a 'struct cpumask' in bytes
+ *
+ * Return: size to allocate for a &struct cpumask in bytes
*/
static inline unsigned int cpumask_size(void)
{
@@ -831,7 +863,7 @@ static inline unsigned int cpumask_size(void)
* little more difficult, we typedef cpumask_var_t to an array or a
* pointer: doing &mask on an array is a noop, so it still works.
*
- * ie.
+ * i.e.
* cpumask_var_t tmpmask;
* if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
* return -ENOMEM;
@@ -887,6 +919,8 @@ bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
* a nop returning a constant 1 (in <linux/cpumask.h>).
*
* See alloc_cpumask_var_node.
+ *
+ * Return: %true if allocation succeeded, %false if not
*/
static inline
bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
@@ -957,6 +991,8 @@ static inline bool cpumask_available(cpumask_var_t mask)
}
#endif /* CONFIG_CPUMASK_OFFSTACK */
+DEFINE_FREE(free_cpumask_var, struct cpumask *, if (_T) free_cpumask_var(_T));
+
/* It's common to want to use cpu_all_mask in struct member initializers,
* so it has to refer to an address rather than a pointer. */
extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
@@ -1025,7 +1061,7 @@ set_cpu_dying(unsigned int cpu, bool dying)
}
/**
- * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
+ * to_cpumask - convert a NR_CPUS bitmap to a struct cpumask *
* @bitmap: the bitmap
*
* There are a few places where cpumask_var_t isn't appropriate and
@@ -1068,6 +1104,8 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
* interface gives only a momentary snapshot and is not protected against
* concurrent CPU hotplug operations unless invoked from a cpuhp_lock held
* region.
+ *
+ * Return: momentary snapshot of the number of online CPUs
*/
static __always_inline unsigned int num_online_cpus(void)
{
@@ -1160,7 +1198,7 @@ static inline bool cpu_dying(unsigned int cpu)
* @mask: the cpumask to copy
* @buf: the buffer to copy into
*
- * Returns the length of the (null-terminated) @buf string, zero if
+ * Return: the length of the (null-terminated) @buf string, zero if
* nothing is copied.
*/
static inline ssize_t
@@ -1183,7 +1221,7 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
* cpumask; Typically used by bin_attribute to export cpumask bitmask
* ABI.
*
- * Returns the length of how many bytes have been copied, excluding
+ * Return: the length of how many bytes have been copied, excluding
* terminating '\0'.
*/
static inline ssize_t
@@ -1204,6 +1242,9 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
*
* Everything is same with the above cpumap_print_bitmask_to_buf()
* except the print format.
+ *
+ * Return: the length of how many bytes have been copied, excluding
+ * terminating '\0'.
*/
static inline ssize_t
cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index d629094fac6e..0ce6ff0d9c9a 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -77,6 +77,7 @@ extern void cpuset_lock(void);
extern void cpuset_unlock(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
extern bool cpuset_cpus_allowed_fallback(struct task_struct *p);
+extern bool cpuset_cpu_is_isolated(int cpu);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
#define cpuset_current_mems_allowed (current->mems_allowed)
void cpuset_init_current_mems_allowed(void);
@@ -120,11 +121,6 @@ static inline int cpuset_do_page_mem_spread(void)
return task_spread_page(current);
}
-static inline int cpuset_do_slab_mem_spread(void)
-{
- return task_spread_slab(current);
-}
-
extern bool current_cpuset_is_being_rebound(void);
extern void rebuild_sched_domains(void);
@@ -207,6 +203,11 @@ static inline bool cpuset_cpus_allowed_fallback(struct task_struct *p)
return false;
}
+static inline bool cpuset_cpu_is_isolated(int cpu)
+{
+ return false;
+}
+
static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
{
return node_possible_map;
@@ -258,11 +259,6 @@ static inline int cpuset_do_page_mem_spread(void)
return 0;
}
-static inline int cpuset_do_slab_mem_spread(void)
-{
- return 0;
-}
-
static inline bool current_cpuset_is_being_rebound(void)
{
return false;
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index de62a722431e..d33352c2e386 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -6,82 +6,90 @@
#include <linux/elfcore.h>
#include <linux/elf.h>
-#define CRASH_CORE_NOTE_NAME "CORE"
-#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4)
-#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4)
-#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4)
+struct kimage;
+struct crash_mem {
+ unsigned int max_nr_ranges;
+ unsigned int nr_ranges;
+ struct range ranges[] __counted_by(max_nr_ranges);
+};
+
+#ifdef CONFIG_CRASH_DUMP
+
+int crash_shrink_memory(unsigned long new_size);
+ssize_t crash_get_memory_size(void);
+
+#ifndef arch_kexec_protect_crashkres
/*
- * The per-cpu notes area is a list of notes terminated by a "NULL"
- * note header. For kdump, the code in vmcore.c runs in the context
- * of the second kernel to combine them into one note.
+ * Protection mechanism for crashkernel reserved memory after
+ * the kdump kernel is loaded.
+ *
+ * Provide an empty default implementation here -- architecture
+ * code may override this
*/
-#define CRASH_CORE_NOTE_BYTES ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \
- CRASH_CORE_NOTE_NAME_BYTES + \
- CRASH_CORE_NOTE_DESC_BYTES)
-
-#define VMCOREINFO_BYTES PAGE_SIZE
-#define VMCOREINFO_NOTE_NAME "VMCOREINFO"
-#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
-#define VMCOREINFO_NOTE_SIZE ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \
- VMCOREINFO_NOTE_NAME_BYTES + \
- VMCOREINFO_BYTES)
-
-typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4];
-
-void crash_update_vmcoreinfo_safecopy(void *ptr);
-void crash_save_vmcoreinfo(void);
-void arch_crash_save_vmcoreinfo(void);
-__printf(1, 2)
-void vmcoreinfo_append_str(const char *fmt, ...);
-phys_addr_t paddr_vmcoreinfo_note(void);
-
-#define VMCOREINFO_OSRELEASE(value) \
- vmcoreinfo_append_str("OSRELEASE=%s\n", value)
-#define VMCOREINFO_BUILD_ID() \
- ({ \
- static_assert(sizeof(vmlinux_build_id) == 20); \
- vmcoreinfo_append_str("BUILD-ID=%20phN\n", vmlinux_build_id); \
- })
-
-#define VMCOREINFO_PAGESIZE(value) \
- vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
-#define VMCOREINFO_SYMBOL(name) \
- vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
-#define VMCOREINFO_SYMBOL_ARRAY(name) \
- vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name)
-#define VMCOREINFO_SIZE(name) \
- vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
- (unsigned long)sizeof(name))
-#define VMCOREINFO_STRUCT_SIZE(name) \
- vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
- (unsigned long)sizeof(struct name))
-#define VMCOREINFO_OFFSET(name, field) \
- vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
- (unsigned long)offsetof(struct name, field))
-#define VMCOREINFO_TYPE_OFFSET(name, field) \
- vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
- (unsigned long)offsetof(name, field))
-#define VMCOREINFO_LENGTH(name, value) \
- vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value)
-#define VMCOREINFO_NUMBER(name) \
- vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name)
-#define VMCOREINFO_CONFIG(name) \
- vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
-
-extern unsigned char *vmcoreinfo_data;
-extern size_t vmcoreinfo_size;
-extern u32 *vmcoreinfo_note;
-
-Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
- void *data, size_t data_len);
-void final_note(Elf_Word *buf);
-
-int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
- unsigned long long *crash_size, unsigned long long *crash_base);
-int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
- unsigned long long *crash_size, unsigned long long *crash_base);
-int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
- unsigned long long *crash_size, unsigned long long *crash_base);
+static inline void arch_kexec_protect_crashkres(void) { }
+#endif
+
+#ifndef arch_kexec_unprotect_crashkres
+static inline void arch_kexec_unprotect_crashkres(void) { }
+#endif
+
+
+
+#ifndef arch_crash_handle_hotplug_event
+static inline void arch_crash_handle_hotplug_event(struct kimage *image) { }
+#endif
+
+int crash_check_update_elfcorehdr(void);
+
+#ifndef crash_hotplug_cpu_support
+static inline int crash_hotplug_cpu_support(void) { return 0; }
+#endif
+
+#ifndef crash_hotplug_memory_support
+static inline int crash_hotplug_memory_support(void) { return 0; }
+#endif
+
+#ifndef crash_get_elfcorehdr_size
+static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; }
+#endif
+
+/* Alignment required for elf header segment */
+#define ELF_CORE_HEADER_ALIGN 4096
+
+extern int crash_exclude_mem_range(struct crash_mem *mem,
+ unsigned long long mstart,
+ unsigned long long mend);
+extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
+ void **addr, unsigned long *sz);
+
+struct kimage;
+struct kexec_segment;
+
+#define KEXEC_CRASH_HP_NONE 0
+#define KEXEC_CRASH_HP_ADD_CPU 1
+#define KEXEC_CRASH_HP_REMOVE_CPU 2
+#define KEXEC_CRASH_HP_ADD_MEMORY 3
+#define KEXEC_CRASH_HP_REMOVE_MEMORY 4
+#define KEXEC_CRASH_HP_INVALID_CPU -1U
+
+extern void __crash_kexec(struct pt_regs *regs);
+extern void crash_kexec(struct pt_regs *regs);
+int kexec_should_crash(struct task_struct *p);
+int kexec_crash_loaded(void);
+void crash_save_cpu(struct pt_regs *regs, int cpu);
+extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
+
+#else /* !CONFIG_CRASH_DUMP*/
+struct pt_regs;
+struct task_struct;
+struct kimage;
+static inline void __crash_kexec(struct pt_regs *regs) { }
+static inline void crash_kexec(struct pt_regs *regs) { }
+static inline int kexec_should_crash(struct task_struct *p) { return 0; }
+static inline int kexec_crash_loaded(void) { return 0; }
+static inline void crash_save_cpu(struct pt_regs *regs, int cpu) {};
+static inline int kimage_crash_copy_vmcoreinfo(struct kimage *image) { return 0; };
+#endif /* CONFIG_CRASH_DUMP*/
#endif /* LINUX_CRASH_CORE_H */
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 0f3a656293b0..acc55626afdc 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -50,6 +50,7 @@ void vmcore_cleanup(void);
#define vmcore_elf64_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
#endif
+#ifndef is_kdump_kernel
/*
* is_kdump_kernel() checks whether this kernel is booting after a panic of
* previous kernel or not. This is determined by checking if previous kernel
@@ -64,6 +65,7 @@ static inline bool is_kdump_kernel(void)
{
return elfcorehdr_addr != ELFCORE_ADDR_MAX;
}
+#endif
/* is_vmcore_usable() checks if the kernel is booting after a panic and
* the vmcore region is usable.
@@ -75,7 +77,8 @@ static inline bool is_kdump_kernel(void)
static inline int is_vmcore_usable(void)
{
- return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0;
+ return elfcorehdr_addr != ELFCORE_ADDR_ERR &&
+ elfcorehdr_addr != ELFCORE_ADDR_MAX ? 1 : 0;
}
/* vmcore_unusable() marks the vmcore as unusable,
@@ -84,8 +87,7 @@ static inline int is_vmcore_usable(void)
static inline void vmcore_unusable(void)
{
- if (is_kdump_kernel())
- elfcorehdr_addr = ELFCORE_ADDR_ERR;
+ elfcorehdr_addr = ELFCORE_ADDR_ERR;
}
/**
diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h
new file mode 100644
index 000000000000..5a9df944fb80
--- /dev/null
+++ b/include/linux/crash_reserve.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_CRASH_RESERVE_H
+#define LINUX_CRASH_RESERVE_H
+
+#include <linux/linkage.h>
+#include <linux/elfcore.h>
+#include <linux/elf.h>
+#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+#include <asm/crash_reserve.h>
+#endif
+
+/* Location of a reserved region to hold the crash kernel.
+ */
+extern struct resource crashk_res;
+extern struct resource crashk_low_res;
+
+int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
+ unsigned long long *crash_size, unsigned long long *crash_base,
+ unsigned long long *low_size, bool *high);
+
+#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE
+#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20)
+#endif
+#ifndef CRASH_ALIGN
+#define CRASH_ALIGN SZ_2M
+#endif
+#ifndef CRASH_ADDR_LOW_MAX
+#define CRASH_ADDR_LOW_MAX SZ_4G
+#endif
+#ifndef CRASH_ADDR_HIGH_MAX
+#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM()
+#endif
+
+void __init reserve_crashkernel_generic(char *cmdline,
+ unsigned long long crash_size,
+ unsigned long long crash_base,
+ unsigned long long crash_low_size,
+ bool high);
+#else
+static inline void __init reserve_crashkernel_generic(char *cmdline,
+ unsigned long long crash_size,
+ unsigned long long crash_base,
+ unsigned long long crash_low_size,
+ bool high)
+{}
+#endif
+#endif /* LINUX_CRASH_RESERVE_H */
diff --git a/include/linux/crc-ccitt.h b/include/linux/crc-ccitt.h
index 72c92c396bb8..cd4f420231ba 100644
--- a/include/linux/crc-ccitt.h
+++ b/include/linux/crc-ccitt.h
@@ -5,19 +5,12 @@
#include <linux/types.h>
extern u16 const crc_ccitt_table[256];
-extern u16 const crc_ccitt_false_table[256];
extern u16 crc_ccitt(u16 crc, const u8 *buffer, size_t len);
-extern u16 crc_ccitt_false(u16 crc, const u8 *buffer, size_t len);
static inline u16 crc_ccitt_byte(u16 crc, const u8 c)
{
return (crc >> 8) ^ crc_ccitt_table[(crc ^ c) & 0xff];
}
-static inline u16 crc_ccitt_false_byte(u16 crc, const u8 c)
-{
- return (crc << 8) ^ crc_ccitt_false_table[(crc >> 8) ^ c];
-}
-
#endif /* _LINUX_CRC_CCITT_H */
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 9ed9232af934..2976f534a7a3 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -12,6 +12,7 @@
#include <linux/init.h>
#include <linux/key.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/uidgid.h>
#include <linux/sched.h>
#include <linux/sched/user.h>
@@ -23,7 +24,7 @@ struct inode;
* COW Supplementary groups list
*/
struct group_info {
- atomic_t usage;
+ refcount_t usage;
int ngroups;
kgid_t gid[];
} __randomize_layout;
@@ -39,7 +40,7 @@ struct group_info {
*/
static inline struct group_info *get_group_info(struct group_info *gi)
{
- atomic_inc(&gi->usage);
+ refcount_inc(&gi->usage);
return gi;
}
@@ -49,7 +50,7 @@ static inline struct group_info *get_group_info(struct group_info *gi)
*/
#define put_group_info(group_info) \
do { \
- if (atomic_dec_and_test(&(group_info)->usage)) \
+ if (refcount_dec_and_test(&(group_info)->usage)) \
groups_free(group_info); \
} while (0)
@@ -108,14 +109,7 @@ static inline int groups_search(const struct group_info *group_info, kgid_t grp)
* same context as task->real_cred.
*/
struct cred {
- atomic_t usage;
-#ifdef CONFIG_DEBUG_CREDENTIALS
- atomic_t subscribers; /* number of processes subscribed */
- void *put_addr;
- unsigned magic;
-#define CRED_MAGIC 0x43736564
-#define CRED_MAGIC_DEAD 0x44656144
-#endif
+ atomic_long_t usage;
kuid_t uid; /* real UID of the task */
kgid_t gid; /* real GID of the task */
kuid_t suid; /* saved UID of the task */
@@ -164,7 +158,6 @@ extern void abort_creds(struct cred *);
extern const struct cred *override_creds(const struct cred *);
extern void revert_creds(const struct cred *);
extern struct cred *prepare_kernel_cred(struct task_struct *);
-extern int change_create_files_as(struct cred *, struct inode *);
extern int set_security_override(struct cred *, u32);
extern int set_security_override_from_ctx(struct cred *, const char *);
extern int set_create_files_as(struct cred *, struct inode *);
@@ -172,46 +165,6 @@ extern int cred_fscmp(const struct cred *, const struct cred *);
extern void __init cred_init(void);
extern int set_cred_ucounts(struct cred *);
-/*
- * check for validity of credentials
- */
-#ifdef CONFIG_DEBUG_CREDENTIALS
-extern void __noreturn __invalid_creds(const struct cred *, const char *, unsigned);
-extern void __validate_process_creds(struct task_struct *,
- const char *, unsigned);
-
-extern bool creds_are_invalid(const struct cred *cred);
-
-static inline void __validate_creds(const struct cred *cred,
- const char *file, unsigned line)
-{
- if (unlikely(creds_are_invalid(cred)))
- __invalid_creds(cred, file, line);
-}
-
-#define validate_creds(cred) \
-do { \
- __validate_creds((cred), __FILE__, __LINE__); \
-} while(0)
-
-#define validate_process_creds() \
-do { \
- __validate_process_creds(current, __FILE__, __LINE__); \
-} while(0)
-
-extern void validate_creds_for_do_exit(struct task_struct *);
-#else
-static inline void validate_creds(const struct cred *cred)
-{
-}
-static inline void validate_creds_for_do_exit(struct task_struct *tsk)
-{
-}
-static inline void validate_process_creds(void)
-{
-}
-#endif
-
static inline bool cap_ambient_invariant_ok(const struct cred *cred)
{
return cap_issubset(cred->cap_ambient,
@@ -220,6 +173,20 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred)
}
/**
+ * get_new_cred_many - Get references on a new set of credentials
+ * @cred: The new credentials to reference
+ * @nr: Number of references to acquire
+ *
+ * Get references on the specified set of new credentials. The caller must
+ * release all acquired references.
+ */
+static inline struct cred *get_new_cred_many(struct cred *cred, int nr)
+{
+ atomic_long_add(nr, &cred->usage);
+ return cred;
+}
+
+/**
* get_new_cred - Get a reference on a new set of credentials
* @cred: The new credentials to reference
*
@@ -228,16 +195,16 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred)
*/
static inline struct cred *get_new_cred(struct cred *cred)
{
- atomic_inc(&cred->usage);
- return cred;
+ return get_new_cred_many(cred, 1);
}
/**
- * get_cred - Get a reference on a set of credentials
+ * get_cred_many - Get references on a set of credentials
* @cred: The credentials to reference
+ * @nr: Number of references to acquire
*
- * Get a reference on the specified set of credentials. The caller must
- * release the reference. If %NULL is passed, it is returned with no action.
+ * Get references on the specified set of credentials. The caller must release
+ * all acquired reference. If %NULL is passed, it is returned with no action.
*
* This is used to deal with a committed set of credentials. Although the
* pointer is const, this will temporarily discard the const and increment the
@@ -245,14 +212,27 @@ static inline struct cred *get_new_cred(struct cred *cred)
* accidental alteration of a set of credentials that should be considered
* immutable.
*/
-static inline const struct cred *get_cred(const struct cred *cred)
+static inline const struct cred *get_cred_many(const struct cred *cred, int nr)
{
struct cred *nonconst_cred = (struct cred *) cred;
if (!cred)
return cred;
- validate_creds(cred);
nonconst_cred->non_rcu = 0;
- return get_new_cred(nonconst_cred);
+ return get_new_cred_many(nonconst_cred, nr);
+}
+
+/*
+ * get_cred - Get a reference on a set of credentials
+ * @cred: The credentials to reference
+ *
+ * Get a reference on the specified set of credentials. The caller must
+ * release the reference. If %NULL is passed, it is returned with no action.
+ *
+ * This is used to deal with a committed set of credentials.
+ */
+static inline const struct cred *get_cred(const struct cred *cred)
+{
+ return get_cred_many(cred, 1);
}
static inline const struct cred *get_cred_rcu(const struct cred *cred)
@@ -260,9 +240,8 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred)
struct cred *nonconst_cred = (struct cred *) cred;
if (!cred)
return NULL;
- if (!atomic_inc_not_zero(&nonconst_cred->usage))
+ if (!atomic_long_inc_not_zero(&nonconst_cred->usage))
return NULL;
- validate_creds(cred);
nonconst_cred->non_rcu = 0;
return cred;
}
@@ -270,6 +249,7 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred)
/**
* put_cred - Release a reference to a set of credentials
* @cred: The credentials to release
+ * @nr: Number of references to release
*
* Release a reference to a set of credentials, deleting them when the last ref
* is released. If %NULL is passed, nothing is done.
@@ -278,17 +258,28 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred)
* on task_struct are attached by const pointers to prevent accidental
* alteration of otherwise immutable credential sets.
*/
-static inline void put_cred(const struct cred *_cred)
+static inline void put_cred_many(const struct cred *_cred, int nr)
{
struct cred *cred = (struct cred *) _cred;
if (cred) {
- validate_creds(cred);
- if (atomic_dec_and_test(&(cred)->usage))
+ if (atomic_long_sub_and_test(nr, &cred->usage))
__put_cred(cred);
}
}
+/*
+ * put_cred - Release a reference to a set of credentials
+ * @cred: The credentials to release
+ *
+ * Release a reference to a set of credentials, deleting them when the last ref
+ * is released. If %NULL is passed, nothing is done.
+ */
+static inline void put_cred(const struct cred *cred)
+{
+ put_cred_many(cred, 1);
+}
+
/**
* current_cred - Access the current task's subjective credentials
*
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 31f6fee0c36c..b164da5e129e 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -24,6 +24,7 @@
#define CRYPTO_ALG_TYPE_CIPHER 0x00000001
#define CRYPTO_ALG_TYPE_COMPRESS 0x00000002
#define CRYPTO_ALG_TYPE_AEAD 0x00000003
+#define CRYPTO_ALG_TYPE_LSKCIPHER 0x00000004
#define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005
#define CRYPTO_ALG_TYPE_AKCIPHER 0x00000006
#define CRYPTO_ALG_TYPE_SIG 0x00000007
@@ -35,8 +36,6 @@
#define CRYPTO_ALG_TYPE_SHASH 0x0000000e
#define CRYPTO_ALG_TYPE_AHASH 0x0000000f
-#define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e
-#define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e
#define CRYPTO_ALG_TYPE_ACOMPRESS_MASK 0x0000000e
#define CRYPTO_ALG_LARVAL 0x00000010
@@ -111,7 +110,6 @@
* crypto_aead_walksize() (with the remainder going at the end), no chunk
* can cross a page boundary or a scatterlist element boundary.
* ahash:
- * - The result buffer must be aligned to the algorithm's alignmask.
* - crypto_ahash_finup() must not be used unless the algorithm implements
* ->finup() natively.
*/
@@ -279,18 +277,20 @@ struct compress_alg {
* @cra_ctxsize: Size of the operational context of the transformation. This
* value informs the kernel crypto API about the memory size
* needed to be allocated for the transformation context.
- * @cra_alignmask: Alignment mask for the input and output data buffer. The data
- * buffer containing the input data for the algorithm must be
- * aligned to this alignment mask. The data buffer for the
- * output data must be aligned to this alignment mask. Note that
- * the Crypto API will do the re-alignment in software, but
- * only under special conditions and there is a performance hit.
- * The re-alignment happens at these occasions for different
- * @cra_u types: cipher -- For both input data and output data
- * buffer; ahash -- For output hash destination buf; shash --
- * For output hash destination buf.
- * This is needed on hardware which is flawed by design and
- * cannot pick data from arbitrary addresses.
+ * @cra_alignmask: For cipher, skcipher, lskcipher, and aead algorithms this is
+ * 1 less than the alignment, in bytes, that the algorithm
+ * implementation requires for input and output buffers. When
+ * the crypto API is invoked with buffers that are not aligned
+ * to this alignment, the crypto API automatically utilizes
+ * appropriately aligned temporary buffers to comply with what
+ * the algorithm needs. (For scatterlists this happens only if
+ * the algorithm uses the skcipher_walk helper functions.) This
+ * misalignment handling carries a performance penalty, so it is
+ * preferred that algorithms do not set a nonzero alignmask.
+ * Also, crypto API users may wish to allocate buffers aligned
+ * to the alignmask of the algorithm being used, in order to
+ * avoid the API having to realign them. Note: the alignmask is
+ * not supported for hash algorithms and is always 0 for them.
* @cra_priority: Priority of this transformation implementation. In case
* multiple transformations with same @cra_name are available to
* the Crypto API, the kernel will use the one with highest
diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h
new file mode 100644
index 000000000000..03fa6d50d46f
--- /dev/null
+++ b/include/linux/cxl-event.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2023 Intel Corporation. */
+#ifndef _LINUX_CXL_EVENT_H
+#define _LINUX_CXL_EVENT_H
+
+/*
+ * Common Event Record Format
+ * CXL rev 3.0 section 8.2.9.2.1; Table 8-42
+ */
+struct cxl_event_record_hdr {
+ u8 length;
+ u8 flags[3];
+ __le16 handle;
+ __le16 related_handle;
+ __le64 timestamp;
+ u8 maint_op_class;
+ u8 reserved[15];
+} __packed;
+
+#define CXL_EVENT_RECORD_DATA_LENGTH 0x50
+struct cxl_event_generic {
+ struct cxl_event_record_hdr hdr;
+ u8 data[CXL_EVENT_RECORD_DATA_LENGTH];
+} __packed;
+
+/*
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10
+struct cxl_event_gen_media {
+ struct cxl_event_record_hdr hdr;
+ __le64 phys_addr;
+ u8 descriptor;
+ u8 type;
+ u8 transaction_type;
+ u8 validity_flags[2];
+ u8 channel;
+ u8 rank;
+ u8 device[3];
+ u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
+ u8 reserved[46];
+} __packed;
+
+/*
+ * DRAM Event Record - DER
+ * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44
+ */
+#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20
+struct cxl_event_dram {
+ struct cxl_event_record_hdr hdr;
+ __le64 phys_addr;
+ u8 descriptor;
+ u8 type;
+ u8 transaction_type;
+ u8 validity_flags[2];
+ u8 channel;
+ u8 rank;
+ u8 nibble_mask[3];
+ u8 bank_group;
+ u8 bank;
+ u8 row[3];
+ u8 column[2];
+ u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE];
+ u8 reserved[0x17];
+} __packed;
+
+/*
+ * Get Health Info Record
+ * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100
+ */
+struct cxl_get_health_info {
+ u8 health_status;
+ u8 media_status;
+ u8 add_status;
+ u8 life_used;
+ u8 device_temp[2];
+ u8 dirty_shutdown_cnt[4];
+ u8 cor_vol_err_cnt[4];
+ u8 cor_per_err_cnt[4];
+} __packed;
+
+/*
+ * Memory Module Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+struct cxl_event_mem_module {
+ struct cxl_event_record_hdr hdr;
+ u8 event_type;
+ struct cxl_get_health_info info;
+ u8 reserved[0x3d];
+} __packed;
+
+union cxl_event {
+ struct cxl_event_generic generic;
+ struct cxl_event_gen_media gen_media;
+ struct cxl_event_dram dram;
+ struct cxl_event_mem_module mem_module;
+} __packed;
+
+/*
+ * Common Event Record Format; in event logs
+ * CXL rev 3.0 section 8.2.9.2.1; Table 8-42
+ */
+struct cxl_event_record_raw {
+ uuid_t id;
+ union cxl_event event;
+} __packed;
+
+enum cxl_event_type {
+ CXL_CPER_EVENT_GENERIC,
+ CXL_CPER_EVENT_GEN_MEDIA,
+ CXL_CPER_EVENT_DRAM,
+ CXL_CPER_EVENT_MEM_MODULE,
+};
+
+#define CPER_CXL_DEVICE_ID_VALID BIT(0)
+#define CPER_CXL_DEVICE_SN_VALID BIT(1)
+#define CPER_CXL_COMP_EVENT_LOG_VALID BIT(2)
+struct cxl_cper_event_rec {
+ struct {
+ u32 length;
+ u64 validation_bits;
+ struct cper_cxl_event_devid {
+ u16 vendor_id;
+ u16 device_id;
+ u8 func_num;
+ u8 device_num;
+ u8 bus_num;
+ u16 segment_num;
+ u16 slot_num; /* bits 2:0 reserved */
+ u8 reserved;
+ } __packed device_id;
+ struct cper_cxl_event_sn {
+ u32 lower_dw;
+ u32 upper_dw;
+ } __packed dev_serial_num;
+ } __packed hdr;
+
+ union cxl_event event;
+} __packed;
+
+#endif /* _LINUX_CXL_EVENT_H */
diff --git a/include/linux/damon.h b/include/linux/damon.h
index d5d4d19928e0..886d07294f4e 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -2,7 +2,7 @@
/*
* DAMON api
*
- * Author: SeongJae Park <sjpark@amazon.de>
+ * Author: SeongJae Park <sj@kernel.org>
*/
#ifndef _DAMON_H_
@@ -40,9 +40,24 @@ struct damon_addr_range {
* @ar: The address range of the region.
* @sampling_addr: Address of the sample for the next access check.
* @nr_accesses: Access frequency of this region.
+ * @nr_accesses_bp: @nr_accesses in basis point (0.01%) that updated for
+ * each sampling interval.
* @list: List head for siblings.
* @age: Age of this region.
*
+ * @nr_accesses is reset to zero for every &damon_attrs->aggr_interval and be
+ * increased for every &damon_attrs->sample_interval if an access to the region
+ * during the last sampling interval is found. The update of this field should
+ * not be done with direct access but with the helper function,
+ * damon_update_region_access_rate().
+ *
+ * @nr_accesses_bp is another representation of @nr_accesses in basis point
+ * (1 in 10,000) that updated for every &damon_attrs->sample_interval in a
+ * manner similar to moving sum. By the algorithm, this value becomes
+ * @nr_accesses * 10000 for every &struct damon_attrs->aggr_interval. This can
+ * be used when the aggregation interval is too huge and therefore cannot wait
+ * for it before getting the access monitoring results.
+ *
* @age is initially zero, increased for each aggregation interval, and reset
* to zero again if the access frequency is significantly changed. If two
* regions are merged into a new region, both @nr_accesses and @age of the new
@@ -52,6 +67,7 @@ struct damon_region {
struct damon_addr_range ar;
unsigned long sampling_addr;
unsigned int nr_accesses;
+ unsigned int nr_accesses_bp;
struct list_head list;
unsigned int age;
@@ -111,10 +127,55 @@ enum damos_action {
};
/**
+ * enum damos_quota_goal_metric - Represents the metric to be used as the goal
+ *
+ * @DAMOS_QUOTA_USER_INPUT: User-input value.
+ * @DAMOS_QUOTA_SOME_MEM_PSI_US: System level some memory PSI in us.
+ * @NR_DAMOS_QUOTA_GOAL_METRICS: Number of DAMOS quota goal metrics.
+ *
+ * Metrics equal to larger than @NR_DAMOS_QUOTA_GOAL_METRICS are unsupported.
+ */
+enum damos_quota_goal_metric {
+ DAMOS_QUOTA_USER_INPUT,
+ DAMOS_QUOTA_SOME_MEM_PSI_US,
+ NR_DAMOS_QUOTA_GOAL_METRICS,
+};
+
+/**
+ * struct damos_quota_goal - DAMOS scheme quota auto-tuning goal.
+ * @metric: Metric to be used for representing the goal.
+ * @target_value: Target value of @metric to achieve with the tuning.
+ * @current_value: Current value of @metric.
+ * @last_psi_total: Last measured total PSI
+ * @list: List head for siblings.
+ *
+ * Data structure for getting the current score of the quota tuning goal. The
+ * score is calculated by how close @current_value and @target_value are. Then
+ * the score is entered to DAMON's internal feedback loop mechanism to get the
+ * auto-tuned quota.
+ *
+ * If @metric is DAMOS_QUOTA_USER_INPUT, @current_value should be manually
+ * entered by the user, probably inside the kdamond callbacks. Otherwise,
+ * DAMON sets @current_value with self-measured value of @metric.
+ */
+struct damos_quota_goal {
+ enum damos_quota_goal_metric metric;
+ unsigned long target_value;
+ unsigned long current_value;
+ /* metric-dependent fields */
+ union {
+ u64 last_psi_total;
+ };
+ struct list_head list;
+};
+
+/**
* struct damos_quota - Controls the aggressiveness of the given scheme.
+ * @reset_interval: Charge reset interval in milliseconds.
* @ms: Maximum milliseconds that the scheme can use.
* @sz: Maximum bytes of memory that the action can be applied.
- * @reset_interval: Charge reset interval in milliseconds.
+ * @goals: Head of quota tuning goals (&damos_quota_goal) list.
+ * @esz: Effective size quota in bytes.
*
* @weight_sz: Weight of the region's size for prioritization.
* @weight_nr_accesses: Weight of the region's nr_accesses for prioritization.
@@ -132,6 +193,13 @@ enum damos_action {
* throughput of the scheme's action. DAMON then compares it against &sz and
* uses smaller one as the effective quota.
*
+ * If @goals is not empt, DAMON calculates yet another size quota based on the
+ * goals using its internal feedback loop algorithm, for every @reset_interval.
+ * Then, if the new size quota is smaller than the effective quota, it uses the
+ * new size quota as the effective quota.
+ *
+ * The resulting effective size quota in bytes is set to @esz.
+ *
* For selecting regions within the quota, DAMON prioritizes current scheme's
* target memory regions using the &struct damon_operations->get_scheme_score.
* You could customize the prioritization logic by setting &weight_sz,
@@ -139,9 +207,11 @@ enum damos_action {
* encouraged to respect those.
*/
struct damos_quota {
+ unsigned long reset_interval;
unsigned long ms;
unsigned long sz;
- unsigned long reset_interval;
+ struct list_head goals;
+ unsigned long esz;
unsigned int weight_sz;
unsigned int weight_nr_accesses;
@@ -152,8 +222,6 @@ struct damos_quota {
unsigned long total_charged_sz;
unsigned long total_charged_ns;
- unsigned long esz; /* Effective size quota in bytes */
-
/* For charging the quota */
unsigned long charged_sz;
unsigned long charged_from;
@@ -163,6 +231,9 @@ struct damos_quota {
/* For prioritization */
unsigned long histogram[DAMOS_MAX_SCORE + 1];
unsigned int min_score;
+
+ /* For feedback loop */
+ unsigned long esz_bp;
};
/**
@@ -226,16 +297,26 @@ struct damos_stat {
* enum damos_filter_type - Type of memory for &struct damos_filter
* @DAMOS_FILTER_TYPE_ANON: Anonymous pages.
* @DAMOS_FILTER_TYPE_MEMCG: Specific memcg's pages.
+ * @DAMOS_FILTER_TYPE_ADDR: Address range.
+ * @DAMOS_FILTER_TYPE_TARGET: Data Access Monitoring target.
* @NR_DAMOS_FILTER_TYPES: Number of filter types.
*
- * The support of each filter type is up to running &struct damon_operations.
- * &enum DAMON_OPS_PADDR is supporting all filter types, while
- * &enum DAMON_OPS_VADDR and &enum DAMON_OPS_FVADDR are not supporting any
- * filter types.
+ * The anon pages type and memcg type filters are handled by underlying
+ * &struct damon_operations as a part of scheme action trying, and therefore
+ * accounted as 'tried'. In contrast, other types are handled by core layer
+ * before trying of the action and therefore not accounted as 'tried'.
+ *
+ * The support of the filters that handled by &struct damon_operations depend
+ * on the running &struct damon_operations.
+ * &enum DAMON_OPS_PADDR supports both anon pages type and memcg type filters,
+ * while &enum DAMON_OPS_VADDR and &enum DAMON_OPS_FVADDR don't support any of
+ * the two types.
*/
enum damos_filter_type {
DAMOS_FILTER_TYPE_ANON,
DAMOS_FILTER_TYPE_MEMCG,
+ DAMOS_FILTER_TYPE_ADDR,
+ DAMOS_FILTER_TYPE_TARGET,
NR_DAMOS_FILTER_TYPES,
};
@@ -244,18 +325,24 @@ enum damos_filter_type {
* @type: Type of the page.
* @matching: If the matching page should filtered out or in.
* @memcg_id: Memcg id of the question if @type is DAMOS_FILTER_MEMCG.
+ * @addr_range: Address range if @type is DAMOS_FILTER_TYPE_ADDR.
+ * @target_idx: Index of the &struct damon_target of
+ * &damon_ctx->adaptive_targets if @type is
+ * DAMOS_FILTER_TYPE_TARGET.
* @list: List head for siblings.
*
* Before applying the &damos->action to a memory region, DAMOS checks if each
* page of the region matches to this and avoid applying the action if so.
- * Note that the check support is up to &struct damon_operations
- * implementation.
+ * Support of each filter type depends on the running &struct damon_operations
+ * and the type. Refer to &enum damos_filter_type for more detai.
*/
struct damos_filter {
enum damos_filter_type type;
bool matching;
union {
unsigned short memcg_id;
+ struct damon_addr_range addr_range;
+ int target_idx;
};
struct list_head list;
};
@@ -282,24 +369,24 @@ struct damos_access_pattern {
* struct damos - Represents a Data Access Monitoring-based Operation Scheme.
* @pattern: Access pattern of target regions.
* @action: &damo_action to be applied to the target regions.
+ * @apply_interval_us: The time between applying the @action.
* @quota: Control the aggressiveness of this scheme.
* @wmarks: Watermarks for automated (in)activation of this scheme.
* @filters: Additional set of &struct damos_filter for &action.
* @stat: Statistics of this scheme.
* @list: List head for siblings.
*
- * For each aggregation interval, DAMON finds regions which fit in the
+ * For each @apply_interval_us, DAMON finds regions which fit in the
* &pattern and applies &action to those. To avoid consuming too much
* CPU time or IO resources for the &action, &quota is used.
*
+ * If @apply_interval_us is zero, &damon_attrs->aggr_interval is used instead.
+ *
* To do the work only when needed, schemes can be activated for specific
* system situations using &wmarks. If all schemes that registered to the
* monitoring context are inactive, DAMON stops monitoring either, and just
* repeatedly checks the watermarks.
*
- * If all schemes that registered to a &struct damon_ctx are inactive, DAMON
- * stops monitoring and just repeatedly checks the watermarks.
- *
* Before applying the &action to a memory region, &struct damon_operations
* implementation could check pages of the region and skip &action to respect
* &filters
@@ -311,6 +398,14 @@ struct damos_access_pattern {
struct damos {
struct damos_access_pattern pattern;
enum damos_action action;
+ unsigned long apply_interval_us;
+/* private: internal use only */
+ /*
+ * number of sample intervals that should be passed before applying
+ * @action
+ */
+ unsigned long next_apply_sis;
+/* public: */
struct damos_quota quota;
struct damos_watermarks wmarks;
struct list_head filters;
@@ -456,13 +551,14 @@ struct damon_callback {
* regions.
*
* For each @sample_interval, DAMON checks whether each region is accessed or
- * not. It aggregates and keeps the access information (number of accesses to
- * each region) for @aggr_interval time. DAMON also checks whether the target
- * memory regions need update (e.g., by ``mmap()`` calls from the application,
- * in case of virtual memory monitoring) and applies the changes for each
- * @ops_update_interval. All time intervals are in micro-seconds.
- * Please refer to &struct damon_operations and &struct damon_callback for more
- * detail.
+ * not during the last @sample_interval. If such access is found, DAMON
+ * aggregates the information by increasing &damon_region->nr_accesses for
+ * @aggr_interval time. For each @aggr_interval, the count is reset. DAMON
+ * also checks whether the target memory regions need update (e.g., by
+ * ``mmap()`` calls from the application, in case of virtual memory monitoring)
+ * and applies the changes for each @ops_update_interval. All time intervals
+ * are in micro-seconds. Please refer to &struct damon_operations and &struct
+ * damon_callback for more detail.
*/
struct damon_attrs {
unsigned long sample_interval;
@@ -506,8 +602,20 @@ struct damon_ctx {
struct damon_attrs attrs;
/* private: internal use only */
- struct timespec64 last_aggregation;
- struct timespec64 last_ops_update;
+ /* number of sample intervals that passed since this context started */
+ unsigned long passed_sample_intervals;
+ /*
+ * number of sample intervals that should be passed before next
+ * aggregation
+ */
+ unsigned long next_aggregation_sis;
+ /*
+ * number of sample intervals that should be passed before next ops
+ * update
+ */
+ unsigned long next_ops_update_sis;
+ /* for waiting until the execution of the kdamond_fn is started */
+ struct completion kdamond_started;
/* public: */
struct task_struct *kdamond;
@@ -567,6 +675,12 @@ static inline unsigned long damon_sz_region(struct damon_region *r)
#define damon_for_each_scheme_safe(s, next, ctx) \
list_for_each_entry_safe(s, next, &(ctx)->schemes, list)
+#define damos_for_each_quota_goal(goal, quota) \
+ list_for_each_entry(goal, &quota->goals, list)
+
+#define damos_for_each_quota_goal_safe(goal, next, quota) \
+ list_for_each_entry_safe(goal, next, &(quota)->goals, list)
+
#define damos_for_each_filter(f, scheme) \
list_for_each_entry(f, &(scheme)->filters, list)
@@ -592,14 +706,24 @@ void damon_add_region(struct damon_region *r, struct damon_target *t);
void damon_destroy_region(struct damon_region *r, struct damon_target *t);
int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
unsigned int nr_ranges);
+void damon_update_region_access_rate(struct damon_region *r, bool accessed,
+ struct damon_attrs *attrs);
struct damos_filter *damos_new_filter(enum damos_filter_type type,
bool matching);
void damos_add_filter(struct damos *s, struct damos_filter *f);
void damos_destroy_filter(struct damos_filter *f);
+struct damos_quota_goal *damos_new_quota_goal(
+ enum damos_quota_goal_metric metric,
+ unsigned long target_value);
+void damos_add_quota_goal(struct damos_quota *q, struct damos_quota_goal *g);
+void damos_destroy_quota_goal(struct damos_quota_goal *goal);
+
struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
- enum damos_action action, struct damos_quota *quota,
+ enum damos_action action,
+ unsigned long apply_interval_us,
+ struct damos_quota *quota,
struct damos_watermarks *wmarks);
void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
void damon_destroy_scheme(struct damos *s);
@@ -626,6 +750,13 @@ static inline bool damon_target_has_pid(const struct damon_ctx *ctx)
return ctx->ops.id == DAMON_OPS_VADDR || ctx->ops.id == DAMON_OPS_FVADDR;
}
+static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs)
+{
+ /* {aggr,sample}_interval are unsigned long, hence could overflow */
+ return min(attrs->aggr_interval / attrs->sample_interval,
+ (unsigned long)UINT_MAX);
+}
+
int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive);
int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 261944ec0887..9d3e3327af4c 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -63,6 +63,8 @@ void kill_dax(struct dax_device *dax_dev);
void dax_write_cache(struct dax_device *dax_dev, bool wc);
bool dax_write_cache_enabled(struct dax_device *dax_dev);
bool dax_synchronous(struct dax_device *dax_dev);
+void set_dax_nocache(struct dax_device *dax_dev);
+void set_dax_nomc(struct dax_device *dax_dev);
void set_dax_synchronous(struct dax_device *dax_dev);
size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i);
@@ -86,11 +88,7 @@ static inline void *dax_holder(struct dax_device *dax_dev)
static inline struct dax_device *alloc_dax(void *private,
const struct dax_operations *ops)
{
- /*
- * Callers should check IS_ENABLED(CONFIG_DAX) to know if this
- * NULL is an error or expected.
- */
- return NULL;
+ return ERR_PTR(-EOPNOTSUPP);
}
static inline void put_dax(struct dax_device *dax_dev)
{
@@ -109,6 +107,12 @@ static inline bool dax_synchronous(struct dax_device *dax_dev)
{
return true;
}
+static inline void set_dax_nocache(struct dax_device *dax_dev)
+{
+}
+static inline void set_dax_nomc(struct dax_device *dax_dev)
+{
+}
static inline void set_dax_synchronous(struct dax_device *dax_dev)
{
}
@@ -124,9 +128,6 @@ static inline size_t dax_recovery_write(struct dax_device *dax_dev,
}
#endif
-void set_dax_nocache(struct dax_device *dax_dev);
-void set_dax_nomc(struct dax_device *dax_dev);
-
struct writeback_control;
#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk);
@@ -159,8 +160,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
struct page *dax_layout_busy_page(struct address_space *mapping);
struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end);
-dax_entry_t dax_lock_page(struct page *page);
-void dax_unlock_page(struct page *page, dax_entry_t cookie);
+dax_entry_t dax_lock_folio(struct folio *folio);
+void dax_unlock_folio(struct folio *folio, dax_entry_t cookie);
dax_entry_t dax_lock_mapping_entry(struct address_space *mapping,
unsigned long index, struct page **page);
void dax_unlock_mapping_entry(struct address_space *mapping,
@@ -182,14 +183,14 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping,
return -EOPNOTSUPP;
}
-static inline dax_entry_t dax_lock_page(struct page *page)
+static inline dax_entry_t dax_lock_folio(struct folio *folio)
{
- if (IS_DAX(page->mapping->host))
+ if (IS_DAX(folio->mapping->host))
return ~0UL;
return 0;
}
-static inline void dax_unlock_page(struct page *page, dax_entry_t cookie)
+static inline void dax_unlock_folio(struct folio *folio, dax_entry_t cookie)
{
}
@@ -241,10 +242,10 @@ void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops);
-vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
+vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order,
pfn_t *pfnp, int *errp, const struct iomap_ops *ops);
vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
- enum page_entry_size pe_size, pfn_t pfn);
+ unsigned int order, pfn_t pfn);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 6b351e009f59..bf53e3894aae 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -68,12 +68,12 @@ extern const struct qstr dotdot_name;
* large memory footprint increase).
*/
#ifdef CONFIG_64BIT
-# define DNAME_INLINE_LEN 32 /* 192 bytes */
+# define DNAME_INLINE_LEN 40 /* 192 bytes */
#else
# ifdef CONFIG_SMP
-# define DNAME_INLINE_LEN 36 /* 128 bytes */
-# else
# define DNAME_INLINE_LEN 40 /* 128 bytes */
+# else
+# define DNAME_INLINE_LEN 44 /* 128 bytes */
# endif
#endif
@@ -101,8 +101,8 @@ struct dentry {
struct list_head d_lru; /* LRU list */
wait_queue_head_t *d_wait; /* in-lookup ones only */
};
- struct list_head d_child; /* child of parent list */
- struct list_head d_subdirs; /* our children */
+ struct hlist_node d_sib; /* child of parent list */
+ struct hlist_head d_children; /* our children */
/*
* d_alias and d_rcu can share memory
*/
@@ -111,7 +111,7 @@ struct dentry {
struct hlist_bl_node d_in_lookup_hash; /* only for in-lookup ones */
struct rcu_head d_rcu;
} d_u;
-} __randomize_layout;
+};
/*
* dentry->d_lock spinlock nesting subclasses:
@@ -125,6 +125,11 @@ enum dentry_d_lock_class
DENTRY_D_LOCK_NESTED
};
+enum d_real_type {
+ D_REAL_DATA,
+ D_REAL_METADATA,
+};
+
struct dentry_operations {
int (*d_revalidate)(struct dentry *, unsigned int);
int (*d_weak_revalidate)(struct dentry *, unsigned int);
@@ -139,7 +144,7 @@ struct dentry_operations {
char *(*d_dname)(struct dentry *, char *, int);
struct vfsmount *(*d_automount)(struct path *);
int (*d_manage)(const struct path *, bool);
- struct dentry *(*d_real)(struct dentry *, const struct inode *);
+ struct dentry *(*d_real)(struct dentry *, enum d_real_type type);
} ____cacheline_aligned;
/*
@@ -151,13 +156,13 @@ struct dentry_operations {
*/
/* d_flags entries */
-#define DCACHE_OP_HASH 0x00000001
-#define DCACHE_OP_COMPARE 0x00000002
-#define DCACHE_OP_REVALIDATE 0x00000004
-#define DCACHE_OP_DELETE 0x00000008
-#define DCACHE_OP_PRUNE 0x00000010
+#define DCACHE_OP_HASH BIT(0)
+#define DCACHE_OP_COMPARE BIT(1)
+#define DCACHE_OP_REVALIDATE BIT(2)
+#define DCACHE_OP_DELETE BIT(3)
+#define DCACHE_OP_PRUNE BIT(4)
-#define DCACHE_DISCONNECTED 0x00000020
+#define DCACHE_DISCONNECTED BIT(5)
/* This dentry is possibly not currently connected to the dcache tree, in
* which case its parent will either be itself, or will have this flag as
* well. nfsd will not use a dentry with this bit set, but will first
@@ -168,50 +173,47 @@ struct dentry_operations {
* dentry into place and return that dentry rather than the passed one,
* typically using d_splice_alias. */
-#define DCACHE_REFERENCED 0x00000040 /* Recently used, don't discard. */
+#define DCACHE_REFERENCED BIT(6) /* Recently used, don't discard. */
-#define DCACHE_DONTCACHE 0x00000080 /* Purge from memory on final dput() */
+#define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */
-#define DCACHE_CANT_MOUNT 0x00000100
-#define DCACHE_GENOCIDE 0x00000200
-#define DCACHE_SHRINK_LIST 0x00000400
+#define DCACHE_CANT_MOUNT BIT(8)
+#define DCACHE_GENOCIDE BIT(9)
+#define DCACHE_SHRINK_LIST BIT(10)
-#define DCACHE_OP_WEAK_REVALIDATE 0x00000800
+#define DCACHE_OP_WEAK_REVALIDATE BIT(11)
-#define DCACHE_NFSFS_RENAMED 0x00001000
+#define DCACHE_NFSFS_RENAMED BIT(12)
/* this dentry has been "silly renamed" and has to be deleted on the last
* dput() */
-#define DCACHE_COOKIE 0x00002000 /* For use by dcookie subsystem */
-#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x00004000
+#define DCACHE_FSNOTIFY_PARENT_WATCHED BIT(14)
/* Parent inode is watched by some fsnotify listener */
-#define DCACHE_DENTRY_KILLED 0x00008000
+#define DCACHE_DENTRY_KILLED BIT(15)
-#define DCACHE_MOUNTED 0x00010000 /* is a mountpoint */
-#define DCACHE_NEED_AUTOMOUNT 0x00020000 /* handle automount on this dir */
-#define DCACHE_MANAGE_TRANSIT 0x00040000 /* manage transit from this dirent */
+#define DCACHE_MOUNTED BIT(16) /* is a mountpoint */
+#define DCACHE_NEED_AUTOMOUNT BIT(17) /* handle automount on this dir */
+#define DCACHE_MANAGE_TRANSIT BIT(18) /* manage transit from this dirent */
#define DCACHE_MANAGED_DENTRY \
(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
-#define DCACHE_LRU_LIST 0x00080000
+#define DCACHE_LRU_LIST BIT(19)
-#define DCACHE_ENTRY_TYPE 0x00700000
-#define DCACHE_MISS_TYPE 0x00000000 /* Negative dentry (maybe fallthru to nowhere) */
-#define DCACHE_WHITEOUT_TYPE 0x00100000 /* Whiteout dentry (stop pathwalk) */
-#define DCACHE_DIRECTORY_TYPE 0x00200000 /* Normal directory */
-#define DCACHE_AUTODIR_TYPE 0x00300000 /* Lookupless directory (presumed automount) */
-#define DCACHE_REGULAR_TYPE 0x00400000 /* Regular file type (or fallthru to such) */
-#define DCACHE_SPECIAL_TYPE 0x00500000 /* Other file type (or fallthru to such) */
-#define DCACHE_SYMLINK_TYPE 0x00600000 /* Symlink (or fallthru to such) */
+#define DCACHE_ENTRY_TYPE (7 << 20) /* bits 20..22 are for storing type: */
+#define DCACHE_MISS_TYPE (0 << 20) /* Negative dentry */
+#define DCACHE_WHITEOUT_TYPE (1 << 20) /* Whiteout dentry (stop pathwalk) */
+#define DCACHE_DIRECTORY_TYPE (2 << 20) /* Normal directory */
+#define DCACHE_AUTODIR_TYPE (3 << 20) /* Lookupless directory (presumed automount) */
+#define DCACHE_REGULAR_TYPE (4 << 20) /* Regular file type */
+#define DCACHE_SPECIAL_TYPE (5 << 20) /* Other file type */
+#define DCACHE_SYMLINK_TYPE (6 << 20) /* Symlink */
-#define DCACHE_MAY_FREE 0x00800000
-#define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */
-#define DCACHE_NOKEY_NAME 0x02000000 /* Encrypted name encoded without key */
-#define DCACHE_OP_REAL 0x04000000
+#define DCACHE_NOKEY_NAME BIT(25) /* Encrypted name encoded without key */
+#define DCACHE_OP_REAL BIT(26)
-#define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */
-#define DCACHE_DENTRY_CURSOR 0x20000000
-#define DCACHE_NORCU 0x40000000 /* No RCU delay for freeing */
+#define DCACHE_PAR_LOOKUP BIT(28) /* being looked up (with parent locked shared) */
+#define DCACHE_DENTRY_CURSOR BIT(29)
+#define DCACHE_NORCU BIT(30) /* No RCU delay for freeing */
extern seqlock_t rename_lock;
@@ -220,8 +222,6 @@ extern seqlock_t rename_lock;
*/
extern void d_instantiate(struct dentry *, struct inode *);
extern void d_instantiate_new(struct dentry *, struct inode *);
-extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
-extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *);
extern void __d_drop(struct dentry *dentry);
extern void d_drop(struct dentry *dentry);
extern void d_delete(struct dentry *);
@@ -242,15 +242,12 @@ extern struct dentry * d_obtain_alias(struct inode *);
extern struct dentry * d_obtain_root(struct inode *);
extern void shrink_dcache_sb(struct super_block *);
extern void shrink_dcache_parent(struct dentry *);
-extern void shrink_dcache_for_umount(struct super_block *);
extern void d_invalidate(struct dentry *);
/* only used at mount-time */
extern struct dentry * d_make_root(struct inode *);
-/* <clickety>-<click> the ramfs-type tree */
-extern void d_genocide(struct dentry *);
-
+extern void d_mark_tmpfile(struct file *, struct inode *);
extern void d_tmpfile(struct file *, struct inode *);
extern struct dentry *d_find_alias(struct inode *);
@@ -273,12 +270,8 @@ extern void d_move(struct dentry *, struct dentry *);
extern void d_exchange(struct dentry *, struct dentry *);
extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
-/* appendix may either be NULL or be used for transname suffixes */
extern struct dentry *d_lookup(const struct dentry *, const struct qstr *);
extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
-extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *);
-extern struct dentry *__d_lookup_rcu(const struct dentry *parent,
- const struct qstr *name, unsigned *seq);
static inline unsigned d_count(const struct dentry *dentry)
{
@@ -300,20 +293,40 @@ extern char *dentry_path(const struct dentry *, char *, int);
/* Allocation counts.. */
/**
- * dget, dget_dlock - get a reference to a dentry
- * @dentry: dentry to get a reference to
+ * dget_dlock - get a reference to a dentry
+ * @dentry: dentry to get a reference to
*
- * Given a dentry or %NULL pointer increment the reference count
- * if appropriate and return the dentry. A dentry will not be
- * destroyed when it has references.
+ * Given a live dentry, increment the reference count and return the dentry.
+ * Caller must hold @dentry->d_lock. Making sure that dentry is alive is
+ * caller's resonsibility. There are many conditions sufficient to guarantee
+ * that; e.g. anything with non-negative refcount is alive, so's anything
+ * hashed, anything positive, anyone's parent, etc.
*/
static inline struct dentry *dget_dlock(struct dentry *dentry)
{
- if (dentry)
- dentry->d_lockref.count++;
+ dentry->d_lockref.count++;
return dentry;
}
+
+/**
+ * dget - get a reference to a dentry
+ * @dentry: dentry to get a reference to
+ *
+ * Given a dentry or %NULL pointer increment the reference count
+ * if appropriate and return the dentry. A dentry will not be
+ * destroyed when it has references. Conversely, a dentry with
+ * no references can disappear for any number of reasons, starting
+ * with memory pressure. In other words, that primitive is
+ * used to clone an existing reference; using it on something with
+ * zero refcount is a bug.
+ *
+ * NOTE: it will spin if @dentry->d_lock is held. From the deadlock
+ * avoidance point of view it is equivalent to spin_lock()/increment
+ * refcount/spin_unlock(), so calling it under @dentry->d_lock is
+ * always a bug; so's calling it under ->d_lock on any of its descendents.
+ *
+ */
static inline struct dentry *dget(struct dentry *dentry)
{
if (dentry)
@@ -324,12 +337,11 @@ static inline struct dentry *dget(struct dentry *dentry)
extern struct dentry *dget_parent(struct dentry *dentry);
/**
- * d_unhashed - is dentry hashed
- * @dentry: entry to check
+ * d_unhashed - is dentry hashed
+ * @dentry: entry to check
*
- * Returns true if the dentry passed is not currently hashed.
+ * Returns true if the dentry passed is not currently hashed.
*/
-
static inline int d_unhashed(const struct dentry *dentry)
{
return hlist_bl_unhashed(&dentry->d_hash);
@@ -489,14 +501,6 @@ static inline int simple_positive(const struct dentry *dentry)
return d_really_is_positive(dentry) && !d_unhashed(dentry);
}
-extern void d_set_fallthru(struct dentry *dentry);
-
-static inline bool d_is_fallthru(const struct dentry *dentry)
-{
- return dentry->d_flags & DCACHE_FALLTHRU;
-}
-
-
extern int sysctl_vfs_cache_pressure;
static inline unsigned long vfs_pressure_ratio(unsigned long val)
@@ -546,41 +550,25 @@ static inline struct inode *d_backing_inode(const struct dentry *upper)
}
/**
- * d_backing_dentry - Get upper or lower dentry we should be using
- * @upper: The upper layer
- *
- * This is the helper that should be used to get the dentry of the inode that
- * will be used if this dentry were opened as a file. It may be the upper
- * dentry or it may be a lower dentry pinned by the upper.
- *
- * Normal filesystems should not use this to access their own dentries.
- */
-static inline struct dentry *d_backing_dentry(struct dentry *upper)
-{
- return upper;
-}
-
-/**
* d_real - Return the real dentry
* @dentry: the dentry to query
- * @inode: inode to select the dentry from multiple layers (can be NULL)
+ * @type: the type of real dentry (data or metadata)
*
* If dentry is on a union/overlay, then return the underlying, real dentry.
* Otherwise return the dentry itself.
*
* See also: Documentation/filesystems/vfs.rst
*/
-static inline struct dentry *d_real(struct dentry *dentry,
- const struct inode *inode)
+static inline struct dentry *d_real(struct dentry *dentry, enum d_real_type type)
{
if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
- return dentry->d_op->d_real(dentry, inode);
+ return dentry->d_op->d_real(dentry, type);
else
return dentry;
}
/**
- * d_real_inode - Return the real inode
+ * d_real_inode - Return the real inode hosting the data
* @dentry: The dentry to query
*
* If dentry is on a union/overlay, then return the underlying, real inode.
@@ -589,7 +577,7 @@ static inline struct dentry *d_real(struct dentry *dentry,
static inline struct inode *d_real_inode(const struct dentry *dentry)
{
/* This usage of d_real() results in const dentry */
- return d_backing_inode(d_real((struct dentry *) dentry, NULL));
+ return d_inode(d_real((struct dentry *) dentry, D_REAL_DATA));
}
struct name_snapshot {
@@ -599,4 +587,14 @@ struct name_snapshot {
void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *);
void release_dentry_name_snapshot(struct name_snapshot *);
+static inline struct dentry *d_first_child(const struct dentry *dentry)
+{
+ return hlist_entry_safe(dentry->d_children.first, struct dentry, d_sib);
+}
+
+static inline struct dentry *d_next_sibling(const struct dentry *dentry)
+{
+ return hlist_entry_safe(dentry->d_sib.next, struct dentry, d_sib);
+}
+
#endif /* __LINUX_DCACHE_H */
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index ea2d919fd9c7..c9c65b132c0f 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -171,6 +171,25 @@ ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf,
ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos);
+/**
+ * struct debugfs_cancellation - cancellation data
+ * @list: internal, for keeping track
+ * @cancel: callback to call
+ * @cancel_data: extra data for the callback to call
+ */
+struct debugfs_cancellation {
+ struct list_head list;
+ void (*cancel)(struct dentry *, void *);
+ void *cancel_data;
+};
+
+void __acquires(cancellation)
+debugfs_enter_cancellation(struct file *file,
+ struct debugfs_cancellation *cancellation);
+void __releases(cancellation)
+debugfs_leave_cancellation(struct file *file,
+ struct debugfs_cancellation *cancellation);
+
#else
#include <linux/err.h>
diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h
index 9192986b1a73..ac862422df15 100644
--- a/include/linux/decompress/mm.h
+++ b/include/linux/decompress/mm.h
@@ -48,7 +48,7 @@ MALLOC_VISIBLE void *malloc(int size)
if (!malloc_ptr)
malloc_ptr = free_mem_ptr;
- malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */
+ malloc_ptr = (malloc_ptr + 7) & ~7; /* Align */
p = (void *)malloc_ptr;
malloc_ptr += size;
diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h
index 8904063d4c9f..6bfe70decc9f 100644
--- a/include/linux/dev_printk.h
+++ b/include/linux/dev_printk.h
@@ -274,4 +274,6 @@ do { \
WARN_ONCE(condition, "%s %s: " format, \
dev_driver_string(dev), dev_name(dev), ## arg)
+__printf(3, 4) int dev_err_probe(const struct device *dev, int err, const char *fmt, ...);
+
#endif /* _DEVICE_PRINTK_H_ */
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 69d0435c7ebb..82b2195efaca 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -165,6 +165,7 @@ void dm_error(const char *message);
struct dm_dev {
struct block_device *bdev;
+ struct file *bdev_file;
struct dax_device *dax_dev;
blk_mode_t mode;
char name[16];
diff --git a/include/linux/device.h b/include/linux/device.h
index bbaeabd04b0d..b9f5464f44ed 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -42,7 +42,6 @@ struct class;
struct subsys_private;
struct device_node;
struct fwnode_handle;
-struct iommu_ops;
struct iommu_group;
struct dev_pin_info;
struct dev_iommu;
@@ -63,7 +62,7 @@ struct msi_device_data;
*/
struct subsys_interface {
const char *name;
- struct bus_type *subsys;
+ const struct bus_type *subsys;
struct list_head node;
int (*add_dev)(struct device *dev, struct subsys_interface *sif);
void (*remove_dev)(struct device *dev, struct subsys_interface *sif);
@@ -72,9 +71,9 @@ struct subsys_interface {
int subsys_interface_register(struct subsys_interface *sif);
void subsys_interface_unregister(struct subsys_interface *sif);
-int subsys_system_register(struct bus_type *subsys,
+int subsys_system_register(const struct bus_type *subsys,
const struct attribute_group **groups);
-int subsys_virtual_register(struct bus_type *subsys,
+int subsys_virtual_register(const struct bus_type *subsys,
const struct attribute_group **groups);
/*
@@ -349,6 +348,7 @@ unsigned long devm_get_free_pages(struct device *dev,
gfp_t gfp_mask, unsigned int order);
void devm_free_pages(struct device *dev, unsigned long addr);
+#ifdef CONFIG_HAS_IOMEM
void __iomem *devm_ioremap_resource(struct device *dev,
const struct resource *res);
void __iomem *devm_ioremap_resource_wc(struct device *dev,
@@ -357,14 +357,39 @@ void __iomem *devm_ioremap_resource_wc(struct device *dev,
void __iomem *devm_of_iomap(struct device *dev,
struct device_node *node, int index,
resource_size_t *size);
+#else
+
+static inline
+void __iomem *devm_ioremap_resource(struct device *dev,
+ const struct resource *res)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+static inline
+void __iomem *devm_ioremap_resource_wc(struct device *dev,
+ const struct resource *res)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+static inline
+void __iomem *devm_of_iomap(struct device *dev,
+ struct device_node *node, int index,
+ resource_size_t *size)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+#endif
/* allows to add/remove a custom action to devres stack */
void devm_remove_action(struct device *dev, void (*action)(void *), void *data);
void devm_release_action(struct device *dev, void (*action)(void *), void *data);
int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name);
-#define devm_add_action(release, action, data) \
- __devm_add_action(release, action, data, #action)
+#define devm_add_action(dev, action, data) \
+ __devm_add_action(dev, action, data, #action)
static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *),
void *data, const char *name)
@@ -377,8 +402,8 @@ static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(
return ret;
}
-#define devm_add_action_or_reset(release, action, data) \
- __devm_add_action_or_reset(release, action, data, #action)
+#define devm_add_action_or_reset(dev, action, data) \
+ __devm_add_action_or_reset(dev, action, data, #action)
/**
* devm_alloc_percpu - Resource-managed alloc_percpu
@@ -625,7 +650,10 @@ struct device_physical_location {
* @dma_pools: Dma pools (if dma'ble device).
* @dma_mem: Internal for coherent mem override.
* @cma_area: Contiguous memory area for dma allocations
- * @dma_io_tlb_mem: Pointer to the swiotlb pool used. Not for driver use.
+ * @dma_io_tlb_mem: Software IO TLB allocator. Not for driver use.
+ * @dma_io_tlb_pools: List of transient swiotlb memory pools.
+ * @dma_io_tlb_lock: Protects changes to the list of active pools.
+ * @dma_uses_io_tlb: %true if device has used the software IO TLB.
* @archdata: For arch-specific additions.
* @of_node: Associated device tree node.
* @fwnode: Associated device node supplied by platform firmware.
@@ -633,7 +661,6 @@ struct device_physical_location {
* @id: device instance
* @devres_lock: Spinlock to protect the resource of the device.
* @devres_head: The resources list of the device.
- * @knode_class: The node used to add the device to the class list.
* @class: The class of the device.
* @groups: Optional attribute groups.
* @release: Callback to free the device after all references have
@@ -732,6 +759,11 @@ struct device {
#ifdef CONFIG_SWIOTLB
struct io_tlb_mem *dma_io_tlb_mem;
#endif
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ struct list_head dma_io_tlb_pools;
+ spinlock_t dma_io_tlb_lock;
+ bool dma_uses_io_tlb;
+#endif
/* arch specific additions */
struct dev_archdata archdata;
@@ -973,6 +1005,8 @@ static inline void device_unlock(struct device *dev)
mutex_unlock(&dev->mutex);
}
+DEFINE_GUARD(device, struct device *, device_lock(_T), device_unlock(_T))
+
static inline void device_lock_assert(struct device *dev)
{
lockdep_assert_held(&dev->mutex);
@@ -1037,7 +1071,6 @@ int device_rename(struct device *dev, const char *new_name);
int device_move(struct device *dev, struct device *new_parent,
enum dpm_order dpm_order);
int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid);
-int device_is_dependent(struct device *dev, void *target);
static inline bool device_supports_offline(struct device *dev)
{
@@ -1214,8 +1247,7 @@ void device_link_del(struct device_link *link);
void device_link_remove(void *consumer, struct device *supplier);
void device_links_supplier_sync_state_pause(void);
void device_links_supplier_sync_state_resume(void);
-
-__printf(3, 4) int dev_err_probe(const struct device *dev, int err, const char *fmt, ...);
+void device_link_wait_removal(void);
/* Create alias, so I can be autoloaded. */
#define MODULE_ALIAS_CHARDEV(major,minor) \
diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h
index ae10c4322754..5ef4ec1c36c3 100644
--- a/include/linux/device/bus.h
+++ b/include/linux/device/bus.h
@@ -62,9 +62,6 @@ struct fwnode_handle;
* this bus.
* @pm: Power management operations of this bus, callback the specific
* device driver's pm-ops.
- * @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU
- * driver implementations to a bus and allow the driver to do
- * bus-specific setup
* @need_parent_lock: When probing or removing a device on this bus, the
* device core should lock the device's parent.
*
@@ -104,8 +101,6 @@ struct bus_type {
const struct dev_pm_ops *pm;
- const struct iommu_ops *iommu_ops;
-
bool need_parent_lock;
};
@@ -232,7 +227,7 @@ bus_find_device_by_acpi_dev(const struct bus_type *bus, const void *adev)
int bus_for_each_drv(const struct bus_type *bus, struct device_driver *start,
void *data, int (*fn)(struct device_driver *, void *));
-void bus_sort_breadthfirst(struct bus_type *bus,
+void bus_sort_breadthfirst(const struct bus_type *bus,
int (*compare)(const struct device *a,
const struct device *b));
/*
diff --git a/include/linux/device/class.h b/include/linux/device/class.h
index abf3d3bfb6fe..c576b49c55c2 100644
--- a/include/linux/device/class.h
+++ b/include/linux/device/class.h
@@ -40,8 +40,6 @@ struct fwnode_handle;
* for the devices belonging to the class. Usually tied to
* device's namespace.
* @pm: The default device power management operations of this class.
- * @p: The private data of the driver core, no one other than the
- * driver core can touch this.
*
* A class is a higher-level view of a device that abstracts out low-level
* implementation details. Drivers may see a SCSI disk or an ATA disk, but,
diff --git a/include/linux/dio.h b/include/linux/dio.h
index 5abd07361eb5..2b5923909f96 100644
--- a/include/linux/dio.h
+++ b/include/linux/dio.h
@@ -68,7 +68,7 @@ struct dio_bus {
};
extern struct dio_bus dio_bus; /* Single DIO bus */
-extern struct bus_type dio_bus_type;
+extern const struct bus_type dio_bus_type;
/*
* DIO device IDs
diff --git a/include/linux/dlm_plock.h b/include/linux/dlm_plock.h
index e6d76e8715a6..15fc856d198c 100644
--- a/include/linux/dlm_plock.h
+++ b/include/linux/dlm_plock.h
@@ -11,6 +11,8 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
int cmd, struct file_lock *fl);
int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
struct file_lock *fl);
+int dlm_posix_cancel(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+ struct file_lock *fl);
int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
struct file_lock *fl);
#endif
diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h
index 75e7d8cbb532..d1503b815a78 100644
--- a/include/linux/dm-bufio.h
+++ b/include/linux/dm-bufio.h
@@ -64,6 +64,9 @@ void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start);
void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
struct dm_buffer **bp);
+void *dm_bufio_read_with_ioprio(struct dm_bufio_client *c, sector_t block,
+ struct dm_buffer **bp, unsigned short ioprio);
+
/*
* Like dm_bufio_read, but return buffer from cache, don't read
* it. If the buffer is not in the cache, return NULL.
@@ -86,6 +89,10 @@ void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
void dm_bufio_prefetch(struct dm_bufio_client *c,
sector_t block, unsigned int n_blocks);
+void dm_bufio_prefetch_with_ioprio(struct dm_bufio_client *c,
+ sector_t block, unsigned int n_blocks,
+ unsigned short ioprio);
+
/*
* Release a reference obtained with dm_bufio_{read,get,new}. The data
* pointer and dm_buffer pointer is no longer valid after this call.
diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index 7595142f3fc5..7b2968612b7e 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h
@@ -80,7 +80,8 @@ void dm_io_client_destroy(struct dm_io_client *client);
* error occurred doing io to the corresponding region.
*/
int dm_io(struct dm_io_request *io_req, unsigned int num_regions,
- struct dm_io_region *region, unsigned int long *sync_error_bits);
+ struct dm_io_region *region, unsigned int long *sync_error_bits,
+ unsigned short ioprio);
#endif /* __KERNEL__ */
#endif /* _LINUX_DM_IO_H */
diff --git a/include/linux/dm-verity-loadpin.h b/include/linux/dm-verity-loadpin.h
index 552b817ab102..3ac6dbaeaa37 100644
--- a/include/linux/dm-verity-loadpin.h
+++ b/include/linux/dm-verity-loadpin.h
@@ -12,7 +12,7 @@ extern struct list_head dm_verity_loadpin_trusted_root_digests;
struct dm_verity_loadpin_trusted_root_digest {
struct list_head node;
unsigned int len;
- u8 data[];
+ u8 data[] __counted_by(len);
};
#if IS_ENABLED(CONFIG_SECURITY_LOADPIN_VERITY)
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 3f31baa3293f..8ff4add71f88 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -343,16 +343,19 @@ struct dma_buf {
/**
* @exp_name:
*
- * Name of the exporter; useful for debugging. See the
- * DMA_BUF_SET_NAME IOCTL.
+ * Name of the exporter; useful for debugging. Must not be NULL
*/
const char *exp_name;
/**
* @name:
*
- * Userspace-provided name; useful for accounting and debugging,
- * protected by dma_resv_lock() on @resv and @name_lock for read access.
+ * Userspace-provided name. Default value is NULL. If not NULL,
+ * length cannot be longer than DMA_BUF_NAME_LEN, including NIL
+ * char. Useful for accounting and debugging. Read/Write accesses
+ * are protected by @name_lock
+ *
+ * See the IOCTLs DMA_BUF_SET_NAME or DMA_BUF_SET_NAME_A/B
*/
const char *name;
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 18aade195884..3eb3589ff43e 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -21,7 +21,6 @@ struct bus_dma_region {
phys_addr_t cpu_start;
dma_addr_t dma_start;
u64 size;
- u64 offset;
};
static inline dma_addr_t translate_phys_to_dma(struct device *dev,
@@ -29,9 +28,12 @@ static inline dma_addr_t translate_phys_to_dma(struct device *dev,
{
const struct bus_dma_region *m;
- for (m = dev->dma_range_map; m->size; m++)
- if (paddr >= m->cpu_start && paddr - m->cpu_start < m->size)
- return (dma_addr_t)paddr - m->offset;
+ for (m = dev->dma_range_map; m->size; m++) {
+ u64 offset = paddr - m->cpu_start;
+
+ if (paddr >= m->cpu_start && offset < m->size)
+ return m->dma_start + offset;
+ }
/* make sure dma_capable fails when no translation is available */
return DMA_MAPPING_ERROR;
@@ -42,9 +44,12 @@ static inline phys_addr_t translate_dma_to_phys(struct device *dev,
{
const struct bus_dma_region *m;
- for (m = dev->dma_range_map; m->size; m++)
- if (dma_addr >= m->dma_start && dma_addr - m->dma_start < m->size)
- return (phys_addr_t)dma_addr + m->offset;
+ for (m = dev->dma_range_map; m->size; m++) {
+ u64 offset = dma_addr - m->dma_start;
+
+ if (dma_addr >= m->dma_start && offset < m->size)
+ return m->cpu_start + offset;
+ }
return (phys_addr_t)-1;
}
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 0d678e9a7b24..e06bad467f55 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -21,6 +21,7 @@
#include <linux/sched.h>
#include <linux/printk.h>
#include <linux/rcupdate.h>
+#include <linux/timekeeping.h>
struct dma_fence;
struct dma_fence_ops;
@@ -499,6 +500,21 @@ static inline bool dma_fence_is_later(struct dma_fence *f1,
}
/**
+ * dma_fence_is_later_or_same - return true if f1 is later or same as f2
+ * @f1: the first fence from the same context
+ * @f2: the second fence from the same context
+ *
+ * Returns true if f1 is chronologically later than f2 or the same fence. Both
+ * fences must be from the same context, since a seqno is not re-used across
+ * contexts.
+ */
+static inline bool dma_fence_is_later_or_same(struct dma_fence *f1,
+ struct dma_fence *f2)
+{
+ return f1 == f2 || dma_fence_is_later(f1, f2);
+}
+
+/**
* dma_fence_later - return the chronologically later fence
* @f1: the first fence from the same context
* @f2: the second fence from the same context
@@ -568,6 +584,25 @@ static inline void dma_fence_set_error(struct dma_fence *fence,
fence->error = error;
}
+/**
+ * dma_fence_timestamp - helper to get the completion timestamp of a fence
+ * @fence: fence to get the timestamp from.
+ *
+ * After a fence is signaled the timestamp is updated with the signaling time,
+ * but setting the timestamp can race with tasks waiting for the signaling. This
+ * helper busy waits for the correct timestamp to appear.
+ */
+static inline ktime_t dma_fence_timestamp(struct dma_fence *fence)
+{
+ if (WARN_ON(!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)))
+ return ktime_get();
+
+ while (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags))
+ cpu_relax();
+
+ return fence->timestamp;
+}
+
signed long dma_fence_wait_timeout(struct dma_fence *,
bool intr, signed long timeout);
signed long dma_fence_wait_any_timeout(struct dma_fence **fences,
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 9bf19b5bf755..4abc60f04209 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -11,6 +11,7 @@
#include <linux/slab.h>
struct cma;
+struct iommu_ops;
/*
* Values for struct dma_map_ops.flags:
@@ -169,12 +170,6 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page,
}
#endif /* CONFIG_DMA_CMA*/
-#ifdef CONFIG_DMA_PERNUMA_CMA
-void dma_pernuma_cma_reserve(void);
-#else
-static inline void dma_pernuma_cma_reserve(void) { }
-#endif /* CONFIG_DMA_PERNUMA_CMA */
-
#ifdef CONFIG_DMA_DECLARE_COHERENT
int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
dma_addr_t device_addr, size_t size);
@@ -343,6 +338,12 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs);
+#ifdef CONFIG_ARCH_HAS_DMA_SET_MASK
+void arch_dma_set_mask(struct device *dev, u64 mask);
+#else
+#define arch_dma_set_mask(dev, mask) do { } while (0)
+#endif
+
#ifdef CONFIG_MMU
/*
* Page protection so that devices that can't snoop CPU caches can use the
@@ -426,10 +427,10 @@ bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg,
#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
- const struct iommu_ops *iommu, bool coherent);
+ bool coherent);
#else
static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base,
- u64 size, const struct iommu_ops *iommu, bool coherent)
+ u64 size, bool coherent)
{
}
#endif /* CONFIG_ARCH_HAS_SETUP_DMA_OPS */
@@ -443,10 +444,10 @@ static inline void arch_teardown_dma_ops(struct device *dev)
#endif /* CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS */
#ifdef CONFIG_DMA_API_DEBUG
-void dma_debug_add_bus(struct bus_type *bus);
+void dma_debug_add_bus(const struct bus_type *bus);
void debug_dma_dump_mappings(struct device *dev);
#else
-static inline void dma_debug_add_bus(struct bus_type *bus)
+static inline void dma_debug_add_bus(const struct bus_type *bus)
{
}
static inline void debug_dma_dump_mappings(struct device *dev)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index e13050eb9777..4a658de44ee9 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -144,6 +144,7 @@ bool dma_pci_p2pdma_supported(struct device *dev);
int dma_set_mask(struct device *dev, u64 mask);
int dma_set_coherent_mask(struct device *dev, u64 mask);
u64 dma_get_required_mask(struct device *dev);
+bool dma_addressing_limited(struct device *dev);
size_t dma_max_mapping_size(struct device *dev);
size_t dma_opt_mapping_size(struct device *dev);
bool dma_need_sync(struct device *dev, dma_addr_t dma_addr);
@@ -264,6 +265,10 @@ static inline u64 dma_get_required_mask(struct device *dev)
{
return 0;
}
+static inline bool dma_addressing_limited(struct device *dev)
+{
+ return false;
+}
static inline size_t dma_max_mapping_size(struct device *dev)
{
return 0;
@@ -418,6 +423,8 @@ static inline void dma_sync_sgtable_for_device(struct device *dev,
#define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0)
#define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0)
+bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size);
+
static inline void *dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp)
{
@@ -463,20 +470,6 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask)
return dma_set_mask_and_coherent(dev, mask);
}
-/**
- * dma_addressing_limited - return if the device is addressing limited
- * @dev: device to check
- *
- * Return %true if the devices DMA mask is too small to address all memory in
- * the system, else %false. Lack of addressing bits is the prime reason for
- * bounce buffering, but might not be the only one.
- */
-static inline bool dma_addressing_limited(struct device *dev)
-{
- return min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) <
- dma_get_required_mask(dev);
-}
-
static inline unsigned int dma_get_max_seg_size(struct device *dev)
{
if (dev->dma_parms && dev->dma_parms->max_segment_size)
diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h
index e443be4d3b4b..1e491c5dcac2 100644
--- a/include/linux/dma/k3-udma-glue.h
+++ b/include/linux/dma/k3-udma-glue.h
@@ -26,6 +26,11 @@ struct k3_udma_glue_tx_channel;
struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev,
const char *name, struct k3_udma_glue_tx_channel_cfg *cfg);
+struct k3_udma_glue_tx_channel *
+k3_udma_glue_request_tx_chn_for_thread_id(struct device *dev,
+ struct k3_udma_glue_tx_channel_cfg *cfg,
+ struct device_node *udmax_np, u32 thread_id);
+
void k3_udma_glue_release_tx_chn(struct k3_udma_glue_tx_channel *tx_chn);
int k3_udma_glue_push_tx_chn(struct k3_udma_glue_tx_channel *tx_chn,
struct cppi5_host_desc_t *desc_tx,
@@ -109,6 +114,11 @@ struct k3_udma_glue_rx_channel *k3_udma_glue_request_rx_chn(
const char *name,
struct k3_udma_glue_rx_channel_cfg *cfg);
+struct k3_udma_glue_rx_channel *
+k3_udma_glue_request_remote_rx_chn_for_thread_id(struct device *dev,
+ struct k3_udma_glue_rx_channel_cfg *cfg,
+ struct device_node *udmax_np, u32 thread_id);
+
void k3_udma_glue_release_rx_chn(struct k3_udma_glue_rx_channel *rx_chn);
int k3_udma_glue_enable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn);
void k3_udma_glue_disable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn);
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c3656e590213..752dbde4cec1 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -517,8 +517,6 @@ static inline const char *dma_chan_name(struct dma_chan *chan)
return dev_name(&chan->dev->device);
}
-void dma_chan_cleanup(struct kref *kref);
-
/**
* typedef dma_filter_fn - callback filter for dma_request_channel
* @chan: channel to be reviewed
@@ -955,7 +953,8 @@ static inline int dmaengine_slave_config(struct dma_chan *chan,
static inline bool is_slave_direction(enum dma_transfer_direction direction)
{
- return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM);
+ return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM) ||
+ (direction == DMA_DEV_TO_DEV);
}
static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single(
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 27dbd4c64860..e34b601b71fd 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -106,8 +106,6 @@ static inline bool dmar_rcu_check(void)
extern int dmar_table_init(void);
extern int dmar_dev_scope_init(void);
extern void dmar_register_bus_notifier(void);
-extern int dmar_parse_dev_scope(void *start, void *end, int *cnt,
- struct dmar_dev_scope **devices, u16 segment);
extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt);
extern void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt);
extern int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h
index b1d26f9f1c9f..9f183a679277 100644
--- a/include/linux/dnotify.h
+++ b/include/linux/dnotify.h
@@ -30,7 +30,7 @@ struct dnotify_struct {
FS_MOVED_FROM | FS_MOVED_TO)
extern void dnotify_flush(struct file *, fl_owner_t);
-extern int fcntl_dirnotify(int, struct file *, unsigned long);
+extern int fcntl_dirnotify(int, struct file *, unsigned int);
#else
@@ -38,7 +38,7 @@ static inline void dnotify_flush(struct file *filp, fl_owner_t id)
{
}
-static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
+static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
{
return -EINVAL;
}
diff --git a/include/linux/dpll.h b/include/linux/dpll.h
new file mode 100644
index 000000000000..d275736230b3
--- /dev/null
+++ b/include/linux/dpll.h
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates
+ * Copyright (c) 2023 Intel and affiliates
+ */
+
+#ifndef __DPLL_H__
+#define __DPLL_H__
+
+#include <uapi/linux/dpll.h>
+#include <linux/device.h>
+#include <linux/netlink.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+
+struct dpll_device;
+struct dpll_pin;
+
+struct dpll_device_ops {
+ int (*mode_get)(const struct dpll_device *dpll, void *dpll_priv,
+ enum dpll_mode *mode, struct netlink_ext_ack *extack);
+ int (*lock_status_get)(const struct dpll_device *dpll, void *dpll_priv,
+ enum dpll_lock_status *status,
+ enum dpll_lock_status_error *status_error,
+ struct netlink_ext_ack *extack);
+ int (*temp_get)(const struct dpll_device *dpll, void *dpll_priv,
+ s32 *temp, struct netlink_ext_ack *extack);
+};
+
+struct dpll_pin_ops {
+ int (*frequency_set)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ const u64 frequency,
+ struct netlink_ext_ack *extack);
+ int (*frequency_get)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ u64 *frequency, struct netlink_ext_ack *extack);
+ int (*direction_set)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ const enum dpll_pin_direction direction,
+ struct netlink_ext_ack *extack);
+ int (*direction_get)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ enum dpll_pin_direction *direction,
+ struct netlink_ext_ack *extack);
+ int (*state_on_pin_get)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_pin *parent_pin,
+ void *parent_pin_priv,
+ enum dpll_pin_state *state,
+ struct netlink_ext_ack *extack);
+ int (*state_on_dpll_get)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll,
+ void *dpll_priv, enum dpll_pin_state *state,
+ struct netlink_ext_ack *extack);
+ int (*state_on_pin_set)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_pin *parent_pin,
+ void *parent_pin_priv,
+ const enum dpll_pin_state state,
+ struct netlink_ext_ack *extack);
+ int (*state_on_dpll_set)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll,
+ void *dpll_priv,
+ const enum dpll_pin_state state,
+ struct netlink_ext_ack *extack);
+ int (*prio_get)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ u32 *prio, struct netlink_ext_ack *extack);
+ int (*prio_set)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ const u32 prio, struct netlink_ext_ack *extack);
+ int (*phase_offset_get)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ s64 *phase_offset,
+ struct netlink_ext_ack *extack);
+ int (*phase_adjust_get)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ s32 *phase_adjust,
+ struct netlink_ext_ack *extack);
+ int (*phase_adjust_set)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ const s32 phase_adjust,
+ struct netlink_ext_ack *extack);
+ int (*ffo_get)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ s64 *ffo, struct netlink_ext_ack *extack);
+};
+
+struct dpll_pin_frequency {
+ u64 min;
+ u64 max;
+};
+
+#define DPLL_PIN_FREQUENCY_RANGE(_min, _max) \
+ { \
+ .min = _min, \
+ .max = _max, \
+ }
+
+#define DPLL_PIN_FREQUENCY(_val) DPLL_PIN_FREQUENCY_RANGE(_val, _val)
+#define DPLL_PIN_FREQUENCY_1PPS \
+ DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_1_HZ)
+#define DPLL_PIN_FREQUENCY_10MHZ \
+ DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_MHZ)
+#define DPLL_PIN_FREQUENCY_IRIG_B \
+ DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_KHZ)
+#define DPLL_PIN_FREQUENCY_DCF77 \
+ DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_77_5_KHZ)
+
+struct dpll_pin_phase_adjust_range {
+ s32 min;
+ s32 max;
+};
+
+struct dpll_pin_properties {
+ const char *board_label;
+ const char *panel_label;
+ const char *package_label;
+ enum dpll_pin_type type;
+ unsigned long capabilities;
+ u32 freq_supported_num;
+ struct dpll_pin_frequency *freq_supported;
+ struct dpll_pin_phase_adjust_range phase_range;
+};
+
+#if IS_ENABLED(CONFIG_DPLL)
+void dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin);
+void dpll_netdev_pin_clear(struct net_device *dev);
+
+size_t dpll_netdev_pin_handle_size(const struct net_device *dev);
+int dpll_netdev_add_pin_handle(struct sk_buff *msg,
+ const struct net_device *dev);
+#else
+static inline void
+dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) { }
+static inline void dpll_netdev_pin_clear(struct net_device *dev) { }
+
+static inline size_t dpll_netdev_pin_handle_size(const struct net_device *dev)
+{
+ return 0;
+}
+
+static inline int
+dpll_netdev_add_pin_handle(struct sk_buff *msg, const struct net_device *dev)
+{
+ return 0;
+}
+#endif
+
+struct dpll_device *
+dpll_device_get(u64 clock_id, u32 dev_driver_id, struct module *module);
+
+void dpll_device_put(struct dpll_device *dpll);
+
+int dpll_device_register(struct dpll_device *dpll, enum dpll_type type,
+ const struct dpll_device_ops *ops, void *priv);
+
+void dpll_device_unregister(struct dpll_device *dpll,
+ const struct dpll_device_ops *ops, void *priv);
+
+struct dpll_pin *
+dpll_pin_get(u64 clock_id, u32 dev_driver_id, struct module *module,
+ const struct dpll_pin_properties *prop);
+
+int dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin,
+ const struct dpll_pin_ops *ops, void *priv);
+
+void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin,
+ const struct dpll_pin_ops *ops, void *priv);
+
+void dpll_pin_put(struct dpll_pin *pin);
+
+int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin,
+ const struct dpll_pin_ops *ops, void *priv);
+
+void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin,
+ const struct dpll_pin_ops *ops, void *priv);
+
+int dpll_device_change_ntf(struct dpll_device *dpll);
+
+int dpll_pin_change_ntf(struct dpll_pin *pin);
+
+#endif
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index c177322f793d..b9dd35d4b8f5 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -28,7 +28,7 @@
/* Source and Destination MAC of follow-up meta frames.
* Whereas the choice of SMAC only affects the unique identification of the
* switch as sender of meta frames, the DMAC must be an address that is present
- * in the DSA master port's multicast MAC filter.
+ * in the DSA conduit port's multicast MAC filter.
* 01-80-C2-00-00-0E is a good choice for this, as all profiles of IEEE 1588
* over L2 use this address for some purpose already.
*/
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 061dd84d09f3..4fcbf4d4fd0a 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -37,10 +37,12 @@ struct _ddebug {
#define _DPRINTK_FLAGS_INCL_FUNCNAME (1<<2)
#define _DPRINTK_FLAGS_INCL_LINENO (1<<3)
#define _DPRINTK_FLAGS_INCL_TID (1<<4)
+#define _DPRINTK_FLAGS_INCL_SOURCENAME (1<<5)
#define _DPRINTK_FLAGS_INCL_ANY \
(_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |\
- _DPRINTK_FLAGS_INCL_LINENO | _DPRINTK_FLAGS_INCL_TID)
+ _DPRINTK_FLAGS_INCL_LINENO | _DPRINTK_FLAGS_INCL_TID |\
+ _DPRINTK_FLAGS_INCL_SOURCENAME)
#if defined DEBUG
#define _DPRINTK_FLAGS_DEFAULT _DPRINTK_FLAGS_PRINT
diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
index 407c2f281b64..5693a4be0d9a 100644
--- a/include/linux/dynamic_queue_limits.h
+++ b/include/linux/dynamic_queue_limits.h
@@ -38,14 +38,22 @@
#ifdef __KERNEL__
+#include <linux/bitops.h>
#include <asm/bug.h>
+#define DQL_HIST_LEN 4
+#define DQL_HIST_ENT(dql, idx) ((dql)->history[(idx) % DQL_HIST_LEN])
+
struct dql {
/* Fields accessed in enqueue path (dql_queued) */
unsigned int num_queued; /* Total ever queued */
unsigned int adj_limit; /* limit + num_completed */
unsigned int last_obj_cnt; /* Count at last queuing */
+ unsigned long history_head; /* top 58 bits of jiffies */
+ /* stall entries, a bit per entry */
+ unsigned long history[DQL_HIST_LEN];
+
/* Fields accessed only by completion path (dql_completed) */
unsigned int limit ____cacheline_aligned_in_smp; /* Current limit */
@@ -62,6 +70,13 @@ struct dql {
unsigned int max_limit; /* Max limit */
unsigned int min_limit; /* Minimum limit */
unsigned int slack_hold_time; /* Time to measure slack */
+
+ /* Stall threshold (in jiffies), defined by user */
+ unsigned short stall_thrs;
+ /* Longest stall detected, reported to user */
+ unsigned short stall_max;
+ unsigned long last_reap; /* Last reap (in jiffies) */
+ unsigned long stall_cnt; /* Number of stalls */
};
/* Set some static maximums */
@@ -74,6 +89,8 @@ struct dql {
*/
static inline void dql_queued(struct dql *dql, unsigned int count)
{
+ unsigned long map, now, now_hi, i;
+
BUG_ON(count > DQL_MAX_OBJECT);
dql->last_obj_cnt = count;
@@ -86,6 +103,34 @@ static inline void dql_queued(struct dql *dql, unsigned int count)
barrier();
dql->num_queued += count;
+
+ now = jiffies;
+ now_hi = now / BITS_PER_LONG;
+
+ /* The following code set a bit in the ring buffer, where each
+ * bit trackes time the packet was queued. The dql->history buffer
+ * tracks DQL_HIST_LEN * BITS_PER_LONG time (jiffies) slot
+ */
+ if (unlikely(now_hi != dql->history_head)) {
+ /* About to reuse slots, clear them */
+ for (i = 0; i < DQL_HIST_LEN; i++) {
+ /* Multiplication masks high bits */
+ if (now_hi * BITS_PER_LONG ==
+ (dql->history_head + i) * BITS_PER_LONG)
+ break;
+ DQL_HIST_ENT(dql, dql->history_head + i + 1) = 0;
+ }
+ /* pairs with smp_rmb() in dql_check_stall() */
+ smp_wmb();
+ WRITE_ONCE(dql->history_head, now_hi);
+ }
+
+ /* __set_bit() does not guarantee WRITE_ONCE() semantics */
+ map = DQL_HIST_ENT(dql, now_hi);
+
+ /* Populate the history with an entry (bit) per queued */
+ if (!(map & BIT_MASK(now)))
+ WRITE_ONCE(DQL_HIST_ENT(dql, now_hi), map | BIT_MASK(now));
}
/* Returns how many objects can be queued, < 0 indicates over limit. */
diff --git a/include/linux/edac.h b/include/linux/edac.h
index fa4bda2a70f6..b4ee8961e623 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -30,7 +30,7 @@ struct device;
extern int edac_op_state;
-struct bus_type *edac_get_sysfs_subsys(void);
+const struct bus_type *edac_get_sysfs_subsys(void);
static inline void opstate_init(void)
{
@@ -187,6 +187,7 @@ static inline char *mc_event_error_type(const unsigned int err_type)
* @MEM_NVDIMM: Non-volatile RAM
* @MEM_WIO2: Wide I/O 2.
* @MEM_HBM2: High bandwidth Memory Gen 2.
+ * @MEM_HBM3: High bandwidth Memory Gen 3.
*/
enum mem_type {
MEM_EMPTY = 0,
@@ -218,6 +219,7 @@ enum mem_type {
MEM_NVDIMM,
MEM_WIO2,
MEM_HBM2,
+ MEM_HBM3,
};
#define MEM_FLAG_EMPTY BIT(MEM_EMPTY)
@@ -248,6 +250,7 @@ enum mem_type {
#define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM)
#define MEM_FLAG_WIO2 BIT(MEM_WIO2)
#define MEM_FLAG_HBM2 BIT(MEM_HBM2)
+#define MEM_FLAG_HBM3 BIT(MEM_HBM3)
/**
* enum edac_type - Error Detection and Correction capabilities and mode
@@ -492,7 +495,7 @@ struct edac_raw_error_desc {
*/
struct mem_ctl_info {
struct device dev;
- struct bus_type *bus;
+ const struct bus_type *bus;
struct list_head link; /* for global list of mem_ctl_info structs */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index ab088c662e88..d59b0947fba0 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -24,10 +24,11 @@
#include <linux/range.h>
#include <linux/reboot.h>
#include <linux/uuid.h>
-#include <linux/screen_info.h>
#include <asm/page.h>
+struct screen_info;
+
#define EFI_SUCCESS 0
#define EFI_LOAD_ERROR ( 1 | (1UL << (BITS_PER_LONG-1)))
#define EFI_INVALID_PARAMETER ( 2 | (1UL << (BITS_PER_LONG-1)))
@@ -39,6 +40,7 @@
#define EFI_WRITE_PROTECTED ( 8 | (1UL << (BITS_PER_LONG-1)))
#define EFI_OUT_OF_RESOURCES ( 9 | (1UL << (BITS_PER_LONG-1)))
#define EFI_NOT_FOUND (14 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_ACCESS_DENIED (15 | (1UL << (BITS_PER_LONG-1)))
#define EFI_TIMEOUT (18 | (1UL << (BITS_PER_LONG-1)))
#define EFI_ABORTED (21 | (1UL << (BITS_PER_LONG-1)))
#define EFI_SECURITY_VIOLATION (26 | (1UL << (BITS_PER_LONG-1)))
@@ -357,13 +359,10 @@ void efi_native_runtime_setup(void);
* where the UEFI SPEC breaks the line.
*/
#define NULL_GUID EFI_GUID(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)
-#define MPS_TABLE_GUID EFI_GUID(0xeb9d2d2f, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
#define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
#define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81)
#define SMBIOS_TABLE_GUID EFI_GUID(0xeb9d2d31, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
#define SMBIOS3_TABLE_GUID EFI_GUID(0xf2fd1544, 0x9794, 0x4a2c, 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94)
-#define SAL_SYSTEM_TABLE_GUID EFI_GUID(0xeb9d2d32, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
-#define HCDP_TABLE_GUID EFI_GUID(0xf951938d, 0x620b, 0x42ef, 0x82, 0x79, 0xa8, 0x4b, 0x79, 0x61, 0x78, 0x98)
#define UGA_IO_PROTOCOL_GUID EFI_GUID(0x61a4d49e, 0x6f68, 0x4f1b, 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0x0b, 0x07, 0xa2)
#define EFI_GLOBAL_VARIABLE_GUID EFI_GUID(0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c)
#define UV_SYSTEM_TABLE_GUID EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93)
@@ -387,6 +386,7 @@ void efi_native_runtime_setup(void);
#define EFI_CONSOLE_OUT_DEVICE_GUID EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
#define APPLE_PROPERTIES_PROTOCOL_GUID EFI_GUID(0x91bd12fe, 0xf6c3, 0x44fb, 0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0)
#define EFI_TCG2_PROTOCOL_GUID EFI_GUID(0x607f766c, 0x7455, 0x42be, 0x93, 0x0b, 0xe4, 0xd7, 0x6d, 0xb2, 0x72, 0x0f)
+#define EFI_TCG2_FINAL_EVENTS_TABLE_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25)
#define EFI_LOAD_FILE_PROTOCOL_GUID EFI_GUID(0x56ec3091, 0x954c, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
#define EFI_LOAD_FILE2_PROTOCOL_GUID EFI_GUID(0x4006c0c1, 0xfcb3, 0x403e, 0x99, 0x6d, 0x4a, 0x6c, 0x87, 0x24, 0xe0, 0x6d)
#define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9)
@@ -401,6 +401,8 @@ void efi_native_runtime_setup(void);
#define EFI_CERT_X509_GUID EFI_GUID(0xa5c059a1, 0x94e4, 0x4aa7, 0x87, 0xb5, 0xab, 0x15, 0x5c, 0x2b, 0xf0, 0x72)
#define EFI_CERT_X509_SHA256_GUID EFI_GUID(0x3bd2a492, 0x96c0, 0x4079, 0xb4, 0x20, 0xfc, 0xf9, 0x8e, 0xf1, 0x03, 0xed)
#define EFI_CC_BLOB_GUID EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42)
+#define EFI_CC_MEASUREMENT_PROTOCOL_GUID EFI_GUID(0x96751a3d, 0x72f4, 0x41a6, 0xa7, 0x94, 0xed, 0x5d, 0x0e, 0x67, 0xae, 0x6b)
+#define EFI_CC_FINAL_EVENTS_TABLE_GUID EFI_GUID(0xdd4a4648, 0x2de7, 0x4665, 0x96, 0x4d, 0x21, 0xd9, 0xef, 0x5f, 0xb4, 0x46)
/*
* This GUID is used to pass to the kernel proper the struct screen_info
@@ -412,7 +414,6 @@ void efi_native_runtime_setup(void);
#define LINUX_EFI_LOADER_ENTRY_GUID EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f)
#define LINUX_EFI_RANDOM_SEED_TABLE_GUID EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2, 0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b)
#define LINUX_EFI_TPM_EVENT_LOG_GUID EFI_GUID(0xb7799cb0, 0xeca2, 0x4943, 0x96, 0x67, 0x1f, 0xae, 0x07, 0xb7, 0x47, 0xfa)
-#define LINUX_EFI_TPM_FINAL_LOG_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25)
#define LINUX_EFI_MEMRESERVE_TABLE_GUID EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5, 0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2)
#define LINUX_EFI_INITRD_MEDIA_GUID EFI_GUID(0x5568e427, 0x68fc, 0x4f3d, 0xac, 0x74, 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68)
#define LINUX_EFI_MOK_VARIABLE_TABLE_GUID EFI_GUID(0xc451ed2b, 0x9694, 0x45d3, 0xba, 0xba, 0xed, 0x9f, 0x89, 0x88, 0xa3, 0x89)
@@ -693,6 +694,11 @@ extern struct efi {
extern struct mm_struct efi_mm;
+static inline bool mm_is_efi(struct mm_struct *mm)
+{
+ return IS_ENABLED(CONFIG_EFI) && mm == &efi_mm;
+}
+
static inline int
efi_guidcmp (efi_guid_t left, efi_guid_t right)
{
@@ -726,7 +732,6 @@ static inline efi_status_t efi_query_variable_store(u32 attributes,
return EFI_SUCCESS;
}
#endif
-extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
extern int __init __efi_memmap_init(struct efi_memory_map_data *data);
extern int __init efi_memmap_init_early(struct efi_memory_map_data *data);
@@ -851,10 +856,6 @@ static inline int efi_range_is_wc(unsigned long start, unsigned long len)
return 1;
}
-#ifdef CONFIG_EFI_PCDP
-extern int __init efi_setup_pcdp_console(char *);
-#endif
-
/*
* We play games with efi_enabled so that the compiler will, if
* possible, remove EFI-related code altogether.
@@ -1130,7 +1131,7 @@ extern bool efi_runtime_disabled(void);
static inline bool efi_runtime_disabled(void) { return true; }
#endif
-extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
+extern void efi_call_virt_check_flags(unsigned long flags, const void *caller);
extern unsigned long efi_call_virt_save_flags(void);
enum efi_secureboot_mode {
@@ -1171,8 +1172,7 @@ static inline void efi_check_for_embedded_firmwares(void) { }
#define arch_efi_call_virt(p, f, args...) ((p)->f(args))
/*
- * Arch code can implement the following three template macros, avoiding
- * reptition for the void/non-void return cases of {__,}efi_call_virt():
+ * Arch code must implement the following three routines:
*
* * arch_efi_call_virt_setup()
*
@@ -1181,9 +1181,8 @@ static inline void efi_check_for_embedded_firmwares(void) { }
*
* * arch_efi_call_virt()
*
- * Performs the call. The last expression in the macro must be the call
- * itself, allowing the logic to be shared by the void and non-void
- * cases.
+ * Performs the call. This routine takes a variable number of arguments so
+ * it must be implemented as a variadic preprocessor macro.
*
* * arch_efi_call_virt_teardown()
*
@@ -1192,33 +1191,20 @@ static inline void efi_check_for_embedded_firmwares(void) { }
#define efi_call_virt_pointer(p, f, args...) \
({ \
- efi_status_t __s; \
+ typeof((p)->f(args)) __s; \
unsigned long __flags; \
\
arch_efi_call_virt_setup(); \
\
__flags = efi_call_virt_save_flags(); \
__s = arch_efi_call_virt(p, f, args); \
- efi_call_virt_check_flags(__flags, __stringify(f)); \
+ efi_call_virt_check_flags(__flags, NULL); \
\
arch_efi_call_virt_teardown(); \
\
__s; \
})
-#define __efi_call_virt_pointer(p, f, args...) \
-({ \
- unsigned long __flags; \
- \
- arch_efi_call_virt_setup(); \
- \
- __flags = efi_call_virt_save_flags(); \
- arch_efi_call_virt(p, f, args); \
- efi_call_virt_check_flags(__flags, __stringify(f)); \
- \
- arch_efi_call_virt_teardown(); \
-})
-
#define EFI_RANDOM_SEED_SIZE 32U // BLAKE2S_HASH_SIZE
struct linux_efi_random_seed {
@@ -1244,6 +1230,10 @@ extern int efi_tpm_final_log_size;
extern unsigned long rci2_table_phys;
+efi_status_t
+efi_call_acpi_prm_handler(efi_status_t (__efiapi *handler_addr)(u64, void *),
+ u64 param_buffer_addr, void *context);
+
/*
* efi_runtime_service() function identifiers.
* "NONE" is used by efi_recover_from_page_fault() to check if the page
@@ -1263,25 +1253,26 @@ enum efi_rts_ids {
EFI_RESET_SYSTEM,
EFI_UPDATE_CAPSULE,
EFI_QUERY_CAPSULE_CAPS,
+ EFI_ACPI_PRM_HANDLER,
};
+union efi_rts_args;
+
/*
* efi_runtime_work: Details of EFI Runtime Service work
- * @arg<1-5>: EFI Runtime Service function arguments
+ * @args: Pointer to union describing the arguments
* @status: Status of executing EFI Runtime Service
* @efi_rts_id: EFI Runtime Service function identifier
* @efi_rts_comp: Struct used for handling completions
+ * @caller: The caller of the runtime service
*/
struct efi_runtime_work {
- void *arg1;
- void *arg2;
- void *arg3;
- void *arg4;
- void *arg5;
- efi_status_t status;
- struct work_struct work;
- enum efi_rts_ids efi_rts_id;
- struct completion efi_rts_comp;
+ union efi_rts_args *args;
+ efi_status_t status;
+ struct work_struct work;
+ enum efi_rts_ids efi_rts_id;
+ struct completion efi_rts_comp;
+ const void *caller;
};
extern struct efi_runtime_work efi_rts_work;
@@ -1365,4 +1356,15 @@ bool efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table)
umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n);
+/*
+ * efivar ops event type
+ */
+#define EFIVAR_OPS_RDONLY 0
+#define EFIVAR_OPS_RDWR 1
+
+extern struct blocking_notifier_head efivar_ops_nh;
+
+void efivars_generic_ops_register(void);
+void efivars_generic_ops_unregister(void);
+
#endif /* _LINUX_EFI_H */
diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h
new file mode 100644
index 000000000000..624ff6ff41f9
--- /dev/null
+++ b/include/linux/einj-cxl.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * CXL protocol Error INJection support.
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Ben Cheatham <benjamin.cheatham@amd.com>
+ */
+#ifndef EINJ_CXL_H
+#define EINJ_CXL_H
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct pci_dev;
+struct seq_file;
+
+#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL)
+int einj_cxl_available_error_type_show(struct seq_file *m, void *v);
+int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type);
+int einj_cxl_inject_rch_error(u64 rcrb, u64 type);
+bool einj_cxl_is_initialized(void);
+#else /* !IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) */
+static inline int einj_cxl_available_error_type_show(struct seq_file *m,
+ void *v)
+{
+ return -ENXIO;
+}
+
+static inline int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type)
+{
+ return -ENXIO;
+}
+
+static inline int einj_cxl_inject_rch_error(u64 rcrb, u64 type)
+{
+ return -ENXIO;
+}
+
+static inline bool einj_cxl_is_initialized(void) { return false; }
+#endif /* CONFIG_ACPI_APEI_EINJ_CXL */
+
+#endif /* EINJ_CXL_H */
diff --git a/include/linux/elf-fdpic.h b/include/linux/elf-fdpic.h
index 3bea95a1af53..e533f4513194 100644
--- a/include/linux/elf-fdpic.h
+++ b/include/linux/elf-fdpic.h
@@ -10,13 +10,25 @@
#include <uapi/linux/elf-fdpic.h>
+#if ELF_CLASS == ELFCLASS32
+#define Elf_Sword Elf32_Sword
+#define elf_fdpic_loadseg elf32_fdpic_loadseg
+#define elf_fdpic_loadmap elf32_fdpic_loadmap
+#define ELF_FDPIC_LOADMAP_VERSION ELF32_FDPIC_LOADMAP_VERSION
+#else
+#define Elf_Sword Elf64_Sxword
+#define elf_fdpic_loadmap elf64_fdpic_loadmap
+#define elf_fdpic_loadseg elf64_fdpic_loadseg
+#define ELF_FDPIC_LOADMAP_VERSION ELF64_FDPIC_LOADMAP_VERSION
+#endif
+
/*
* binfmt binary parameters structure
*/
struct elf_fdpic_params {
struct elfhdr hdr; /* ref copy of ELF header */
struct elf_phdr *phdrs; /* ref copy of PT_PHDR table */
- struct elf32_fdpic_loadmap *loadmap; /* loadmap to be passed to userspace */
+ struct elf_fdpic_loadmap *loadmap; /* loadmap to be passed to userspace */
unsigned long elfhdr_addr; /* mapped ELF header user address */
unsigned long ph_addr; /* mapped PT_PHDR user address */
unsigned long map_addr; /* mapped loadmap user address */
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index b9caa01dfac4..70cd7258cd29 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -5,6 +5,7 @@
#include <linux/device.h>
#include <linux/jump_label.h>
#include <linux/kobject.h>
+#include <linux/kref.h>
#include <linux/rcupdate.h>
#include <linux/sched/cpufreq.h>
#include <linux/sched/topology.h>
@@ -12,6 +13,7 @@
/**
* struct em_perf_state - Performance state of a performance domain
+ * @performance: CPU performance (capacity) at a given frequency
* @frequency: The frequency in KHz, for consistency with CPUFreq
* @power: The power consumed at this level (by 1 CPU or by a registered
* device). It can be a total power: static and dynamic.
@@ -20,6 +22,7 @@
* @flags: see "em_perf_state flags" description below.
*/
struct em_perf_state {
+ unsigned long performance;
unsigned long frequency;
unsigned long power;
unsigned long cost;
@@ -37,8 +40,20 @@ struct em_perf_state {
#define EM_PERF_STATE_INEFFICIENT BIT(0)
/**
+ * struct em_perf_table - Performance states table
+ * @rcu: RCU used for safe access and destruction
+ * @kref: Reference counter to track the users
+ * @state: List of performance states, in ascending order
+ */
+struct em_perf_table {
+ struct rcu_head rcu;
+ struct kref kref;
+ struct em_perf_state state[];
+};
+
+/**
* struct em_perf_domain - Performance domain
- * @table: List of performance states, in ascending order
+ * @em_table: Pointer to the runtime modifiable em_perf_table
* @nr_perf_states: Number of performance states
* @flags: See "em_perf_domain flags"
* @cpus: Cpumask covering the CPUs of the domain. It's here
@@ -53,7 +68,7 @@ struct em_perf_state {
* field is unused.
*/
struct em_perf_domain {
- struct em_perf_state *table;
+ struct em_perf_table __rcu *em_table;
int nr_perf_states;
unsigned long flags;
unsigned long cpus[];
@@ -98,27 +113,6 @@ struct em_perf_domain {
#define EM_MAX_NUM_CPUS 16
#endif
-/*
- * To avoid an overflow on 32bit machines while calculating the energy
- * use a different order in the operation. First divide by the 'cpu_scale'
- * which would reduce big value stored in the 'cost' field, then multiply by
- * the 'sum_util'. This would allow to handle existing platforms, which have
- * e.g. power ~1.3 Watt at max freq, so the 'cost' value > 1mln micro-Watts.
- * In such scenario, where there are 4 CPUs in the Perf. Domain the 'sum_util'
- * could be 4096, then multiplication: 'cost' * 'sum_util' would overflow.
- * This reordering of operations has some limitations, we lose small
- * precision in the estimation (comparing to 64bit platform w/o reordering).
- *
- * We are safe on 64bit machine.
- */
-#ifdef CONFIG_64BIT
-#define em_estimate_energy(cost, sum_util, scale_cpu) \
- (((cost) * (sum_util)) / (scale_cpu))
-#else
-#define em_estimate_energy(cost, sum_util, scale_cpu) \
- (((cost) / (scale_cpu)) * (sum_util))
-#endif
-
struct em_data_callback {
/**
* active_power() - Provide power at the next performance state of
@@ -168,40 +162,48 @@ struct em_data_callback {
struct em_perf_domain *em_cpu_get(int cpu);
struct em_perf_domain *em_pd_get(struct device *dev);
+int em_dev_update_perf_domain(struct device *dev,
+ struct em_perf_table __rcu *new_table);
int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
struct em_data_callback *cb, cpumask_t *span,
bool microwatts);
void em_dev_unregister_perf_domain(struct device *dev);
+struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd);
+void em_table_free(struct em_perf_table __rcu *table);
+int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
+ int nr_states);
/**
* em_pd_get_efficient_state() - Get an efficient performance state from the EM
- * @pd : Performance domain for which we want an efficient frequency
- * @freq : Frequency to map with the EM
+ * @table: List of performance states, in ascending order
+ * @nr_perf_states: Number of performance states
+ * @max_util: Max utilization to map with the EM
+ * @pd_flags: Performance Domain flags
*
* It is called from the scheduler code quite frequently and as a consequence
* doesn't implement any check.
*
- * Return: An efficient performance state, high enough to meet @freq
+ * Return: An efficient performance state id, high enough to meet @max_util
* requirement.
*/
-static inline
-struct em_perf_state *em_pd_get_efficient_state(struct em_perf_domain *pd,
- unsigned long freq)
+static inline int
+em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states,
+ unsigned long max_util, unsigned long pd_flags)
{
struct em_perf_state *ps;
int i;
- for (i = 0; i < pd->nr_perf_states; i++) {
- ps = &pd->table[i];
- if (ps->frequency >= freq) {
- if (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES &&
+ for (i = 0; i < nr_perf_states; i++) {
+ ps = &table[i];
+ if (ps->performance >= max_util) {
+ if (pd_flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES &&
ps->flags & EM_PERF_STATE_INEFFICIENT)
continue;
- break;
+ return i;
}
}
- return ps;
+ return nr_perf_states - 1;
}
/**
@@ -224,9 +226,13 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
unsigned long max_util, unsigned long sum_util,
unsigned long allowed_cpu_cap)
{
- unsigned long freq, scale_cpu;
+ struct em_perf_table *em_table;
struct em_perf_state *ps;
- int cpu;
+ int i;
+
+#ifdef CONFIG_SCHED_DEBUG
+ WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n");
+#endif
if (!sum_util)
return 0;
@@ -234,32 +240,29 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
/*
* In order to predict the performance state, map the utilization of
* the most utilized CPU of the performance domain to a requested
- * frequency, like schedutil. Take also into account that the real
- * frequency might be set lower (due to thermal capping). Thus, clamp
+ * performance, like schedutil. Take also into account that the real
+ * performance might be set lower (due to thermal capping). Thus, clamp
* max utilization to the allowed CPU capacity before calculating
- * effective frequency.
+ * effective performance.
*/
- cpu = cpumask_first(to_cpumask(pd->cpus));
- scale_cpu = arch_scale_cpu_capacity(cpu);
- ps = &pd->table[pd->nr_perf_states - 1];
-
- max_util = map_util_perf(max_util);
max_util = min(max_util, allowed_cpu_cap);
- freq = map_util_freq(max_util, ps->frequency, scale_cpu);
/*
* Find the lowest performance state of the Energy Model above the
- * requested frequency.
+ * requested performance.
*/
- ps = em_pd_get_efficient_state(pd, freq);
+ em_table = rcu_dereference(pd->em_table);
+ i = em_pd_get_efficient_state(em_table->state, pd->nr_perf_states,
+ max_util, pd->flags);
+ ps = &em_table->state[i];
/*
- * The capacity of a CPU in the domain at the performance state (ps)
- * can be computed as:
+ * The performance (capacity) of a CPU in the domain at the performance
+ * state (ps) can be computed as:
*
- * ps->freq * scale_cpu
- * ps->cap = -------------------- (1)
- * cpu_max_freq
+ * ps->freq * scale_cpu
+ * ps->performance = -------------------- (1)
+ * cpu_max_freq
*
* So, ignoring the costs of idle states (which are not available in
* the EM), the energy consumed by this CPU at that performance state
@@ -267,9 +270,10 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
*
* ps->power * cpu_util
* cpu_nrg = -------------------- (2)
- * ps->cap
+ * ps->performance
*
- * since 'cpu_util / ps->cap' represents its percentage of busy time.
+ * since 'cpu_util / ps->performance' represents its percentage of busy
+ * time.
*
* NOTE: Although the result of this computation actually is in
* units of power, it can be manipulated as an energy value
@@ -279,9 +283,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
* By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product
* of two terms:
*
- * ps->power * cpu_max_freq cpu_util
- * cpu_nrg = ------------------------ * --------- (3)
- * ps->freq scale_cpu
+ * ps->power * cpu_max_freq
+ * cpu_nrg = ------------------------ * cpu_util (3)
+ * ps->freq * scale_cpu
*
* The first term is static, and is stored in the em_perf_state struct
* as 'ps->cost'.
@@ -291,11 +295,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
* total energy of the domain (which is the simple sum of the energy of
* all of its CPUs) can be factorized as:
*
- * ps->cost * \Sum cpu_util
- * pd_nrg = ------------------------ (4)
- * scale_cpu
+ * pd_nrg = ps->cost * \Sum cpu_util (4)
*/
- return em_estimate_energy(ps->cost, sum_util, scale_cpu);
+ return ps->cost * sum_util;
}
/**
@@ -310,6 +312,23 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd)
return pd->nr_perf_states;
}
+/**
+ * em_perf_state_from_pd() - Get the performance states table of perf.
+ * domain
+ * @pd : performance domain for which this must be done
+ *
+ * To use this function the rcu_read_lock() should be hold. After the usage
+ * of the performance states table is finished, the rcu_read_unlock() should
+ * be called.
+ *
+ * Return: the pointer to performance states table of the performance domain
+ */
+static inline
+struct em_perf_state *em_perf_state_from_pd(struct em_perf_domain *pd)
+{
+ return rcu_dereference(pd->em_table)->state;
+}
+
#else
struct em_data_callback {};
#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) { }
@@ -344,6 +363,29 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd)
{
return 0;
}
+static inline
+struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd)
+{
+ return NULL;
+}
+static inline void em_table_free(struct em_perf_table __rcu *table) {}
+static inline
+int em_dev_update_perf_domain(struct device *dev,
+ struct em_perf_table __rcu *new_table)
+{
+ return -EINVAL;
+}
+static inline
+struct em_perf_state *em_perf_state_from_pd(struct em_perf_domain *pd)
+{
+ return NULL;
+}
+static inline
+int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
+ int nr_states)
+{
+ return -EINVAL;
+}
#endif
#endif
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index d95ab85f96ba..b0fb775a600d 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -7,6 +7,11 @@
#include <linux/syscalls.h>
#include <linux/seccomp.h>
#include <linux/sched.h>
+#include <linux/context_tracking.h>
+#include <linux/livepatch.h>
+#include <linux/resume_user_mode.h>
+#include <linux/tick.h>
+#include <linux/kmsan.h>
#include <asm/entry-common.h>
@@ -98,7 +103,19 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {}
* done between establishing state and enabling interrupts. The caller must
* enable interrupts before invoking syscall_enter_from_user_mode_work().
*/
-void enter_from_user_mode(struct pt_regs *regs);
+static __always_inline void enter_from_user_mode(struct pt_regs *regs)
+{
+ arch_enter_from_user_mode(regs);
+ lockdep_hardirqs_off(CALLER_ADDR0);
+
+ CT_WARN_ON(__ct_state() != CONTEXT_USER);
+ user_exit_irqoff();
+
+ instrumentation_begin();
+ kmsan_unpoison_entry_regs(regs);
+ trace_hardirqs_off_finish();
+ instrumentation_end();
+}
/**
* syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
@@ -117,6 +134,9 @@ void enter_from_user_mode(struct pt_regs *regs);
*/
void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
+long syscall_trace_enter(struct pt_regs *regs, long syscall,
+ unsigned long work);
+
/**
* syscall_enter_from_user_mode_work - Check and handle work before invoking
* a syscall
@@ -140,7 +160,15 @@ void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
* ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter()
* 2) Invocation of audit_syscall_entry()
*/
-long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
+static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
+{
+ unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
+
+ if (work & SYSCALL_WORK_ENTER)
+ syscall = syscall_trace_enter(regs, syscall, work);
+
+ return syscall;
+}
/**
* syscall_enter_from_user_mode - Establish state and check and handle work
@@ -159,7 +187,19 @@ long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
* Returns: The original or a modified syscall number. See
* syscall_enter_from_user_mode_work() for further explanation.
*/
-long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall);
+static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
+{
+ long ret;
+
+ enter_from_user_mode(regs);
+
+ instrumentation_begin();
+ local_irq_enable();
+ ret = syscall_enter_from_user_mode_work(regs, syscall);
+ instrumentation_end();
+
+ return ret;
+}
/**
* local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable()
@@ -259,6 +299,43 @@ static __always_inline void arch_exit_to_user_mode(void) { }
void arch_do_signal_or_restart(struct pt_regs *regs);
/**
+ * exit_to_user_mode_loop - do any pending work before leaving to user space
+ */
+unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
+ unsigned long ti_work);
+
+/**
+ * exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
+ * @regs: Pointer to pt_regs on entry stack
+ *
+ * 1) check that interrupts are disabled
+ * 2) call tick_nohz_user_enter_prepare()
+ * 3) call exit_to_user_mode_loop() if any flags from
+ * EXIT_TO_USER_MODE_WORK are set
+ * 4) check that interrupts are still disabled
+ */
+static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
+{
+ unsigned long ti_work;
+
+ lockdep_assert_irqs_disabled();
+
+ /* Flush pending rcuog wakeup before the last need_resched() check */
+ tick_nohz_user_enter_prepare();
+
+ ti_work = read_thread_flags();
+ if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
+ ti_work = exit_to_user_mode_loop(regs, ti_work);
+
+ arch_exit_to_user_mode_prepare(regs, ti_work);
+
+ /* Ensure that kernel state is sane for a return to userspace */
+ kmap_assert_nomap();
+ lockdep_assert_irqs_disabled();
+ lockdep_sys_exit();
+}
+
+/**
* exit_to_user_mode - Fixup state when exiting to user mode
*
* Syscall/interrupt exit enables interrupts, but the kernel state is
@@ -276,7 +353,17 @@ void arch_do_signal_or_restart(struct pt_regs *regs);
* non-instrumentable.
* The caller has to invoke syscall_exit_to_user_mode_work() before this.
*/
-void exit_to_user_mode(void);
+static __always_inline void exit_to_user_mode(void)
+{
+ instrumentation_begin();
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare();
+ instrumentation_end();
+
+ user_enter_irqoff();
+ arch_exit_to_user_mode();
+ lockdep_hardirqs_on(CALLER_ADDR0);
+}
/**
* syscall_exit_to_user_mode_work - Handle work before returning to user mode
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 224645f17c33..297231854ada 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -608,6 +608,31 @@ static inline void eth_hw_addr_gen(struct net_device *dev, const u8 *base_addr,
}
/**
+ * eth_skb_pkt_type - Assign packet type if destination address does not match
+ * @skb: Assigned a packet type if address does not match @dev address
+ * @dev: Network device used to compare packet address against
+ *
+ * If the destination MAC address of the packet does not match the network
+ * device address, assign an appropriate packet type.
+ */
+static inline void eth_skb_pkt_type(struct sk_buff *skb,
+ const struct net_device *dev)
+{
+ const struct ethhdr *eth = eth_hdr(skb);
+
+ if (unlikely(!ether_addr_equal_64bits(eth->h_dest, dev->dev_addr))) {
+ if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
+ if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
+ skb->pkt_type = PACKET_BROADCAST;
+ else
+ skb->pkt_type = PACKET_MULTICAST;
+ } else {
+ skb->pkt_type = PACKET_OTHERHOST;
+ }
+ }
+}
+
+/**
* eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame
* @skb: Buffer to pad
*
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 62b61527bcc4..9901e563f706 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -95,6 +95,7 @@ struct kernel_ethtool_ringparam {
* @ETHTOOL_RING_USE_TX_PUSH: capture for setting tx_push
* @ETHTOOL_RING_USE_RX_PUSH: capture for setting rx_push
* @ETHTOOL_RING_USE_TX_PUSH_BUF_LEN: capture for setting tx_push_buf_len
+ * @ETHTOOL_RING_USE_TCP_DATA_SPLIT: capture for setting tcp_data_split
*/
enum ethtool_supported_ring_param {
ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0),
@@ -102,6 +103,7 @@ enum ethtool_supported_ring_param {
ETHTOOL_RING_USE_TX_PUSH = BIT(2),
ETHTOOL_RING_USE_RX_PUSH = BIT(3),
ETHTOOL_RING_USE_TX_PUSH_BUF_LEN = BIT(4),
+ ETHTOOL_RING_USE_TCP_DATA_SPLIT = BIT(5),
};
#define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit))
@@ -220,6 +222,16 @@ extern int
__ethtool_get_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *link_ksettings);
+struct ethtool_keee {
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertised);
+ u32 tx_lpi_timer;
+ bool tx_lpi_enabled;
+ bool eee_active;
+ bool eee_enabled;
+};
+
struct kernel_ethtool_coalesce {
u8 use_cqe_mode_tx;
u8 use_cqe_mode_rx;
@@ -409,8 +421,10 @@ struct ethtool_pause_stats {
* not entire FEC data blocks. This is a non-standard statistic.
* Reported to user space as %ETHTOOL_A_FEC_STAT_CORR_BITS.
*
- * @lane: per-lane/PCS-instance counts as defined by the standard
- * @total: error counts for the entire port, for drivers incapable of reporting
+ * For each of the above fields, the two substructure members are:
+ *
+ * - @lanes: per-lane/PCS-instance counts as defined by the standard
+ * - @total: error counts for the entire port, for drivers incapable of reporting
* per-lane stats
*
* Drivers should fill in either only total or per-lane statistics, core
@@ -595,9 +609,46 @@ struct ethtool_mm_stats {
};
/**
+ * struct ethtool_rxfh_param - RXFH (RSS) parameters
+ * @hfunc: Defines the current RSS hash function used by HW (or to be set to).
+ * Valid values are one of the %ETH_RSS_HASH_*.
+ * @indir_size: On SET, the array size of the user buffer for the
+ * indirection table, which may be zero, or
+ * %ETH_RXFH_INDIR_NO_CHANGE. On GET (read from the driver),
+ * the array size of the hardware indirection table.
+ * @indir: The indirection table of size @indir_size entries.
+ * @key_size: On SET, the array size of the user buffer for the hash key,
+ * which may be zero. On GET (read from the driver), the size of the
+ * hardware hash key.
+ * @key: The hash key of size @key_size bytes.
+ * @rss_context: RSS context identifier. Context 0 is the default for normal
+ * traffic; other contexts can be referenced as the destination for RX flow
+ * classification rules. On SET, %ETH_RXFH_CONTEXT_ALLOC is used
+ * to allocate a new RSS context; on return this field will
+ * contain the ID of the newly allocated context.
+ * @rss_delete: Set to non-ZERO to remove the @rss_context context.
+ * @input_xfrm: Defines how the input data is transformed. Valid values are one
+ * of %RXH_XFRM_*.
+ */
+struct ethtool_rxfh_param {
+ u8 hfunc;
+ u32 indir_size;
+ u32 *indir;
+ u32 key_size;
+ u8 *key;
+ u32 rss_context;
+ u8 rss_delete;
+ u8 input_xfrm;
+};
+
+/**
* struct ethtool_ops - optional netdev operations
* @cap_link_lanes_supported: indicates if the driver supports lanes
* parameter.
+ * @cap_rss_ctx_supported: indicates if the driver supports RSS
+ * contexts.
+ * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor
+ * RSS.
* @supported_coalesce_params: supported types of interrupt coalescing.
* @supported_ring_params: supported ring params.
* @get_drvinfo: Report driver/device information. Modern drivers no
@@ -694,15 +745,6 @@ struct ethtool_mm_stats {
* will remain unchanged.
* Returns a negative error code or zero. An error code must be returned
* if at least one unsupported change was requested.
- * @get_rxfh_context: Get the contents of the RX flow hash indirection table,
- * hash key, and/or hash function assiciated to the given rss context.
- * Returns a negative error code or zero.
- * @set_rxfh_context: Create, remove and configure RSS contexts. Allows setting
- * the contents of the RX flow hash indirection table, hash key, and/or
- * hash function associated to the given context. Arguments which are set
- * to %NULL or zero will remain unchanged.
- * Returns a negative error code or zero. An error code must be returned
- * if at least one unsupported change was requested.
* @get_channels: Get number of channels.
* @set_channels: Set number of channels. Returns a negative error code or
* zero.
@@ -785,6 +827,8 @@ struct ethtool_mm_stats {
*/
struct ethtool_ops {
u32 cap_link_lanes_supported:1;
+ u32 cap_rss_ctx_supported:1;
+ u32 cap_rss_sym_xor_supported:1;
u32 supported_coalesce_params;
u32 supported_ring_params;
void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *);
@@ -844,15 +888,9 @@ struct ethtool_ops {
int (*reset)(struct net_device *, u32 *);
u32 (*get_rxfh_key_size)(struct net_device *);
u32 (*get_rxfh_indir_size)(struct net_device *);
- int (*get_rxfh)(struct net_device *, u32 *indir, u8 *key,
- u8 *hfunc);
- int (*set_rxfh)(struct net_device *, const u32 *indir,
- const u8 *key, const u8 hfunc);
- int (*get_rxfh_context)(struct net_device *, u32 *indir, u8 *key,
- u8 *hfunc, u32 rss_context);
- int (*set_rxfh_context)(struct net_device *, const u32 *indir,
- const u8 *key, const u8 hfunc,
- u32 *rss_context, bool delete);
+ int (*get_rxfh)(struct net_device *, struct ethtool_rxfh_param *);
+ int (*set_rxfh)(struct net_device *, struct ethtool_rxfh_param *,
+ struct netlink_ext_ack *extack);
void (*get_channels)(struct net_device *, struct ethtool_channels *);
int (*set_channels)(struct net_device *, struct ethtool_channels *);
int (*get_dump_flag)(struct net_device *, struct ethtool_dump *);
@@ -864,8 +902,8 @@ struct ethtool_ops {
struct ethtool_modinfo *);
int (*get_module_eeprom)(struct net_device *,
struct ethtool_eeprom *, u8 *);
- int (*get_eee)(struct net_device *, struct ethtool_eee *);
- int (*set_eee)(struct net_device *, struct ethtool_eee *);
+ int (*get_eee)(struct net_device *dev, struct ethtool_keee *eee);
+ int (*set_eee)(struct net_device *dev, struct ethtool_keee *eee);
int (*get_tunable)(struct net_device *,
const struct ethtool_tunable *, void *);
int (*set_tunable)(struct net_device *,
@@ -1044,12 +1082,52 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add,
}
/**
+ * ethtool_get_ts_info_by_layer - Obtains time stamping capabilities from the MAC or PHY layer.
+ * @dev: pointer to net_device structure
+ * @info: buffer to hold the result
+ * Returns zero on success, non-zero otherwise.
+ */
+int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info);
+
+/**
* ethtool_sprintf - Write formatted string to ethtool string data
- * @data: Pointer to start of string to update
+ * @data: Pointer to a pointer to the start of string to update
* @fmt: Format of string to write
*
- * Write formatted string to data. Update data to point at start of
+ * Write formatted string to *data. Update *data to point at start of
* next string.
*/
extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...);
+
+/**
+ * ethtool_puts - Write string to ethtool string data
+ * @data: Pointer to a pointer to the start of string to update
+ * @str: String to write
+ *
+ * Write string to *data without a trailing newline. Update *data
+ * to point at start of next string.
+ *
+ * Prefer this function to ethtool_sprintf() when given only
+ * two arguments or if @fmt is just "%s".
+ */
+extern void ethtool_puts(u8 **data, const char *str);
+
+/* Link mode to forced speed capabilities maps */
+struct ethtool_forced_speed_map {
+ u32 speed;
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(caps);
+
+ const u32 *cap_arr;
+ u32 arr_size;
+};
+
+#define ETHTOOL_FORCED_SPEED_MAP(prefix, value) \
+{ \
+ .speed = SPEED_##value, \
+ .cap_arr = prefix##_##value, \
+ .arr_size = ARRAY_SIZE(prefix##_##value), \
+}
+
+void
+ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size);
#endif /* _LINUX_ETHTOOL_H */
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index b9d83652c097..e32bee4345fb 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -35,8 +35,7 @@ void eventfd_ctx_put(struct eventfd_ctx *ctx);
struct file *eventfd_fget(int fd);
struct eventfd_ctx *eventfd_ctx_fdget(int fd);
struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
-__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
-__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask);
+void eventfd_signal_mask(struct eventfd_ctx *ctx, __poll_t mask);
int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
__u64 *cnt);
void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
@@ -58,15 +57,8 @@ static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd)
return ERR_PTR(-ENOSYS);
}
-static inline int eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
+static inline void eventfd_signal_mask(struct eventfd_ctx *ctx, __poll_t mask)
{
- return -ENOSYS;
-}
-
-static inline int eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n,
- unsigned mask)
-{
- return -ENOSYS;
}
static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
@@ -92,5 +84,10 @@ static inline void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
#endif
+static inline void eventfd_signal(struct eventfd_ctx *ctx)
+{
+ eventfd_signal_mask(ctx, 0);
+}
+
#endif /* _LINUX_EVENTFD_H */
diff --git a/include/linux/evm.h b/include/linux/evm.h
index 7dc1ee74169f..d48d6da32315 100644
--- a/include/linux/evm.h
+++ b/include/linux/evm.h
@@ -12,53 +12,15 @@
#include <linux/integrity.h>
#include <linux/xattr.h>
-struct integrity_iint_cache;
-
#ifdef CONFIG_EVM
extern int evm_set_key(void *key, size_t keylen);
extern enum integrity_status evm_verifyxattr(struct dentry *dentry,
const char *xattr_name,
void *xattr_value,
- size_t xattr_value_len,
- struct integrity_iint_cache *iint);
-extern int evm_inode_setattr(struct mnt_idmap *idmap,
- struct dentry *dentry, struct iattr *attr);
-extern void evm_inode_post_setattr(struct dentry *dentry, int ia_valid);
-extern int evm_inode_setxattr(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *name,
- const void *value, size_t size);
-extern void evm_inode_post_setxattr(struct dentry *dentry,
- const char *xattr_name,
- const void *xattr_value,
- size_t xattr_value_len);
-extern int evm_inode_removexattr(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *xattr_name);
-extern void evm_inode_post_removexattr(struct dentry *dentry,
- const char *xattr_name);
-static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap,
- struct dentry *dentry,
- const char *acl_name)
-{
- evm_inode_post_removexattr(dentry, acl_name);
-}
-extern int evm_inode_set_acl(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *acl_name,
- struct posix_acl *kacl);
-static inline int evm_inode_remove_acl(struct mnt_idmap *idmap,
- struct dentry *dentry,
- const char *acl_name)
-{
- return evm_inode_set_acl(idmap, dentry, acl_name, NULL);
-}
-static inline void evm_inode_post_set_acl(struct dentry *dentry,
- const char *acl_name,
- struct posix_acl *kacl)
-{
- return evm_inode_post_setxattr(dentry, acl_name, NULL, 0);
-}
-extern int evm_inode_init_security(struct inode *inode,
- const struct xattr *xattr_array,
- struct xattr *evm);
+ size_t xattr_value_len);
+int evm_inode_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr, struct xattr *xattrs,
+ int *xattr_count);
extern bool evm_revalidate_status(const char *xattr_name);
extern int evm_protected_xattr_if_enabled(const char *req_xattr_name);
extern int evm_read_protected_xattrs(struct dentry *dentry, u8 *buffer,
@@ -83,83 +45,16 @@ static inline int evm_set_key(void *key, size_t keylen)
static inline enum integrity_status evm_verifyxattr(struct dentry *dentry,
const char *xattr_name,
void *xattr_value,
- size_t xattr_value_len,
- struct integrity_iint_cache *iint)
+ size_t xattr_value_len)
{
return INTEGRITY_UNKNOWN;
}
#endif
-static inline int evm_inode_setattr(struct mnt_idmap *idmap,
- struct dentry *dentry, struct iattr *attr)
-{
- return 0;
-}
-
-static inline void evm_inode_post_setattr(struct dentry *dentry, int ia_valid)
-{
- return;
-}
-
-static inline int evm_inode_setxattr(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *name,
- const void *value, size_t size)
-{
- return 0;
-}
-
-static inline void evm_inode_post_setxattr(struct dentry *dentry,
- const char *xattr_name,
- const void *xattr_value,
- size_t xattr_value_len)
-{
- return;
-}
-
-static inline int evm_inode_removexattr(struct mnt_idmap *idmap,
- struct dentry *dentry,
- const char *xattr_name)
-{
- return 0;
-}
-
-static inline void evm_inode_post_removexattr(struct dentry *dentry,
- const char *xattr_name)
-{
- return;
-}
-
-static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap,
- struct dentry *dentry,
- const char *acl_name)
-{
- return;
-}
-
-static inline int evm_inode_set_acl(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *acl_name,
- struct posix_acl *kacl)
-{
- return 0;
-}
-
-static inline int evm_inode_remove_acl(struct mnt_idmap *idmap,
- struct dentry *dentry,
- const char *acl_name)
-{
- return 0;
-}
-
-static inline void evm_inode_post_set_acl(struct dentry *dentry,
- const char *acl_name,
- struct posix_acl *kacl)
-{
- return;
-}
-
-static inline int evm_inode_init_security(struct inode *inode,
- const struct xattr *xattr_array,
- struct xattr *evm)
+static inline int evm_inode_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr,
+ struct xattr *xattrs,
+ int *xattr_count)
{
return 0;
}
diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h
index 1c849db953a5..d445705ac13c 100644
--- a/include/linux/export-internal.h
+++ b/include/linux/export-internal.h
@@ -16,10 +16,13 @@
* and eliminates the need for absolute relocations that require runtime
* processing on relocatable kernels.
*/
+#define __KSYM_ALIGN ".balign 4"
#define __KSYM_REF(sym) ".long " #sym "- ."
#elif defined(CONFIG_64BIT)
+#define __KSYM_ALIGN ".balign 8"
#define __KSYM_REF(sym) ".quad " #sym
#else
+#define __KSYM_ALIGN ".balign 4"
#define __KSYM_REF(sym) ".long " #sym
#endif
@@ -42,7 +45,7 @@
" .asciz \"" ns "\"" "\n" \
" .previous" "\n" \
" .section \"___ksymtab" sec "+" #name "\", \"a\"" "\n" \
- " .balign 4" "\n" \
+ __KSYM_ALIGN "\n" \
"__ksymtab_" #name ":" "\n" \
__KSYM_REF(sym) "\n" \
__KSYM_REF(__kstrtab_ ##name) "\n" \
@@ -50,8 +53,8 @@
" .previous" "\n" \
)
-#ifdef CONFIG_IA64
-#define KSYM_FUNC(name) @fptr(name)
+#if defined(CONFIG_PARISC) && defined(CONFIG_64BIT)
+#define KSYM_FUNC(name) P%name
#else
#define KSYM_FUNC(name) name
#endif
@@ -61,6 +64,7 @@
#define SYMBOL_CRC(sym, crc, sec) \
asm(".section \"___kcrctab" sec "+" #sym "\",\"a\"" "\n" \
+ ".balign 4" "\n" \
"__crc_" #sym ":" "\n" \
".long " #crc "\n" \
".previous" "\n")
diff --git a/include/linux/export.h b/include/linux/export.h
index beed8387e0a4..0bbd02fd351d 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -7,15 +7,6 @@
#include <linux/stringify.h>
/*
- * Export symbols from the kernel to modules. Forked from module.h
- * to reduce the amount of pointless cruft we feed to gcc when only
- * exporting a simple symbol or two.
- *
- * Try not to add #includes here. It slows compilation and makes kernel
- * hackers place grumpy comments in header files.
- */
-
-/*
* This comment block is used by fixdep. Please do not remove.
*
* When CONFIG_MODVERSIONS is changed from n to y, all source files having
@@ -23,15 +14,6 @@
* side effect of the *.o build rule.
*/
-#ifndef __ASSEMBLY__
-#ifdef MODULE
-extern struct module __this_module;
-#define THIS_MODULE (&__this_module)
-#else
-#define THIS_MODULE ((struct module *)0)
-#endif
-#endif /* __ASSEMBLY__ */
-
#ifdef CONFIG_64BIT
#define __EXPORT_SYMBOL_REF(sym) \
.balign 8 ASM_NL \
@@ -50,7 +32,7 @@ extern struct module __this_module;
__EXPORT_SYMBOL_REF(sym) ASM_NL \
.previous
-#if !defined(CONFIG_MODULES) || defined(__DISABLE_EXPORTS)
+#if defined(__DISABLE_EXPORTS)
/*
* Allow symbol exports to be disabled completely so that C code may
@@ -75,7 +57,7 @@ extern struct module __this_module;
__ADDRESSABLE(sym) \
asm(__stringify(___EXPORT_SYMBOL(sym, license, ns)))
-#endif /* CONFIG_MODULES */
+#endif
#ifdef DEFAULT_SYMBOL_NAMESPACE
#define _EXPORT_SYMBOL(sym, license) __EXPORT_SYMBOL(sym, license, __stringify(DEFAULT_SYMBOL_NAMESPACE))
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 11fbd0ee1370..bb37ad5cc954 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -99,12 +99,29 @@ enum fid_type {
FILEID_FAT_WITH_PARENT = 0x72,
/*
+ * 64 bit inode number, 32 bit generation number.
+ */
+ FILEID_INO64_GEN = 0x81,
+
+ /*
+ * 64 bit inode number, 32 bit generation number,
+ * 64 bit parent inode number, 32 bit parent generation.
+ */
+ FILEID_INO64_GEN_PARENT = 0x82,
+
+ /*
* 128 bit child FID (struct lu_fid)
* 128 bit parent FID (struct lu_fid)
*/
FILEID_LUSTRE = 0x97,
/*
+ * 64 bit inode number, 32 bit subvolume, 32 bit generation number:
+ */
+ FILEID_BCACHEFS_WITHOUT_PARENT = 0xb1,
+ FILEID_BCACHEFS_WITH_PARENT = 0xb2,
+
+ /*
* 64 bit unique kernfs id
*/
FILEID_KERNFS = 0xfe,
@@ -123,7 +140,11 @@ struct fid {
u32 parent_ino;
u32 parent_gen;
} i32;
- struct {
+ struct {
+ u64 ino;
+ u32 gen;
+ } __packed i64;
+ struct {
u32 block;
u16 partref;
u16 parent_partref;
@@ -224,15 +245,56 @@ struct export_operations {
atomic attribute updates
*/
#define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */
+#define EXPORT_OP_ASYNC_LOCK (0x40) /* fs can do async lock request */
unsigned long flags;
};
+/**
+ * exportfs_lock_op_is_async() - export op supports async lock operation
+ * @export_ops: the nfs export operations to check
+ *
+ * Returns true if the nfs export_operations structure has
+ * EXPORT_OP_ASYNC_LOCK in their flags set
+ */
+static inline bool
+exportfs_lock_op_is_async(const struct export_operations *export_ops)
+{
+ return export_ops->flags & EXPORT_OP_ASYNC_LOCK;
+}
+
extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
int *max_len, struct inode *parent,
int flags);
extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
int *max_len, int flags);
+static inline bool exportfs_can_encode_fid(const struct export_operations *nop)
+{
+ return !nop || nop->encode_fh;
+}
+
+static inline bool exportfs_can_decode_fh(const struct export_operations *nop)
+{
+ return nop && nop->fh_to_dentry;
+}
+
+static inline bool exportfs_can_encode_fh(const struct export_operations *nop,
+ int fh_flags)
+{
+ /*
+ * If a non-decodeable file handle was requested, we only need to make
+ * sure that filesystem did not opt-out of encoding fid.
+ */
+ if (fh_flags & EXPORT_FH_FID)
+ return exportfs_can_encode_fid(nop);
+
+ /*
+ * If a decodeable file handle was requested, we need to make sure that
+ * filesystem can also decode file handles.
+ */
+ return exportfs_can_decode_fh(nop);
+}
+
static inline int exportfs_encode_fid(struct inode *inode, struct fid *fid,
int *max_len)
{
@@ -252,10 +314,12 @@ extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
/*
* Generic helpers for filesystems.
*/
-extern struct dentry *generic_fh_to_dentry(struct super_block *sb,
+int generic_encode_ino32_fh(struct inode *inode, __u32 *fh, int *max_len,
+ struct inode *parent);
+struct dentry *generic_fh_to_dentry(struct super_block *sb,
struct fid *fid, int fh_len, int fh_type,
struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen));
-extern struct dentry *generic_fh_to_parent(struct super_block *sb,
+struct dentry *generic_fh_to_parent(struct super_block *sb,
struct fid *fid, int fh_len, int fh_type,
struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen));
diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index 3c45c3846fe9..e596a0abcb27 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -328,16 +328,4 @@ struct extcon_specific_cable_nb {
struct extcon_dev *edev;
unsigned long previous_value;
};
-
-static inline int extcon_register_interest(struct extcon_specific_cable_nb *obj,
- const char *extcon_name, const char *cable_name,
- struct notifier_block *nb)
-{
- return -EINVAL;
-}
-
-static inline int extcon_unregister_interest(struct extcon_specific_cable_nb *obj)
-{
- return -EINVAL;
-}
#endif /* __LINUX_EXTCON_H__ */
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index a82a4bb6ce68..a357287eac1e 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -13,10 +13,10 @@
#define F2FS_SUPER_OFFSET 1024 /* byte-size offset */
#define F2FS_MIN_LOG_SECTOR_SIZE 9 /* 9 bits for 512 bytes */
-#define F2FS_MAX_LOG_SECTOR_SIZE 12 /* 12 bits for 4096 bytes */
-#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* log number for sector/blk */
-#define F2FS_BLKSIZE 4096 /* support only 4KB block */
-#define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */
+#define F2FS_MAX_LOG_SECTOR_SIZE PAGE_SHIFT /* Max is Block Size */
+#define F2FS_LOG_SECTORS_PER_BLOCK (PAGE_SHIFT - 9) /* log number for sector/blk */
+#define F2FS_BLKSIZE PAGE_SIZE /* support only block == page */
+#define F2FS_BLKSIZE_BITS PAGE_SHIFT /* bits for F2FS_BLKSIZE */
#define F2FS_MAX_EXTENSION 64 /* # of extension entries */
#define F2FS_EXTENSION_LEN 8 /* max size of extension */
#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS)
@@ -27,6 +27,7 @@
#define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS)
#define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS)
+#define F2FS_BLK_END_BYTES(blk) (F2FS_BLK_TO_BYTES(blk + 1) - 1)
/* 0, 1(node nid), 2(meta nid) are reserved node id */
#define F2FS_RESERVED_NODE_NUM 3
@@ -40,12 +41,6 @@
#define F2FS_ENC_UTF8_12_1 1
-#define F2FS_IO_SIZE(sbi) BIT(F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */
-#define F2FS_IO_SIZE_KB(sbi) BIT(F2FS_OPTION(sbi).write_io_size_bits + 2) /* KB */
-#define F2FS_IO_SIZE_BITS(sbi) (F2FS_OPTION(sbi).write_io_size_bits) /* power of 2 */
-#define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1)
-#define F2FS_IO_ALIGNED(sbi) (F2FS_IO_SIZE(sbi) > 1)
-
/* This flag is used by node and meta inodes, and by recovery */
#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO)
@@ -81,6 +76,7 @@ enum stop_cp_reason {
STOP_CP_REASON_CORRUPTED_SUMMARY,
STOP_CP_REASON_UPDATE_INODE,
STOP_CP_REASON_FLUSH_FAIL,
+ STOP_CP_REASON_NO_SEGMENT,
STOP_CP_REASON_MAX,
};
@@ -104,6 +100,7 @@ enum f2fs_error {
ERROR_CORRUPTED_VERITY_XATTR,
ERROR_CORRUPTED_XATTR,
ERROR_INVALID_NODE_REFERENCE,
+ ERROR_INCONSISTENT_NAT,
ERROR_MAX,
};
@@ -210,14 +207,14 @@ struct f2fs_checkpoint {
unsigned char sit_nat_version_bitmap[];
} __packed;
-#define CP_CHKSUM_OFFSET 4092 /* default chksum offset in checkpoint */
+#define CP_CHKSUM_OFFSET (F2FS_BLKSIZE - sizeof(__le32)) /* default chksum offset in checkpoint */
#define CP_MIN_CHKSUM_OFFSET \
(offsetof(struct f2fs_checkpoint, sit_nat_version_bitmap))
/*
* For orphan inode management
*/
-#define F2FS_ORPHANS_PER_BLOCK 1020
+#define F2FS_ORPHANS_PER_BLOCK ((F2FS_BLKSIZE - 4 * sizeof(__le32)) / sizeof(__le32))
#define GET_ORPHAN_BLOCKS(n) (((n) + F2FS_ORPHANS_PER_BLOCK - 1) / \
F2FS_ORPHANS_PER_BLOCK)
@@ -243,14 +240,31 @@ struct f2fs_extent {
#define F2FS_NAME_LEN 255
/* 200 bytes for inline xattrs by default */
#define DEFAULT_INLINE_XATTR_ADDRS 50
-#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */
+
+#define OFFSET_OF_END_OF_I_EXT 360
+#define SIZE_OF_I_NID 20
+
+struct node_footer {
+ __le32 nid; /* node id */
+ __le32 ino; /* inode number */
+ __le32 flag; /* include cold/fsync/dentry marks and offset */
+ __le64 cp_ver; /* checkpoint version */
+ __le32 next_blkaddr; /* next node page block address */
+} __packed;
+
+/* Address Pointers in an Inode */
+#define DEF_ADDRS_PER_INODE ((F2FS_BLKSIZE - OFFSET_OF_END_OF_I_EXT \
+ - SIZE_OF_I_NID \
+ - sizeof(struct node_footer)) / sizeof(__le32))
#define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \
get_extra_isize(inode))
#define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */
#define ADDRS_PER_INODE(inode) addrs_per_inode(inode)
-#define DEF_ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */
+/* Address Pointers in a Direct Block */
+#define DEF_ADDRS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32))
#define ADDRS_PER_BLOCK(inode) addrs_per_block(inode)
-#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */
+/* Node IDs in an Indirect Block */
+#define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32))
#define ADDRS_PER_PAGE(page, inode) \
(IS_INODE(page) ? ADDRS_PER_INODE(inode) : ADDRS_PER_BLOCK(inode))
@@ -342,14 +356,6 @@ enum {
#define OFFSET_BIT_MASK GENMASK(OFFSET_BIT_SHIFT - 1, 0)
-struct node_footer {
- __le32 nid; /* node id */
- __le32 ino; /* inode number */
- __le32 flag; /* include cold/fsync/dentry marks and offset */
- __le64 cp_ver; /* checkpoint version */
- __le32 next_blkaddr; /* next node page block address */
-} __packed;
-
struct f2fs_node {
/* can be one of three types: inode, direct, and indirect types */
union {
@@ -363,7 +369,7 @@ struct f2fs_node {
/*
* For NAT entries
*/
-#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
+#define NAT_ENTRY_PER_BLOCK (F2FS_BLKSIZE / sizeof(struct f2fs_nat_entry))
struct f2fs_nat_entry {
__u8 version; /* latest version of cached nat entry */
@@ -378,12 +384,13 @@ struct f2fs_nat_block {
/*
* For SIT entries
*
- * Each segment is 2MB in size by default so that a bitmap for validity of
- * there-in blocks should occupy 64 bytes, 512 bits.
+ * A validity bitmap of 64 bytes covers 512 blocks of area. For a 4K page size,
+ * this results in a segment size of 2MB. For 16k pages, the default segment size
+ * is 8MB.
* Not allow to change this.
*/
#define SIT_VBLOCK_MAP_SIZE 64
-#define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry))
+#define SIT_ENTRY_PER_BLOCK (F2FS_BLKSIZE / sizeof(struct f2fs_sit_entry))
/*
* F2FS uses 4 bytes to represent block address. As a result, supported size of
@@ -418,7 +425,7 @@ struct f2fs_sit_block {
* For segment summary
*
* One summary block contains exactly 512 summary entries, which represents
- * exactly 2MB segment by default. Not allow to change the basic units.
+ * exactly one segment by default. Not allow to change the basic units.
*
* NOTE: For initializing fields, you must use set_summary
*
@@ -429,12 +436,12 @@ struct f2fs_sit_block {
* from node's page's beginning to get a data block address.
* ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node)
*/
-#define ENTRIES_IN_SUM 512
-#define SUMMARY_SIZE (7) /* sizeof(struct summary) */
+#define ENTRIES_IN_SUM (F2FS_BLKSIZE / 8)
+#define SUMMARY_SIZE (7) /* sizeof(struct f2fs_summary) */
#define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */
#define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM)
-/* a summary entry for a 4KB-sized block in a segment */
+/* a summary entry for a block in a segment */
struct f2fs_summary {
__le32 nid; /* parent node id */
union {
@@ -518,7 +525,7 @@ struct f2fs_journal {
};
} __packed;
-/* 4KB-sized summary block structure */
+/* Block-sized summary block structure */
struct f2fs_summary_block {
struct f2fs_summary entries[ENTRIES_IN_SUM];
struct f2fs_journal journal;
@@ -559,11 +566,14 @@ typedef __le32 f2fs_hash_t;
* Note: there are more reserved space in inline dentry than in regular
* dentry, when converting inline dentry we should handle this carefully.
*/
-#define NR_DENTRY_IN_BLOCK 214 /* the number of dentry in a block */
+
+/* the number of dentry in a block */
+#define NR_DENTRY_IN_BLOCK ((BITS_PER_BYTE * F2FS_BLKSIZE) / \
+ ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * BITS_PER_BYTE + 1))
#define SIZE_OF_DIR_ENTRY 11 /* by byte */
#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \
BITS_PER_BYTE)
-#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \
+#define SIZE_OF_RESERVED (F2FS_BLKSIZE - ((SIZE_OF_DIR_ENTRY + \
F2FS_SLOT_LEN) * \
NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP))
#define MIN_INLINE_DENTRY_SIZE 40 /* just include '.' and '..' entries */
@@ -576,7 +586,7 @@ struct f2fs_dir_entry {
__u8 file_type; /* file type */
} __packed;
-/* 4KB-sized directory entry block */
+/* Block-sized directory entry block */
struct f2fs_dentry_block {
/* validity bitmap for directory entries in each block */
__u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP];
diff --git a/include/linux/fb.h b/include/linux/fb.h
index ce7d588edc3e..0dd27364d56f 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -2,28 +2,30 @@
#ifndef _LINUX_FB_H
#define _LINUX_FB_H
-#include <linux/refcount.h>
-#include <linux/kgdb.h>
#include <uapi/linux/fb.h>
#define FBIO_CURSOR _IOWR('F', 0x08, struct fb_cursor_user)
-#include <linux/fs.h>
-#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/printk.h>
+#include <linux/refcount.h>
+#include <linux/types.h>
#include <linux/workqueue.h>
-#include <linux/notifier.h>
-#include <linux/list.h>
-#include <linux/backlight.h>
-#include <linux/slab.h>
#include <asm/fb.h>
-struct vm_area_struct;
-struct fb_info;
+struct backlight_device;
struct device;
+struct device_node;
+struct fb_info;
struct file;
+struct i2c_adapter;
+struct inode;
+struct module;
+struct notifier_block;
+struct page;
struct videomode;
-struct device_node;
+struct vm_area_struct;
/* Definitions below are used in the parsed monitor specs */
#define FB_DPMS_ACTIVE_OFF 1
@@ -143,9 +145,13 @@ struct fb_event {
void *data;
};
+/* Enough for the VT console needs, see its max_font_width/height */
+#define FB_MAX_BLIT_WIDTH 64
+#define FB_MAX_BLIT_HEIGHT 128
+
struct fb_blit_caps {
- u32 x;
- u32 y;
+ DECLARE_BITMAP(x, FB_MAX_BLIT_WIDTH);
+ DECLARE_BITMAP(y, FB_MAX_BLIT_HEIGHT);
u32 len;
u32 flags;
};
@@ -192,10 +198,12 @@ struct fb_pixmap {
u32 scan_align; /* alignment per scanline */
u32 access_align; /* alignment per read/write (bits) */
u32 flags; /* see FB_PIXMAP_* */
- u32 blit_x; /* supported bit block dimensions (1-32)*/
- u32 blit_y; /* Format: blit_x = 1 << (width - 1) */
- /* blit_y = 1 << (height - 1) */
- /* if 0, will be set to 0xffffffff (all)*/
+ /* supported bit block dimensions */
+ /* Format: test_bit(width - 1, blit_x) */
+ /* test_bit(height - 1, blit_y) */
+ /* if zero, will be set to full (all) */
+ DECLARE_BITMAP(blit_x, FB_MAX_BLIT_WIDTH);
+ DECLARE_BITMAP(blit_y, FB_MAX_BLIT_HEIGHT);
/* access methods */
void (*writeio)(struct fb_info *info, void __iomem *dst, void *src, unsigned int size);
void (*readio) (struct fb_info *info, void *dst, void __iomem *src, unsigned int size);
@@ -383,7 +391,6 @@ struct fb_tile_ops {
#endif /* CONFIG_FB_TILEBLITTING */
/* FBINFO_* = fb_info.flags bit flags */
-#define FBINFO_DEFAULT 0
#define FBINFO_HWACCEL_DISABLED 0x0002
/* When FBINFO_HWACCEL_DISABLED is set:
* Hardware acceleration is turned off. Software implementations
@@ -481,7 +488,9 @@ struct fb_info {
const struct fb_ops *fbops;
struct device *device; /* This is the parent */
+#if defined(CONFIG_FB_DEVICE)
struct device *dev; /* This is this fb device */
+#endif
int class_flag; /* private sysfs flags */
#ifdef CONFIG_FB_TILEBLITTING
struct fb_tile_ops *tileops; /* Tile Blitting */
@@ -502,8 +511,6 @@ struct fb_info {
bool skip_vt_switch; /* no VT switch on suspend/resume required */
};
-#define FBINFO_FLAG_DEFAULT FBINFO_DEFAULT
-
/* This will go away
* fbset currently hacks in FB_ACCELF_TEXT into var.accel_flags
* when it wants to turn the acceleration engine on. This is
@@ -527,7 +534,7 @@ extern int fb_pan_display(struct fb_info *info, struct fb_var_screeninfo *var);
extern int fb_blank(struct fb_info *info, int blank);
/*
- * Drawing operations where framebuffer is in I/O memory
+ * Helpers for framebuffers in I/O memory
*/
extern void cfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect);
@@ -537,30 +544,27 @@ extern ssize_t fb_io_read(struct fb_info *info, char __user *buf,
size_t count, loff_t *ppos);
extern ssize_t fb_io_write(struct fb_info *info, const char __user *buf,
size_t count, loff_t *ppos);
+int fb_io_mmap(struct fb_info *info, struct vm_area_struct *vma);
-/*
- * Initializes struct fb_ops for framebuffers in I/O memory.
- */
-
-#define __FB_DEFAULT_IO_OPS_RDWR \
+#define __FB_DEFAULT_IOMEM_OPS_RDWR \
.fb_read = fb_io_read, \
.fb_write = fb_io_write
-#define __FB_DEFAULT_IO_OPS_DRAW \
+#define __FB_DEFAULT_IOMEM_OPS_DRAW \
.fb_fillrect = cfb_fillrect, \
.fb_copyarea = cfb_copyarea, \
.fb_imageblit = cfb_imageblit
-#define __FB_DEFAULT_IO_OPS_MMAP \
- .fb_mmap = NULL /* default implementation */
+#define __FB_DEFAULT_IOMEM_OPS_MMAP \
+ .fb_mmap = fb_io_mmap
-#define FB_DEFAULT_IO_OPS \
- __FB_DEFAULT_IO_OPS_RDWR, \
- __FB_DEFAULT_IO_OPS_DRAW, \
- __FB_DEFAULT_IO_OPS_MMAP
+#define FB_DEFAULT_IOMEM_OPS \
+ __FB_DEFAULT_IOMEM_OPS_RDWR, \
+ __FB_DEFAULT_IOMEM_OPS_DRAW, \
+ __FB_DEFAULT_IOMEM_OPS_MMAP
/*
- * Drawing operations where framebuffer is in system RAM
+ * Helpers for framebuffers in system memory
*/
extern void sys_fillrect(struct fb_info *info, const struct fb_fillrect *rect);
@@ -571,32 +575,31 @@ extern ssize_t fb_sys_read(struct fb_info *info, char __user *buf,
extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf,
size_t count, loff_t *ppos);
+#define __FB_DEFAULT_SYSMEM_OPS_RDWR \
+ .fb_read = fb_sys_read, \
+ .fb_write = fb_sys_write
+
+#define __FB_DEFAULT_SYSMEM_OPS_DRAW \
+ .fb_fillrect = sys_fillrect, \
+ .fb_copyarea = sys_copyarea, \
+ .fb_imageblit = sys_imageblit
+
/*
- * Initializes struct fb_ops for framebuffers in system memory.
+ * Helpers for framebuffers in DMA-able memory
*/
-#define __FB_DEFAULT_SYS_OPS_RDWR \
+#define __FB_DEFAULT_DMAMEM_OPS_RDWR \
.fb_read = fb_sys_read, \
.fb_write = fb_sys_write
-#define __FB_DEFAULT_SYS_OPS_DRAW \
+#define __FB_DEFAULT_DMAMEM_OPS_DRAW \
.fb_fillrect = sys_fillrect, \
.fb_copyarea = sys_copyarea, \
.fb_imageblit = sys_imageblit
-#define __FB_DEFAULT_SYS_OPS_MMAP \
- .fb_mmap = NULL /* default implementation */
-
-#define FB_DEFAULT_SYS_OPS \
- __FB_DEFAULT_SYS_OPS_RDWR, \
- __FB_DEFAULT_SYS_OPS_DRAW, \
- __FB_DEFAULT_SYS_OPS_MMAP
-
-/* drivers/video/fbmem.c */
+/* fbmem.c */
extern int register_framebuffer(struct fb_info *fb_info);
extern void unregister_framebuffer(struct fb_info *fb_info);
-extern int fb_prepare_logo(struct fb_info *fb_info, int rotate);
-extern int fb_show_logo(struct fb_info *fb_info, int rotate);
extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size);
extern void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx,
u32 height, u32 shift_high, u32 shift_low, u32 mod);
@@ -607,10 +610,6 @@ extern int fb_get_color_depth(struct fb_var_screeninfo *var,
extern int fb_get_options(const char *name, char **option);
extern int fb_new_modelist(struct fb_info *info);
-extern bool fb_center_logo;
-extern int fb_logo_count;
-extern struct class *fb_class;
-
static inline void lock_fb_info(struct fb_info *info)
{
mutex_lock(&info->lock);
@@ -636,7 +635,7 @@ static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch,
}
}
-/* drivers/video/fb_defio.c */
+/* fb_defio.c */
int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma);
extern int fb_deferred_io_init(struct fb_info *info);
extern void fb_deferred_io_open(struct fb_info *info,
@@ -687,11 +686,11 @@ extern int fb_deferred_io_fsync(struct file *file, loff_t start,
__damage_area(info, image->dx, image->dy, image->width, image->height); \
}
-#define FB_GEN_DEFAULT_DEFERRED_IO_OPS(__prefix, __damage_range, __damage_area) \
+#define FB_GEN_DEFAULT_DEFERRED_IOMEM_OPS(__prefix, __damage_range, __damage_area) \
__FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, io) \
__FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, cfb)
-#define FB_GEN_DEFAULT_DEFERRED_SYS_OPS(__prefix, __damage_range, __damage_area) \
+#define FB_GEN_DEFAULT_DEFERRED_SYSMEM_OPS(__prefix, __damage_range, __damage_area) \
__FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, sys) \
__FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, sys)
@@ -735,14 +734,11 @@ static inline bool fb_be_math(struct fb_info *info)
#endif /* CONFIG_FB_FOREIGN_ENDIAN */
}
-/* drivers/video/fbsysfs.c */
extern struct fb_info *framebuffer_alloc(size_t size, struct device *dev);
extern void framebuffer_release(struct fb_info *info);
-extern int fb_init_device(struct fb_info *fb_info);
-extern void fb_cleanup_device(struct fb_info *head);
extern void fb_bl_default_curve(struct fb_info *fb_info, u8 off, u8 min, u8 max);
-/* drivers/video/fbmon.c */
+/* fbmon.c */
#define FB_MAXTIMINGS 0
#define FB_VSYNCTIMINGS 1
#define FB_HSYNCTIMINGS 2
@@ -776,7 +772,7 @@ extern int of_get_fb_videomode(struct device_node *np,
extern int fb_videomode_from_videomode(const struct videomode *vm,
struct fb_videomode *fbmode);
-/* drivers/video/modedb.c */
+/* modedb.c */
#define VESA_MODEDB_SIZE 43
#define DMT_SIZE 0x50
@@ -802,7 +798,7 @@ extern void fb_videomode_to_modelist(const struct fb_videomode *modedb, int num,
extern const struct fb_videomode *fb_find_best_display(const struct fb_monspecs *specs,
struct list_head *head);
-/* drivers/video/fbcmap.c */
+/* fbcmap.c */
extern int fb_alloc_cmap(struct fb_cmap *cmap, int len, int transp);
extern int fb_alloc_cmap_gfp(struct fb_cmap *cmap, int len, int transp, gfp_t flags);
extern void fb_dealloc_cmap(struct fb_cmap *cmap);
@@ -852,16 +848,12 @@ extern int fb_find_mode(struct fb_var_screeninfo *var,
const struct fb_videomode *default_mode,
unsigned int default_bpp);
-#if defined(CONFIG_VIDEO_NOMODESET)
bool fb_modesetting_disabled(const char *drvname);
-#else
-static inline bool fb_modesetting_disabled(const char *drvname)
-{
- return false;
-}
-#endif
-/* Convenience logging macros */
+/*
+ * Convenience logging macros
+ */
+
#define fb_err(fb_info, fmt, ...) \
pr_err("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__)
#define fb_notice(info, fmt, ...) \
@@ -873,4 +865,12 @@ static inline bool fb_modesetting_disabled(const char *drvname)
#define fb_dbg(fb_info, fmt, ...) \
pr_debug("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__)
+#define fb_warn_once(fb_info, fmt, ...) \
+ pr_warn_once("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__)
+
+#define fb_WARN_ONCE(fb_info, condition, fmt, ...) \
+ WARN_ONCE(condition, "fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__)
+#define fb_WARN_ON_ONCE(fb_info, x) \
+ fb_WARN_ONCE(fb_info, (x), "%s", "fb_WARN_ON_ONCE(" __stringify(x) ")")
+
#endif /* _LINUX_FB_H */
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index e066816f3519..78c8326d74ae 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -83,12 +83,17 @@ struct dentry;
static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd)
{
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
-
- if (fd < fdt->max_fds) {
- fd = array_index_nospec(fd, fdt->max_fds);
- return rcu_dereference_raw(fdt->fd[fd]);
- }
- return NULL;
+ unsigned long mask = array_index_mask_nospec(fd, fdt->max_fds);
+ struct file *needs_masking;
+
+ /*
+ * 'mask' is zero for an out-of-bounds fd, all ones for ok.
+ * 'fd&mask' is 'fd' for ok, or 0 for out of bounds.
+ *
+ * Accessing fdt->fd[0] is ok, but needs masking of the result.
+ */
+ needs_masking = rcu_dereference_raw(fdt->fd[fd&mask]);
+ return (struct file *)(mask & (unsigned long)needs_masking);
}
static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd)
@@ -98,20 +103,9 @@ static inline struct file *files_lookup_fd_locked(struct files_struct *files, un
return files_lookup_fd_raw(files, fd);
}
-static inline struct file *files_lookup_fd_rcu(struct files_struct *files, unsigned int fd)
-{
- RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
- "suspicious rcu_dereference_check() usage");
- return files_lookup_fd_raw(files, fd);
-}
-
-static inline struct file *lookup_fd_rcu(unsigned int fd)
-{
- return files_lookup_fd_rcu(current->files, fd);
-}
-
-struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd);
-struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *fd);
+struct file *lookup_fdget_rcu(unsigned int fd);
+struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd);
+struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *fd);
struct task_struct;
@@ -125,7 +119,7 @@ int iterate_fd(struct files_struct *, unsigned,
extern int close_fd(unsigned int fd);
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
-extern struct file *close_fd_get_file(unsigned int fd);
+extern struct file *file_close_fd(unsigned int fd);
extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
struct files_struct **new_fdp);
diff --git a/include/linux/file.h b/include/linux/file.h
index 6e9099d29343..169692cb1906 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -24,6 +24,8 @@ struct inode;
struct path;
extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *,
const char *, int flags, const struct file_operations *);
+extern struct file *alloc_file_pseudo_noaccount(struct inode *, struct vfsmount *,
+ const char *, int flags, const struct file_operations *);
extern struct file *alloc_file_clone(struct file *, int flags,
const struct file_operations *);
@@ -96,18 +98,8 @@ DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T),
extern void fd_install(unsigned int fd, struct file *file);
-extern int __receive_fd(struct file *file, int __user *ufd,
- unsigned int o_flags);
-
-extern int receive_fd(struct file *file, unsigned int o_flags);
+int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags);
-static inline int receive_fd_user(struct file *file, int __user *ufd,
- unsigned int o_flags)
-{
- if (ufd == NULL)
- return -EFAULT;
- return __receive_fd(file, ufd, o_flags);
-}
int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags);
extern void flush_delayed_fput(void);
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index efcdd1631d9b..daee999d05f3 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -27,6 +27,7 @@
#define FILE_LOCK_DEFERRED 1
struct file_lock;
+struct file_lease;
struct file_lock_operations {
void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
@@ -39,14 +40,17 @@ struct lock_manager_operations {
void (*lm_put_owner)(fl_owner_t);
void (*lm_notify)(struct file_lock *); /* unblock callback */
int (*lm_grant)(struct file_lock *, int);
- bool (*lm_break)(struct file_lock *);
- int (*lm_change)(struct file_lock *, int, struct list_head *);
- void (*lm_setup)(struct file_lock *, void **);
- bool (*lm_breaker_owns_lease)(struct file_lock *);
bool (*lm_lock_expirable)(struct file_lock *cfl);
void (*lm_expire_lock)(void);
};
+struct lease_manager_operations {
+ bool (*lm_break)(struct file_lease *);
+ int (*lm_change)(struct file_lease *, int, struct list_head *);
+ void (*lm_setup)(struct file_lease *, void **);
+ bool (*lm_breaker_owns_lease)(struct file_lease *);
+};
+
struct lock_manager {
struct list_head list;
/*
@@ -85,31 +89,31 @@ bool opens_in_grace(struct net *);
*
* Obviously, the last two criteria only matter for POSIX locks.
*/
-struct file_lock {
- struct file_lock *fl_blocker; /* The lock, that is blocking us */
- struct list_head fl_list; /* link into file_lock_context */
- struct hlist_node fl_link; /* node in global lists */
- struct list_head fl_blocked_requests; /* list of requests with
+
+struct file_lock_core {
+ struct file_lock_core *flc_blocker; /* The lock that is blocking us */
+ struct list_head flc_list; /* link into file_lock_context */
+ struct hlist_node flc_link; /* node in global lists */
+ struct list_head flc_blocked_requests; /* list of requests with
* ->fl_blocker pointing here
*/
- struct list_head fl_blocked_member; /* node in
+ struct list_head flc_blocked_member; /* node in
* ->fl_blocker->fl_blocked_requests
*/
- fl_owner_t fl_owner;
- unsigned int fl_flags;
- unsigned char fl_type;
- unsigned int fl_pid;
- int fl_link_cpu; /* what cpu's list is this on? */
- wait_queue_head_t fl_wait;
- struct file *fl_file;
+ fl_owner_t flc_owner;
+ unsigned int flc_flags;
+ unsigned char flc_type;
+ pid_t flc_pid;
+ int flc_link_cpu; /* what cpu's list is this on? */
+ wait_queue_head_t flc_wait;
+ struct file *flc_file;
+};
+
+struct file_lock {
+ struct file_lock_core c;
loff_t fl_start;
loff_t fl_end;
- struct fasync_struct * fl_fasync; /* for lease break notifications */
- /* for lease breaks: */
- unsigned long fl_break_time;
- unsigned long fl_downgrade_time;
-
const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */
const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */
union {
@@ -126,6 +130,15 @@ struct file_lock {
} fl_u;
} __randomize_layout;
+struct file_lease {
+ struct file_lock_core c;
+ struct fasync_struct * fl_fasync; /* for lease break notifications */
+ /* for lease breaks: */
+ unsigned long fl_break_time;
+ unsigned long fl_downgrade_time;
+ const struct lease_manager_operations *fl_lmops; /* Callbacks for lease managers */
+} __randomize_layout;
+
struct file_lock_context {
spinlock_t flc_lock;
struct list_head flc_flock;
@@ -144,14 +157,34 @@ int fcntl_setlk64(unsigned int, struct file *, unsigned int,
struct flock64 *);
#endif
-int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
+int fcntl_setlease(unsigned int fd, struct file *filp, int arg);
int fcntl_getlease(struct file *filp);
+static inline bool lock_is_unlock(struct file_lock *fl)
+{
+ return fl->c.flc_type == F_UNLCK;
+}
+
+static inline bool lock_is_read(struct file_lock *fl)
+{
+ return fl->c.flc_type == F_RDLCK;
+}
+
+static inline bool lock_is_write(struct file_lock *fl)
+{
+ return fl->c.flc_type == F_WRLCK;
+}
+
+static inline void locks_wake_up(struct file_lock *fl)
+{
+ wake_up(&fl->c.flc_wait);
+}
+
/* fs/locks.c */
void locks_free_lock_context(struct inode *inode);
void locks_free_lock(struct file_lock *fl);
void locks_init_lock(struct file_lock *);
-struct file_lock * locks_alloc_lock(void);
+struct file_lock *locks_alloc_lock(void);
void locks_copy_lock(struct file_lock *, struct file_lock *);
void locks_copy_conflock(struct file_lock *, struct file_lock *);
void locks_remove_posix(struct file *, fl_owner_t);
@@ -165,11 +198,16 @@ int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_l
int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
bool vfs_inode_has_locks(struct inode *inode);
int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
+
+void locks_init_lease(struct file_lease *);
+void locks_free_lease(struct file_lease *fl);
+struct file_lease *locks_alloc_lease(void);
int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
void lease_get_mtime(struct inode *, struct timespec64 *time);
-int generic_setlease(struct file *, long, struct file_lock **, void **priv);
-int vfs_setlease(struct file *, long, struct file_lock **, void **);
-int lease_modify(struct file_lock *, int, struct list_head *);
+int generic_setlease(struct file *, int, struct file_lease **, void **priv);
+int kernel_setlease(struct file *, int, struct file_lease **, void **);
+int vfs_setlease(struct file *, int, struct file_lease **, void **);
+int lease_modify(struct file_lease *, int, struct list_head *);
struct notifier_block;
int lease_register_notifier(struct notifier_block *);
@@ -213,7 +251,7 @@ static inline int fcntl_setlk64(unsigned int fd, struct file *file,
return -EACCES;
}
#endif
-static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
+static inline int fcntl_setlease(unsigned int fd, struct file *filp, int arg)
{
return -EINVAL;
}
@@ -223,6 +261,25 @@ static inline int fcntl_getlease(struct file *filp)
return F_UNLCK;
}
+static inline bool lock_is_unlock(struct file_lock *fl)
+{
+ return false;
+}
+
+static inline bool lock_is_read(struct file_lock *fl)
+{
+ return false;
+}
+
+static inline bool lock_is_write(struct file_lock *fl)
+{
+ return false;
+}
+
+static inline void locks_wake_up(struct file_lock *fl)
+{
+}
+
static inline void
locks_free_lock_context(struct inode *inode)
{
@@ -233,6 +290,11 @@ static inline void locks_init_lock(struct file_lock *fl)
return;
}
+static inline void locks_init_lease(struct file_lease *fl)
+{
+ return;
+}
+
static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
{
return;
@@ -306,19 +368,25 @@ static inline void lease_get_mtime(struct inode *inode,
return;
}
-static inline int generic_setlease(struct file *filp, long arg,
- struct file_lock **flp, void **priv)
+static inline int generic_setlease(struct file *filp, int arg,
+ struct file_lease **flp, void **priv)
+{
+ return -EINVAL;
+}
+
+static inline int kernel_setlease(struct file *filp, int arg,
+ struct file_lease **lease, void **priv)
{
return -EINVAL;
}
-static inline int vfs_setlease(struct file *filp, long arg,
- struct file_lock **lease, void **priv)
+static inline int vfs_setlease(struct file *filp, int arg,
+ struct file_lease **lease, void **priv)
{
return -EINVAL;
}
-static inline int lease_modify(struct file_lock *fl, int arg,
+static inline int lease_modify(struct file_lease *fl, int arg,
struct list_head *dispose)
{
return -EINVAL;
@@ -341,6 +409,9 @@ locks_inode_context(const struct inode *inode)
#endif /* !CONFIG_FILE_LOCKING */
+/* for walking lists of file_locks linked by fl_list */
+#define for_each_file_lock(_fl, _head) list_for_each_entry(_fl, _head, c.flc_list)
+
static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
{
return locks_lock_inode_wait(file_inode(filp), fl);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f69114083ec7..219ee7a76874 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -69,6 +69,12 @@ struct ctl_table_header;
/* unused opcode to mark special load instruction. Same as BPF_ABS */
#define BPF_PROBE_MEM 0x20
+/* unused opcode to mark special ldsx instruction. Same as BPF_IND */
+#define BPF_PROBE_MEMSX 0x40
+
+/* unused opcode to mark special load instruction. Same as BPF_MSH */
+#define BPF_PROBE_MEM32 0xa0
+
/* unused opcode to mark call to interpreter with arguments */
#define BPF_CALL_ARGS 0xe0
@@ -90,39 +96,49 @@ struct ctl_table_header;
/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
-#define BPF_ALU64_REG(OP, DST, SRC) \
+#define BPF_ALU64_REG_OFF(OP, DST, SRC, OFF) \
((struct bpf_insn) { \
.code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
.dst_reg = DST, \
.src_reg = SRC, \
- .off = 0, \
+ .off = OFF, \
.imm = 0 })
-#define BPF_ALU32_REG(OP, DST, SRC) \
+#define BPF_ALU64_REG(OP, DST, SRC) \
+ BPF_ALU64_REG_OFF(OP, DST, SRC, 0)
+
+#define BPF_ALU32_REG_OFF(OP, DST, SRC, OFF) \
((struct bpf_insn) { \
.code = BPF_ALU | BPF_OP(OP) | BPF_X, \
.dst_reg = DST, \
.src_reg = SRC, \
- .off = 0, \
+ .off = OFF, \
.imm = 0 })
+#define BPF_ALU32_REG(OP, DST, SRC) \
+ BPF_ALU32_REG_OFF(OP, DST, SRC, 0)
+
/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
-#define BPF_ALU64_IMM(OP, DST, IMM) \
+#define BPF_ALU64_IMM_OFF(OP, DST, IMM, OFF) \
((struct bpf_insn) { \
.code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
- .off = 0, \
+ .off = OFF, \
.imm = IMM })
+#define BPF_ALU64_IMM(OP, DST, IMM) \
+ BPF_ALU64_IMM_OFF(OP, DST, IMM, 0)
-#define BPF_ALU32_IMM(OP, DST, IMM) \
+#define BPF_ALU32_IMM_OFF(OP, DST, IMM, OFF) \
((struct bpf_insn) { \
.code = BPF_ALU | BPF_OP(OP) | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
- .off = 0, \
+ .off = OFF, \
.imm = IMM })
+#define BPF_ALU32_IMM(OP, DST, IMM) \
+ BPF_ALU32_IMM_OFF(OP, DST, IMM, 0)
/* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */
@@ -134,6 +150,16 @@ struct ctl_table_header;
.off = 0, \
.imm = LEN })
+/* Byte Swap, bswap16/32/64 */
+
+#define BPF_BSWAP(DST, LEN) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_END | BPF_SRC(BPF_TO_LE), \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = LEN })
+
/* Short form of mov, dst_reg = src_reg */
#define BPF_MOV64_REG(DST, SRC) \
@@ -170,6 +196,24 @@ struct ctl_table_header;
.off = 0, \
.imm = IMM })
+/* Short form of movsx, dst_reg = (s8,s16,s32)src_reg */
+
+#define BPF_MOVSX64_REG(DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+#define BPF_MOVSX32_REG(DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
/* Special form of mov32, used for doing explicit zero extension on dst. */
#define BPF_ZEXT_REG(DST) \
((struct bpf_insn) { \
@@ -254,6 +298,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
.off = OFF, \
.imm = 0 })
+/* Memory load, dst_reg = *(signed size *) (src_reg + off16) */
+
+#define BPF_LDX_MEMSX(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEMSX, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
@@ -496,24 +550,27 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
__BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \
u64, __ur_3, u64, __ur_4, u64, __ur_5)
-#define BPF_CALL_x(x, name, ...) \
+#define BPF_CALL_x(x, attr, name, ...) \
static __always_inline \
u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \
typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \
- u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \
- u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \
+ attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \
+ attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \
{ \
return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\
} \
static __always_inline \
u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__))
-#define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__)
-#define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__)
-#define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__)
-#define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__)
-#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__)
-#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__)
+#define __NOATTR
+#define BPF_CALL_0(name, ...) BPF_CALL_x(0, __NOATTR, name, __VA_ARGS__)
+#define BPF_CALL_1(name, ...) BPF_CALL_x(1, __NOATTR, name, __VA_ARGS__)
+#define BPF_CALL_2(name, ...) BPF_CALL_x(2, __NOATTR, name, __VA_ARGS__)
+#define BPF_CALL_3(name, ...) BPF_CALL_x(3, __NOATTR, name, __VA_ARGS__)
+#define BPF_CALL_4(name, ...) BPF_CALL_x(4, __NOATTR, name, __VA_ARGS__)
+#define BPF_CALL_5(name, ...) BPF_CALL_x(5, __NOATTR, name, __VA_ARGS__)
+
+#define NOTRACE_BPF_CALL_1(name, ...) BPF_CALL_x(1, notrace, name, __VA_ARGS__)
#define bpf_ctx_range(TYPE, MEMBER) \
offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
@@ -685,7 +742,7 @@ static inline void bpf_compute_and_save_data_end(
cb->data_end = skb->data + skb_headlen(skb);
}
-/* Restore data saved by bpf_compute_data_pointers(). */
+/* Restore data saved by bpf_compute_and_save_data_end(). */
static inline void bpf_restore_data_end(
struct sk_buff *skb, void *saved_data_end)
{
@@ -765,23 +822,6 @@ DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
u32 xdp_master_redirect(struct xdp_buff *xdp);
-static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
- struct xdp_buff *xdp)
-{
- /* Driver XDP hooks are invoked within a single NAPI poll cycle and thus
- * under local_bh_disable(), which provides the needed RCU protection
- * for accessing map entries.
- */
- u32 act = __bpf_prog_run(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
-
- if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
- if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev))
- act = xdp_master_redirect(xdp);
- }
-
- return act;
-}
-
void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);
static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
@@ -920,6 +960,11 @@ bool bpf_jit_needs_zext(void);
bool bpf_jit_supports_subprog_tailcalls(void);
bool bpf_jit_supports_kfunc_call(void);
bool bpf_jit_supports_far_kfunc_call(void);
+bool bpf_jit_supports_exceptions(void);
+bool bpf_jit_supports_ptr_xchg(void);
+bool bpf_jit_supports_arena(void);
+u64 bpf_arch_uaddress_limit(void);
+void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
bool bpf_helper_changes_pkt_data(void *func);
static inline bool bpf_dump_raw_ok(const struct cred *cred)
@@ -989,12 +1034,6 @@ int xdp_do_redirect_frame(struct net_device *dev,
struct bpf_prog *prog);
void xdp_do_flush(void);
-/* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as
- * it is no longer only flushing maps. Keep this define for compatibility
- * until all drivers are updated - do not use xdp_do_flush_map() in new code!
- */
-#define xdp_do_flush_map xdp_do_flush
-
void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act);
#ifdef CONFIG_INET
@@ -1037,7 +1076,7 @@ struct bpf_binary_header *
bpf_jit_binary_pack_hdr(const struct bpf_prog *fp);
void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns);
-void bpf_prog_pack_free(struct bpf_binary_header *hdr);
+void bpf_prog_pack_free(void *ptr, u32 size);
static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
{
@@ -1109,7 +1148,7 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog)
return false;
if (!bpf_jit_harden)
return false;
- if (bpf_jit_harden == 1 && bpf_capable())
+ if (bpf_jit_harden == 1 && bpf_token_capable(prog->aux->token, CAP_BPF))
return false;
return true;
@@ -1135,6 +1174,7 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
bool is_bpf_text_address(unsigned long addr);
int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
char *sym);
+struct bpf_prog *bpf_prog_ksym_find(unsigned long addr);
static inline const char *
bpf_address_lookup(unsigned long addr, unsigned long *size,
@@ -1202,6 +1242,11 @@ static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value,
return -ERANGE;
}
+static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
+{
+ return NULL;
+}
+
static inline const char *
bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char **modname, char *sym)
@@ -1293,6 +1338,7 @@ struct bpf_sock_addr_kern {
*/
u64 tmp_reg;
void *t_ctx; /* Attach type specific context. */
+ u32 uaddrlen;
};
struct bpf_sock_ops_kern {
@@ -1580,10 +1626,9 @@ static inline void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
return NULL;
}
-static inline void *bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf,
- unsigned long len, bool flush)
+static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf,
+ unsigned long len, bool flush)
{
- return NULL;
}
#endif /* CONFIG_NET */
diff --git a/include/linux/find.h b/include/linux/find.h
index 5e4f39ef2e72..c69598e383c1 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -405,7 +405,7 @@ unsigned long find_next_and_bit_wrap(const unsigned long *addr1,
{
unsigned long bit = find_next_and_bit(addr1, addr2, size, offset);
- if (bit < size)
+ if (bit < size || offset == 0)
return bit;
bit = find_first_and_bit(addr1, addr2, offset);
@@ -413,8 +413,8 @@ unsigned long find_next_and_bit_wrap(const unsigned long *addr1,
}
/**
- * find_next_bit_wrap - find the next set bit in both memory regions
- * @addr: The first address to base the search on
+ * find_next_bit_wrap - find the next set bit in a memory region
+ * @addr: The address to base the search on
* @size: The bitmap size in bits
* @offset: The bitnumber to start searching at
*
@@ -427,7 +427,7 @@ unsigned long find_next_bit_wrap(const unsigned long *addr,
{
unsigned long bit = find_next_bit(addr, size, offset);
- if (bit < size)
+ if (bit < size || offset == 0)
return bit;
bit = find_first_bit(addr, offset);
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index bd3fc75d4f14..dd9f2d765e68 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -75,7 +75,7 @@ void fw_csr_iterator_init(struct fw_csr_iterator *ci, const u32 *p);
int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value);
int fw_csr_string(const u32 *directory, int key, char *buf, size_t size);
-extern struct bus_type fw_bus_type;
+extern const struct bus_type fw_bus_type;
struct fw_card_driver;
struct fw_node;
diff --git a/include/linux/firmware.h b/include/linux/firmware.h
index de7fea3bca51..f026f8926d79 100644
--- a/include/linux/firmware.h
+++ b/include/linux/firmware.h
@@ -4,6 +4,7 @@
#include <linux/types.h>
#include <linux/compiler.h>
+#include <linux/cleanup.h>
#include <linux/gfp.h>
#define FW_ACTION_NOUEVENT 0
@@ -27,6 +28,7 @@ struct firmware {
* @FW_UPLOAD_ERR_INVALID_SIZE: invalid firmware image size
* @FW_UPLOAD_ERR_RW_ERROR: read or write to HW failed, see kernel log
* @FW_UPLOAD_ERR_WEAROUT: FLASH device is approaching wear-out, wait & retry
+ * @FW_UPLOAD_ERR_FW_INVALID: invalid firmware file
* @FW_UPLOAD_ERR_MAX: Maximum error code marker
*/
enum fw_upload_err {
@@ -38,6 +40,7 @@ enum fw_upload_err {
FW_UPLOAD_ERR_INVALID_SIZE,
FW_UPLOAD_ERR_RW_ERROR,
FW_UPLOAD_ERR_WEAROUT,
+ FW_UPLOAD_ERR_FW_INVALID,
FW_UPLOAD_ERR_MAX
};
@@ -196,4 +199,6 @@ static inline void firmware_upload_unregister(struct fw_upload *fw_upload)
int firmware_request_cache(struct device *device, const char *name);
+DEFINE_FREE(firmware, struct firmware *, release_firmware(_T))
+
#endif
diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h
index 29cd11d5a3cf..23384a54d575 100644
--- a/include/linux/firmware/cirrus/cs_dsp.h
+++ b/include/linux/firmware/cirrus/cs_dsp.h
@@ -123,7 +123,6 @@ struct cs_dsp_client_ops;
* @sysclk_mask: Mask of frequency bits within sysclk register (ADSP1 only)
* @sysclk_shift: Shift of frequency bits within sysclk register (ADSP1 only)
* @alg_regions: List of currently loaded algorithm regions
- * @fw_file_name: Filename of the current firmware
* @fw_name: Name of the current firmware
* @fw_id: ID of the current firmware, obtained from the wmfw
* @fw_id_version: Version of the firmware, obtained from the wmfw
diff --git a/include/linux/firmware/imx/dsp.h b/include/linux/firmware/imx/dsp.h
index 4f7895a3b73c..1f176a2683fe 100644
--- a/include/linux/firmware/imx/dsp.h
+++ b/include/linux/firmware/imx/dsp.h
@@ -37,17 +37,11 @@ struct imx_dsp_ipc {
static inline void imx_dsp_set_data(struct imx_dsp_ipc *ipc, void *data)
{
- if (!ipc)
- return;
-
ipc->private_data = data;
}
static inline void *imx_dsp_get_data(struct imx_dsp_ipc *ipc)
{
- if (!ipc)
- return NULL;
-
return ipc->private_data;
}
diff --git a/include/linux/firmware/imx/sci.h b/include/linux/firmware/imx/sci.h
index 5cc63fe7e84d..df17196df5ff 100644
--- a/include/linux/firmware/imx/sci.h
+++ b/include/linux/firmware/imx/sci.h
@@ -21,31 +21,37 @@ int imx_scu_enable_general_irq_channel(struct device *dev);
int imx_scu_irq_register_notifier(struct notifier_block *nb);
int imx_scu_irq_unregister_notifier(struct notifier_block *nb);
int imx_scu_irq_group_enable(u8 group, u32 mask, u8 enable);
+int imx_scu_irq_get_status(u8 group, u32 *irq_status);
int imx_scu_soc_init(struct device *dev);
#else
static inline int imx_scu_soc_init(struct device *dev)
{
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
static inline int imx_scu_enable_general_irq_channel(struct device *dev)
{
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
static inline int imx_scu_irq_register_notifier(struct notifier_block *nb)
{
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
static inline int imx_scu_irq_unregister_notifier(struct notifier_block *nb)
{
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
static inline int imx_scu_irq_group_enable(u8 group, u32 mask, u8 enable)
{
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
+}
+
+static inline int imx_scu_irq_get_status(u8 group, u32 *irq_status)
+{
+ return -EOPNOTSUPP;
}
#endif
#endif /* _SC_SCI_H */
diff --git a/include/linux/firmware/intel/stratix10-smc.h b/include/linux/firmware/intel/stratix10-smc.h
index a718f853d457..ee80ca4bb0d0 100644
--- a/include/linux/firmware/intel/stratix10-smc.h
+++ b/include/linux/firmware/intel/stratix10-smc.h
@@ -467,6 +467,31 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE)
INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FIRMWARE_VERSION)
/**
+ * SMC call protocol for Mailbox, starting FUNCID from 60
+ *
+ * Call register usage:
+ * a0 INTEL_SIP_SMC_MBOX_SEND_CMD
+ * a1 mailbox command code
+ * a2 physical address that contain mailbox command data (not include header)
+ * a3 mailbox command data size in word
+ * a4 set to 0 for CASUAL, set to 1 for URGENT
+ * a5 physical address for secure firmware to put response data
+ * (not include header)
+ * a6 maximum size in word of physical address to store response data
+ * a7 not used
+ *
+ * Return status
+ * a0 INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_STATUS_REJECTED or
+ * INTEL_SIP_SMC_STATUS_ERROR
+ * a1 mailbox error code
+ * a2 response data length in word
+ * a3 not used
+ */
+#define INTEL_SIP_SMC_FUNCID_MBOX_SEND_CMD 60
+ #define INTEL_SIP_SMC_MBOX_SEND_CMD \
+ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_MBOX_SEND_CMD)
+
+/**
* Request INTEL_SIP_SMC_SVC_VERSION
*
* Sync call used to query the SIP SMC API Version
diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h
index 0c16037fd08d..60ed82112680 100644
--- a/include/linux/firmware/intel/stratix10-svc-client.h
+++ b/include/linux/firmware/intel/stratix10-svc-client.h
@@ -118,6 +118,9 @@ struct stratix10_svc_chan;
* @COMMAND_SMC_SVC_VERSION: Non-mailbox SMC SVC API Version,
* return status is SVC_STATUS_OK
*
+ * @COMMAND_MBOX_SEND_CMD: send generic mailbox command, return status is
+ * SVC_STATUS_OK or SVC_STATUS_ERROR
+ *
* @COMMAND_RSU_DCMF_STATUS: query firmware for the DCMF status
* return status is SVC_STATUS_OK or SVC_STATUS_ERROR
*
@@ -164,6 +167,8 @@ enum stratix10_svc_command_code {
COMMAND_FCS_RANDOM_NUMBER_GEN,
/* for general status poll */
COMMAND_POLL_SERVICE_STATUS = 40,
+ /* for generic mailbox send command */
+ COMMAND_MBOX_SEND_CMD = 100,
/* Non-mailbox SMC Call */
COMMAND_SMC_SVC_VERSION = 200,
};
diff --git a/include/linux/firmware/mediatek/mtk-adsp-ipc.h b/include/linux/firmware/mediatek/mtk-adsp-ipc.h
index 28fd313340b8..5b1d16fa3f56 100644
--- a/include/linux/firmware/mediatek/mtk-adsp-ipc.h
+++ b/include/linux/firmware/mediatek/mtk-adsp-ipc.h
@@ -46,17 +46,11 @@ struct mtk_adsp_ipc {
static inline void mtk_adsp_ipc_set_data(struct mtk_adsp_ipc *ipc, void *data)
{
- if (!ipc)
- return;
-
ipc->private_data = data;
}
static inline void *mtk_adsp_ipc_get_data(struct mtk_adsp_ipc *ipc)
{
- if (!ipc)
- return NULL;
-
return ipc->private_data;
}
diff --git a/include/linux/firmware/meson/meson_sm.h b/include/linux/firmware/meson/meson_sm.h
index 95b0da2326a9..8eaf8922ab02 100644
--- a/include/linux/firmware/meson/meson_sm.h
+++ b/include/linux/firmware/meson/meson_sm.h
@@ -19,7 +19,7 @@ enum {
struct meson_sm_firmware;
int meson_sm_call(struct meson_sm_firmware *fw, unsigned int cmd_index,
- u32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4);
+ s32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4);
int meson_sm_call_write(struct meson_sm_firmware *fw, void *buffer,
unsigned int b_size, unsigned int cmd_index, u32 arg0,
u32 arg1, u32 arg2, u32 arg3, u32 arg4);
diff --git a/include/linux/firmware/qcom/qcom_qseecom.h b/include/linux/firmware/qcom/qcom_qseecom.h
new file mode 100644
index 000000000000..366243ee9609
--- /dev/null
+++ b/include/linux/firmware/qcom/qcom_qseecom.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Driver for Qualcomm Secure Execution Environment (SEE) interface (QSEECOM).
+ * Responsible for setting up and managing QSEECOM client devices.
+ *
+ * Copyright (C) 2023 Maximilian Luz <luzmaximilian@gmail.com>
+ */
+
+#ifndef __QCOM_QSEECOM_H
+#define __QCOM_QSEECOM_H
+
+#include <linux/auxiliary_bus.h>
+#include <linux/dma-mapping.h>
+#include <linux/types.h>
+
+#include <linux/firmware/qcom/qcom_scm.h>
+
+/**
+ * struct qseecom_client - QSEECOM client device.
+ * @aux_dev: Underlying auxiliary device.
+ * @app_id: ID of the loaded application.
+ */
+struct qseecom_client {
+ struct auxiliary_device aux_dev;
+ u32 app_id;
+};
+
+/**
+ * qseecom_scm_dev() - Get the SCM device associated with the QSEECOM client.
+ * @client: The QSEECOM client device.
+ *
+ * Returns the SCM device under which the provided QSEECOM client device
+ * operates. This function is intended to be used for DMA allocations.
+ */
+static inline struct device *qseecom_scm_dev(struct qseecom_client *client)
+{
+ return client->aux_dev.dev.parent->parent;
+}
+
+/**
+ * qseecom_dma_alloc() - Allocate DMA memory for a QSEECOM client.
+ * @client: The QSEECOM client to allocate the memory for.
+ * @size: The number of bytes to allocate.
+ * @dma_handle: Pointer to where the DMA address should be stored.
+ * @gfp: Allocation flags.
+ *
+ * Wrapper function for dma_alloc_coherent(), allocating DMA memory usable for
+ * TZ/QSEECOM communication. Refer to dma_alloc_coherent() for details.
+ */
+static inline void *qseecom_dma_alloc(struct qseecom_client *client, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp)
+{
+ return dma_alloc_coherent(qseecom_scm_dev(client), size, dma_handle, gfp);
+}
+
+/**
+ * dma_free_coherent() - Free QSEECOM DMA memory.
+ * @client: The QSEECOM client for which the memory has been allocated.
+ * @size: The number of bytes allocated.
+ * @cpu_addr: Virtual memory address to free.
+ * @dma_handle: DMA memory address to free.
+ *
+ * Wrapper function for dma_free_coherent(), freeing memory previously
+ * allocated with qseecom_dma_alloc(). Refer to dma_free_coherent() for
+ * details.
+ */
+static inline void qseecom_dma_free(struct qseecom_client *client, size_t size,
+ void *cpu_addr, dma_addr_t dma_handle)
+{
+ return dma_free_coherent(qseecom_scm_dev(client), size, cpu_addr, dma_handle);
+}
+
+/**
+ * qcom_qseecom_app_send() - Send to and receive data from a given QSEE app.
+ * @client: The QSEECOM client associated with the target app.
+ * @req: DMA address of the request buffer sent to the app.
+ * @req_size: Size of the request buffer.
+ * @rsp: DMA address of the response buffer, written to by the app.
+ * @rsp_size: Size of the response buffer.
+ *
+ * Sends a request to the QSEE app associated with the given client and read
+ * back its response. The caller must provide two DMA memory regions, one for
+ * the request and one for the response, and fill out the @req region with the
+ * respective (app-specific) request data. The QSEE app reads this and returns
+ * its response in the @rsp region.
+ *
+ * Note: This is a convenience wrapper around qcom_scm_qseecom_app_send().
+ * Clients should prefer to use this wrapper.
+ *
+ * Return: Zero on success, nonzero on failure.
+ */
+static inline int qcom_qseecom_app_send(struct qseecom_client *client,
+ dma_addr_t req, size_t req_size,
+ dma_addr_t rsp, size_t rsp_size)
+{
+ return qcom_scm_qseecom_app_send(client->app_id, req, req_size, rsp, rsp_size);
+}
+
+#endif /* __QCOM_QSEECOM_H */
diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h
index 250ea4efb7cb..aaa19f93ac43 100644
--- a/include/linux/firmware/qcom/qcom_scm.h
+++ b/include/linux/firmware/qcom/qcom_scm.h
@@ -59,12 +59,12 @@ enum qcom_scm_ice_cipher {
#define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE)
#define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC)
-extern bool qcom_scm_is_available(void);
+bool qcom_scm_is_available(void);
-extern int qcom_scm_set_cold_boot_addr(void *entry);
-extern int qcom_scm_set_warm_boot_addr(void *entry);
-extern void qcom_scm_cpu_power_down(u32 flags);
-extern int qcom_scm_set_remote_state(u32 state, u32 id);
+int qcom_scm_set_cold_boot_addr(void *entry);
+int qcom_scm_set_warm_boot_addr(void *entry);
+void qcom_scm_cpu_power_down(u32 flags);
+int qcom_scm_set_remote_state(u32 state, u32 id);
struct qcom_scm_pas_metadata {
void *ptr;
@@ -72,54 +72,69 @@ struct qcom_scm_pas_metadata {
ssize_t size;
};
-extern int qcom_scm_pas_init_image(u32 peripheral, const void *metadata,
- size_t size,
- struct qcom_scm_pas_metadata *ctx);
+int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, size_t size,
+ struct qcom_scm_pas_metadata *ctx);
void qcom_scm_pas_metadata_release(struct qcom_scm_pas_metadata *ctx);
-extern int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr,
- phys_addr_t size);
-extern int qcom_scm_pas_auth_and_reset(u32 peripheral);
-extern int qcom_scm_pas_shutdown(u32 peripheral);
-extern bool qcom_scm_pas_supported(u32 peripheral);
-
-extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val);
-extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val);
-
-extern bool qcom_scm_restore_sec_cfg_available(void);
-extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare);
-extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size);
-extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare);
-extern int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size);
-extern int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size,
- u32 cp_nonpixel_start,
- u32 cp_nonpixel_size);
-extern int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz,
- u64 *src,
- const struct qcom_scm_vmperm *newvm,
- unsigned int dest_cnt);
-
-extern bool qcom_scm_ocmem_lock_available(void);
-extern int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset,
- u32 size, u32 mode);
-extern int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset,
- u32 size);
-
-extern bool qcom_scm_ice_available(void);
-extern int qcom_scm_ice_invalidate_key(u32 index);
-extern int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size,
- enum qcom_scm_ice_cipher cipher,
- u32 data_unit_size);
-
-extern bool qcom_scm_hdcp_available(void);
-extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,
- u32 *resp);
-
-extern int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt);
-extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en);
-
-extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
- u64 limit_node, u32 node_id, u64 version);
-extern int qcom_scm_lmh_profile_change(u32 profile_id);
-extern bool qcom_scm_lmh_dcvsh_available(void);
+int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, phys_addr_t size);
+int qcom_scm_pas_auth_and_reset(u32 peripheral);
+int qcom_scm_pas_shutdown(u32 peripheral);
+bool qcom_scm_pas_supported(u32 peripheral);
+
+int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val);
+int qcom_scm_io_writel(phys_addr_t addr, unsigned int val);
+
+bool qcom_scm_restore_sec_cfg_available(void);
+int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare);
+int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size);
+int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare);
+int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size);
+int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size,
+ u32 cp_nonpixel_start, u32 cp_nonpixel_size);
+int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, u64 *src,
+ const struct qcom_scm_vmperm *newvm,
+ unsigned int dest_cnt);
+
+bool qcom_scm_ocmem_lock_available(void);
+int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, u32 size,
+ u32 mode);
+int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, u32 size);
+
+bool qcom_scm_ice_available(void);
+int qcom_scm_ice_invalidate_key(u32 index);
+int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size,
+ enum qcom_scm_ice_cipher cipher, u32 data_unit_size);
+
+bool qcom_scm_hdcp_available(void);
+int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp);
+
+int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt);
+int qcom_scm_qsmmu500_wait_safe_toggle(bool en);
+
+int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
+ u64 limit_node, u32 node_id, u64 version);
+int qcom_scm_lmh_profile_change(u32 profile_id);
+bool qcom_scm_lmh_dcvsh_available(void);
+
+#ifdef CONFIG_QCOM_QSEECOM
+
+int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id);
+int qcom_scm_qseecom_app_send(u32 app_id, dma_addr_t req, size_t req_size,
+ dma_addr_t rsp, size_t rsp_size);
+
+#else /* CONFIG_QCOM_QSEECOM */
+
+static inline int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id)
+{
+ return -EINVAL;
+}
+
+static inline int qcom_scm_qseecom_app_send(u32 app_id,
+ dma_addr_t req, size_t req_size,
+ dma_addr_t rsp, size_t rsp_size)
+{
+ return -EINVAL;
+}
+
+#endif /* CONFIG_QCOM_QSEECOM */
#endif
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 9dda7d9898ff..1a069a56c961 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -3,6 +3,7 @@
* Xilinx Zynq MPSoC Firmware layer
*
* Copyright (C) 2014-2021 Xilinx
+ * Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc.
*
* Michal Simek <michal.simek@amd.com>
* Davorin Mista <davorin.mista@aggios.com>
@@ -32,8 +33,25 @@
#define PM_SIP_SVC 0xC2000000
/* PM API versions */
+#define PM_API_VERSION_1 1
#define PM_API_VERSION_2 2
+#define PM_PINCTRL_PARAM_SET_VERSION 2
+
+#define ZYNQMP_FAMILY_CODE 0x23
+#define VERSAL_FAMILY_CODE 0x26
+
+/* When all subfamily of platform need to support */
+#define ALL_SUB_FAMILY_CODE 0x00
+#define VERSAL_SUB_FAMILY_CODE 0x01
+#define VERSALNET_SUB_FAMILY_CODE 0x03
+
+#define FAMILY_CODE_MASK GENMASK(27, 21)
+#define SUB_FAMILY_CODE_MASK GENMASK(20, 19)
+
+#define API_ID_MASK GENMASK(7, 0)
+#define MODULE_ID_MASK GENMASK(11, 8)
+
/* ATF only commands */
#define TF_A_PM_REGISTER_SGI 0xa04
#define PM_GET_TRUSTZONE_VERSION 0xa03
@@ -78,15 +96,41 @@
/*
* Node IDs for the Error Events.
*/
-#define EVENT_ERROR_PMC_ERR1 (0x28100000U)
-#define EVENT_ERROR_PMC_ERR2 (0x28104000U)
-#define EVENT_ERROR_PSM_ERR1 (0x28108000U)
-#define EVENT_ERROR_PSM_ERR2 (0x2810C000U)
+#define VERSAL_EVENT_ERROR_PMC_ERR1 (0x28100000U)
+#define VERSAL_EVENT_ERROR_PMC_ERR2 (0x28104000U)
+#define VERSAL_EVENT_ERROR_PSM_ERR1 (0x28108000U)
+#define VERSAL_EVENT_ERROR_PSM_ERR2 (0x2810C000U)
+
+#define VERSAL_NET_EVENT_ERROR_PMC_ERR1 (0x28100000U)
+#define VERSAL_NET_EVENT_ERROR_PMC_ERR2 (0x28104000U)
+#define VERSAL_NET_EVENT_ERROR_PMC_ERR3 (0x28108000U)
+#define VERSAL_NET_EVENT_ERROR_PSM_ERR1 (0x2810C000U)
+#define VERSAL_NET_EVENT_ERROR_PSM_ERR2 (0x28110000U)
+#define VERSAL_NET_EVENT_ERROR_PSM_ERR3 (0x28114000U)
+#define VERSAL_NET_EVENT_ERROR_PSM_ERR4 (0x28118000U)
/* ZynqMP SD tap delay tuning */
#define SD_ITAPDLY 0xFF180314
#define SD_OTAPDLYSEL 0xFF180318
+/**
+ * XPM_EVENT_ERROR_MASK_DDRMC_CR: Error event mask for DDRMC MC Correctable ECC Error.
+ */
+#define XPM_EVENT_ERROR_MASK_DDRMC_CR BIT(18)
+
+/**
+ * XPM_EVENT_ERROR_MASK_DDRMC_NCR: Error event mask for DDRMC MC Non-Correctable ECC Error.
+ */
+#define XPM_EVENT_ERROR_MASK_DDRMC_NCR BIT(19)
+#define XPM_EVENT_ERROR_MASK_NOC_NCR BIT(13)
+#define XPM_EVENT_ERROR_MASK_NOC_CR BIT(12)
+
+enum pm_module_id {
+ PM_MODULE_ID = 0x0,
+ XSEM_MODULE_ID = 0x3,
+ TF_A_MODULE_ID = 0xa,
+};
+
enum pm_api_cb_id {
PM_INIT_SUSPEND_CB = 30,
PM_ACKNOWLEDGE_CB = 31,
@@ -94,6 +138,7 @@ enum pm_api_cb_id {
};
enum pm_api_id {
+ PM_API_FEATURES = 0,
PM_GET_API_VERSION = 1,
PM_REGISTER_NOTIFIER = 5,
PM_FORCE_POWERDOWN = 8,
@@ -113,7 +158,6 @@ enum pm_api_id {
PM_SECURE_SHA = 26,
PM_PINCTRL_REQUEST = 28,
PM_PINCTRL_RELEASE = 29,
- PM_PINCTRL_GET_FUNCTION = 30,
PM_PINCTRL_SET_FUNCTION = 31,
PM_PINCTRL_CONFIG_PARAM_GET = 32,
PM_PINCTRL_CONFIG_PARAM_SET = 33,
@@ -124,19 +168,20 @@ enum pm_api_id {
PM_CLOCK_GETSTATE = 38,
PM_CLOCK_SETDIVIDER = 39,
PM_CLOCK_GETDIVIDER = 40,
- PM_CLOCK_SETRATE = 41,
- PM_CLOCK_GETRATE = 42,
PM_CLOCK_SETPARENT = 43,
PM_CLOCK_GETPARENT = 44,
PM_FPGA_READ = 46,
PM_SECURE_AES = 47,
+ PM_EFUSE_ACCESS = 53,
PM_FEATURE_CHECK = 63,
};
/* PMU-FW return status codes */
enum pm_ret_status {
XST_PM_SUCCESS = 0,
+ XST_PM_INVALID_VERSION = 4,
XST_PM_NO_FEATURE = 19,
+ XST_PM_INVALID_CRC = 301,
XST_PM_INTERNAL = 2000,
XST_PM_CONFLICT = 2001,
XST_PM_NO_ACCESS = 2002,
@@ -484,20 +529,18 @@ struct zynqmp_pm_query_data {
u32 arg3;
};
-int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 arg0, u32 arg1,
- u32 arg2, u32 arg3, u32 *ret_payload);
+int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 *ret_payload, u32 num_args, ...);
#if IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE)
int zynqmp_pm_get_api_version(u32 *version);
int zynqmp_pm_get_chipid(u32 *idcode, u32 *version);
+int zynqmp_pm_get_family_info(u32 *family, u32 *subfamily);
int zynqmp_pm_query_data(struct zynqmp_pm_query_data qdata, u32 *out);
int zynqmp_pm_clock_enable(u32 clock_id);
int zynqmp_pm_clock_disable(u32 clock_id);
int zynqmp_pm_clock_getstate(u32 clock_id, u32 *state);
int zynqmp_pm_clock_setdivider(u32 clock_id, u32 divider);
int zynqmp_pm_clock_getdivider(u32 clock_id, u32 *divider);
-int zynqmp_pm_clock_setrate(u32 clock_id, u64 rate);
-int zynqmp_pm_clock_getrate(u32 clock_id, u64 *rate);
int zynqmp_pm_clock_setparent(u32 clock_id, u32 parent_id);
int zynqmp_pm_clock_getparent(u32 clock_id, u32 *parent_id);
int zynqmp_pm_set_pll_frac_mode(u32 clk_id, u32 mode);
@@ -521,6 +564,7 @@ int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities,
const u32 qos,
const enum zynqmp_pm_request_ack ack);
int zynqmp_pm_aes_engine(const u64 address, u32 *out);
+int zynqmp_pm_efuse_access(const u64 address, u32 *out);
int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags);
int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags);
int zynqmp_pm_fpga_get_status(u32 *value);
@@ -534,7 +578,6 @@ int zynqmp_pm_system_shutdown(const u32 type, const u32 subtype);
int zynqmp_pm_set_boot_health_status(u32 value);
int zynqmp_pm_pinctrl_request(const u32 pin);
int zynqmp_pm_pinctrl_release(const u32 pin);
-int zynqmp_pm_pinctrl_get_function(const u32 pin, u32 *id);
int zynqmp_pm_pinctrl_set_function(const u32 pin, const u32 id);
int zynqmp_pm_pinctrl_get_config(const u32 pin, const u32 param,
u32 *value);
@@ -571,6 +614,11 @@ static inline int zynqmp_pm_get_chipid(u32 *idcode, u32 *version)
return -ENODEV;
}
+static inline int zynqmp_pm_get_family_info(u32 *family, u32 *subfamily)
+{
+ return -ENODEV;
+}
+
static inline int zynqmp_pm_query_data(struct zynqmp_pm_query_data qdata,
u32 *out)
{
@@ -602,16 +650,6 @@ static inline int zynqmp_pm_clock_getdivider(u32 clock_id, u32 *divider)
return -ENODEV;
}
-static inline int zynqmp_pm_clock_setrate(u32 clock_id, u64 rate)
-{
- return -ENODEV;
-}
-
-static inline int zynqmp_pm_clock_getrate(u32 clock_id, u64 *rate)
-{
- return -ENODEV;
-}
-
static inline int zynqmp_pm_clock_setparent(u32 clock_id, u32 parent_id)
{
return -ENODEV;
@@ -714,6 +752,11 @@ static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out)
return -ENODEV;
}
+static inline int zynqmp_pm_efuse_access(const u64 address, u32 *out)
+{
+ return -ENODEV;
+}
+
static inline int zynqmp_pm_sha_hash(const u64 address, const u32 size,
const u32 flags)
{
@@ -781,11 +824,6 @@ static inline int zynqmp_pm_pinctrl_release(const u32 pin)
return -ENODEV;
}
-static inline int zynqmp_pm_pinctrl_get_function(const u32 pin, u32 *id)
-{
- return -ENODEV;
-}
-
static inline int zynqmp_pm_is_function_supported(const u32 api_id, const u32 id)
{
return -ENODEV;
diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h
index 3e378b1fb0bc..e9a72fd0bfe7 100644
--- a/include/linux/flex_proportions.h
+++ b/include/linux/flex_proportions.h
@@ -39,38 +39,6 @@ void fprop_global_destroy(struct fprop_global *p);
bool fprop_new_period(struct fprop_global *p, int periods);
/*
- * ---- SINGLE ----
- */
-struct fprop_local_single {
- /* the local events counter */
- unsigned long events;
- /* Period in which we last updated events */
- unsigned int period;
- raw_spinlock_t lock; /* Protect period and numerator */
-};
-
-#define INIT_FPROP_LOCAL_SINGLE(name) \
-{ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
-}
-
-int fprop_local_init_single(struct fprop_local_single *pl);
-void fprop_local_destroy_single(struct fprop_local_single *pl);
-void __fprop_inc_single(struct fprop_global *p, struct fprop_local_single *pl);
-void fprop_fraction_single(struct fprop_global *p,
- struct fprop_local_single *pl, unsigned long *numerator,
- unsigned long *denominator);
-
-static inline
-void fprop_inc_single(struct fprop_global *p, struct fprop_local_single *pl)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __fprop_inc_single(p, pl);
- local_irq_restore(flags);
-}
-
-/*
* ---- PERCPU ----
*/
struct fprop_local_percpu {
diff --git a/include/linux/font.h b/include/linux/font.h
index abf1442ce719..81caffd51bb4 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -57,7 +57,8 @@ extern const struct font_desc *find_font(const char *name);
/* Get the default font for a specific screen size */
extern const struct font_desc *get_default_font(int xres, int yres,
- u32 font_w, u32 font_h);
+ unsigned long *font_w,
+ unsigned long *font_h);
/* Max. length for the name of a predefined font */
#define MAX_FONT_NAME 32
diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index da51a83b2829..6aeebe0a6777 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_FORTIFY_STRING_H_
#define _LINUX_FORTIFY_STRING_H_
+#include <linux/bitfield.h>
#include <linux/bug.h>
#include <linux/const.h>
#include <linux/limits.h>
@@ -9,7 +10,46 @@
#define __FORTIFY_INLINE extern __always_inline __gnu_inline __overloadable
#define __RENAME(x) __asm__(#x)
-void fortify_panic(const char *name) __noreturn __cold;
+#define FORTIFY_REASON_DIR(r) FIELD_GET(BIT(0), r)
+#define FORTIFY_REASON_FUNC(r) FIELD_GET(GENMASK(7, 1), r)
+#define FORTIFY_REASON(func, write) (FIELD_PREP(BIT(0), write) | \
+ FIELD_PREP(GENMASK(7, 1), func))
+
+#ifndef fortify_panic
+# define fortify_panic(func, write, avail, size, retfail) \
+ __fortify_panic(FORTIFY_REASON(func, write), avail, size)
+#endif
+
+#define FORTIFY_READ 0
+#define FORTIFY_WRITE 1
+
+#define EACH_FORTIFY_FUNC(macro) \
+ macro(strncpy), \
+ macro(strnlen), \
+ macro(strlen), \
+ macro(strscpy), \
+ macro(strlcat), \
+ macro(strcat), \
+ macro(strncat), \
+ macro(memset), \
+ macro(memcpy), \
+ macro(memmove), \
+ macro(memscan), \
+ macro(memcmp), \
+ macro(memchr), \
+ macro(memchr_inv), \
+ macro(kmemdup), \
+ macro(strcpy), \
+ macro(UNKNOWN),
+
+#define MAKE_FORTIFY_FUNC(func) FORTIFY_FUNC_##func
+
+enum fortify_func {
+ EACH_FORTIFY_FUNC(MAKE_FORTIFY_FUNC)
+};
+
+void __fortify_report(const u8 reason, const size_t avail, const size_t size);
+void __fortify_panic(const u8 reason, const size_t avail, const size_t size) __cold __noreturn;
void __read_overflow(void) __compiletime_error("detected read beyond size of object (1st parameter)");
void __read_overflow2(void) __compiletime_error("detected read beyond size of object (2nd parameter)");
void __read_overflow2_field(size_t avail, size_t wanted) __compiletime_warning("detected read beyond size of field (2nd parameter); maybe use struct_group()?");
@@ -93,13 +133,9 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size)
#if __has_builtin(__builtin_dynamic_object_size)
#define POS __pass_dynamic_object_size(1)
#define POS0 __pass_dynamic_object_size(0)
-#define __struct_size(p) __builtin_dynamic_object_size(p, 0)
-#define __member_size(p) __builtin_dynamic_object_size(p, 1)
#else
#define POS __pass_object_size(1)
#define POS0 __pass_object_size(0)
-#define __struct_size(p) __builtin_object_size(p, 0)
-#define __member_size(p) __builtin_object_size(p, 1)
#endif
#define __compiletime_lessthan(bounds, length) ( \
@@ -147,7 +183,7 @@ char *strncpy(char * const POS p, const char *q, __kernel_size_t size)
if (__compiletime_lessthan(p_size, size))
__write_overflow();
if (p_size < size)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_strncpy, FORTIFY_WRITE, p_size, size, p);
return __underlying_strncpy(p, q, size);
}
@@ -178,7 +214,7 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size
/* Do not check characters beyond the end of p. */
ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
if (p_size <= ret && maxlen != ret)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_strnlen, FORTIFY_READ, p_size, ret + 1, ret);
return ret;
}
@@ -214,82 +250,13 @@ __kernel_size_t __fortify_strlen(const char * const POS p)
return __underlying_strlen(p);
ret = strnlen(p, p_size);
if (p_size <= ret)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_strlen, FORTIFY_READ, p_size, ret + 1, ret);
return ret;
}
-/* Defined after fortified strlen() to reuse it. */
-extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
-/**
- * strlcpy - Copy a string into another string buffer
- *
- * @p: pointer to destination of copy
- * @q: pointer to NUL-terminated source string to copy
- * @size: maximum number of bytes to write at @p
- *
- * If strlen(@q) >= @size, the copy of @q will be truncated at
- * @size - 1 bytes. @p will always be NUL-terminated.
- *
- * Do not use this function. While FORTIFY_SOURCE tries to avoid
- * over-reads when calculating strlen(@q), it is still possible.
- * Prefer strscpy(), though note its different return values for
- * detecting truncation.
- *
- * Returns total number of bytes written to @p, including terminating NUL.
- *
- */
-__FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, size_t size)
-{
- const size_t p_size = __member_size(p);
- const size_t q_size = __member_size(q);
- size_t q_len; /* Full count of source string length. */
- size_t len; /* Count of characters going into destination. */
-
- if (p_size == SIZE_MAX && q_size == SIZE_MAX)
- return __real_strlcpy(p, q, size);
- q_len = strlen(q);
- len = (q_len >= size) ? size - 1 : q_len;
- if (__builtin_constant_p(size) && __builtin_constant_p(q_len) && size) {
- /* Write size is always larger than destination. */
- if (len >= p_size)
- __write_overflow();
- }
- if (size) {
- if (len >= p_size)
- fortify_panic(__func__);
- __underlying_memcpy(p, q, len);
- p[len] = '\0';
- }
- return q_len;
-}
-
/* Defined after fortified strnlen() to reuse it. */
-extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
-/**
- * strscpy - Copy a C-string into a sized buffer
- *
- * @p: Where to copy the string to
- * @q: Where to copy the string from
- * @size: Size of destination buffer
- *
- * Copy the source string @q, or as much of it as fits, into the destination
- * @p buffer. The behavior is undefined if the string buffers overlap. The
- * destination @p buffer is always NUL terminated, unless it's zero-sized.
- *
- * Preferred to strlcpy() since the API doesn't require reading memory
- * from the source @q string beyond the specified @size bytes, and since
- * the return value is easier to error-check than strlcpy()'s.
- * In addition, the implementation is robust to the string changing out
- * from underneath it, unlike the current strlcpy() implementation.
- *
- * Preferred to strncpy() since it always returns a valid string, and
- * doesn't unnecessarily force the tail of the destination buffer to be
- * zero padded. If padding is desired please use strscpy_pad().
- *
- * Returns the number of characters copied in @p (not including the
- * trailing %NUL) or -E2BIG if @size is 0 or the copy of @q was truncated.
- */
-__FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, size_t size)
+extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(sized_strscpy);
+__FORTIFY_INLINE ssize_t sized_strscpy(char * const POS p, const char * const POS q, size_t size)
{
/* Use string size rather than possible enclosing struct size. */
const size_t p_size = __member_size(p);
@@ -333,8 +300,8 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s
* Generate a runtime write overflow error if len is greater than
* p_size.
*/
- if (len > p_size)
- fortify_panic(__func__);
+ if (p_size < len)
+ fortify_panic(FORTIFY_FUNC_strscpy, FORTIFY_WRITE, p_size, len, -E2BIG);
/*
* We can now safely call vanilla strscpy because we are protected from:
@@ -392,7 +359,7 @@ size_t strlcat(char * const POS p, const char * const POS q, size_t avail)
/* Give up if string is already overflowed. */
if (p_size <= p_len)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_strlcat, FORTIFY_READ, p_size, p_len + 1, wanted);
if (actual >= avail) {
copy_len = avail - p_len - 1;
@@ -401,7 +368,7 @@ size_t strlcat(char * const POS p, const char * const POS q, size_t avail)
/* Give up if copy will overflow. */
if (p_size <= actual)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_strlcat, FORTIFY_WRITE, p_size, actual + 1, wanted);
__underlying_memcpy(p + p_len, q, copy_len);
p[actual] = '\0';
@@ -428,9 +395,10 @@ __FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2)
char *strcat(char * const POS p, const char *q)
{
const size_t p_size = __member_size(p);
+ const size_t wanted = strlcat(p, q, p_size);
- if (strlcat(p, q, p_size) >= p_size)
- fortify_panic(__func__);
+ if (p_size <= wanted)
+ fortify_panic(FORTIFY_FUNC_strcat, FORTIFY_WRITE, p_size, wanted + 1, p);
return p;
}
@@ -459,20 +427,21 @@ char *strncat(char * const POS p, const char * const POS q, __kernel_size_t coun
{
const size_t p_size = __member_size(p);
const size_t q_size = __member_size(q);
- size_t p_len, copy_len;
+ size_t p_len, copy_len, total;
if (p_size == SIZE_MAX && q_size == SIZE_MAX)
return __underlying_strncat(p, q, count);
p_len = strlen(p);
copy_len = strnlen(q, count);
- if (p_size < p_len + copy_len + 1)
- fortify_panic(__func__);
+ total = p_len + copy_len + 1;
+ if (p_size < total)
+ fortify_panic(FORTIFY_FUNC_strncat, FORTIFY_WRITE, p_size, total, p);
__underlying_memcpy(p + p_len, q, copy_len);
p[p_len + copy_len] = '\0';
return p;
}
-__FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size,
+__FORTIFY_INLINE bool fortify_memset_chk(__kernel_size_t size,
const size_t p_size,
const size_t p_size_field)
{
@@ -507,7 +476,8 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size,
* lengths are unknown.)
*/
if (p_size != SIZE_MAX && p_size < size)
- fortify_panic("memset");
+ fortify_panic(FORTIFY_FUNC_memset, FORTIFY_WRITE, p_size, size, true);
+ return false;
}
#define __fortify_memset_chk(p, c, size, p_size, p_size_field) ({ \
@@ -561,7 +531,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
const size_t q_size,
const size_t p_size_field,
const size_t q_size_field,
- const char *func)
+ const u8 func)
{
if (__builtin_constant_p(size)) {
/*
@@ -605,9 +575,10 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
* (The SIZE_MAX test is to optimize away checks where the buffer
* lengths are unknown.)
*/
- if ((p_size != SIZE_MAX && p_size < size) ||
- (q_size != SIZE_MAX && q_size < size))
- fortify_panic(func);
+ if (p_size != SIZE_MAX && p_size < size)
+ fortify_panic(func, FORTIFY_WRITE, p_size, size, true);
+ else if (q_size != SIZE_MAX && q_size < size)
+ fortify_panic(func, FORTIFY_READ, p_size, size, true);
/*
* Warn when writing beyond destination field size.
@@ -640,10 +611,10 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
const size_t __q_size_field = (q_size_field); \
WARN_ONCE(fortify_memcpy_chk(__fortify_size, __p_size, \
__q_size, __p_size_field, \
- __q_size_field, #op), \
+ __q_size_field, FORTIFY_FUNC_ ##op), \
#op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \
__fortify_size, \
- "field \"" #p "\" at " __FILE__ ":" __stringify(__LINE__), \
+ "field \"" #p "\" at " FILE_LINE, \
__p_size_field); \
__underlying_##op(p, q, __fortify_size); \
})
@@ -707,7 +678,7 @@ __FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size)
if (__compiletime_lessthan(p_size, size))
__read_overflow();
if (p_size < size)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_memscan, FORTIFY_READ, p_size, size, NULL);
return __real_memscan(p, c, size);
}
@@ -723,8 +694,10 @@ int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t
if (__compiletime_lessthan(q_size, size))
__read_overflow2();
}
- if (p_size < size || q_size < size)
- fortify_panic(__func__);
+ if (p_size < size)
+ fortify_panic(FORTIFY_FUNC_memcmp, FORTIFY_READ, p_size, size, INT_MIN);
+ else if (q_size < size)
+ fortify_panic(FORTIFY_FUNC_memcmp, FORTIFY_READ, q_size, size, INT_MIN);
return __underlying_memcmp(p, q, size);
}
@@ -736,7 +709,7 @@ void *memchr(const void * const POS0 p, int c, __kernel_size_t size)
if (__compiletime_lessthan(p_size, size))
__read_overflow();
if (p_size < size)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_memchr, FORTIFY_READ, p_size, size, NULL);
return __underlying_memchr(p, c, size);
}
@@ -748,7 +721,7 @@ __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size)
if (__compiletime_lessthan(p_size, size))
__read_overflow();
if (p_size < size)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_memchr_inv, FORTIFY_READ, p_size, size, NULL);
return __real_memchr_inv(p, c, size);
}
@@ -761,7 +734,7 @@ __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp
if (__compiletime_lessthan(p_size, size))
__read_overflow();
if (p_size < size)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_kmemdup, FORTIFY_READ, p_size, size, NULL);
return __real_kmemdup(p, size, gfp);
}
@@ -798,7 +771,7 @@ char *strcpy(char * const POS p, const char * const POS q)
__write_overflow();
/* Run-time check for dynamic size overflow. */
if (p_size < size)
- fortify_panic(__func__);
+ fortify_panic(FORTIFY_FUNC_strcpy, FORTIFY_WRITE, p_size, size, p);
__underlying_memcpy(p, q, size);
return p;
}
diff --git a/include/linux/framer/framer-provider.h b/include/linux/framer/framer-provider.h
new file mode 100644
index 000000000000..9724d4b44b9c
--- /dev/null
+++ b/include/linux/framer/framer-provider.h
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Generic framer profider header file
+ *
+ * Copyright 2023 CS GROUP France
+ *
+ * Author: Herve Codina <herve.codina@bootlin.com>
+ */
+
+#ifndef __DRIVERS_PROVIDER_FRAMER_H
+#define __DRIVERS_PROVIDER_FRAMER_H
+
+#include <linux/export.h>
+#include <linux/framer/framer.h>
+#include <linux/types.h>
+
+#define FRAMER_FLAG_POLL_STATUS BIT(0)
+
+/**
+ * struct framer_ops - set of function pointers for performing framer operations
+ * @init: operation to be performed for initializing the framer
+ * @exit: operation to be performed while exiting
+ * @power_on: powering on the framer
+ * @power_off: powering off the framer
+ * @flags: OR-ed flags (FRAMER_FLAG_*) to ask for core functionality
+ * - @FRAMER_FLAG_POLL_STATUS:
+ * Ask the core to perform a polling to get the framer status and
+ * notify consumers on change.
+ * The framer should call @framer_notify_status_change() when it
+ * detects a status change. This is usually done using interrupts.
+ * If the framer cannot detect this change, it can ask the core for
+ * a status polling. The core will call @get_status() periodically
+ * and, on change detected, it will notify the consumer.
+ * the @get_status()
+ * @owner: the module owner containing the ops
+ */
+struct framer_ops {
+ int (*init)(struct framer *framer);
+ void (*exit)(struct framer *framer);
+ int (*power_on)(struct framer *framer);
+ int (*power_off)(struct framer *framer);
+
+ /**
+ * @get_status:
+ *
+ * Optional.
+ *
+ * Used to get the framer status. framer_init() must have
+ * been called on the framer.
+ *
+ * Returns: 0 if successful, an negative error code otherwise
+ */
+ int (*get_status)(struct framer *framer, struct framer_status *status);
+
+ /**
+ * @set_config:
+ *
+ * Optional.
+ *
+ * Used to set the framer configuration. framer_init() must have
+ * been called on the framer.
+ *
+ * Returns: 0 if successful, an negative error code otherwise
+ */
+ int (*set_config)(struct framer *framer, const struct framer_config *config);
+
+ /**
+ * @get_config:
+ *
+ * Optional.
+ *
+ * Used to get the framer configuration. framer_init() must have
+ * been called on the framer.
+ *
+ * Returns: 0 if successful, an negative error code otherwise
+ */
+ int (*get_config)(struct framer *framer, struct framer_config *config);
+
+ u32 flags;
+ struct module *owner;
+};
+
+/**
+ * struct framer_provider - represents the framer provider
+ * @dev: framer provider device
+ * @owner: the module owner having of_xlate
+ * @list: to maintain a linked list of framer providers
+ * @of_xlate: function pointer to obtain framer instance from framer pointer
+ */
+struct framer_provider {
+ struct device *dev;
+ struct module *owner;
+ struct list_head list;
+ struct framer * (*of_xlate)(struct device *dev,
+ const struct of_phandle_args *args);
+};
+
+static inline void framer_set_drvdata(struct framer *framer, void *data)
+{
+ dev_set_drvdata(&framer->dev, data);
+}
+
+static inline void *framer_get_drvdata(struct framer *framer)
+{
+ return dev_get_drvdata(&framer->dev);
+}
+
+#if IS_ENABLED(CONFIG_GENERIC_FRAMER)
+
+/* Create and destroy a framer */
+struct framer *framer_create(struct device *dev, struct device_node *node,
+ const struct framer_ops *ops);
+void framer_destroy(struct framer *framer);
+
+/* devm version */
+struct framer *devm_framer_create(struct device *dev, struct device_node *node,
+ const struct framer_ops *ops);
+
+struct framer *framer_provider_simple_of_xlate(struct device *dev,
+ const struct of_phandle_args *args);
+
+struct framer_provider *
+__framer_provider_of_register(struct device *dev, struct module *owner,
+ struct framer *(*of_xlate)(struct device *dev,
+ const struct of_phandle_args *args));
+
+void framer_provider_of_unregister(struct framer_provider *framer_provider);
+
+struct framer_provider *
+__devm_framer_provider_of_register(struct device *dev, struct module *owner,
+ struct framer *(*of_xlate)(struct device *dev,
+ const struct of_phandle_args *args));
+
+void framer_notify_status_change(struct framer *framer);
+
+#else /* IS_ENABLED(CONFIG_GENERIC_FRAMER) */
+
+static inline struct framer *framer_create(struct device *dev, struct device_node *node,
+ const struct framer_ops *ops)
+{
+ return ERR_PTR(-ENOSYS);
+}
+
+static inline void framer_destroy(struct framer *framer)
+{
+}
+
+/* devm version */
+static inline struct framer *devm_framer_create(struct device *dev, struct device_node *node,
+ const struct framer_ops *ops)
+{
+ return ERR_PTR(-ENOSYS);
+}
+
+static inline struct framer *framer_provider_simple_of_xlate(struct device *dev,
+ const struct of_phandle_args *args)
+{
+ return ERR_PTR(-ENOSYS);
+}
+
+static inline struct framer_provider *
+__framer_provider_of_register(struct device *dev, struct module *owner,
+ struct framer *(*of_xlate)(struct device *dev,
+ const struct of_phandle_args *args))
+{
+ return ERR_PTR(-ENOSYS);
+}
+
+void framer_provider_of_unregister(struct framer_provider *framer_provider)
+{
+}
+
+static inline struct framer_provider *
+__devm_framer_provider_of_register(struct device *dev, struct module *owner,
+ struct framer *(*of_xlate)(struct device *dev,
+ const struct of_phandle_args *args))
+{
+ return ERR_PTR(-ENOSYS);
+}
+
+void framer_notify_status_change(struct framer *framer)
+{
+}
+
+#endif /* IS_ENABLED(CONFIG_GENERIC_FRAMER) */
+
+#define framer_provider_of_register(dev, xlate) \
+ __framer_provider_of_register((dev), THIS_MODULE, (xlate))
+
+#define devm_framer_provider_of_register(dev, xlate) \
+ __devm_framer_provider_of_register((dev), THIS_MODULE, (xlate))
+
+#endif /* __DRIVERS_PROVIDER_FRAMER_H */
diff --git a/include/linux/framer/framer.h b/include/linux/framer/framer.h
new file mode 100644
index 000000000000..2b85fe9e7f9a
--- /dev/null
+++ b/include/linux/framer/framer.h
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Generic framer header file
+ *
+ * Copyright 2023 CS GROUP France
+ *
+ * Author: Herve Codina <herve.codina@bootlin.com>
+ */
+
+#ifndef __DRIVERS_FRAMER_H
+#define __DRIVERS_FRAMER_H
+
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/device.h>
+#include <linux/workqueue.h>
+
+/**
+ * enum framer_iface - Framer interface
+ * @FRAMER_IFACE_E1: E1 interface
+ * @FRAMER_IFACE_T1: T1 interface
+ */
+enum framer_iface {
+ FRAMER_IFACE_E1,
+ FRAMER_IFACE_T1,
+};
+
+/**
+ * enum framer_clock_type - Framer clock type
+ * @FRAMER_CLOCK_EXT: External clock
+ * @FRAMER_CLOCK_INT: Internal clock
+ */
+enum framer_clock_type {
+ FRAMER_CLOCK_EXT,
+ FRAMER_CLOCK_INT,
+};
+
+/**
+ * struct framer_config - Framer configuration
+ * @iface: Framer line interface
+ * @clock_type: Framer clock type
+ * @line_clock_rate: Framer line clock rate
+ */
+struct framer_config {
+ enum framer_iface iface;
+ enum framer_clock_type clock_type;
+ unsigned long line_clock_rate;
+};
+
+/**
+ * struct framer_status - Framer status
+ * @link_is_on: Framer link state. true, the link is on, false, the link is off.
+ */
+struct framer_status {
+ bool link_is_on;
+};
+
+/**
+ * enum framer_event - Event available for notification
+ * @FRAMER_EVENT_STATUS: Event notified on framer_status changes
+ */
+enum framer_event {
+ FRAMER_EVENT_STATUS,
+};
+
+/**
+ * struct framer - represents the framer device
+ * @dev: framer device
+ * @id: id of the framer device
+ * @ops: function pointers for performing framer operations
+ * @mutex: mutex to protect framer_ops
+ * @init_count: used to protect when the framer is used by multiple consumers
+ * @power_count: used to protect when the framer is used by multiple consumers
+ * @pwr: power regulator associated with the framer
+ * @notify_status_work: work structure used for status notifications
+ * @notifier_list: notifier list used for notifications
+ * @polling_work: delayed work structure used for the polling task
+ * @prev_status: previous read status used by the polling task to detect changes
+ */
+struct framer {
+ struct device dev;
+ int id;
+ const struct framer_ops *ops;
+ struct mutex mutex; /* Protect framer */
+ int init_count;
+ int power_count;
+ struct regulator *pwr;
+ struct work_struct notify_status_work;
+ struct blocking_notifier_head notifier_list;
+ struct delayed_work polling_work;
+ struct framer_status prev_status;
+};
+
+#if IS_ENABLED(CONFIG_GENERIC_FRAMER)
+int framer_pm_runtime_get(struct framer *framer);
+int framer_pm_runtime_get_sync(struct framer *framer);
+int framer_pm_runtime_put(struct framer *framer);
+int framer_pm_runtime_put_sync(struct framer *framer);
+int framer_init(struct framer *framer);
+int framer_exit(struct framer *framer);
+int framer_power_on(struct framer *framer);
+int framer_power_off(struct framer *framer);
+int framer_get_status(struct framer *framer, struct framer_status *status);
+int framer_get_config(struct framer *framer, struct framer_config *config);
+int framer_set_config(struct framer *framer, const struct framer_config *config);
+int framer_notifier_register(struct framer *framer, struct notifier_block *nb);
+int framer_notifier_unregister(struct framer *framer, struct notifier_block *nb);
+
+struct framer *framer_get(struct device *dev, const char *con_id);
+void framer_put(struct device *dev, struct framer *framer);
+
+struct framer *devm_framer_get(struct device *dev, const char *con_id);
+struct framer *devm_framer_optional_get(struct device *dev, const char *con_id);
+#else
+static inline int framer_pm_runtime_get(struct framer *framer)
+{
+ return -ENOSYS;
+}
+
+static inline int framer_pm_runtime_get_sync(struct framer *framer)
+{
+ return -ENOSYS;
+}
+
+static inline int framer_pm_runtime_put(struct framer *framer)
+{
+ return -ENOSYS;
+}
+
+static inline int framer_pm_runtime_put_sync(struct framer *framer)
+{
+ return -ENOSYS;
+}
+
+static inline int framer_init(struct framer *framer)
+{
+ return -ENOSYS;
+}
+
+static inline int framer_exit(struct framer *framer)
+{
+ return -ENOSYS;
+}
+
+static inline int framer_power_on(struct framer *framer)
+{
+ return -ENOSYS;
+}
+
+static inline int framer_power_off(struct framer *framer)
+{
+ return -ENOSYS;
+}
+
+static inline int framer_get_status(struct framer *framer, struct framer_status *status)
+{
+ return -ENOSYS;
+}
+
+static inline int framer_get_config(struct framer *framer, struct framer_config *config)
+{
+ return -ENOSYS;
+}
+
+static inline int framer_set_config(struct framer *framer, const struct framer_config *config)
+{
+ return -ENOSYS;
+}
+
+static inline int framer_notifier_register(struct framer *framer,
+ struct notifier_block *nb)
+{
+ return -ENOSYS;
+}
+
+static inline int framer_notifier_unregister(struct framer *framer,
+ struct notifier_block *nb)
+{
+ return -ENOSYS;
+}
+
+static inline struct framer *framer_get(struct device *dev, const char *con_id)
+{
+ return ERR_PTR(-ENOSYS);
+}
+
+static inline void framer_put(struct device *dev, struct framer *framer)
+{
+}
+
+static inline struct framer *devm_framer_get(struct device *dev, const char *con_id)
+{
+ return ERR_PTR(-ENOSYS);
+}
+
+static inline struct framer *devm_framer_optional_get(struct device *dev, const char *con_id)
+{
+ return NULL;
+}
+
+#endif
+
+#endif /* __DRIVERS_FRAMER_H */
diff --git a/include/linux/framer/pef2256.h b/include/linux/framer/pef2256.h
new file mode 100644
index 000000000000..71d80af58c40
--- /dev/null
+++ b/include/linux/framer/pef2256.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * PEF2256 consumer API
+ *
+ * Copyright 2023 CS GROUP France
+ *
+ * Author: Herve Codina <herve.codina@bootlin.com>
+ */
+#ifndef __PEF2256_H__
+#define __PEF2256_H__
+
+#include <linux/types.h>
+
+struct pef2256;
+struct regmap;
+
+/* Retrieve the PEF2256 regmap */
+struct regmap *pef2256_get_regmap(struct pef2256 *pef2256);
+
+/* PEF2256 hardware versions */
+enum pef2256_version {
+ PEF2256_VERSION_UNKNOWN,
+ PEF2256_VERSION_1_2,
+ PEF2256_VERSION_2_1,
+ PEF2256_VERSION_2_2,
+};
+
+/* Get the PEF2256 hardware version */
+enum pef2256_version pef2256_get_version(struct pef2256 *pef2256);
+
+#endif /* __PEF2256_H__ */
diff --git a/include/linux/freelist.h b/include/linux/freelist.h
deleted file mode 100644
index fc1842b96469..000000000000
--- a/include/linux/freelist.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
-#ifndef FREELIST_H
-#define FREELIST_H
-
-#include <linux/atomic.h>
-
-/*
- * Copyright: cameron@moodycamel.com
- *
- * A simple CAS-based lock-free free list. Not the fastest thing in the world
- * under heavy contention, but simple and correct (assuming nodes are never
- * freed until after the free list is destroyed), and fairly speedy under low
- * contention.
- *
- * Adapted from: https://moodycamel.com/blog/2014/solving-the-aba-problem-for-lock-free-free-lists
- */
-
-struct freelist_node {
- atomic_t refs;
- struct freelist_node *next;
-};
-
-struct freelist_head {
- struct freelist_node *head;
-};
-
-#define REFS_ON_FREELIST 0x80000000
-#define REFS_MASK 0x7FFFFFFF
-
-static inline void __freelist_add(struct freelist_node *node, struct freelist_head *list)
-{
- /*
- * Since the refcount is zero, and nobody can increase it once it's
- * zero (except us, and we run only one copy of this method per node at
- * a time, i.e. the single thread case), then we know we can safely
- * change the next pointer of the node; however, once the refcount is
- * back above zero, then other threads could increase it (happens under
- * heavy contention, when the refcount goes to zero in between a load
- * and a refcount increment of a node in try_get, then back up to
- * something non-zero, then the refcount increment is done by the other
- * thread) -- so if the CAS to add the node to the actual list fails,
- * decrese the refcount and leave the add operation to the next thread
- * who puts the refcount back to zero (which could be us, hence the
- * loop).
- */
- struct freelist_node *head = READ_ONCE(list->head);
-
- for (;;) {
- WRITE_ONCE(node->next, head);
- atomic_set_release(&node->refs, 1);
-
- if (!try_cmpxchg_release(&list->head, &head, node)) {
- /*
- * Hmm, the add failed, but we can only try again when
- * the refcount goes back to zero.
- */
- if (atomic_fetch_add_release(REFS_ON_FREELIST - 1, &node->refs) == 1)
- continue;
- }
- return;
- }
-}
-
-static inline void freelist_add(struct freelist_node *node, struct freelist_head *list)
-{
- /*
- * We know that the should-be-on-freelist bit is 0 at this point, so
- * it's safe to set it using a fetch_add.
- */
- if (!atomic_fetch_add_release(REFS_ON_FREELIST, &node->refs)) {
- /*
- * Oh look! We were the last ones referencing this node, and we
- * know we want to add it to the free list, so let's do it!
- */
- __freelist_add(node, list);
- }
-}
-
-static inline struct freelist_node *freelist_try_get(struct freelist_head *list)
-{
- struct freelist_node *prev, *next, *head = smp_load_acquire(&list->head);
- unsigned int refs;
-
- while (head) {
- prev = head;
- refs = atomic_read(&head->refs);
- if ((refs & REFS_MASK) == 0 ||
- !atomic_try_cmpxchg_acquire(&head->refs, &refs, refs+1)) {
- head = smp_load_acquire(&list->head);
- continue;
- }
-
- /*
- * Good, reference count has been incremented (it wasn't at
- * zero), which means we can read the next and not worry about
- * it changing between now and the time we do the CAS.
- */
- next = READ_ONCE(head->next);
- if (try_cmpxchg_acquire(&list->head, &head, next)) {
- /*
- * Yay, got the node. This means it was on the list,
- * which means should-be-on-freelist must be false no
- * matter the refcount (because nobody else knows it's
- * been taken off yet, it can't have been put back on).
- */
- WARN_ON_ONCE(atomic_read(&head->refs) & REFS_ON_FREELIST);
-
- /*
- * Decrease refcount twice, once for our ref, and once
- * for the list's ref.
- */
- atomic_fetch_add(-2, &head->refs);
-
- return head;
- }
-
- /*
- * OK, the head must have changed on us, but we still need to decrement
- * the refcount we increased.
- */
- refs = atomic_fetch_add(-1, &prev->refs);
- if (refs == REFS_ON_FREELIST + 1)
- __freelist_add(prev, list);
- }
-
- return NULL;
-}
-
-#endif /* FREELIST_H */
diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
deleted file mode 100644
index eaa0ac5f9003..000000000000
--- a/include/linux/frontswap.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_FRONTSWAP_H
-#define _LINUX_FRONTSWAP_H
-
-#include <linux/swap.h>
-#include <linux/mm.h>
-#include <linux/bitops.h>
-#include <linux/jump_label.h>
-
-struct frontswap_ops {
- void (*init)(unsigned); /* this swap type was just swapon'ed */
- int (*store)(unsigned, pgoff_t, struct page *); /* store a page */
- int (*load)(unsigned, pgoff_t, struct page *, bool *); /* load a page */
- void (*invalidate_page)(unsigned, pgoff_t); /* page no longer needed */
- void (*invalidate_area)(unsigned); /* swap type just swapoff'ed */
-};
-
-int frontswap_register_ops(const struct frontswap_ops *ops);
-
-extern void frontswap_init(unsigned type, unsigned long *map);
-extern int __frontswap_store(struct page *page);
-extern int __frontswap_load(struct page *page);
-extern void __frontswap_invalidate_page(unsigned, pgoff_t);
-extern void __frontswap_invalidate_area(unsigned);
-
-#ifdef CONFIG_FRONTSWAP
-extern struct static_key_false frontswap_enabled_key;
-
-static inline bool frontswap_enabled(void)
-{
- return static_branch_unlikely(&frontswap_enabled_key);
-}
-
-static inline void frontswap_map_set(struct swap_info_struct *p,
- unsigned long *map)
-{
- p->frontswap_map = map;
-}
-
-static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
-{
- return p->frontswap_map;
-}
-#else
-/* all inline routines become no-ops and all externs are ignored */
-
-static inline bool frontswap_enabled(void)
-{
- return false;
-}
-
-static inline void frontswap_map_set(struct swap_info_struct *p,
- unsigned long *map)
-{
-}
-
-static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
-{
- return NULL;
-}
-#endif
-
-static inline int frontswap_store(struct page *page)
-{
- if (frontswap_enabled())
- return __frontswap_store(page);
-
- return -1;
-}
-
-static inline int frontswap_load(struct page *page)
-{
- if (frontswap_enabled())
- return __frontswap_load(page);
-
- return -1;
-}
-
-static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset)
-{
- if (frontswap_enabled())
- __frontswap_invalidate_page(type, offset);
-}
-
-static inline void frontswap_invalidate_area(unsigned type)
-{
- if (frontswap_enabled())
- __frontswap_invalidate_area(type);
-}
-
-#endif /* _LINUX_FRONTSWAP_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 562f2623c9c9..8dfd53b52744 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -43,6 +43,8 @@
#include <linux/cred.h>
#include <linux/mnt_idmapping.h>
#include <linux/slab.h>
+#include <linux/maple_tree.h>
+#include <linux/rw_hint.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -67,7 +69,7 @@ struct swap_info_struct;
struct seq_file;
struct workqueue_struct;
struct iov_iter;
-struct fscrypt_info;
+struct fscrypt_inode_info;
struct fscrypt_operations;
struct fsverity_info;
struct fsverity_operations;
@@ -119,6 +121,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define FMODE_PWRITE ((__force fmode_t)0x10)
/* File is opened for execution with sys_execve / sys_uselib */
#define FMODE_EXEC ((__force fmode_t)0x20)
+/* File writes are restricted (block device specific) */
+#define FMODE_WRITE_RESTRICTED ((__force fmode_t)0x40)
/* 32bit hashes as llseek() offset (for directories) */
#define FMODE_32BITHASH ((__force fmode_t)0x200)
/* 64bit hashes as llseek() offset (for directories) */
@@ -309,19 +313,6 @@ struct address_space;
struct writeback_control;
struct readahead_control;
-/*
- * Write life time hint values.
- * Stored in struct inode as u8.
- */
-enum rw_hint {
- WRITE_LIFE_NOT_SET = 0,
- WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE,
- WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT,
- WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM,
- WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG,
- WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
-};
-
/* Match RWF_* bits to IOCB bits */
#define IOCB_HIPRI (__force int) RWF_HIPRI
#define IOCB_DSYNC (__force int) RWF_DSYNC
@@ -338,6 +329,22 @@ enum rw_hint {
#define IOCB_NOIO (1 << 20)
/* can use bio alloc cache */
#define IOCB_ALLOC_CACHE (1 << 21)
+/*
+ * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
+ * iocb completion can be passed back to the owner for execution from a safe
+ * context rather than needing to be punted through a workqueue. If this
+ * flag is set, the bio completion handling may set iocb->dio_complete to a
+ * handler function and iocb->private to context information for that handler.
+ * The issuer should call the handler with that context information from task
+ * context to complete the processing of the iocb. Note that while this
+ * provides a task context for the dio_complete() callback, it should only be
+ * used on the completion side for non-IO generating completions. It's fine to
+ * call blocking functions from this callback, but they should not wait for
+ * unrelated IO (like cache flushing, new IO generation, etc).
+ */
+#define IOCB_DIO_CALLER_COMP (1 << 22)
+/* kiocb is a read or write operation submitted by fs/aio.c. */
+#define IOCB_AIO_RW (1 << 23)
/* for use in trace events */
#define TRACE_IOCB_STRINGS \
@@ -351,7 +358,8 @@ enum rw_hint {
{ IOCB_WRITE, "WRITE" }, \
{ IOCB_WAITQ, "WAITQ" }, \
{ IOCB_NOIO, "NOIO" }, \
- { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }
+ { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \
+ { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }
struct kiocb {
struct file *ki_filp;
@@ -360,7 +368,23 @@ struct kiocb {
void *private;
int ki_flags;
u16 ki_ioprio; /* See linux/ioprio.h */
- struct wait_page_queue *ki_waitq; /* for async buffered IO */
+ union {
+ /*
+ * Only used for async buffered reads, where it denotes the
+ * page waitqueue associated with completing the read. Valid
+ * IFF IOCB_WAITQ is set.
+ */
+ struct wait_page_queue *ki_waitq;
+ /*
+ * Can be used for O_DIRECT IO, where the completion handling
+ * is punted back to the issuer of the IO. May only be set
+ * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer
+ * must then check for presence of this handler when ki_complete
+ * is invoked. The data passed in to this handler must be
+ * assigned to ->private when dio_complete is assigned.
+ */
+ ssize_t (*dio_complete)(void *data);
+ };
};
static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -403,7 +427,7 @@ struct address_space_operations {
bool (*is_partially_uptodate) (struct folio *, size_t from,
size_t count);
void (*is_dirty_writeback) (struct folio *, bool *dirty, bool *wb);
- int (*error_remove_page)(struct address_space *, struct page *);
+ int (*error_remove_folio)(struct address_space *, struct folio *);
/* swapfile support */
int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
@@ -423,7 +447,7 @@ extern const struct address_space_operations empty_aops;
* It is also used to block modification of page cache contents through
* memory mappings.
* @gfp_mask: Memory allocation flags to use for allocating pages.
- * @i_mmap_writable: Number of VM_SHARED mappings.
+ * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings.
* @nr_thps: Number of THPs in the pagecache (non-shmem only).
* @i_mmap: Tree of private and shared mappings.
* @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
@@ -432,9 +456,9 @@ extern const struct address_space_operations empty_aops;
* @a_ops: Methods.
* @flags: Error bits and flags (AS_*).
* @wb_err: The most recent error which has occurred.
- * @private_lock: For use by the owner of the address_space.
- * @private_list: For use by the owner of the address_space.
- * @private_data: For use by the owner of the address_space.
+ * @i_private_lock: For use by the owner of the address_space.
+ * @i_private_list: For use by the owner of the address_space.
+ * @i_private_data: For use by the owner of the address_space.
*/
struct address_space {
struct inode *host;
@@ -447,15 +471,15 @@ struct address_space {
atomic_t nr_thps;
#endif
struct rb_root_cached i_mmap;
- struct rw_semaphore i_mmap_rwsem;
unsigned long nrpages;
pgoff_t writeback_index;
const struct address_space_operations *a_ops;
unsigned long flags;
errseq_t wb_err;
- spinlock_t private_lock;
- struct list_head private_list;
- void *private_data;
+ spinlock_t i_private_lock;
+ struct list_head i_private_list;
+ struct rw_semaphore i_mmap_rwsem;
+ void * i_private_data;
} __attribute__((aligned(sizeof(long)))) __randomize_layout;
/*
* On most architectures that alignment is already the case; but
@@ -526,7 +550,7 @@ static inline int mapping_mapped(struct address_space *mapping)
/*
* Might pages of this file have been modified in userspace?
- * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap
+ * Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap
* marks vma as VM_SHARED if it is shared, and the file was opened for
* writing i.e. vma may be mprotected writable even if now readonly.
*
@@ -640,13 +664,13 @@ struct inode {
};
dev_t i_rdev;
loff_t i_size;
- struct timespec64 i_atime;
- struct timespec64 i_mtime;
- struct timespec64 i_ctime;
+ struct timespec64 __i_atime;
+ struct timespec64 __i_mtime;
+ struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short i_bytes;
u8 i_blkbits;
- u8 i_write_hint;
+ enum rw_hint i_write_hint;
blkcnt_t i_blocks;
#ifdef __NEED_I_SIZE_ORDERED
@@ -707,7 +731,7 @@ struct inode {
#endif
#ifdef CONFIG_FS_ENCRYPTION
- struct fscrypt_info *i_crypt_info;
+ struct fscrypt_inode_info *i_crypt_info;
#endif
#ifdef CONFIG_FS_VERITY
@@ -876,7 +900,8 @@ static inline loff_t i_size_read(const struct inode *inode)
preempt_enable();
return i_size;
#else
- return inode->i_size;
+ /* Pairs with smp_store_release() in i_size_write() */
+ return smp_load_acquire(&inode->i_size);
#endif
}
@@ -898,7 +923,12 @@ static inline void i_size_write(struct inode *inode, loff_t i_size)
inode->i_size = i_size;
preempt_enable();
#else
- inode->i_size = i_size;
+ /*
+ * Pairs with smp_load_acquire() in i_size_read() to ensure
+ * changes related to inode size (such as page contents) are
+ * visible before we see the changed inode size.
+ */
+ smp_store_release(&inode->i_size, i_size);
#endif
}
@@ -960,8 +990,10 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
*/
struct file {
union {
+ /* fput() uses task work when closing and freeing file (default). */
+ struct callback_head f_task_work;
+ /* fput() must use workqueue (most kernel threads). */
struct llist_node f_llist;
- struct rcu_head f_rcuhead;
unsigned int f_iocb_flags;
};
@@ -1011,7 +1043,10 @@ static inline struct file *get_file(struct file *f)
atomic_long_inc(&f->f_count);
return f;
}
-#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count)
+
+struct file *get_file_rcu(struct file __rcu **f);
+struct file *get_file_active(struct file **f);
+
#define file_count(x) atomic_long_read(&(x)->f_count)
#define MAX_NON_LFS ((1UL<<31) - 1)
@@ -1028,6 +1063,7 @@ static inline struct file *get_file(struct file *f)
typedef void *fl_owner_t;
struct file_lock;
+struct file_lease;
/* The following constant reflects the upper bound of the file/locking space */
#ifndef OFFSET_MAX
@@ -1042,9 +1078,20 @@ static inline struct inode *file_inode(const struct file *f)
return f->f_inode;
}
+/*
+ * file_dentry() is a relic from the days that overlayfs was using files with a
+ * "fake" path, meaning, f_path on overlayfs and f_inode on underlying fs.
+ * In those days, file_dentry() was needed to get the underlying fs dentry that
+ * matches f_inode.
+ * Files with "fake" path should not exist nowadays, so use an assertion to make
+ * sure that file_dentry() was not papering over filesystem bugs.
+ */
static inline struct dentry *file_dentry(const struct file *file)
{
- return d_real(file->f_path.dentry, file_inode(file));
+ struct dentry *dentry = file->f_path.dentry;
+
+ WARN_ON_ONCE(d_inode(dentry) != file_inode(file));
+ return dentry;
}
struct fasync_struct {
@@ -1069,7 +1116,7 @@ extern void fasync_free(struct fasync_struct *);
extern void kill_fasync(struct fasync_struct **, int, int);
extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
-extern int f_setown(struct file *filp, unsigned long arg, int force);
+extern int f_setown(struct file *filp, int who, int force);
extern void f_delown(struct file *filp);
extern pid_t f_getown(struct file *filp);
extern int send_sigurg(struct fown_struct *fown);
@@ -1088,13 +1135,15 @@ extern int send_sigurg(struct fown_struct *fown);
#define SB_NOATIME BIT(10) /* Do not update access times. */
#define SB_NODIRATIME BIT(11) /* Do not update directory access times */
#define SB_SILENT BIT(15)
-#define SB_POSIXACL BIT(16) /* VFS does not apply the umask */
+#define SB_POSIXACL BIT(16) /* Supports POSIX ACLs */
#define SB_INLINECRYPT BIT(17) /* Use blk-crypto for encrypted files */
#define SB_KERNMOUNT BIT(22) /* this is a kern_mount call */
#define SB_I_VERSION BIT(23) /* Update inode I_version field */
#define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */
/* These sb flags are internal to the kernel */
+#define SB_DEAD BIT(21)
+#define SB_DYING BIT(24)
#define SB_SUBMOUNT BIT(26)
#define SB_FORCE BIT(27)
#define SB_NOSEC BIT(28)
@@ -1128,11 +1177,13 @@ extern int send_sigurg(struct fown_struct *fown);
#define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */
#define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020
#define SB_I_UNTRUSTED_MOUNTER 0x00000040
+#define SB_I_EVM_UNSUPPORTED 0x00000080
#define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */
#define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */
#define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
#define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */
+#define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */
/* Possible states of 'frozen' field */
enum {
@@ -1147,7 +1198,9 @@ enum {
#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
struct sb_writers {
- int frozen; /* Is sb frozen? */
+ unsigned short frozen; /* Is sb frozen? */
+ int freeze_kcount; /* How many kernel freeze requests? */
+ int freeze_ucount; /* How many userspace freeze requests? */
struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
};
@@ -1172,7 +1225,7 @@ struct super_block {
#ifdef CONFIG_SECURITY
void *s_security;
#endif
- const struct xattr_handler **s_xattr;
+ const struct xattr_handler * const *s_xattr;
#ifdef CONFIG_FS_ENCRYPTION
const struct fscrypt_operations *s_cop;
struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */
@@ -1186,7 +1239,8 @@ struct super_block {
#endif
struct hlist_bl_head s_roots; /* alternate root dentries for NFS */
struct list_head s_mounts; /* list of mounts; _not_ for fs use */
- struct block_device *s_bdev;
+ struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */
+ struct file *s_bdev_file;
struct backing_dev_info *s_bdi;
struct mtd_info *s_mtd;
struct hlist_node s_instances;
@@ -1212,8 +1266,22 @@ struct super_block {
struct fsnotify_mark_connector __rcu *s_fsnotify_marks;
#endif
+ /*
+ * q: why are s_id and s_sysfs_name not the same? both are human
+ * readable strings that identify the filesystem
+ * a: s_id is allowed to change at runtime; it's used in log messages,
+ * and we want to when a device starts out as single device (s_id is dev
+ * name) but then a device is hot added and we have to switch to
+ * identifying it by UUID
+ * but s_sysfs_name is a handle for programmatic access, and can't
+ * change at runtime
+ */
char s_id[32]; /* Informational name */
uuid_t s_uuid; /* UUID */
+ u8 s_uuid_len; /* Default 16, possibly smaller for weird filesystems */
+
+ /* if set, fs shows up under sysfs at /sys/fs/$FSTYP/s_sysfs_name */
+ char s_sysfs_name[UUID_STRING_LEN + 1];
unsigned int s_max_links;
@@ -1231,7 +1299,7 @@ struct super_block {
const struct dentry_operations *s_d_op; /* default d_op for dentries */
- struct shrinker s_shrink; /* per-sb shrinker handle */
+ struct shrinker *s_shrink; /* per-sb shrinker handle */
/* Number of inodes with nlink == 0 but still referenced */
atomic_long_t s_remove_count;
@@ -1474,7 +1542,109 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
kgid_has_mapping(fs_userns, kgid);
}
-extern struct timespec64 current_time(struct inode *inode);
+struct timespec64 current_time(struct inode *inode);
+struct timespec64 inode_set_ctime_current(struct inode *inode);
+
+static inline time64_t inode_get_atime_sec(const struct inode *inode)
+{
+ return inode->__i_atime.tv_sec;
+}
+
+static inline long inode_get_atime_nsec(const struct inode *inode)
+{
+ return inode->__i_atime.tv_nsec;
+}
+
+static inline struct timespec64 inode_get_atime(const struct inode *inode)
+{
+ return inode->__i_atime;
+}
+
+static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode,
+ struct timespec64 ts)
+{
+ inode->__i_atime = ts;
+ return ts;
+}
+
+static inline struct timespec64 inode_set_atime(struct inode *inode,
+ time64_t sec, long nsec)
+{
+ struct timespec64 ts = { .tv_sec = sec,
+ .tv_nsec = nsec };
+ return inode_set_atime_to_ts(inode, ts);
+}
+
+static inline time64_t inode_get_mtime_sec(const struct inode *inode)
+{
+ return inode->__i_mtime.tv_sec;
+}
+
+static inline long inode_get_mtime_nsec(const struct inode *inode)
+{
+ return inode->__i_mtime.tv_nsec;
+}
+
+static inline struct timespec64 inode_get_mtime(const struct inode *inode)
+{
+ return inode->__i_mtime;
+}
+
+static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode,
+ struct timespec64 ts)
+{
+ inode->__i_mtime = ts;
+ return ts;
+}
+
+static inline struct timespec64 inode_set_mtime(struct inode *inode,
+ time64_t sec, long nsec)
+{
+ struct timespec64 ts = { .tv_sec = sec,
+ .tv_nsec = nsec };
+ return inode_set_mtime_to_ts(inode, ts);
+}
+
+static inline time64_t inode_get_ctime_sec(const struct inode *inode)
+{
+ return inode->__i_ctime.tv_sec;
+}
+
+static inline long inode_get_ctime_nsec(const struct inode *inode)
+{
+ return inode->__i_ctime.tv_nsec;
+}
+
+static inline struct timespec64 inode_get_ctime(const struct inode *inode)
+{
+ return inode->__i_ctime;
+}
+
+static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
+ struct timespec64 ts)
+{
+ inode->__i_ctime = ts;
+ return ts;
+}
+
+/**
+ * inode_set_ctime - set the ctime in the inode
+ * @inode: inode in which to set the ctime
+ * @sec: tv_sec value to set
+ * @nsec: tv_nsec value to set
+ *
+ * Set the ctime in @inode to { @sec, @nsec }
+ */
+static inline struct timespec64 inode_set_ctime(struct inode *inode,
+ time64_t sec, long nsec)
+{
+ struct timespec64 ts = { .tv_sec = sec,
+ .tv_nsec = nsec };
+
+ return inode_set_ctime_to_ts(inode, ts);
+}
+
+struct timespec64 simple_inode_init_ts(struct inode *inode);
/*
* Snapshotting support.
@@ -1504,9 +1674,70 @@ static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
#define __sb_writers_release(sb, lev) \
percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
+/**
+ * __sb_write_started - check if sb freeze level is held
+ * @sb: the super we write to
+ * @level: the freeze level
+ *
+ * * > 0 - sb freeze level is held
+ * * 0 - sb freeze level is not held
+ * * < 0 - !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN
+ */
+static inline int __sb_write_started(const struct super_block *sb, int level)
+{
+ return lockdep_is_held_type(sb->s_writers.rw_sem + level - 1, 1);
+}
+
+/**
+ * sb_write_started - check if SB_FREEZE_WRITE is held
+ * @sb: the super we write to
+ *
+ * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
+ */
static inline bool sb_write_started(const struct super_block *sb)
{
- return lockdep_is_held_type(sb->s_writers.rw_sem + SB_FREEZE_WRITE - 1, 1);
+ return __sb_write_started(sb, SB_FREEZE_WRITE);
+}
+
+/**
+ * sb_write_not_started - check if SB_FREEZE_WRITE is not held
+ * @sb: the super we write to
+ *
+ * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
+ */
+static inline bool sb_write_not_started(const struct super_block *sb)
+{
+ return __sb_write_started(sb, SB_FREEZE_WRITE) <= 0;
+}
+
+/**
+ * file_write_started - check if SB_FREEZE_WRITE is held
+ * @file: the file we write to
+ *
+ * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
+ * May be false positive with !S_ISREG, because file_start_write() has
+ * no effect on !S_ISREG.
+ */
+static inline bool file_write_started(const struct file *file)
+{
+ if (!S_ISREG(file_inode(file)->i_mode))
+ return true;
+ return sb_write_started(file_inode(file)->i_sb);
+}
+
+/**
+ * file_write_not_started - check if SB_FREEZE_WRITE is not held
+ * @file: the file we write to
+ *
+ * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
+ * May be false positive with !S_ISREG, because file_start_write() has
+ * no effect on !S_ISREG.
+ */
+static inline bool file_write_not_started(const struct file *file)
+{
+ if (!S_ISREG(file_inode(file)->i_mode))
+ return true;
+ return sb_write_not_started(file_inode(file)->i_sb);
}
/**
@@ -1770,6 +2001,7 @@ struct dir_context {
struct iov_iter;
struct io_uring_cmd;
+struct offset_ctx;
struct file_operations {
struct module *owner;
@@ -1798,7 +2030,7 @@ struct file_operations {
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
void (*splice_eof)(struct file *file);
- int (*setlease)(struct file *, long, struct file_lock **, void **);
+ int (*setlease)(struct file *, int, struct file_lease **, void **);
long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len);
void (*show_fdinfo)(struct seq_file *m, struct file *f);
@@ -1850,7 +2082,7 @@ struct inode_operations {
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
u64 len);
- int (*update_time)(struct inode *, struct timespec64 *, int);
+ int (*update_time)(struct inode *, int);
int (*atomic_open)(struct inode *, struct dentry *,
struct file *, unsigned open_flag,
umode_t create_mode);
@@ -1863,6 +2095,7 @@ struct inode_operations {
int (*fileattr_set)(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
+ struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
} ____cacheline_aligned;
static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
@@ -1886,9 +2119,6 @@ extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
loff_t, size_t, unsigned int);
-extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- size_t len, unsigned int flags);
int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t *len, unsigned int remap_flags,
@@ -1896,9 +2126,6 @@ int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t *count, unsigned int remap_flags);
-extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- loff_t len, unsigned int remap_flags);
extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
@@ -1908,6 +2135,25 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
struct file *dst_file, loff_t dst_pos,
loff_t len, unsigned int remap_flags);
+/**
+ * enum freeze_holder - holder of the freeze
+ * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem
+ * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem
+ * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed
+ *
+ * Indicate who the owner of the freeze or thaw request is and whether
+ * the freeze needs to be exclusive or can nest.
+ * Without @FREEZE_MAY_NEST, multiple freeze and thaw requests from the
+ * same holder aren't allowed. It is however allowed to hold a single
+ * @FREEZE_HOLDER_USERSPACE and a single @FREEZE_HOLDER_KERNEL freeze at
+ * the same time. This is relied upon by some filesystems during online
+ * repair or similar.
+ */
+enum freeze_holder {
+ FREEZE_HOLDER_KERNEL = (1U << 0),
+ FREEZE_HOLDER_USERSPACE = (1U << 1),
+ FREEZE_MAY_NEST = (1U << 2),
+};
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
@@ -1920,9 +2166,9 @@ struct super_operations {
void (*evict_inode) (struct inode *);
void (*put_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
- int (*freeze_super) (struct super_block *);
+ int (*freeze_super) (struct super_block *, enum freeze_holder who);
int (*freeze_fs) (struct super_block *);
- int (*thaw_super) (struct super_block *);
+ int (*thaw_super) (struct super_block *, enum freeze_holder who);
int (*unfreeze_fs) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *);
int (*remount_fs) (struct super_block *, int *, char *);
@@ -1935,7 +2181,7 @@ struct super_operations {
#ifdef CONFIG_QUOTA
ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
- struct dquot **(*get_dquots)(struct inode *);
+ struct dquot __rcu **(*get_dquots)(struct inode *);
#endif
long (*nr_cached_objects)(struct super_block *,
struct shrink_control *);
@@ -1998,7 +2244,12 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
#define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
#define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
#define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
+
+#ifdef CONFIG_FS_POSIX_ACL
#define IS_POSIXACL(inode) __IS_FLG(inode, SB_POSIXACL)
+#else
+#define IS_POSIXACL(inode) 0
+#endif
#define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
#define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME)
@@ -2142,7 +2393,7 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
#define I_CREATING (1 << 15)
#define I_DONTCACHE (1 << 16)
#define I_SYNC_QUEUED (1 << 17)
-#define I_PINNING_FSCACHE_WB (1 << 18)
+#define I_PINNING_NETFS_WB (1 << 18)
#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
@@ -2200,7 +2451,7 @@ enum file_time_flags {
extern bool atime_needs_update(const struct path *, struct inode *);
extern void touch_atime(const struct path *);
-int inode_update_time(struct inode *inode, struct timespec64 *time, int flags);
+int inode_update_time(struct inode *inode, int flags);
static inline void file_accessed(struct file *file)
{
@@ -2273,6 +2524,7 @@ struct super_block *sget(struct file_system_type *type,
int (*test)(struct super_block *,void *),
int (*set)(struct super_block *,void *),
int flags, void *data);
+struct super_block *sget_dev(struct fs_context *fc, dev_t dev);
/* Alas, no aliases. Too much hassle with bringing module.h everywhere */
#define fops_get(fops) \
@@ -2296,17 +2548,56 @@ extern int unregister_filesystem(struct file_system_type *);
extern int vfs_statfs(const struct path *, struct kstatfs *);
extern int user_statfs(const char __user *, struct kstatfs *);
extern int fd_statfs(int, struct kstatfs *);
-extern int freeze_super(struct super_block *super);
-extern int thaw_super(struct super_block *super);
+int freeze_super(struct super_block *super, enum freeze_holder who);
+int thaw_super(struct super_block *super, enum freeze_holder who);
extern __printf(2, 3)
int super_setup_bdi_name(struct super_block *sb, char *fmt, ...);
extern int super_setup_bdi(struct super_block *sb);
+static inline void super_set_uuid(struct super_block *sb, const u8 *uuid, unsigned len)
+{
+ if (WARN_ON(len > sizeof(sb->s_uuid)))
+ len = sizeof(sb->s_uuid);
+ sb->s_uuid_len = len;
+ memcpy(&sb->s_uuid, uuid, len);
+}
+
+/* set sb sysfs name based on sb->s_bdev */
+static inline void super_set_sysfs_name_bdev(struct super_block *sb)
+{
+ snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pg", sb->s_bdev);
+}
+
+/* set sb sysfs name based on sb->s_uuid */
+static inline void super_set_sysfs_name_uuid(struct super_block *sb)
+{
+ WARN_ON(sb->s_uuid_len != sizeof(sb->s_uuid));
+ snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pU", sb->s_uuid.b);
+}
+
+/* set sb sysfs name based on sb->s_id */
+static inline void super_set_sysfs_name_id(struct super_block *sb)
+{
+ strscpy(sb->s_sysfs_name, sb->s_id, sizeof(sb->s_sysfs_name));
+}
+
+/* try to use something standard before you use this */
+__printf(2, 3)
+static inline void super_set_sysfs_name_generic(struct super_block *sb, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vsnprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), fmt, args);
+ va_end(args);
+}
+
extern int current_umask(void);
extern void ihold(struct inode * inode);
extern void iput(struct inode *);
-extern int generic_update_time(struct inode *, struct timespec64 *, int);
+int inode_update_timestamps(struct inode *inode, int flags);
+int generic_update_time(struct inode *, int);
/* /sys/fs */
extern struct kobject *fs_kobj;
@@ -2318,13 +2609,13 @@ struct audit_names;
struct filename {
const char *name; /* pointer to actual string */
const __user char *uptr; /* original userland pointer */
- int refcnt;
+ atomic_t refcnt;
struct audit_names *aname;
const char iname[];
};
static_assert(offsetof(struct filename, iname) % sizeof(long) == 0);
-static inline struct mnt_idmap *file_mnt_idmap(struct file *file)
+static inline struct mnt_idmap *file_mnt_idmap(const struct file *file)
{
return mnt_idmap(file->f_path.mnt);
}
@@ -2363,26 +2654,31 @@ struct file *dentry_open(const struct path *path, int flags,
const struct cred *creds);
struct file *dentry_create(const struct path *path, int flags, umode_t mode,
const struct cred *cred);
-struct file *backing_file_open(const struct path *path, int flags,
- const struct path *real_path,
- const struct cred *cred);
-struct path *backing_file_real_path(struct file *f);
+struct path *backing_file_user_path(struct file *f);
/*
- * file_real_path - get the path corresponding to f_inode
- *
- * When opening a backing file for a stackable filesystem (e.g.,
- * overlayfs) f_path may be on the stackable filesystem and f_inode on
- * the underlying filesystem. When the path associated with f_inode is
- * needed, this helper should be used instead of accessing f_path
- * directly.
-*/
-static inline const struct path *file_real_path(struct file *f)
+ * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
+ * stored in ->vm_file is a backing file whose f_inode is on the underlying
+ * filesystem. When the mapped file path and inode number are displayed to
+ * user (e.g. via /proc/<pid>/maps), these helpers should be used to get the
+ * path and inode number to display to the user, which is the path of the fd
+ * that user has requested to map and the inode number that would be returned
+ * by fstat() on that same fd.
+ */
+/* Get the path to display in /proc/<pid>/maps */
+static inline const struct path *file_user_path(struct file *f)
{
if (unlikely(f->f_mode & FMODE_BACKING))
- return backing_file_real_path(f);
+ return backing_file_user_path(f);
return &f->f_path;
}
+/* Get the inode whose inode number to display in /proc/<pid>/maps */
+static inline const struct inode *file_user_inode(struct file *f)
+{
+ if (unlikely(f->f_mode & FMODE_BACKING))
+ return d_inode(backing_file_user_path(f)->dentry);
+ return file_inode(f);
+}
static inline struct file *file_clone_open(struct file *file)
{
@@ -2545,6 +2841,13 @@ static inline bool inode_wrong_type(const struct inode *inode, umode_t mode)
return (inode->i_mode ^ mode) & S_IFMT;
}
+/**
+ * file_start_write - get write access to a superblock for regular file io
+ * @file: the file we want to write to
+ *
+ * This is a variant of sb_start_write() which is a noop on non-regualr file.
+ * Should be matched with a call to file_end_write().
+ */
static inline void file_start_write(struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
@@ -2559,11 +2862,53 @@ static inline bool file_start_write_trylock(struct file *file)
return sb_start_write_trylock(file_inode(file)->i_sb);
}
+/**
+ * file_end_write - drop write access to a superblock of a regular file
+ * @file: the file we wrote to
+ *
+ * Should be matched with a call to file_start_write().
+ */
static inline void file_end_write(struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
return;
- __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+ sb_end_write(file_inode(file)->i_sb);
+}
+
+/**
+ * kiocb_start_write - get write access to a superblock for async file io
+ * @iocb: the io context we want to submit the write with
+ *
+ * This is a variant of sb_start_write() for async io submission.
+ * Should be matched with a call to kiocb_end_write().
+ */
+static inline void kiocb_start_write(struct kiocb *iocb)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ sb_start_write(inode->i_sb);
+ /*
+ * Fool lockdep by telling it the lock got released so that it
+ * doesn't complain about the held lock when we return to userspace.
+ */
+ __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
+}
+
+/**
+ * kiocb_end_write - drop write access to a superblock after async file io
+ * @iocb: the io context we sumbitted the write with
+ *
+ * Should be matched with a call to kiocb_start_write().
+ */
+static inline void kiocb_end_write(struct kiocb *iocb)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ /*
+ * Tell lockdep we inherited freeze protection from submission thread.
+ */
+ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
+ sb_end_write(inode->i_sb);
}
/*
@@ -2613,8 +2958,7 @@ static inline bool inode_is_open_for_write(const struct inode *inode)
#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
static inline void i_readcount_dec(struct inode *inode)
{
- BUG_ON(!atomic_read(&inode->i_readcount));
- atomic_dec(&inode->i_readcount);
+ BUG_ON(atomic_dec_return(&inode->i_readcount) < 0);
}
static inline void i_readcount_inc(struct inode *inode)
{
@@ -2644,6 +2988,17 @@ extern bool path_is_under(const struct path *, const struct path *);
extern char *file_path(struct file *, char *, int);
+/**
+ * is_dot_dotdot - returns true only if @name is "." or ".."
+ * @name: file name to check
+ * @len: length of file name, in bytes
+ */
+static inline bool is_dot_dotdot(const char *name, size_t len)
+{
+ return len && unlikely(name[0] == '.') &&
+ (len == 1 || (len == 2 && name[1] == '.'));
+}
+
#include <linux/err.h>
/* needed for stackable file system support */
@@ -2721,6 +3076,7 @@ extern struct inode *new_inode_pseudo(struct super_block *sb);
extern struct inode *new_inode(struct super_block *sb);
extern void free_inode_nonrcu(struct inode *inode);
extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *);
+extern int file_remove_privs_flags(struct file *file, unsigned int flags);
extern int file_remove_privs(struct file *);
int setattr_should_drop_sgid(struct mnt_idmap *idmap,
const struct inode *inode);
@@ -2789,8 +3145,6 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos,
size_t len, unsigned int flags);
extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int);
-extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
- loff_t *opos, size_t len, unsigned int flags);
extern void
@@ -2880,7 +3234,7 @@ extern void page_put_link(void *);
extern int page_symlink(struct inode *inode, const char *symname, int len);
extern const struct inode_operations page_symlink_inode_operations;
extern void kfree_link(void *);
-void generic_fillattr(struct mnt_idmap *, struct inode *, struct kstat *);
+void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
@@ -2919,8 +3273,6 @@ extern int vfs_readlink(struct dentry *, char __user *, int);
extern struct file_system_type *get_filesystem(struct file_system_type *fs);
extern void put_filesystem(struct file_system_type *fs);
extern struct file_system_type *get_fs_type(const char *name);
-extern struct super_block *get_super(struct block_device *);
-extern struct super_block *get_active_super(struct block_device *bdev);
extern void drop_super(struct super_block *sb);
extern void drop_super_exclusive(struct super_block *sb);
extern void iterate_supers(void (*)(struct super_block *, void *), void *);
@@ -2940,6 +3292,8 @@ extern int simple_open(struct inode *inode, struct file *file);
extern int simple_link(struct dentry *, struct inode *, struct dentry *);
extern int simple_unlink(struct inode *, struct dentry *);
extern int simple_rmdir(struct inode *, struct dentry *);
+void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry);
extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry);
extern int simple_rename(struct mnt_idmap *, struct inode *,
@@ -2956,7 +3310,7 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping,
extern const struct address_space_operations ram_aops;
extern int always_delete_dentry(const struct dentry *);
extern struct inode *alloc_anon_inode(struct super_block *);
-extern int simple_nosetlease(struct file *, long, struct file_lock **, void **);
+extern int simple_nosetlease(struct file *, int, struct file_lease **, void **);
extern const struct dentry_operations simple_dentry_operations;
extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
@@ -2977,12 +3331,38 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
const void __user *from, size_t count);
+struct offset_ctx {
+ struct maple_tree mt;
+ unsigned long next_offset;
+};
+
+void simple_offset_init(struct offset_ctx *octx);
+int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
+void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
+int simple_offset_empty(struct dentry *dentry);
+int simple_offset_rename_exchange(struct inode *old_dir,
+ struct dentry *old_dentry,
+ struct inode *new_dir,
+ struct dentry *new_dentry);
+void simple_offset_destroy(struct offset_ctx *octx);
+
+extern const struct file_operations simple_offset_dir_operations;
+
extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
extern int generic_check_addressable(unsigned, u64);
-extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry);
+extern void generic_set_sb_d_ops(struct super_block *sb);
+
+static inline bool sb_has_encoding(const struct super_block *sb)
+{
+#if IS_ENABLED(CONFIG_UNICODE)
+ return !!sb->s_encoding;
+#else
+ return false;
+#endif
+}
int may_setattr(struct mnt_idmap *idmap, struct inode *inode,
unsigned int ia_valid);
@@ -3037,6 +3417,8 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
return 0;
if (unlikely(flags & ~RWF_SUPPORTED))
return -EOPNOTSUPP;
+ if (unlikely((flags & RWF_APPEND) && (flags & RWF_NOAPPEND)))
+ return -EINVAL;
if (flags & RWF_NOWAIT) {
if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
@@ -3047,6 +3429,12 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
if (flags & RWF_SYNC)
kiocb_flags |= IOCB_DSYNC;
+ if ((flags & RWF_NOAPPEND) && (ki->ki_flags & IOCB_APPEND)) {
+ if (IS_APPEND(file_inode(ki->ki_filp)))
+ return -EPERM;
+ ki->ki_flags &= ~IOCB_APPEND;
+ }
+
ki->ki_flags |= kiocb_flags;
return 0;
}
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index ff6341e09925..c13e99cbbf81 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -109,6 +109,7 @@ struct fs_context {
bool need_free:1; /* Need to call ops->free() */
bool global:1; /* Goes into &init_user_ns */
bool oldapi:1; /* Coming from mount(2) */
+ bool exclusive:1; /* create new superblock, reject existing one */
};
struct fs_context_operations {
@@ -135,6 +136,8 @@ extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc);
extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param);
extern int vfs_parse_fs_string(struct fs_context *fc, const char *key,
const char *value, size_t v_size);
+int vfs_parse_monolithic_sep(struct fs_context *fc, void *data,
+ char *(*sep)(char **));
extern int generic_parse_monolithic(struct fs_context *fc, void *data);
extern int vfs_get_tree(struct fs_context *fc);
extern void put_fs_context(struct fs_context *fc);
@@ -150,14 +153,13 @@ extern int get_tree_nodev(struct fs_context *fc,
extern int get_tree_single(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc));
-extern int get_tree_single_reconf(struct fs_context *fc,
- int (*fill_super)(struct super_block *sb,
- struct fs_context *fc));
extern int get_tree_keyed(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc),
void *key);
+int setup_bdev_super(struct super_block *sb, int sb_flags,
+ struct fs_context *fc);
extern int get_tree_bdev(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc));
diff --git a/include/linux/fs_enet_pd.h b/include/linux/fs_enet_pd.h
deleted file mode 100644
index 77d783f71527..000000000000
--- a/include/linux/fs_enet_pd.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Platform information definitions for the
- * universal Freescale Ethernet driver.
- *
- * Copyright (c) 2003 Intracom S.A.
- * by Pantelis Antoniou <panto@intracom.gr>
- *
- * 2005 (c) MontaVista Software, Inc.
- * Vitaly Bordug <vbordug@ru.mvista.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef FS_ENET_PD_H
-#define FS_ENET_PD_H
-
-#include <linux/clk.h>
-#include <linux/string.h>
-#include <linux/of_mdio.h>
-#include <linux/if_ether.h>
-#include <asm/types.h>
-
-#define FS_ENET_NAME "fs_enet"
-
-enum fs_id {
- fsid_fec1,
- fsid_fec2,
- fsid_fcc1,
- fsid_fcc2,
- fsid_fcc3,
- fsid_scc1,
- fsid_scc2,
- fsid_scc3,
- fsid_scc4,
-};
-
-#define FS_MAX_INDEX 9
-
-static inline int fs_get_fec_index(enum fs_id id)
-{
- if (id >= fsid_fec1 && id <= fsid_fec2)
- return id - fsid_fec1;
- return -1;
-}
-
-static inline int fs_get_fcc_index(enum fs_id id)
-{
- if (id >= fsid_fcc1 && id <= fsid_fcc3)
- return id - fsid_fcc1;
- return -1;
-}
-
-static inline int fs_get_scc_index(enum fs_id id)
-{
- if (id >= fsid_scc1 && id <= fsid_scc4)
- return id - fsid_scc1;
- return -1;
-}
-
-static inline int fs_fec_index2id(int index)
-{
- int id = fsid_fec1 + index - 1;
- if (id >= fsid_fec1 && id <= fsid_fec2)
- return id;
- return FS_MAX_INDEX;
- }
-
-static inline int fs_fcc_index2id(int index)
-{
- int id = fsid_fcc1 + index - 1;
- if (id >= fsid_fcc1 && id <= fsid_fcc3)
- return id;
- return FS_MAX_INDEX;
-}
-
-static inline int fs_scc_index2id(int index)
-{
- int id = fsid_scc1 + index - 1;
- if (id >= fsid_scc1 && id <= fsid_scc4)
- return id;
- return FS_MAX_INDEX;
-}
-
-enum fs_mii_method {
- fsmii_fixed,
- fsmii_fec,
- fsmii_bitbang,
-};
-
-enum fs_ioport {
- fsiop_porta,
- fsiop_portb,
- fsiop_portc,
- fsiop_portd,
- fsiop_porte,
-};
-
-struct fs_mii_bit {
- u32 offset;
- u8 bit;
- u8 polarity;
-};
-struct fs_mii_bb_platform_info {
- struct fs_mii_bit mdio_dir;
- struct fs_mii_bit mdio_dat;
- struct fs_mii_bit mdc_dat;
- int delay; /* delay in us */
- int irq[32]; /* irqs per phy's */
-};
-
-struct fs_platform_info {
-
- void(*init_ioports)(struct fs_platform_info *);
- /* device specific information */
- int fs_no; /* controller index */
- char fs_type[4]; /* controller type */
-
- u32 cp_page; /* CPM page */
- u32 cp_block; /* CPM sblock */
- u32 cp_command; /* CPM page/sblock/mcn */
-
- u32 clk_trx; /* some stuff for pins & mux configuration*/
- u32 clk_rx;
- u32 clk_tx;
- u32 clk_route;
- u32 clk_mask;
-
- u32 mem_offset;
- u32 dpram_offset;
- u32 fcc_regs_c;
-
- u32 device_flags;
-
- struct device_node *phy_node;
- const struct fs_mii_bus_info *bus_info;
-
- int rx_ring, tx_ring; /* number of buffers on rx */
- __u8 macaddr[ETH_ALEN]; /* mac address */
- int rx_copybreak; /* limit we copy small frames */
- int napi_weight; /* NAPI weight */
-
- int use_rmii; /* use RMII mode */
- int has_phy; /* if the network is phy container as well...*/
-
- struct clk *clk_per; /* 'per' clock for register access */
-};
-struct fs_mii_fec_platform_info {
- u32 irq[32];
- u32 mii_speed;
-};
-
-static inline int fs_get_id(struct fs_platform_info *fpi)
-{
- if(strstr(fpi->fs_type, "SCC"))
- return fs_scc_index2id(fpi->fs_no);
- if(strstr(fpi->fs_type, "FCC"))
- return fs_fcc_index2id(fpi->fs_no);
- if(strstr(fpi->fs_type, "FEC"))
- return fs_fec_index2id(fpi->fs_no);
- return fpi->fs_no;
-}
-
-#endif
diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
index 54210a42c30d..2b1f74b24070 100644
--- a/include/linux/fs_stack.h
+++ b/include/linux/fs_stack.h
@@ -16,15 +16,15 @@ extern void fsstack_copy_inode_size(struct inode *dst, struct inode *src);
static inline void fsstack_copy_attr_atime(struct inode *dest,
const struct inode *src)
{
- dest->i_atime = src->i_atime;
+ inode_set_atime_to_ts(dest, inode_get_atime(src));
}
static inline void fsstack_copy_attr_times(struct inode *dest,
const struct inode *src)
{
- dest->i_atime = src->i_atime;
- dest->i_mtime = src->i_mtime;
- dest->i_ctime = src->i_ctime;
+ inode_set_atime_to_ts(dest, inode_get_atime(src));
+ inode_set_mtime_to_ts(dest, inode_get_mtime(src));
+ inode_set_ctime_to_ts(dest, inode_get_ctime(src));
}
#endif /* _LINUX_FS_STACK_H */
diff --git a/include/linux/fs_uart_pd.h b/include/linux/fs_uart_pd.h
deleted file mode 100644
index 36b61ff39277..000000000000
--- a/include/linux/fs_uart_pd.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Platform information definitions for the CPM Uart driver.
- *
- * 2006 (c) MontaVista Software, Inc.
- * Vitaly Bordug <vbordug@ru.mvista.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef FS_UART_PD_H
-#define FS_UART_PD_H
-
-#include <asm/types.h>
-
-enum fs_uart_id {
- fsid_smc1_uart,
- fsid_smc2_uart,
- fsid_scc1_uart,
- fsid_scc2_uart,
- fsid_scc3_uart,
- fsid_scc4_uart,
- fs_uart_nr,
-};
-
-static inline int fs_uart_id_scc2fsid(int id)
-{
- return fsid_scc1_uart + id - 1;
-}
-
-static inline int fs_uart_id_fsid2scc(int id)
-{
- return id - fsid_scc1_uart + 1;
-}
-
-static inline int fs_uart_id_smc2fsid(int id)
-{
- return fsid_smc1_uart + id - 1;
-}
-
-static inline int fs_uart_id_fsid2smc(int id)
-{
- return id - fsid_smc1_uart + 1;
-}
-
-struct fs_uart_platform_info {
- void(*init_ioports)(struct fs_uart_platform_info *);
- /* device specific information */
- int fs_no; /* controller index */
- char fs_type[4]; /* controller type */
- u32 uart_clk;
- u8 tx_num_fifo;
- u8 tx_buf_size;
- u8 rx_num_fifo;
- u8 rx_buf_size;
- u8 brg;
- u8 clk_rx;
- u8 clk_tx;
-};
-
-static inline int fs_uart_get_id(struct fs_uart_platform_info *fpi)
-{
- if(strstr(fpi->fs_type, "SMC"))
- return fs_uart_id_smc2fsid(fpi->fs_no);
- if(strstr(fpi->fs_type, "SCC"))
- return fs_uart_id_scc2fsid(fpi->fs_no);
- return fpi->fs_no;
-}
-
-#endif
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index a174cedf4d90..bdf7f3eddf0a 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -189,17 +189,20 @@ extern atomic_t fscache_n_write;
extern atomic_t fscache_n_no_write_space;
extern atomic_t fscache_n_no_create_space;
extern atomic_t fscache_n_culled;
+extern atomic_t fscache_n_dio_misfit;
#define fscache_count_read() atomic_inc(&fscache_n_read)
#define fscache_count_write() atomic_inc(&fscache_n_write)
#define fscache_count_no_write_space() atomic_inc(&fscache_n_no_write_space)
#define fscache_count_no_create_space() atomic_inc(&fscache_n_no_create_space)
#define fscache_count_culled() atomic_inc(&fscache_n_culled)
+#define fscache_count_dio_misfit() atomic_inc(&fscache_n_dio_misfit)
#else
#define fscache_count_read() do {} while(0)
#define fscache_count_write() do {} while(0)
#define fscache_count_no_write_space() do {} while(0)
#define fscache_count_no_create_space() do {} while(0)
#define fscache_count_culled() do {} while(0)
+#define fscache_count_dio_misfit() do {} while(0)
#endif
#endif /* _LINUX_FSCACHE_CACHE_H */
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 8e312c8323a8..6e8562cbcc43 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -437,9 +437,6 @@ const struct netfs_cache_ops *fscache_operation_valid(const struct netfs_cache_r
* indicates the cache resources to which the operation state should be
* attached; @cookie indicates the cache object that will be accessed.
*
- * This is intended to be called from the ->begin_cache_operation() netfs lib
- * operation as implemented by the network filesystem.
- *
* @cres->inval_counter is set from @cookie->inval_counter for comparison at
* the end of the operation. This allows invalidation during the operation to
* be detected by the caller.
@@ -629,48 +626,6 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie,
}
-#if __fscache_available
-bool fscache_dirty_folio(struct address_space *mapping, struct folio *folio,
- struct fscache_cookie *cookie);
-#else
-#define fscache_dirty_folio(MAPPING, FOLIO, COOKIE) \
- filemap_dirty_folio(MAPPING, FOLIO)
-#endif
-
-/**
- * fscache_unpin_writeback - Unpin writeback resources
- * @wbc: The writeback control
- * @cookie: The cookie referring to the cache object
- *
- * Unpin the writeback resources pinned by fscache_dirty_folio(). This is
- * intended to be called by the netfs's ->write_inode() method.
- */
-static inline void fscache_unpin_writeback(struct writeback_control *wbc,
- struct fscache_cookie *cookie)
-{
- if (wbc->unpinned_fscache_wb)
- fscache_unuse_cookie(cookie, NULL, NULL);
-}
-
-/**
- * fscache_clear_inode_writeback - Clear writeback resources pinned by an inode
- * @cookie: The cookie referring to the cache object
- * @inode: The inode to clean up
- * @aux: Auxiliary data to apply to the inode
- *
- * Clear any writeback resources held by an inode when the inode is evicted.
- * This must be called before clear_inode() is called.
- */
-static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie,
- struct inode *inode,
- const void *aux)
-{
- if (inode->i_state & I_PINNING_FSCACHE_WB) {
- loff_t i_size = i_size_read(inode);
- fscache_unuse_cookie(cookie, aux, &i_size);
- }
-}
-
/**
* fscache_note_page_release - Note that a netfs page got released
* @cookie: The cookie corresponding to the file
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index c895b12737a1..772f822dc6b8 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -31,7 +31,7 @@
#define FSCRYPT_CONTENTS_ALIGNMENT 16
union fscrypt_policy;
-struct fscrypt_info;
+struct fscrypt_inode_info;
struct fs_parameter;
struct seq_file;
@@ -59,26 +59,55 @@ struct fscrypt_name {
#ifdef CONFIG_FS_ENCRYPTION
-/*
- * If set, the fscrypt bounce page pool won't be allocated (unless another
- * filesystem needs it). Set this if the filesystem always uses its own bounce
- * pages for writes and therefore won't need the fscrypt bounce page pool.
- */
-#define FS_CFLG_OWN_PAGES (1U << 1)
-
/* Crypto operations for filesystems */
struct fscrypt_operations {
- /* Set of optional flags; see above for allowed flags */
- unsigned int flags;
+ /*
+ * If set, then fs/crypto/ will allocate a global bounce page pool the
+ * first time an encryption key is set up for a file. The bounce page
+ * pool is required by the following functions:
+ *
+ * - fscrypt_encrypt_pagecache_blocks()
+ * - fscrypt_zeroout_range() for files not using inline crypto
+ *
+ * If the filesystem doesn't use those, it doesn't need to set this.
+ */
+ unsigned int needs_bounce_pages : 1;
/*
- * If set, this is a filesystem-specific key description prefix that
- * will be accepted for "logon" keys for v1 fscrypt policies, in
- * addition to the generic prefix "fscrypt:". This functionality is
- * deprecated, so new filesystems shouldn't set this field.
+ * If set, then fs/crypto/ will allow the use of encryption settings
+ * that assume inode numbers fit in 32 bits (i.e.
+ * FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64}), provided that the other
+ * prerequisites for these settings are also met. This is only useful
+ * if the filesystem wants to support inline encryption hardware that is
+ * limited to 32-bit or 64-bit data unit numbers and where programming
+ * keyslots is very slow.
*/
- const char *key_prefix;
+ unsigned int has_32bit_inodes : 1;
+
+ /*
+ * If set, then fs/crypto/ will allow users to select a crypto data unit
+ * size that is less than the filesystem block size. This is done via
+ * the log2_data_unit_size field of the fscrypt policy. This flag is
+ * not compatible with filesystems that encrypt variable-length blocks
+ * (i.e. blocks that aren't all equal to filesystem's block size), for
+ * example as a result of compression. It's also not compatible with
+ * the fscrypt_encrypt_block_inplace() and
+ * fscrypt_decrypt_block_inplace() functions.
+ */
+ unsigned int supports_subblock_data_units : 1;
+
+ /*
+ * This field exists only for backwards compatibility reasons and should
+ * only be set by the filesystems that are setting it already. It
+ * contains the filesystem-specific key description prefix that is
+ * accepted for "logon" keys for v1 fscrypt policies. This
+ * functionality is deprecated in favor of the generic prefix
+ * "fscrypt:", which itself is deprecated in favor of the filesystem
+ * keyring ioctls such as FS_IOC_ADD_ENCRYPTION_KEY. Filesystems that
+ * are newly adding fscrypt support should not set this field.
+ */
+ const char *legacy_key_prefix;
/*
* Get the fscrypt context of the given inode.
@@ -146,21 +175,6 @@ struct fscrypt_operations {
bool (*has_stable_inodes)(struct super_block *sb);
/*
- * Get the number of bits that the filesystem uses to represent inode
- * numbers and file logical block numbers.
- *
- * By default, both of these are assumed to be 64-bit. This function
- * can be implemented to declare that either or both of these numbers is
- * shorter, which may allow the use of the
- * FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64} flags and/or the use of
- * inline crypto hardware whose maximum DUN length is less than 64 bits
- * (e.g., eMMC v5.2 spec compliant hardware). This function only needs
- * to be implemented if support for one of these features is needed.
- */
- void (*get_ino_and_lblk_bits)(struct super_block *sb,
- int *ino_bits_ret, int *lblk_bits_ret);
-
- /*
* Return an array of pointers to the block devices to which the
* filesystem may write encrypted file contents, NULL if the filesystem
* only has a single such block device, or an ERR_PTR() on error.
@@ -178,7 +192,10 @@ struct fscrypt_operations {
unsigned int *num_devs);
};
-static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode)
+int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags);
+
+static inline struct fscrypt_inode_info *
+fscrypt_get_inode_info(const struct inode *inode)
{
/*
* Pairs with the cmpxchg_release() in fscrypt_setup_encryption_info().
@@ -206,15 +223,29 @@ static inline bool fscrypt_needs_contents_encryption(const struct inode *inode)
}
/*
- * When d_splice_alias() moves a directory's no-key alias to its plaintext alias
- * as a result of the encryption key being added, DCACHE_NOKEY_NAME must be
- * cleared. Note that we don't have to support arbitrary moves of this flag
- * because fscrypt doesn't allow no-key names to be the source or target of a
- * rename().
+ * When d_splice_alias() moves a directory's no-key alias to its
+ * plaintext alias as a result of the encryption key being added,
+ * DCACHE_NOKEY_NAME must be cleared and there might be an opportunity
+ * to disable d_revalidate. Note that we don't have to support the
+ * inverse operation because fscrypt doesn't allow no-key names to be
+ * the source or target of a rename().
*/
static inline void fscrypt_handle_d_move(struct dentry *dentry)
{
- dentry->d_flags &= ~DCACHE_NOKEY_NAME;
+ /*
+ * VFS calls fscrypt_handle_d_move even for non-fscrypt
+ * filesystems.
+ */
+ if (dentry->d_flags & DCACHE_NOKEY_NAME) {
+ dentry->d_flags &= ~DCACHE_NOKEY_NAME;
+
+ /*
+ * Other filesystem features might be handling dentry
+ * revalidation, in which case it cannot be disabled.
+ */
+ if (dentry->d_op->d_revalidate == fscrypt_d_revalidate)
+ dentry->d_flags &= ~DCACHE_OP_REVALIDATE;
+ }
}
/**
@@ -246,6 +277,35 @@ static inline bool fscrypt_is_nokey_name(const struct dentry *dentry)
return dentry->d_flags & DCACHE_NOKEY_NAME;
}
+static inline void fscrypt_prepare_dentry(struct dentry *dentry,
+ bool is_nokey_name)
+{
+ /*
+ * This code tries to only take ->d_lock when necessary to write
+ * to ->d_flags. We shouldn't be peeking on d_flags for
+ * DCACHE_OP_REVALIDATE unlocked, but in the unlikely case
+ * there is a race, the worst it can happen is that we fail to
+ * unset DCACHE_OP_REVALIDATE and pay the cost of an extra
+ * d_revalidate.
+ */
+ if (is_nokey_name) {
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags |= DCACHE_NOKEY_NAME;
+ spin_unlock(&dentry->d_lock);
+ } else if (dentry->d_flags & DCACHE_OP_REVALIDATE &&
+ dentry->d_op->d_revalidate == fscrypt_d_revalidate) {
+ /*
+ * Unencrypted dentries and encrypted dentries where the
+ * key is available are always valid from fscrypt
+ * perspective. Avoid the cost of calling
+ * fscrypt_d_revalidate unnecessarily.
+ */
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags &= ~DCACHE_OP_REVALIDATE;
+ spin_unlock(&dentry->d_lock);
+ }
+}
+
/* crypto.c */
void fscrypt_enqueue_decrypt_work(struct work_struct *);
@@ -353,7 +413,6 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode,
bool fscrypt_match_name(const struct fscrypt_name *fname,
const u8 *de_name, u32 de_name_len);
u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name);
-int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags);
/* bio.c */
bool fscrypt_decrypt_bio(struct bio *bio);
@@ -390,7 +449,8 @@ static inline void fscrypt_set_ops(struct super_block *sb,
}
#else /* !CONFIG_FS_ENCRYPTION */
-static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode)
+static inline struct fscrypt_inode_info *
+fscrypt_get_inode_info(const struct inode *inode)
{
return NULL;
}
@@ -409,6 +469,11 @@ static inline bool fscrypt_is_nokey_name(const struct dentry *dentry)
return false;
}
+static inline void fscrypt_prepare_dentry(struct dentry *dentry,
+ bool is_nokey_name)
+{
+}
+
/* crypto.c */
static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work)
{
@@ -868,7 +933,7 @@ static inline bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode)
*/
static inline bool fscrypt_has_encryption_key(const struct inode *inode)
{
- return fscrypt_get_info(inode) != NULL;
+ return fscrypt_get_inode_info(inode) != NULL;
}
/**
@@ -966,6 +1031,9 @@ static inline int fscrypt_prepare_lookup(struct inode *dir,
fname->usr_fname = &dentry->d_name;
fname->disk_name.name = (unsigned char *)dentry->d_name.name;
fname->disk_name.len = dentry->d_name.len;
+
+ fscrypt_prepare_dentry(dentry, false);
+
return 0;
}
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index ed48e4f1e755..1a9de119a0f7 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -17,6 +17,12 @@
#include <linux/slab.h>
#include <linux/bug.h>
+/* Are there any inode/mount/sb objects that are being watched at all? */
+static inline bool fsnotify_sb_has_watchers(struct super_block *sb)
+{
+ return atomic_long_read(&sb->s_fsnotify_connectors);
+}
+
/*
* Notify this @dir inode about a change in a child directory entry.
* The directory entry may have turned positive or negative or its inode may
@@ -30,7 +36,7 @@ static inline int fsnotify_name(__u32 mask, const void *data, int data_type,
struct inode *dir, const struct qstr *name,
u32 cookie)
{
- if (atomic_long_read(&dir->i_sb->s_fsnotify_connectors) == 0)
+ if (!fsnotify_sb_has_watchers(dir->i_sb))
return 0;
return fsnotify(mask, data, data_type, dir, name, NULL, cookie);
@@ -44,7 +50,7 @@ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry,
static inline void fsnotify_inode(struct inode *inode, __u32 mask)
{
- if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0)
+ if (!fsnotify_sb_has_watchers(inode->i_sb))
return;
if (S_ISDIR(inode->i_mode))
@@ -59,7 +65,7 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask,
{
struct inode *inode = d_inode(dentry);
- if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0)
+ if (!fsnotify_sb_has_watchers(inode->i_sb))
return 0;
if (S_ISDIR(inode->i_mode)) {
@@ -96,35 +102,73 @@ static inline int fsnotify_file(struct file *file, __u32 mask)
if (file->f_mode & FMODE_NONOTIFY)
return 0;
- /* Overlayfs internal files have fake f_path */
- path = file_real_path(file);
+ path = &file->f_path;
return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
}
-/* Simple call site for access decisions */
-static inline int fsnotify_perm(struct file *file, int mask)
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+/*
+ * fsnotify_file_area_perm - permission hook before access to file range
+ */
+static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
+ const loff_t *ppos, size_t count)
{
- int ret;
- __u32 fsnotify_mask = 0;
+ __u32 fsnotify_mask = FS_ACCESS_PERM;
+
+ /*
+ * filesystem may be modified in the context of permission events
+ * (e.g. by HSM filling a file on access), so sb freeze protection
+ * must not be held.
+ */
+ lockdep_assert_once(file_write_not_started(file));
- if (!(mask & (MAY_READ | MAY_OPEN)))
+ if (!(perm_mask & MAY_READ))
return 0;
- if (mask & MAY_OPEN) {
- fsnotify_mask = FS_OPEN_PERM;
+ return fsnotify_file(file, fsnotify_mask);
+}
+
+/*
+ * fsnotify_file_perm - permission hook before file access
+ */
+static inline int fsnotify_file_perm(struct file *file, int perm_mask)
+{
+ return fsnotify_file_area_perm(file, perm_mask, NULL, 0);
+}
- if (file->f_flags & __FMODE_EXEC) {
- ret = fsnotify_file(file, FS_OPEN_EXEC_PERM);
+/*
+ * fsnotify_open_perm - permission hook before file open
+ */
+static inline int fsnotify_open_perm(struct file *file)
+{
+ int ret;
- if (ret)
- return ret;
- }
- } else if (mask & MAY_READ) {
- fsnotify_mask = FS_ACCESS_PERM;
+ if (file->f_flags & __FMODE_EXEC) {
+ ret = fsnotify_file(file, FS_OPEN_EXEC_PERM);
+ if (ret)
+ return ret;
}
- return fsnotify_file(file, fsnotify_mask);
+ return fsnotify_file(file, FS_OPEN_PERM);
+}
+
+#else
+static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
+ const loff_t *ppos, size_t count)
+{
+ return 0;
+}
+
+static inline int fsnotify_file_perm(struct file *file, int perm_mask)
+{
+ return 0;
+}
+
+static inline int fsnotify_open_perm(struct file *file)
+{
+ return 0;
}
+#endif
/*
* fsnotify_link_count - inode's link count changed
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index d7d96c806bff..8f40c349b228 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -31,8 +31,8 @@
#define FS_ACCESS 0x00000001 /* File was accessed */
#define FS_MODIFY 0x00000002 /* File was modified */
#define FS_ATTRIB 0x00000004 /* Metadata changed */
-#define FS_CLOSE_WRITE 0x00000008 /* Writtable file was closed */
-#define FS_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */
+#define FS_CLOSE_WRITE 0x00000008 /* Writable file was closed */
+#define FS_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */
#define FS_OPEN 0x00000020 /* File was opened */
#define FS_MOVED_FROM 0x00000040 /* File was moved from X */
#define FS_MOVED_TO 0x00000080 /* File was moved to Y */
@@ -472,10 +472,8 @@ typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;
struct fsnotify_mark_connector {
spinlock_t lock;
unsigned short type; /* Type of object [lock] */
-#define FSNOTIFY_CONN_FLAG_HAS_FSID 0x01
#define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02
unsigned short flags; /* flags [lock] */
- __kernel_fsid_t fsid; /* fsid of filesystem containing object */
union {
/* Object pointer [lock] */
fsnotify_connp_t *obj;
@@ -530,6 +528,8 @@ struct fsnotify_mark {
#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100
#define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200
#define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS 0x0400
+#define FSNOTIFY_MARK_FLAG_HAS_FSID 0x0800
+#define FSNOTIFY_MARK_FLAG_WEAK_FSID 0x1000
unsigned int flags; /* flags [mark->lock] */
};
@@ -760,17 +760,13 @@ extern void fsnotify_init_mark(struct fsnotify_mark *mark,
/* Find mark belonging to given group in the list of marks */
extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
struct fsnotify_group *group);
-/* Get cached fsid of filesystem containing object */
-extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn,
- __kernel_fsid_t *fsid);
/* attach the mark to the object */
extern int fsnotify_add_mark(struct fsnotify_mark *mark,
fsnotify_connp_t *connp, unsigned int obj_type,
- int add_flags, __kernel_fsid_t *fsid);
+ int add_flags);
extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
fsnotify_connp_t *connp,
- unsigned int obj_type, int add_flags,
- __kernel_fsid_t *fsid);
+ unsigned int obj_type, int add_flags);
/* attach the mark to the inode */
static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
@@ -778,15 +774,14 @@ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
int add_flags)
{
return fsnotify_add_mark(mark, &inode->i_fsnotify_marks,
- FSNOTIFY_OBJ_TYPE_INODE, add_flags, NULL);
+ FSNOTIFY_OBJ_TYPE_INODE, add_flags);
}
static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark,
struct inode *inode,
int add_flags)
{
return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks,
- FSNOTIFY_OBJ_TYPE_INODE, add_flags,
- NULL);
+ FSNOTIFY_OBJ_TYPE_INODE, add_flags);
}
/* given a group and a mark, flag mark to be freed when all references are dropped */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index aad9cf8876b5..54d53f345d14 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -862,13 +862,8 @@ extern int skip_trace(unsigned long ip);
extern void ftrace_module_init(struct module *mod);
extern void ftrace_module_enable(struct module *mod);
extern void ftrace_release_mod(struct module *mod);
-
-extern void ftrace_disable_daemon(void);
-extern void ftrace_enable_daemon(void);
#else /* CONFIG_DYNAMIC_FTRACE */
static inline int skip_trace(unsigned long ip) { return 0; }
-static inline void ftrace_disable_daemon(void) { }
-static inline void ftrace_enable_daemon(void) { }
static inline void ftrace_module_init(struct module *mod) { }
static inline void ftrace_module_enable(struct module *mod) { }
static inline void ftrace_release_mod(struct module *mod) { }
@@ -1156,7 +1151,9 @@ static inline void unpause_graph_tracing(void) { }
#ifdef CONFIG_TRACING
enum ftrace_dump_mode;
-extern enum ftrace_dump_mode ftrace_dump_on_oops;
+#define MAX_TRACER_SIZE 100
+extern char ftrace_dump_on_oops[];
+extern int ftrace_dump_on_oops_enabled(void);
extern int tracepoint_printk;
extern void disable_trace_on_warning(void);
diff --git a/include/linux/fw_table.h b/include/linux/fw_table.h
new file mode 100644
index 000000000000..3ff4c277296f
--- /dev/null
+++ b/include/linux/fw_table.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * fw_tables.h - Parsing support for ACPI and ACPI-like tables provided by
+ * platform or device firmware
+ *
+ * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ * Copyright (C) 2023 Intel Corp.
+ */
+#ifndef _FW_TABLE_H_
+#define _FW_TABLE_H_
+
+union acpi_subtable_headers;
+
+typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header,
+ const unsigned long end);
+
+typedef int (*acpi_tbl_entry_handler_arg)(union acpi_subtable_headers *header,
+ void *arg, const unsigned long end);
+
+struct acpi_subtable_proc {
+ int id;
+ acpi_tbl_entry_handler handler;
+ acpi_tbl_entry_handler_arg handler_arg;
+ void *arg;
+ int count;
+};
+
+union fw_table_header {
+ struct acpi_table_header acpi;
+ struct acpi_table_cdat cdat;
+};
+
+union acpi_subtable_headers {
+ struct acpi_subtable_header common;
+ struct acpi_hmat_structure hmat;
+ struct acpi_prmt_module_header prmt;
+ struct acpi_cedt_header cedt;
+ struct acpi_cdat_header cdat;
+};
+
+int acpi_parse_entries_array(char *id, unsigned long table_size,
+ union fw_table_header *table_header,
+ unsigned long max_length,
+ struct acpi_subtable_proc *proc,
+ int proc_num, unsigned int max_entries);
+
+int cdat_table_parse(enum acpi_cdat_type type,
+ acpi_tbl_entry_handler_arg handler_arg, void *arg,
+ struct acpi_table_cdat *table_header,
+ unsigned long length);
+
+/* CXL is the only non-ACPI consumer of the FIRMWARE_TABLE library */
+#if IS_ENABLED(CONFIG_ACPI) && !IS_ENABLED(CONFIG_CXL_BUS)
+#define EXPORT_SYMBOL_FWTBL_LIB(x) EXPORT_SYMBOL_ACPI_LIB(x)
+#define __init_or_fwtbl_lib __init_or_acpilib
+#else
+#define EXPORT_SYMBOL_FWTBL_LIB(x) EXPORT_SYMBOL_NS_GPL(x, CXL)
+#define __init_or_fwtbl_lib
+#endif
+
+#endif
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 5700451b300f..0d79070c5a70 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -9,10 +9,16 @@
#ifndef _LINUX_FWNODE_H_
#define _LINUX_FWNODE_H_
-#include <linux/types.h>
-#include <linux/list.h>
#include <linux/bits.h>
#include <linux/err.h>
+#include <linux/list.h>
+#include <linux/types.h>
+
+enum dev_dma_attr {
+ DEV_DMA_NOT_SUPPORTED,
+ DEV_DMA_NON_COHERENT,
+ DEV_DMA_COHERENT,
+};
struct fwnode_operations;
struct device;
@@ -41,6 +47,8 @@ struct device;
struct fwnode_handle {
struct fwnode_handle *secondary;
const struct fwnode_operations *ops;
+
+ /* The below is used solely by device links, don't use otherwise */
struct device *dev;
struct list_head suppliers;
struct list_head consumers;
@@ -51,8 +59,10 @@ struct fwnode_handle {
* fwnode link flags
*
* CYCLE: The fwnode link is part of a cycle. Don't defer probe.
+ * IGNORE: Completely ignore this link, even during cycle detection.
*/
#define FWLINK_FLAG_CYCLE BIT(0)
+#define FWLINK_FLAG_IGNORE BIT(1)
struct fwnode_link {
struct fwnode_handle *supplier;
@@ -185,7 +195,6 @@ struct fwnode_operations {
if (fwnode_has_op(fwnode, op)) \
(fwnode)->ops->op(fwnode, ## __VA_ARGS__); \
} while (false)
-#define get_dev_from_fwnode(fwnode) get_device((fwnode)->dev)
static inline void fwnode_init(struct fwnode_handle *fwnode,
const struct fwnode_operations *ops)
@@ -207,9 +216,10 @@ static inline void fwnode_dev_initialized(struct fwnode_handle *fwnode,
fwnode->flags &= ~FWNODE_FLAG_INITIALIZED;
}
-extern bool fw_devlink_is_strict(void);
-int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup);
+int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup,
+ u8 flags);
void fwnode_links_purge(struct fwnode_handle *fwnode);
void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode);
+bool fw_devlink_is_strict(void);
#endif
diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h
index 107613f7d792..f3512fddf3d7 100644
--- a/include/linux/generic-radix-tree.h
+++ b/include/linux/generic-radix-tree.h
@@ -5,7 +5,7 @@
* DOC: Generic radix trees/sparse arrays
*
* Very simple and minimalistic, supporting arbitrary size entries up to
- * PAGE_SIZE.
+ * GENRADIX_NODE_SIZE.
*
* A genradix is defined with the type it will store, like so:
*
@@ -38,18 +38,22 @@
#include <asm/page.h>
#include <linux/bug.h>
+#include <linux/limits.h>
#include <linux/log2.h>
#include <linux/math.h>
#include <linux/types.h>
struct genradix_root;
+#define GENRADIX_NODE_SHIFT 9
+#define GENRADIX_NODE_SIZE (1U << GENRADIX_NODE_SHIFT)
+
struct __genradix {
struct genradix_root *root;
};
/*
- * NOTE: currently, sizeof(_type) must not be larger than PAGE_SIZE:
+ * NOTE: currently, sizeof(_type) must not be larger than GENRADIX_NODE_SIZE:
*/
#define __GENRADIX_INITIALIZER \
@@ -100,14 +104,14 @@ void __genradix_free(struct __genradix *);
static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
{
if (__builtin_constant_p(obj_size))
- BUILD_BUG_ON(obj_size > PAGE_SIZE);
+ BUILD_BUG_ON(obj_size > GENRADIX_NODE_SIZE);
else
- BUG_ON(obj_size > PAGE_SIZE);
+ BUG_ON(obj_size > GENRADIX_NODE_SIZE);
if (!is_power_of_2(obj_size)) {
- size_t objs_per_page = PAGE_SIZE / obj_size;
+ size_t objs_per_page = GENRADIX_NODE_SIZE / obj_size;
- return (idx / objs_per_page) * PAGE_SIZE +
+ return (idx / objs_per_page) * GENRADIX_NODE_SIZE +
(idx % objs_per_page) * obj_size;
} else {
return idx * obj_size;
@@ -116,6 +120,11 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
#define __genradix_cast(_radix) (typeof((_radix)->type[0]) *)
#define __genradix_obj_size(_radix) sizeof((_radix)->type[0])
+#define __genradix_objs_per_page(_radix) \
+ (GENRADIX_NODE_SIZE / sizeof((_radix)->type[0]))
+#define __genradix_page_remainder(_radix) \
+ (GENRADIX_NODE_SIZE % sizeof((_radix)->type[0]))
+
#define __genradix_idx_to_offset(_radix, _idx) \
__idx_to_offset(_idx, __genradix_obj_size(_radix))
@@ -179,16 +188,40 @@ void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t);
#define genradix_iter_peek(_iter, _radix) \
(__genradix_cast(_radix) \
__genradix_iter_peek(_iter, &(_radix)->tree, \
- PAGE_SIZE / __genradix_obj_size(_radix)))
+ __genradix_objs_per_page(_radix)))
+
+void *__genradix_iter_peek_prev(struct genradix_iter *, struct __genradix *,
+ size_t, size_t);
+
+/**
+ * genradix_iter_peek_prev - get first entry at or below iterator's current
+ * position
+ * @_iter: a genradix_iter
+ * @_radix: genradix being iterated over
+ *
+ * If no more entries exist at or below @_iter's current position, returns NULL
+ */
+#define genradix_iter_peek_prev(_iter, _radix) \
+ (__genradix_cast(_radix) \
+ __genradix_iter_peek_prev(_iter, &(_radix)->tree, \
+ __genradix_objs_per_page(_radix), \
+ __genradix_obj_size(_radix) + \
+ __genradix_page_remainder(_radix)))
static inline void __genradix_iter_advance(struct genradix_iter *iter,
size_t obj_size)
{
+ if (iter->offset + obj_size < iter->offset) {
+ iter->offset = SIZE_MAX;
+ iter->pos = SIZE_MAX;
+ return;
+ }
+
iter->offset += obj_size;
if (!is_power_of_2(obj_size) &&
- (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE)
- iter->offset = round_up(iter->offset, PAGE_SIZE);
+ (iter->offset & (GENRADIX_NODE_SIZE - 1)) + obj_size > GENRADIX_NODE_SIZE)
+ iter->offset = round_up(iter->offset, GENRADIX_NODE_SIZE);
iter->pos++;
}
@@ -196,6 +229,25 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter,
#define genradix_iter_advance(_iter, _radix) \
__genradix_iter_advance(_iter, __genradix_obj_size(_radix))
+static inline void __genradix_iter_rewind(struct genradix_iter *iter,
+ size_t obj_size)
+{
+ if (iter->offset == 0 ||
+ iter->offset == SIZE_MAX) {
+ iter->offset = SIZE_MAX;
+ return;
+ }
+
+ if ((iter->offset & (GENRADIX_NODE_SIZE - 1)) == 0)
+ iter->offset -= GENRADIX_NODE_SIZE % obj_size;
+
+ iter->offset -= obj_size;
+ iter->pos--;
+}
+
+#define genradix_iter_rewind(_iter, _radix) \
+ __genradix_iter_rewind(_iter, __genradix_obj_size(_radix))
+
#define genradix_for_each_from(_radix, _iter, _p, _start) \
for (_iter = genradix_iter_init(_radix, _start); \
(_p = genradix_iter_peek(&_iter, _radix)) != NULL; \
@@ -213,6 +265,23 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter,
#define genradix_for_each(_radix, _iter, _p) \
genradix_for_each_from(_radix, _iter, _p, 0)
+#define genradix_last_pos(_radix) \
+ (SIZE_MAX / GENRADIX_NODE_SIZE * __genradix_objs_per_page(_radix) - 1)
+
+/**
+ * genradix_for_each_reverse - iterate over entry in a genradix, reverse order
+ * @_radix: genradix to iterate over
+ * @_iter: a genradix_iter to track current position
+ * @_p: pointer to genradix entry type
+ *
+ * On every iteration, @_p will point to the current entry, and @_iter.pos
+ * will be the current entry's index.
+ */
+#define genradix_for_each_reverse(_radix, _iter, _p) \
+ for (_iter = genradix_iter_init(_radix, genradix_last_pos(_radix));\
+ (_p = genradix_iter_peek_prev(&_iter, _radix)) != NULL;\
+ genradix_iter_rewind(&_iter, _radix))
+
int __genradix_prealloc(struct __genradix *, size_t, gfp_t);
/**
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 2984b0cb24b1..d4da060b7532 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -2,6 +2,7 @@
#ifndef GENL_MAGIC_FUNC_H
#define GENL_MAGIC_FUNC_H
+#include <linux/args.h>
#include <linux/build_bug.h>
#include <linux/genl_magic_struct.h>
@@ -23,7 +24,7 @@
#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
[tag_name] = { .type = NLA_NESTED },
-static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = {
+static struct nla_policy CONCATENATE(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = {
#include GENL_MAGIC_INCLUDE_FILE
};
@@ -209,7 +210,7 @@ static int s_name ## _from_attrs_for_change(struct s_name *s, \
* Magic: define op number to op name mapping {{{1
* {{{2
*/
-static const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd)
+static const char *CONCATENATE(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd)
{
switch (cmd) {
#undef GENL_op
@@ -235,7 +236,7 @@ static const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd)
.cmd = op_name, \
},
-#define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops)
+#define ZZZ_genl_ops CONCATENATE(GENL_MAGIC_FAMILY, _genl_ops)
static struct genl_ops ZZZ_genl_ops[] __read_mostly = {
#include GENL_MAGIC_INCLUDE_FILE
};
@@ -248,32 +249,32 @@ static struct genl_ops ZZZ_genl_ops[] __read_mostly = {
* and provide register/unregister functions.
* {{{2
*/
-#define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family)
+#define ZZZ_genl_family CONCATENATE(GENL_MAGIC_FAMILY, _genl_family)
static struct genl_family ZZZ_genl_family;
/*
* Magic: define multicast groups
* Magic: define multicast group registration helper
*/
-#define ZZZ_genl_mcgrps CONCAT_(GENL_MAGIC_FAMILY, _genl_mcgrps)
+#define ZZZ_genl_mcgrps CONCATENATE(GENL_MAGIC_FAMILY, _genl_mcgrps)
static const struct genl_multicast_group ZZZ_genl_mcgrps[] = {
#undef GENL_mc_group
#define GENL_mc_group(group) { .name = #group, },
#include GENL_MAGIC_INCLUDE_FILE
};
-enum CONCAT_(GENL_MAGIC_FAMILY, group_ids) {
+enum CONCATENATE(GENL_MAGIC_FAMILY, group_ids) {
#undef GENL_mc_group
-#define GENL_mc_group(group) CONCAT_(GENL_MAGIC_FAMILY, _group_ ## group),
+#define GENL_mc_group(group) CONCATENATE(GENL_MAGIC_FAMILY, _group_ ## group),
#include GENL_MAGIC_INCLUDE_FILE
};
#undef GENL_mc_group
#define GENL_mc_group(group) \
-static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \
+static int CONCATENATE(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \
struct sk_buff *skb, gfp_t flags) \
{ \
unsigned int group_id = \
- CONCAT_(GENL_MAGIC_FAMILY, _group_ ## group); \
+ CONCATENATE(GENL_MAGIC_FAMILY, _group_ ## group); \
return genlmsg_multicast(&ZZZ_genl_family, skb, 0, \
group_id, flags); \
}
@@ -289,8 +290,8 @@ static struct genl_family ZZZ_genl_family __ro_after_init = {
#ifdef GENL_MAGIC_FAMILY_HDRSZ
.hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ),
#endif
- .maxattr = ARRAY_SIZE(CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy))-1,
- .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy),
+ .maxattr = ARRAY_SIZE(CONCATENATE(GENL_MAGIC_FAMILY, _tla_nl_policy))-1,
+ .policy = CONCATENATE(GENL_MAGIC_FAMILY, _tla_nl_policy),
.ops = ZZZ_genl_ops,
.n_ops = ARRAY_SIZE(ZZZ_genl_ops),
.mcgrps = ZZZ_genl_mcgrps,
@@ -299,12 +300,12 @@ static struct genl_family ZZZ_genl_family __ro_after_init = {
.module = THIS_MODULE,
};
-int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void)
+int CONCATENATE(GENL_MAGIC_FAMILY, _genl_register)(void)
{
return genl_register_family(&ZZZ_genl_family);
}
-void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void)
+void CONCATENATE(GENL_MAGIC_FAMILY, _genl_unregister)(void)
{
genl_unregister_family(&ZZZ_genl_family);
}
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index f81d48987528..a419d93789ff 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -14,14 +14,12 @@
# error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion"
#endif
+#include <linux/args.h>
#include <linux/genetlink.h>
#include <linux/types.h>
-#define CONCAT__(a,b) a ## b
-#define CONCAT_(a,b) CONCAT__(a,b)
-
-extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void);
-extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
+extern int CONCATENATE(GENL_MAGIC_FAMILY, _genl_register)(void);
+extern void CONCATENATE(GENL_MAGIC_FAMILY, _genl_unregister)(void);
/*
* Extension of genl attribute validation policies {{{2
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 665f06675c83..c775ea3c6015 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -8,6 +8,7 @@
#include <linux/topology.h>
struct vm_area_struct;
+struct mempolicy;
/* Convert GFP flags to their corresponding migrate type */
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
@@ -262,7 +263,9 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
#ifdef CONFIG_NUMA
struct page *alloc_pages(gfp_t gfp, unsigned int order);
-struct folio *folio_alloc(gfp_t gfp, unsigned order);
+struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
+ struct mempolicy *mpol, pgoff_t ilx, int nid);
+struct folio *folio_alloc(gfp_t gfp, unsigned int order);
struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
unsigned long addr, bool hugepage);
#else
@@ -270,6 +273,11 @@ static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
{
return alloc_pages_node(numa_node_id(), gfp_mask, order);
}
+static inline struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
+ struct mempolicy *mpol, pgoff_t ilx, int nid)
+{
+ return alloc_pages(gfp, order);
+}
static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
{
return __folio_alloc_node(gfp, order, numa_node_id());
@@ -303,15 +311,23 @@ extern void __free_pages(struct page *page, unsigned int order);
extern void free_pages(unsigned long addr, unsigned int order);
struct page_frag_cache;
+void page_frag_cache_drain(struct page_frag_cache *nc);
extern void __page_frag_cache_drain(struct page *page, unsigned int count);
-extern void *page_frag_alloc_align(struct page_frag_cache *nc,
- unsigned int fragsz, gfp_t gfp_mask,
- unsigned int align_mask);
+void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
+ gfp_t gfp_mask, unsigned int align_mask);
+
+static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
+ unsigned int fragsz, gfp_t gfp_mask,
+ unsigned int align)
+{
+ WARN_ON_ONCE(!is_power_of_2(align));
+ return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
+}
static inline void *page_frag_alloc(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask)
{
- return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
+ return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
}
extern void page_frag_free(void *addr);
@@ -320,11 +336,13 @@ extern void page_frag_free(void *addr);
#define free_page(addr) free_pages((addr), 0)
void page_alloc_init_cpuhp(void);
+int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp);
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
void drain_all_pages(struct zone *zone);
void drain_local_pages(struct zone *zone);
void page_alloc_init_late(void);
+void setup_pcp_cacheinfo(unsigned int cpu);
/*
* gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
@@ -343,6 +361,15 @@ static inline bool gfp_has_io_fs(gfp_t gfp)
return (gfp & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS);
}
+/*
+ * Check if the gfp flags allow compaction - GFP_NOIO is a really
+ * tricky context because the migration might require IO.
+ */
+static inline bool gfp_compaction_allowed(gfp_t gfp_mask)
+{
+ return IS_ENABLED(CONFIG_COMPACTION) && (gfp_mask & __GFP_IO);
+}
+
extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
#ifdef CONFIG_CONTIG_ALLOC
diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h
index 6583a58670c5..13becafe41df 100644
--- a/include/linux/gfp_types.h
+++ b/include/linux/gfp_types.h
@@ -2,6 +2,8 @@
#ifndef __LINUX_GFP_TYPES_H
#define __LINUX_GFP_TYPES_H
+#include <linux/bits.h>
+
/* The typedef is in types.h but we want the documentation here */
#if 0
/**
@@ -21,44 +23,78 @@ typedef unsigned int __bitwise gfp_t;
* include/trace/events/mmflags.h and tools/perf/builtin-kmem.c
*/
+enum {
+ ___GFP_DMA_BIT,
+ ___GFP_HIGHMEM_BIT,
+ ___GFP_DMA32_BIT,
+ ___GFP_MOVABLE_BIT,
+ ___GFP_RECLAIMABLE_BIT,
+ ___GFP_HIGH_BIT,
+ ___GFP_IO_BIT,
+ ___GFP_FS_BIT,
+ ___GFP_ZERO_BIT,
+ ___GFP_UNUSED_BIT, /* 0x200u unused */
+ ___GFP_DIRECT_RECLAIM_BIT,
+ ___GFP_KSWAPD_RECLAIM_BIT,
+ ___GFP_WRITE_BIT,
+ ___GFP_NOWARN_BIT,
+ ___GFP_RETRY_MAYFAIL_BIT,
+ ___GFP_NOFAIL_BIT,
+ ___GFP_NORETRY_BIT,
+ ___GFP_MEMALLOC_BIT,
+ ___GFP_COMP_BIT,
+ ___GFP_NOMEMALLOC_BIT,
+ ___GFP_HARDWALL_BIT,
+ ___GFP_THISNODE_BIT,
+ ___GFP_ACCOUNT_BIT,
+ ___GFP_ZEROTAGS_BIT,
+#ifdef CONFIG_KASAN_HW_TAGS
+ ___GFP_SKIP_ZERO_BIT,
+ ___GFP_SKIP_KASAN_BIT,
+#endif
+#ifdef CONFIG_LOCKDEP
+ ___GFP_NOLOCKDEP_BIT,
+#endif
+ ___GFP_LAST_BIT
+};
+
/* Plain integer GFP bitmasks. Do not use this directly. */
-#define ___GFP_DMA 0x01u
-#define ___GFP_HIGHMEM 0x02u
-#define ___GFP_DMA32 0x04u
-#define ___GFP_MOVABLE 0x08u
-#define ___GFP_RECLAIMABLE 0x10u
-#define ___GFP_HIGH 0x20u
-#define ___GFP_IO 0x40u
-#define ___GFP_FS 0x80u
-#define ___GFP_ZERO 0x100u
+#define ___GFP_DMA BIT(___GFP_DMA_BIT)
+#define ___GFP_HIGHMEM BIT(___GFP_HIGHMEM_BIT)
+#define ___GFP_DMA32 BIT(___GFP_DMA32_BIT)
+#define ___GFP_MOVABLE BIT(___GFP_MOVABLE_BIT)
+#define ___GFP_RECLAIMABLE BIT(___GFP_RECLAIMABLE_BIT)
+#define ___GFP_HIGH BIT(___GFP_HIGH_BIT)
+#define ___GFP_IO BIT(___GFP_IO_BIT)
+#define ___GFP_FS BIT(___GFP_FS_BIT)
+#define ___GFP_ZERO BIT(___GFP_ZERO_BIT)
/* 0x200u unused */
-#define ___GFP_DIRECT_RECLAIM 0x400u
-#define ___GFP_KSWAPD_RECLAIM 0x800u
-#define ___GFP_WRITE 0x1000u
-#define ___GFP_NOWARN 0x2000u
-#define ___GFP_RETRY_MAYFAIL 0x4000u
-#define ___GFP_NOFAIL 0x8000u
-#define ___GFP_NORETRY 0x10000u
-#define ___GFP_MEMALLOC 0x20000u
-#define ___GFP_COMP 0x40000u
-#define ___GFP_NOMEMALLOC 0x80000u
-#define ___GFP_HARDWALL 0x100000u
-#define ___GFP_THISNODE 0x200000u
-#define ___GFP_ACCOUNT 0x400000u
-#define ___GFP_ZEROTAGS 0x800000u
+#define ___GFP_DIRECT_RECLAIM BIT(___GFP_DIRECT_RECLAIM_BIT)
+#define ___GFP_KSWAPD_RECLAIM BIT(___GFP_KSWAPD_RECLAIM_BIT)
+#define ___GFP_WRITE BIT(___GFP_WRITE_BIT)
+#define ___GFP_NOWARN BIT(___GFP_NOWARN_BIT)
+#define ___GFP_RETRY_MAYFAIL BIT(___GFP_RETRY_MAYFAIL_BIT)
+#define ___GFP_NOFAIL BIT(___GFP_NOFAIL_BIT)
+#define ___GFP_NORETRY BIT(___GFP_NORETRY_BIT)
+#define ___GFP_MEMALLOC BIT(___GFP_MEMALLOC_BIT)
+#define ___GFP_COMP BIT(___GFP_COMP_BIT)
+#define ___GFP_NOMEMALLOC BIT(___GFP_NOMEMALLOC_BIT)
+#define ___GFP_HARDWALL BIT(___GFP_HARDWALL_BIT)
+#define ___GFP_THISNODE BIT(___GFP_THISNODE_BIT)
+#define ___GFP_ACCOUNT BIT(___GFP_ACCOUNT_BIT)
+#define ___GFP_ZEROTAGS BIT(___GFP_ZEROTAGS_BIT)
#ifdef CONFIG_KASAN_HW_TAGS
-#define ___GFP_SKIP_ZERO 0x1000000u
-#define ___GFP_SKIP_KASAN 0x2000000u
+#define ___GFP_SKIP_ZERO BIT(___GFP_SKIP_ZERO_BIT)
+#define ___GFP_SKIP_KASAN BIT(___GFP_SKIP_KASAN_BIT)
#else
#define ___GFP_SKIP_ZERO 0
#define ___GFP_SKIP_KASAN 0
#endif
#ifdef CONFIG_LOCKDEP
-#define ___GFP_NOLOCKDEP 0x4000000u
+#define ___GFP_NOLOCKDEP BIT(___GFP_NOLOCKDEP_BIT)
#else
#define ___GFP_NOLOCKDEP 0
#endif
-/* If the above are modified, __GFP_BITS_SHIFT may need updating */
/*
* Physical address zone modifiers (see linux/mmzone.h - low four bits)
@@ -162,25 +198,25 @@ typedef unsigned int __bitwise gfp_t;
* %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
*
* The default allocator behavior depends on the request size. We have a concept
- * of so called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER).
+ * of so-called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER).
* !costly allocations are too essential to fail so they are implicitly
* non-failing by default (with some exceptions like OOM victims might fail so
* the caller still has to check for failures) while costly requests try to be
* not disruptive and back off even without invoking the OOM killer.
* The following three modifiers might be used to override some of these
- * implicit rules
+ * implicit rules.
*
* %__GFP_NORETRY: The VM implementation will try only very lightweight
* memory direct reclaim to get some memory under memory pressure (thus
* it can sleep). It will avoid disruptive actions like OOM killer. The
* caller must handle the failure which is quite likely to happen under
* heavy memory pressure. The flag is suitable when failure can easily be
- * handled at small cost, such as reduced throughput
+ * handled at small cost, such as reduced throughput.
*
* %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim
* procedures that have previously failed if there is some indication
- * that progress has been made else where. It can wait for other
- * tasks to attempt high level approaches to freeing memory such as
+ * that progress has been made elsewhere. It can wait for other
+ * tasks to attempt high-level approaches to freeing memory such as
* compaction (which removes fragmentation) and page-out.
* There is still a definite limit to the number of retries, but it is
* a larger limit than with %__GFP_NORETRY.
@@ -230,7 +266,7 @@ typedef unsigned int __bitwise gfp_t;
* is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that
* __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting
* memory tags at the same time as zeroing memory has minimal additional
- * performace impact.
+ * performance impact.
*
* %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation.
* Used for userspace and vmalloc pages; the latter are unpoisoned by
@@ -249,7 +285,7 @@ typedef unsigned int __bitwise gfp_t;
#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
/* Room for N __GFP_FOO bits */
-#define __GFP_BITS_SHIFT (26 + IS_ENABLED(CONFIG_LOCKDEP))
+#define __GFP_BITS_SHIFT ___GFP_LAST_BIT
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
/**
@@ -274,7 +310,8 @@ typedef unsigned int __bitwise gfp_t;
* accounted to kmemcg.
*
* %GFP_NOWAIT is for kernel allocations that should not stall for direct
- * reclaim, start physical IO or use any filesystem callback.
+ * reclaim, start physical IO or use any filesystem callback. It is very
+ * likely to fail to allocate memory, even for very small allocations.
*
* %GFP_NOIO will use direct reclaim to discard clean pages or slab pages
* that do not require the starting of any physical IO.
@@ -325,7 +362,7 @@ typedef unsigned int __bitwise gfp_t;
#define GFP_ATOMIC (__GFP_HIGH|__GFP_KSWAPD_RECLAIM)
#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
#define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT)
-#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM)
+#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM | __GFP_NOWARN)
#define GFP_NOIO (__GFP_RECLAIM)
#define GFP_NOFS (__GFP_RECLAIM | __GFP_IO)
#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 1c4385a00f88..db2dfbae8edb 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -159,7 +159,6 @@ int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
int gpiod_set_config(struct gpio_desc *desc, unsigned long config);
int gpiod_set_debounce(struct gpio_desc *desc, unsigned int debounce);
-int gpiod_set_transitory(struct gpio_desc *desc, bool transitory);
void gpiod_toggle_active_low(struct gpio_desc *desc);
int gpiod_is_active_low(const struct gpio_desc *desc);
@@ -494,13 +493,6 @@ static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned int deboun
return -ENOSYS;
}
-static inline int gpiod_set_transitory(struct gpio_desc *desc, bool transitory)
-{
- /* GPIO can never have been requested */
- WARN_ON(desc);
- return -ENOSYS;
-}
-
static inline void gpiod_toggle_active_low(struct gpio_desc *desc)
{
/* GPIO can never have been requested */
@@ -614,8 +606,6 @@ void acpi_dev_remove_driver_gpios(struct acpi_device *adev);
int devm_acpi_dev_add_driver_gpios(struct device *dev,
const struct acpi_gpio_mapping *gpios);
-struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, char *label);
-
#else /* CONFIG_GPIOLIB && CONFIG_ACPI */
#include <linux/err.h>
@@ -633,12 +623,6 @@ static inline int devm_acpi_dev_add_driver_gpios(struct device *dev,
return -ENXIO;
}
-static inline struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin,
- char *label)
-{
- return ERR_PTR(-ENOSYS);
-}
-
#endif /* CONFIG_GPIOLIB && CONFIG_ACPI */
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 4f0c5d62c8f3..f8617eaf08ba 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -3,6 +3,8 @@
#define __LINUX_GPIO_DRIVER_H
#include <linux/bits.h>
+#include <linux/cleanup.h>
+#include <linux/err.h>
#include <linux/irqchip/chained_irq.h>
#include <linux/irqdomain.h>
#include <linux/irqhandler.h>
@@ -333,10 +335,12 @@ struct gpio_irq_chip {
* (same as GPIO_LINE_DIRECTION_OUT / GPIO_LINE_DIRECTION_IN),
* or negative error. It is recommended to always implement this
* function, even on input-only or output-only gpio chips.
- * @direction_input: configures signal "offset" as input, or returns error
- * This can be omitted on input-only or output-only gpio chips.
- * @direction_output: configures signal "offset" as output, or returns error
- * This can be omitted on input-only or output-only gpio chips.
+ * @direction_input: configures signal "offset" as input, returns 0 on success
+ * or a negative error number. This can be omitted on input-only or
+ * output-only gpio chips.
+ * @direction_output: configures signal "offset" as output, returns 0 on
+ * success or a negative error number. This can be omitted on input-only
+ * or output-only gpio chips.
* @get: returns value for signal "offset", 0=low, 1=high, or negative error
* @get_multiple: reads values for multiple signals defined by "mask" and
* stores them in "bits", returns 0 on success or negative error
@@ -529,29 +533,64 @@ struct gpio_chip {
#endif /* CONFIG_OF_GPIO */
};
-extern const char *gpiochip_is_requested(struct gpio_chip *gc,
- unsigned int offset);
+char *gpiochip_dup_line_label(struct gpio_chip *gc, unsigned int offset);
+
+
+struct _gpiochip_for_each_data {
+ const char **label;
+ unsigned int *i;
+};
+
+DEFINE_CLASS(_gpiochip_for_each_data,
+ struct _gpiochip_for_each_data,
+ if (*_T.label) kfree(*_T.label),
+ ({
+ struct _gpiochip_for_each_data _data = { label, i };
+ *_data.i = 0;
+ _data;
+ }),
+ const char **label, int *i)
+
+/**
+ * for_each_hwgpio - Iterates over all GPIOs for given chip.
+ * @_chip: Chip to iterate over.
+ * @_i: Loop counter.
+ * @_label: Place to store the address of the label if the GPIO is requested.
+ * Set to NULL for unused GPIOs.
+ */
+#define for_each_hwgpio(_chip, _i, _label) \
+ for (CLASS(_gpiochip_for_each_data, _data)(&_label, &_i); \
+ *_data.i < _chip->ngpio; \
+ (*_data.i)++, kfree(*(_data.label)), *_data.label = NULL) \
+ if (IS_ERR(*_data.label = \
+ gpiochip_dup_line_label(_chip, *_data.i))) {} \
+ else
/**
* for_each_requested_gpio_in_range - iterates over requested GPIOs in a given range
- * @chip: the chip to query
- * @i: loop variable
- * @base: first GPIO in the range
- * @size: amount of GPIOs to check starting from @base
- * @label: label of current GPIO
+ * @_chip: the chip to query
+ * @_i: loop variable
+ * @_base: first GPIO in the range
+ * @_size: amount of GPIOs to check starting from @base
+ * @_label: label of current GPIO
*/
-#define for_each_requested_gpio_in_range(chip, i, base, size, label) \
- for (i = 0; i < size; i++) \
- if ((label = gpiochip_is_requested(chip, base + i)) == NULL) {} else
+#define for_each_requested_gpio_in_range(_chip, _i, _base, _size, _label) \
+ for (CLASS(_gpiochip_for_each_data, _data)(&_label, &_i); \
+ *_data.i < _size; \
+ (*_data.i)++, kfree(*(_data.label)), *_data.label = NULL) \
+ if ((*_data.label = \
+ gpiochip_dup_line_label(_chip, _base + *_data.i)) == NULL) {} \
+ else if (IS_ERR(*_data.label)) {} \
+ else
/* Iterates over all requested GPIO of the given @chip */
#define for_each_requested_gpio(chip, i, label) \
for_each_requested_gpio_in_range(chip, i, 0, chip->ngpio, label)
/* add/remove chips */
-extern int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
- struct lock_class_key *lock_key,
- struct lock_class_key *request_key);
+int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
+ struct lock_class_key *lock_key,
+ struct lock_class_key *request_key);
/**
* gpiochip_add_data() - register a gpio_chip
@@ -599,13 +638,22 @@ static inline int gpiochip_add(struct gpio_chip *gc)
{
return gpiochip_add_data(gc, NULL);
}
-extern void gpiochip_remove(struct gpio_chip *gc);
-extern int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, void *data,
- struct lock_class_key *lock_key,
- struct lock_class_key *request_key);
+void gpiochip_remove(struct gpio_chip *gc);
+int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc,
+ void *data, struct lock_class_key *lock_key,
+ struct lock_class_key *request_key);
+
+struct gpio_device *gpio_device_find(const void *data,
+ int (*match)(struct gpio_chip *gc,
+ const void *data));
-extern struct gpio_chip *gpiochip_find(void *data,
- int (*match)(struct gpio_chip *gc, void *data));
+struct gpio_device *gpio_device_get(struct gpio_device *gdev);
+void gpio_device_put(struct gpio_device *gdev);
+
+DEFINE_FREE(gpio_device_put, struct gpio_device *,
+ if (!IS_ERR_OR_NULL(_T)) gpio_device_put(_T))
+
+struct device *gpio_device_to_device(struct gpio_device *gdev);
bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset);
int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset);
@@ -672,25 +720,12 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev,
#define BGPIOF_NO_OUTPUT BIT(5) /* only input */
#define BGPIOF_NO_SET_ON_INPUT BIT(6)
-int gpiochip_irq_map(struct irq_domain *d, unsigned int irq,
- irq_hw_number_t hwirq);
-void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq);
-
-int gpiochip_irq_domain_activate(struct irq_domain *domain,
- struct irq_data *data, bool reserve);
-void gpiochip_irq_domain_deactivate(struct irq_domain *domain,
- struct irq_data *data);
-
-bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gc,
- unsigned int offset);
-
#ifdef CONFIG_GPIOLIB_IRQCHIP
int gpiochip_irqchip_add_domain(struct gpio_chip *gc,
struct irq_domain *domain);
#else
#include <asm/bug.h>
-#include <asm/errno.h>
static inline int gpiochip_irqchip_add_domain(struct gpio_chip *gc,
struct irq_domain *domain)
@@ -758,18 +793,29 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc,
enum gpiod_flags dflags);
void gpiochip_free_own_desc(struct gpio_desc *desc);
+struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum);
+struct gpio_desc *
+gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum);
+
+struct gpio_chip *gpio_device_get_chip(struct gpio_device *gdev);
+
#ifdef CONFIG_GPIOLIB
/* lock/unlock as IRQ */
int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset);
void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset);
-
struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc);
+struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc);
-#else /* CONFIG_GPIOLIB */
+/* struct gpio_device getters */
+int gpio_device_get_base(struct gpio_device *gdev);
+const char *gpio_device_get_label(struct gpio_device *gdev);
-#include <linux/err.h>
+struct gpio_device *gpio_device_find_by_label(const char *label);
+struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode);
+
+#else /* CONFIG_GPIOLIB */
#include <asm/bug.h>
@@ -780,6 +826,36 @@ static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc)
return ERR_PTR(-ENODEV);
}
+static inline struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc)
+{
+ WARN_ON(1);
+ return ERR_PTR(-ENODEV);
+}
+
+static inline int gpio_device_get_base(struct gpio_device *gdev)
+{
+ WARN_ON(1);
+ return -ENODEV;
+}
+
+static inline const char *gpio_device_get_label(struct gpio_device *gdev)
+{
+ WARN_ON(1);
+ return NULL;
+}
+
+static inline struct gpio_device *gpio_device_find_by_label(const char *label)
+{
+ WARN_ON(1);
+ return NULL;
+}
+
+static inline struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode)
+{
+ WARN_ON(1);
+ return NULL;
+}
+
static inline int gpiochip_lock_as_irq(struct gpio_chip *gc,
unsigned int offset)
{
diff --git a/include/linux/gpio/gpio-nomadik.h b/include/linux/gpio/gpio-nomadik.h
new file mode 100644
index 000000000000..b5a84864650d
--- /dev/null
+++ b/include/linux/gpio/gpio-nomadik.h
@@ -0,0 +1,294 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_GPIO_NOMADIK_H
+#define __LINUX_GPIO_NOMADIK_H
+
+struct fwnode_handle;
+
+/* Package definitions */
+#define PINCTRL_NMK_STN8815 0
+#define PINCTRL_NMK_DB8500 1
+
+#define GPIO_BLOCK_SHIFT 5
+#define NMK_GPIO_PER_CHIP BIT(GPIO_BLOCK_SHIFT)
+#define NMK_MAX_BANKS DIV_ROUND_UP(512, NMK_GPIO_PER_CHIP)
+
+/* Register in the logic block */
+#define NMK_GPIO_DAT 0x00
+#define NMK_GPIO_DATS 0x04
+#define NMK_GPIO_DATC 0x08
+#define NMK_GPIO_PDIS 0x0c
+#define NMK_GPIO_DIR 0x10
+#define NMK_GPIO_DIRS 0x14
+#define NMK_GPIO_DIRC 0x18
+#define NMK_GPIO_SLPC 0x1c
+#define NMK_GPIO_AFSLA 0x20
+#define NMK_GPIO_AFSLB 0x24
+#define NMK_GPIO_LOWEMI 0x28
+
+#define NMK_GPIO_RIMSC 0x40
+#define NMK_GPIO_FIMSC 0x44
+#define NMK_GPIO_IS 0x48
+#define NMK_GPIO_IC 0x4c
+#define NMK_GPIO_RWIMSC 0x50
+#define NMK_GPIO_FWIMSC 0x54
+#define NMK_GPIO_WKS 0x58
+/* These appear in DB8540 and later ASICs */
+#define NMK_GPIO_EDGELEVEL 0x5C
+#define NMK_GPIO_LEVEL 0x60
+
+/* Pull up/down values */
+enum nmk_gpio_pull {
+ NMK_GPIO_PULL_NONE,
+ NMK_GPIO_PULL_UP,
+ NMK_GPIO_PULL_DOWN,
+};
+
+/* Sleep mode */
+enum nmk_gpio_slpm {
+ NMK_GPIO_SLPM_INPUT,
+ NMK_GPIO_SLPM_WAKEUP_ENABLE = NMK_GPIO_SLPM_INPUT,
+ NMK_GPIO_SLPM_NOCHANGE,
+ NMK_GPIO_SLPM_WAKEUP_DISABLE = NMK_GPIO_SLPM_NOCHANGE,
+};
+
+struct nmk_gpio_chip {
+ struct gpio_chip chip;
+ void __iomem *addr;
+ struct clk *clk;
+ unsigned int bank;
+ void (*set_ioforce)(bool enable);
+ spinlock_t lock;
+ bool sleepmode;
+ bool is_mobileye_soc;
+ /* Keep track of configured edges */
+ u32 edge_rising;
+ u32 edge_falling;
+ u32 real_wake;
+ u32 rwimsc;
+ u32 fwimsc;
+ u32 rimsc;
+ u32 fimsc;
+ u32 pull_up;
+ u32 lowemi;
+};
+
+/* Alternate functions: function C is set in hw by setting both A and B */
+#define NMK_GPIO_ALT_GPIO 0
+#define NMK_GPIO_ALT_A 1
+#define NMK_GPIO_ALT_B 2
+#define NMK_GPIO_ALT_C (NMK_GPIO_ALT_A | NMK_GPIO_ALT_B)
+
+#define NMK_GPIO_ALT_CX_SHIFT 2
+#define NMK_GPIO_ALT_C1 ((1<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C)
+#define NMK_GPIO_ALT_C2 ((2<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C)
+#define NMK_GPIO_ALT_C3 ((3<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C)
+#define NMK_GPIO_ALT_C4 ((4<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C)
+
+#define PRCM_GPIOCR_ALTCX(pin_num,\
+ altc1_used, altc1_ri, altc1_cb,\
+ altc2_used, altc2_ri, altc2_cb,\
+ altc3_used, altc3_ri, altc3_cb,\
+ altc4_used, altc4_ri, altc4_cb)\
+{\
+ .pin = pin_num,\
+ .altcx[PRCM_IDX_GPIOCR_ALTC1] = {\
+ .used = altc1_used,\
+ .reg_index = altc1_ri,\
+ .control_bit = altc1_cb\
+ },\
+ .altcx[PRCM_IDX_GPIOCR_ALTC2] = {\
+ .used = altc2_used,\
+ .reg_index = altc2_ri,\
+ .control_bit = altc2_cb\
+ },\
+ .altcx[PRCM_IDX_GPIOCR_ALTC3] = {\
+ .used = altc3_used,\
+ .reg_index = altc3_ri,\
+ .control_bit = altc3_cb\
+ },\
+ .altcx[PRCM_IDX_GPIOCR_ALTC4] = {\
+ .used = altc4_used,\
+ .reg_index = altc4_ri,\
+ .control_bit = altc4_cb\
+ },\
+}
+
+/**
+ * enum prcm_gpiocr_reg_index
+ * Used to reference an PRCM GPIOCR register address.
+ */
+enum prcm_gpiocr_reg_index {
+ PRCM_IDX_GPIOCR1,
+ PRCM_IDX_GPIOCR2,
+ PRCM_IDX_GPIOCR3
+};
+/**
+ * enum prcm_gpiocr_altcx_index
+ * Used to reference an Other alternate-C function.
+ */
+enum prcm_gpiocr_altcx_index {
+ PRCM_IDX_GPIOCR_ALTC1,
+ PRCM_IDX_GPIOCR_ALTC2,
+ PRCM_IDX_GPIOCR_ALTC3,
+ PRCM_IDX_GPIOCR_ALTC4,
+ PRCM_IDX_GPIOCR_ALTC_MAX,
+};
+
+/**
+ * struct prcm_gpio_altcx - Other alternate-C function
+ * @used: other alternate-C function availability
+ * @reg_index: PRCM GPIOCR register index used to control the function
+ * @control_bit: PRCM GPIOCR bit used to control the function
+ */
+struct prcm_gpiocr_altcx {
+ bool used:1;
+ u8 reg_index:2;
+ u8 control_bit:5;
+} __packed;
+
+/**
+ * struct prcm_gpio_altcx_pin_desc - Other alternate-C pin
+ * @pin: The pin number
+ * @altcx: array of other alternate-C[1-4] functions
+ */
+struct prcm_gpiocr_altcx_pin_desc {
+ unsigned short pin;
+ struct prcm_gpiocr_altcx altcx[PRCM_IDX_GPIOCR_ALTC_MAX];
+};
+
+/**
+ * struct nmk_function - Nomadik pinctrl mux function
+ * @name: The name of the function, exported to pinctrl core.
+ * @groups: An array of pin groups that may select this function.
+ * @ngroups: The number of entries in @groups.
+ */
+struct nmk_function {
+ const char *name;
+ const char * const *groups;
+ unsigned int ngroups;
+};
+
+/**
+ * struct nmk_pingroup - describes a Nomadik pin group
+ * @grp: Generic data of the pin group (name and pins)
+ * @altsetting: the altsetting to apply to all pins in this group to
+ * configure them to be used by a function
+ */
+struct nmk_pingroup {
+ struct pingroup grp;
+ int altsetting;
+};
+
+#define NMK_PIN_GROUP(a, b) \
+ { \
+ .grp = PINCTRL_PINGROUP(#a, a##_pins, ARRAY_SIZE(a##_pins)), \
+ .altsetting = b, \
+ }
+
+/**
+ * struct nmk_pinctrl_soc_data - Nomadik pin controller per-SoC configuration
+ * @pins: An array describing all pins the pin controller affects.
+ * All pins which are also GPIOs must be listed first within the
+ * array, and be numbered identically to the GPIO controller's
+ * numbering.
+ * @npins: The number of entries in @pins.
+ * @functions: The functions supported on this SoC.
+ * @nfunction: The number of entries in @functions.
+ * @groups: An array describing all pin groups the pin SoC supports.
+ * @ngroups: The number of entries in @groups.
+ * @altcx_pins: The pins that support Other alternate-C function on this SoC
+ * @npins_altcx: The number of Other alternate-C pins
+ * @prcm_gpiocr_registers: The array of PRCM GPIOCR registers on this SoC
+ */
+struct nmk_pinctrl_soc_data {
+ const struct pinctrl_pin_desc *pins;
+ unsigned int npins;
+ const struct nmk_function *functions;
+ unsigned int nfunctions;
+ const struct nmk_pingroup *groups;
+ unsigned int ngroups;
+ const struct prcm_gpiocr_altcx_pin_desc *altcx_pins;
+ unsigned int npins_altcx;
+ const u16 *prcm_gpiocr_registers;
+};
+
+#ifdef CONFIG_PINCTRL_STN8815
+
+void nmk_pinctrl_stn8815_init(const struct nmk_pinctrl_soc_data **soc);
+
+#else
+
+static inline void
+nmk_pinctrl_stn8815_init(const struct nmk_pinctrl_soc_data **soc)
+{
+}
+
+#endif
+
+#ifdef CONFIG_PINCTRL_DB8500
+
+void nmk_pinctrl_db8500_init(const struct nmk_pinctrl_soc_data **soc);
+
+#else
+
+static inline void
+nmk_pinctrl_db8500_init(const struct nmk_pinctrl_soc_data **soc)
+{
+}
+
+#endif
+
+#ifdef CONFIG_PINCTRL_DB8540
+
+void nmk_pinctrl_db8540_init(const struct nmk_pinctrl_soc_data **soc);
+
+#else
+
+static inline void
+nmk_pinctrl_db8540_init(const struct nmk_pinctrl_soc_data **soc)
+{
+}
+
+#endif
+
+struct platform_device;
+
+#ifdef CONFIG_DEBUG_FS
+
+/*
+ * Symbols declared in gpio-nomadik used by pinctrl-nomadik. If pinctrl-nomadik
+ * is enabled, then gpio-nomadik is enabled as well; the reverse if not always
+ * true.
+ */
+void nmk_gpio_dbg_show_one(struct seq_file *s, struct pinctrl_dev *pctldev,
+ struct gpio_chip *chip, unsigned int offset,
+ unsigned int gpio);
+
+#else
+
+static inline void nmk_gpio_dbg_show_one(struct seq_file *s,
+ struct pinctrl_dev *pctldev,
+ struct gpio_chip *chip,
+ unsigned int offset,
+ unsigned int gpio)
+{
+}
+
+#endif
+
+void __nmk_gpio_make_output(struct nmk_gpio_chip *nmk_chip,
+ unsigned int offset, int val);
+void __nmk_gpio_set_slpm(struct nmk_gpio_chip *nmk_chip, unsigned int offset,
+ enum nmk_gpio_slpm mode);
+struct nmk_gpio_chip *nmk_gpio_populate_chip(struct fwnode_handle *fwnode,
+ struct platform_device *pdev);
+
+/* Symbols declared in pinctrl-nomadik used by gpio-nomadik. */
+#ifdef CONFIG_PINCTRL_NOMADIK
+extern struct nmk_gpio_chip *nmk_gpio_chips[NMK_MAX_BANKS];
+extern spinlock_t nmk_gpio_slpm_lock;
+int __maybe_unused nmk_prcm_gpiocr_get_mode(struct pinctrl_dev *pctldev,
+ int gpio);
+#endif
+
+#endif /* __LINUX_GPIO_NOMADIK_H */
diff --git a/include/linux/gpio/property.h b/include/linux/gpio/property.h
index 6c75c8bd44a0..1a14e239221f 100644
--- a/include/linux/gpio/property.h
+++ b/include/linux/gpio/property.h
@@ -2,7 +2,6 @@
#ifndef __LINUX_GPIO_PROPERTY_H
#define __LINUX_GPIO_PROPERTY_H
-#include <dt-bindings/gpio/gpio.h> /* for GPIO_* flags */
#include <linux/property.h>
#define PROPERTY_ENTRY_GPIO(_name_, _chip_node_, _idx_, _flags_) \
diff --git a/include/linux/greybus.h b/include/linux/greybus.h
index 18c0fb958b74..634c9511cf78 100644
--- a/include/linux/greybus.h
+++ b/include/linux/greybus.h
@@ -104,44 +104,14 @@ void gb_debugfs_init(void);
void gb_debugfs_cleanup(void);
struct dentry *gb_debugfs_get(void);
-extern struct bus_type greybus_bus_type;
-
-extern struct device_type greybus_hd_type;
-extern struct device_type greybus_module_type;
-extern struct device_type greybus_interface_type;
-extern struct device_type greybus_control_type;
-extern struct device_type greybus_bundle_type;
-extern struct device_type greybus_svc_type;
-
-static inline int is_gb_host_device(const struct device *dev)
-{
- return dev->type == &greybus_hd_type;
-}
-
-static inline int is_gb_module(const struct device *dev)
-{
- return dev->type == &greybus_module_type;
-}
-
-static inline int is_gb_interface(const struct device *dev)
-{
- return dev->type == &greybus_interface_type;
-}
-
-static inline int is_gb_control(const struct device *dev)
-{
- return dev->type == &greybus_control_type;
-}
-
-static inline int is_gb_bundle(const struct device *dev)
-{
- return dev->type == &greybus_bundle_type;
-}
-
-static inline int is_gb_svc(const struct device *dev)
-{
- return dev->type == &greybus_svc_type;
-}
+extern const struct bus_type greybus_bus_type;
+
+extern const struct device_type greybus_hd_type;
+extern const struct device_type greybus_module_type;
+extern const struct device_type greybus_interface_type;
+extern const struct device_type greybus_control_type;
+extern const struct device_type greybus_bundle_type;
+extern const struct device_type greybus_svc_type;
static inline bool cport_id_valid(struct gb_host_device *hd, u16 cport_id)
{
diff --git a/include/linux/greybus/greybus_protocols.h b/include/linux/greybus/greybus_protocols.h
index aeb8f9243545..820134b0105c 100644
--- a/include/linux/greybus/greybus_protocols.h
+++ b/include/linux/greybus/greybus_protocols.h
@@ -232,9 +232,7 @@ struct gb_fw_download_fetch_firmware_request {
__le32 size;
} __packed;
-struct gb_fw_download_fetch_firmware_response {
- __u8 data[0];
-} __packed;
+/* gb_fw_download_fetch_firmware_response contains no other data */
/* firmware download release firmware request */
struct gb_fw_download_release_firmware_request {
@@ -414,9 +412,7 @@ struct gb_bootrom_get_firmware_request {
__le32 size;
} __packed;
-struct gb_bootrom_get_firmware_response {
- __u8 data[0];
-} __packed;
+/* gb_bootrom_get_firmware_response contains no other data */
/* Bootrom protocol Ready to boot request */
struct gb_bootrom_ready_to_boot_request {
diff --git a/include/linux/greybus/svc.h b/include/linux/greybus/svc.h
index 5afaf5f06856..da547fb9071b 100644
--- a/include/linux/greybus/svc.h
+++ b/include/linux/greybus/svc.h
@@ -100,7 +100,4 @@ bool gb_svc_watchdog_enabled(struct gb_svc *svc);
int gb_svc_watchdog_enable(struct gb_svc *svc);
int gb_svc_watchdog_disable(struct gb_svc *svc);
-int gb_svc_protocol_init(void);
-void gb_svc_protocol_exit(void);
-
#endif /* __SVC_H */
diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h
new file mode 100644
index 000000000000..f316c8d0f3fc
--- /dev/null
+++ b/include/linux/habanalabs/cpucp_if.h
@@ -0,0 +1,1423 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2020-2023 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef CPUCP_IF_H
+#define CPUCP_IF_H
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "hl_boot_if.h"
+
+#define NUM_HBM_PSEUDO_CH 2
+#define NUM_HBM_CH_PER_DEV 8
+#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_SHIFT 0
+#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK 0x00000001
+#define CPUCP_PKT_HBM_ECC_INFO_RD_PAR_SHIFT 1
+#define CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK 0x00000002
+#define CPUCP_PKT_HBM_ECC_INFO_CA_PAR_SHIFT 2
+#define CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK 0x00000004
+#define CPUCP_PKT_HBM_ECC_INFO_DERR_SHIFT 3
+#define CPUCP_PKT_HBM_ECC_INFO_DERR_MASK 0x00000008
+#define CPUCP_PKT_HBM_ECC_INFO_SERR_SHIFT 4
+#define CPUCP_PKT_HBM_ECC_INFO_SERR_MASK 0x00000010
+#define CPUCP_PKT_HBM_ECC_INFO_TYPE_SHIFT 5
+#define CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK 0x00000020
+#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_SHIFT 6
+#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK 0x000007C0
+
+#define PLL_MAP_MAX_BITS 128
+#define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8)
+
+enum eq_event_id {
+ EQ_EVENT_NIC_STS_REQUEST = 0,
+ EQ_EVENT_PWR_MODE_0,
+ EQ_EVENT_PWR_MODE_1,
+ EQ_EVENT_PWR_MODE_2,
+ EQ_EVENT_PWR_MODE_3,
+ EQ_EVENT_PWR_BRK_ENTRY,
+ EQ_EVENT_PWR_BRK_EXIT,
+ EQ_EVENT_HEARTBEAT,
+};
+
+/*
+ * info of the pkt queue pointers in the first async occurrence
+ */
+struct cpucp_pkt_sync_err {
+ __le32 pi;
+ __le32 ci;
+};
+
+struct hl_eq_hbm_ecc_data {
+ /* SERR counter */
+ __le32 sec_cnt;
+ /* DERR counter */
+ __le32 dec_cnt;
+ /* Supplemental Information according to the mask bits */
+ __le32 hbm_ecc_info;
+ /* Address in hbm where the ecc happened */
+ __le32 first_addr;
+ /* SERR continuous address counter */
+ __le32 sec_cont_cnt;
+ __le32 pad;
+};
+
+/*
+ * EVENT QUEUE
+ */
+
+struct hl_eq_header {
+ __le32 reserved;
+ __le32 ctl;
+};
+
+struct hl_eq_ecc_data {
+ __le64 ecc_address;
+ __le64 ecc_syndrom;
+ __u8 memory_wrapper_idx;
+ __u8 is_critical;
+ __le16 block_id;
+ __u8 pad[4];
+};
+
+enum hl_sm_sei_cause {
+ SM_SEI_SO_OVERFLOW,
+ SM_SEI_LBW_4B_UNALIGNED,
+ SM_SEI_AXI_RESPONSE_ERR
+};
+
+struct hl_eq_sm_sei_data {
+ __le32 sei_log;
+ /* enum hl_sm_sei_cause */
+ __u8 sei_cause;
+ __u8 pad[3];
+};
+
+enum hl_fw_alive_severity {
+ FW_ALIVE_SEVERITY_MINOR,
+ FW_ALIVE_SEVERITY_CRITICAL
+};
+
+struct hl_eq_fw_alive {
+ __le64 uptime_seconds;
+ __le32 process_id;
+ __le32 thread_id;
+ /* enum hl_fw_alive_severity */
+ __u8 severity;
+ __u8 pad[7];
+};
+
+struct hl_eq_intr_cause {
+ __le64 intr_cause_data;
+};
+
+struct hl_eq_pcie_drain_ind_data {
+ struct hl_eq_intr_cause intr_cause;
+ __le64 drain_wr_addr_lbw;
+ __le64 drain_rd_addr_lbw;
+ __le64 drain_wr_addr_hbw;
+ __le64 drain_rd_addr_hbw;
+};
+
+struct hl_eq_razwi_lbw_info_regs {
+ __le32 rr_aw_razwi_reg;
+ __le32 rr_aw_razwi_id_reg;
+ __le32 rr_ar_razwi_reg;
+ __le32 rr_ar_razwi_id_reg;
+};
+
+struct hl_eq_razwi_hbw_info_regs {
+ __le32 rr_aw_razwi_hi_reg;
+ __le32 rr_aw_razwi_lo_reg;
+ __le32 rr_aw_razwi_id_reg;
+ __le32 rr_ar_razwi_hi_reg;
+ __le32 rr_ar_razwi_lo_reg;
+ __le32 rr_ar_razwi_id_reg;
+};
+
+/* razwi_happened masks */
+#define RAZWI_HAPPENED_HBW 0x1
+#define RAZWI_HAPPENED_LBW 0x2
+#define RAZWI_HAPPENED_AW 0x4
+#define RAZWI_HAPPENED_AR 0x8
+
+struct hl_eq_razwi_info {
+ __le32 razwi_happened_mask;
+ union {
+ struct hl_eq_razwi_lbw_info_regs lbw;
+ struct hl_eq_razwi_hbw_info_regs hbw;
+ };
+ __le32 pad;
+};
+
+struct hl_eq_razwi_with_intr_cause {
+ struct hl_eq_razwi_info razwi_info;
+ struct hl_eq_intr_cause intr_cause;
+};
+
+#define HBM_CA_ERR_CMD_LIFO_LEN 8
+#define HBM_RD_ERR_DATA_LIFO_LEN 8
+#define HBM_WR_PAR_CMD_LIFO_LEN 11
+
+enum hl_hbm_sei_cause {
+ /* Command/address parity error event is split into 2 events due to
+ * size limitation: ODD suffix for odd HBM CK_t cycles and EVEN suffix
+ * for even HBM CK_t cycles
+ */
+ HBM_SEI_CMD_PARITY_EVEN,
+ HBM_SEI_CMD_PARITY_ODD,
+ /* Read errors can be reflected as a combination of SERR/DERR/parity
+ * errors. Therefore, we define one event for all read error types.
+ * LKD will perform further proccessing.
+ */
+ HBM_SEI_READ_ERR,
+ HBM_SEI_WRITE_DATA_PARITY_ERR,
+ HBM_SEI_CATTRIP,
+ HBM_SEI_MEM_BIST_FAIL,
+ HBM_SEI_DFI,
+ HBM_SEI_INV_TEMP_READ_OUT,
+ HBM_SEI_BIST_FAIL,
+};
+
+/* Masks for parsing hl_hbm_sei_headr fields */
+#define HBM_ECC_SERR_CNTR_MASK 0xFF
+#define HBM_ECC_DERR_CNTR_MASK 0xFF00
+#define HBM_RD_PARITY_CNTR_MASK 0xFF0000
+
+/* HBM index and MC index are known by the event_id */
+struct hl_hbm_sei_header {
+ union {
+ /* relevant only in case of HBM read error */
+ struct {
+ __u8 ecc_serr_cnt;
+ __u8 ecc_derr_cnt;
+ __u8 read_par_cnt;
+ __u8 reserved;
+ };
+ /* All other cases */
+ __le32 cnt;
+ };
+ __u8 sei_cause; /* enum hl_hbm_sei_cause */
+ __u8 mc_channel; /* range: 0-3 */
+ __u8 mc_pseudo_channel; /* range: 0-7 */
+ __u8 is_critical;
+};
+
+#define HBM_RD_ADDR_SID_SHIFT 0
+#define HBM_RD_ADDR_SID_MASK 0x1
+#define HBM_RD_ADDR_BG_SHIFT 1
+#define HBM_RD_ADDR_BG_MASK 0x6
+#define HBM_RD_ADDR_BA_SHIFT 3
+#define HBM_RD_ADDR_BA_MASK 0x18
+#define HBM_RD_ADDR_COL_SHIFT 5
+#define HBM_RD_ADDR_COL_MASK 0x7E0
+#define HBM_RD_ADDR_ROW_SHIFT 11
+#define HBM_RD_ADDR_ROW_MASK 0x3FFF800
+
+struct hbm_rd_addr {
+ union {
+ /* bit fields are only for FW use */
+ struct {
+ u32 dbg_rd_err_addr_sid:1;
+ u32 dbg_rd_err_addr_bg:2;
+ u32 dbg_rd_err_addr_ba:2;
+ u32 dbg_rd_err_addr_col:6;
+ u32 dbg_rd_err_addr_row:15;
+ u32 reserved:6;
+ };
+ __le32 rd_addr_val;
+ };
+};
+
+#define HBM_RD_ERR_BEAT_SHIFT 2
+/* dbg_rd_err_misc fields: */
+/* Read parity is calculated per DW on every beat */
+#define HBM_RD_ERR_PAR_ERR_BEAT0_SHIFT 0
+#define HBM_RD_ERR_PAR_ERR_BEAT0_MASK 0x3
+#define HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT 8
+#define HBM_RD_ERR_PAR_DATA_BEAT0_MASK 0x300
+/* ECC is calculated per PC on every beat */
+#define HBM_RD_ERR_SERR_BEAT0_SHIFT 16
+#define HBM_RD_ERR_SERR_BEAT0_MASK 0x10000
+#define HBM_RD_ERR_DERR_BEAT0_SHIFT 24
+#define HBM_RD_ERR_DERR_BEAT0_MASK 0x100000
+
+struct hl_eq_hbm_sei_read_err_intr_info {
+ /* DFI_RD_ERR_REP_ADDR */
+ struct hbm_rd_addr dbg_rd_err_addr;
+ /* DFI_RD_ERR_REP_ERR */
+ union {
+ struct {
+ /* bit fields are only for FW use */
+ u32 dbg_rd_err_par:8;
+ u32 dbg_rd_err_par_data:8;
+ u32 dbg_rd_err_serr:4;
+ u32 dbg_rd_err_derr:4;
+ u32 reserved:8;
+ };
+ __le32 dbg_rd_err_misc;
+ };
+ /* DFI_RD_ERR_REP_DM */
+ __le32 dbg_rd_err_dm;
+ /* DFI_RD_ERR_REP_SYNDROME */
+ __le32 dbg_rd_err_syndrome;
+ /* DFI_RD_ERR_REP_DATA */
+ __le32 dbg_rd_err_data[HBM_RD_ERR_DATA_LIFO_LEN];
+};
+
+struct hl_eq_hbm_sei_ca_par_intr_info {
+ /* 14 LSBs */
+ __le16 dbg_row[HBM_CA_ERR_CMD_LIFO_LEN];
+ /* 18 LSBs */
+ __le32 dbg_col[HBM_CA_ERR_CMD_LIFO_LEN];
+};
+
+#define WR_PAR_LAST_CMD_COL_SHIFT 0
+#define WR_PAR_LAST_CMD_COL_MASK 0x3F
+#define WR_PAR_LAST_CMD_BG_SHIFT 6
+#define WR_PAR_LAST_CMD_BG_MASK 0xC0
+#define WR_PAR_LAST_CMD_BA_SHIFT 8
+#define WR_PAR_LAST_CMD_BA_MASK 0x300
+#define WR_PAR_LAST_CMD_SID_SHIFT 10
+#define WR_PAR_LAST_CMD_SID_MASK 0x400
+
+/* Row address isn't latched */
+struct hbm_sei_wr_cmd_address {
+ /* DFI_DERR_LAST_CMD */
+ union {
+ struct {
+ /* bit fields are only for FW use */
+ u32 col:6;
+ u32 bg:2;
+ u32 ba:2;
+ u32 sid:1;
+ u32 reserved:21;
+ };
+ __le32 dbg_wr_cmd_addr;
+ };
+};
+
+struct hl_eq_hbm_sei_wr_par_intr_info {
+ /* entry 0: WR command address from the 1st cycle prior to the error
+ * entry 1: WR command address from the 2nd cycle prior to the error
+ * and so on...
+ */
+ struct hbm_sei_wr_cmd_address dbg_last_wr_cmds[HBM_WR_PAR_CMD_LIFO_LEN];
+ /* derr[0:1] - 1st HBM cycle DERR output
+ * derr[2:3] - 2nd HBM cycle DERR output
+ */
+ __u8 dbg_derr;
+ /* extend to reach 8B */
+ __u8 pad[3];
+};
+
+/*
+ * this struct represents the following sei causes:
+ * command parity, ECC double error, ECC single error, dfi error, cattrip,
+ * temperature read-out, read parity error and write parity error.
+ * some only use the header while some have extra data.
+ */
+struct hl_eq_hbm_sei_data {
+ struct hl_hbm_sei_header hdr;
+ union {
+ struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_even_info;
+ struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_odd_info;
+ struct hl_eq_hbm_sei_read_err_intr_info read_err_info;
+ struct hl_eq_hbm_sei_wr_par_intr_info wr_parity_info;
+ };
+};
+
+/* Engine/farm arc interrupt type */
+enum hl_engine_arc_interrupt_type {
+ /* Qman/farm ARC DCCM QUEUE FULL interrupt type */
+ ENGINE_ARC_DCCM_QUEUE_FULL_IRQ = 1
+};
+
+/* Data structure specifies details of payload of DCCM QUEUE FULL interrupt */
+struct hl_engine_arc_dccm_queue_full_irq {
+ /* Queue index value which caused DCCM QUEUE FULL */
+ __le32 queue_index;
+ __le32 pad;
+};
+
+/* Data structure specifies details of QM/FARM ARC interrupt */
+struct hl_eq_engine_arc_intr_data {
+ /* ARC engine id e.g. DCORE0_TPC0_QM_ARC, DCORE0_TCP1_QM_ARC */
+ __le32 engine_id;
+ __le32 intr_type; /* enum hl_engine_arc_interrupt_type */
+ /* More info related to the interrupt e.g. queue index
+ * incase of DCCM_QUEUE_FULL interrupt.
+ */
+ __le64 payload;
+ __le64 pad[5];
+};
+
+#define ADDR_DEC_ADDRESS_COUNT_MAX 4
+
+/* Data structure specifies details of ADDR_DEC interrupt */
+struct hl_eq_addr_dec_intr_data {
+ struct hl_eq_intr_cause intr_cause;
+ __le64 addr[ADDR_DEC_ADDRESS_COUNT_MAX];
+ __u8 addr_cnt;
+ __u8 pad[7];
+};
+
+struct hl_eq_entry {
+ struct hl_eq_header hdr;
+ union {
+ __le64 data_placeholder;
+ struct hl_eq_ecc_data ecc_data;
+ struct hl_eq_hbm_ecc_data hbm_ecc_data; /* Obsolete */
+ struct hl_eq_sm_sei_data sm_sei_data;
+ struct cpucp_pkt_sync_err pkt_sync_err;
+ struct hl_eq_fw_alive fw_alive;
+ struct hl_eq_intr_cause intr_cause;
+ struct hl_eq_pcie_drain_ind_data pcie_drain_ind_data;
+ struct hl_eq_razwi_info razwi_info;
+ struct hl_eq_razwi_with_intr_cause razwi_with_intr_cause;
+ struct hl_eq_hbm_sei_data sei_data; /* Gaudi2 HBM */
+ struct hl_eq_engine_arc_intr_data arc_data;
+ struct hl_eq_addr_dec_intr_data addr_dec;
+ __le64 data[7];
+ };
+};
+
+#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry)
+
+#define EQ_CTL_READY_SHIFT 31
+#define EQ_CTL_READY_MASK 0x80000000
+
+#define EQ_CTL_EVENT_TYPE_SHIFT 16
+#define EQ_CTL_EVENT_TYPE_MASK 0x0FFF0000
+
+#define EQ_CTL_INDEX_SHIFT 0
+#define EQ_CTL_INDEX_MASK 0x0000FFFF
+
+enum pq_init_status {
+ PQ_INIT_STATUS_NA = 0,
+ PQ_INIT_STATUS_READY_FOR_CP,
+ PQ_INIT_STATUS_READY_FOR_HOST,
+ PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI,
+ PQ_INIT_STATUS_LEN_NOT_POWER_OF_TWO_ERR,
+ PQ_INIT_STATUS_ILLEGAL_Q_ADDR_ERR
+};
+
+/*
+ * CpuCP Primary Queue Packets
+ *
+ * During normal operation, the host's kernel driver needs to send various
+ * messages to CpuCP, usually either to SET some value into a H/W periphery or
+ * to GET the current value of some H/W periphery. For example, SET the
+ * frequency of MME/TPC and GET the value of the thermal sensor.
+ *
+ * These messages can be initiated either by the User application or by the
+ * host's driver itself, e.g. power management code. In either case, the
+ * communication from the host's driver to CpuCP will *always* be in
+ * synchronous mode, meaning that the host will send a single message and poll
+ * until the message was acknowledged and the results are ready (if results are
+ * needed).
+ *
+ * This means that only a single message can be sent at a time and the host's
+ * driver must wait for its result before sending the next message. Having said
+ * that, because these are control messages which are sent in a relatively low
+ * frequency, this limitation seems acceptable. It's important to note that
+ * in case of multiple devices, messages to different devices *can* be sent
+ * at the same time.
+ *
+ * The message, inputs/outputs (if relevant) and fence object will be located
+ * on the device DDR at an address that will be determined by the host's driver.
+ * During device initialization phase, the host will pass to CpuCP that address.
+ * Most of the message types will contain inputs/outputs inside the message
+ * itself. The common part of each message will contain the opcode of the
+ * message (its type) and a field representing a fence object.
+ *
+ * When the host's driver wishes to send a message to CPU CP, it will write the
+ * message contents to the device DDR, clear the fence object and then write to
+ * the PSOC_ARC1_AUX_SW_INTR, to issue interrupt 121 to ARC Management CPU.
+ *
+ * Upon receiving the interrupt (#121), CpuCP will read the message from the
+ * DDR. In case the message is a SET operation, CpuCP will first perform the
+ * operation and then write to the fence object on the device DDR. In case the
+ * message is a GET operation, CpuCP will first fill the results section on the
+ * device DDR and then write to the fence object. If an error occurred, CpuCP
+ * will fill the rc field with the right error code.
+ *
+ * In the meantime, the host's driver will poll on the fence object. Once the
+ * host sees that the fence object is signaled, it will read the results from
+ * the device DDR (if relevant) and resume the code execution in the host's
+ * driver.
+ *
+ * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8
+ * so the value being put by the host's driver matches the value read by CpuCP
+ *
+ * Non-QMAN packets should be limited to values 1 through (2^8 - 1)
+ *
+ * Detailed description:
+ *
+ * CPUCP_PACKET_DISABLE_PCI_ACCESS -
+ * After receiving this packet the embedded CPU must NOT issue PCI
+ * transactions (read/write) towards the Host CPU. This also include
+ * sending MSI-X interrupts.
+ * This packet is usually sent before the device is moved to D3Hot state.
+ *
+ * CPUCP_PACKET_ENABLE_PCI_ACCESS -
+ * After receiving this packet the embedded CPU is allowed to issue PCI
+ * transactions towards the Host CPU, including sending MSI-X interrupts.
+ * This packet is usually send after the device is moved to D0 state.
+ *
+ * CPUCP_PACKET_TEMPERATURE_GET -
+ * Fetch the current temperature / Max / Max Hyst / Critical /
+ * Critical Hyst of a specified thermal sensor. The packet's
+ * arguments specify the desired sensor and the field to get.
+ *
+ * CPUCP_PACKET_VOLTAGE_GET -
+ * Fetch the voltage / Max / Min of a specified sensor. The packet's
+ * arguments specify the sensor and type.
+ *
+ * CPUCP_PACKET_CURRENT_GET -
+ * Fetch the current / Max / Min of a specified sensor. The packet's
+ * arguments specify the sensor and type.
+ *
+ * CPUCP_PACKET_FAN_SPEED_GET -
+ * Fetch the speed / Max / Min of a specified fan. The packet's
+ * arguments specify the sensor and type.
+ *
+ * CPUCP_PACKET_PWM_GET -
+ * Fetch the pwm value / mode of a specified pwm. The packet's
+ * arguments specify the sensor and type.
+ *
+ * CPUCP_PACKET_PWM_SET -
+ * Set the pwm value / mode of a specified pwm. The packet's
+ * arguments specify the sensor, type and value.
+ *
+ * CPUCP_PACKET_FREQUENCY_SET -
+ * Set the frequency of a specified PLL. The packet's arguments specify
+ * the PLL and the desired frequency. The actual frequency in the device
+ * might differ from the requested frequency.
+ *
+ * CPUCP_PACKET_FREQUENCY_GET -
+ * Fetch the frequency of a specified PLL. The packet's arguments specify
+ * the PLL.
+ *
+ * CPUCP_PACKET_LED_SET -
+ * Set the state of a specified led. The packet's arguments
+ * specify the led and the desired state.
+ *
+ * CPUCP_PACKET_I2C_WR -
+ * Write 32-bit value to I2C device. The packet's arguments specify the
+ * I2C bus, address and value.
+ *
+ * CPUCP_PACKET_I2C_RD -
+ * Read 32-bit value from I2C device. The packet's arguments specify the
+ * I2C bus and address.
+ *
+ * CPUCP_PACKET_INFO_GET -
+ * Fetch information from the device as specified in the packet's
+ * structure. The host's driver passes the max size it allows the CpuCP to
+ * write to the structure, to prevent data corruption in case of
+ * mismatched driver/FW versions.
+ *
+ * CPUCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed
+ *
+ * CPUCP_PACKET_UNMASK_RAZWI_IRQ -
+ * Unmask the given IRQ. The IRQ number is specified in the value field.
+ * The packet is sent after receiving an interrupt and printing its
+ * relevant information.
+ *
+ * CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY -
+ * Unmask the given IRQs. The IRQs numbers are specified in an array right
+ * after the cpucp_packet structure, where its first element is the array
+ * length. The packet is sent after a soft reset was done in order to
+ * handle any interrupts that were sent during the reset process.
+ *
+ * CPUCP_PACKET_TEST -
+ * Test packet for CpuCP connectivity. The CPU will put the fence value
+ * in the result field.
+ *
+ * CPUCP_PACKET_FREQUENCY_CURR_GET -
+ * Fetch the current frequency of a specified PLL. The packet's arguments
+ * specify the PLL.
+ *
+ * CPUCP_PACKET_MAX_POWER_GET -
+ * Fetch the maximal power of the device.
+ *
+ * CPUCP_PACKET_MAX_POWER_SET -
+ * Set the maximal power of the device. The packet's arguments specify
+ * the power.
+ *
+ * CPUCP_PACKET_EEPROM_DATA_GET -
+ * Get EEPROM data from the CpuCP kernel. The buffer is specified in the
+ * addr field. The CPU will put the returned data size in the result
+ * field. In addition, the host's driver passes the max size it allows the
+ * CpuCP to write to the structure, to prevent data corruption in case of
+ * mismatched driver/FW versions.
+ *
+ * CPUCP_PACKET_NIC_INFO_GET -
+ * Fetch information from the device regarding the NIC. the host's driver
+ * passes the max size it allows the CpuCP to write to the structure, to
+ * prevent data corruption in case of mismatched driver/FW versions.
+ *
+ * CPUCP_PACKET_TEMPERATURE_SET -
+ * Set the value of the offset property of a specified thermal sensor.
+ * The packet's arguments specify the desired sensor and the field to
+ * set.
+ *
+ * CPUCP_PACKET_VOLTAGE_SET -
+ * Trigger the reset_history property of a specified voltage sensor.
+ * The packet's arguments specify the desired sensor and the field to
+ * set.
+ *
+ * CPUCP_PACKET_CURRENT_SET -
+ * Trigger the reset_history property of a specified current sensor.
+ * The packet's arguments specify the desired sensor and the field to
+ * set.
+ *
+ * CPUCP_PACKET_PCIE_THROUGHPUT_GET -
+ * Get throughput of PCIe.
+ * The packet's arguments specify the transaction direction (TX/RX).
+ * The window measurement is 10[msec], and the return value is in KB/sec.
+ *
+ * CPUCP_PACKET_PCIE_REPLAY_CNT_GET
+ * Replay count measures number of "replay" events, which is basicly
+ * number of retries done by PCIe.
+ *
+ * CPUCP_PACKET_TOTAL_ENERGY_GET -
+ * Total Energy is measurement of energy from the time FW Linux
+ * is loaded. It is calculated by multiplying the average power
+ * by time (passed from armcp start). The units are in MilliJouls.
+ *
+ * CPUCP_PACKET_PLL_INFO_GET -
+ * Fetch frequencies of PLL from the required PLL IP.
+ * The packet's arguments specify the device PLL type
+ * Pll type is the PLL from device pll_index enum.
+ * The result is composed of 4 outputs, each is 16-bit
+ * frequency in MHz.
+ *
+ * CPUCP_PACKET_POWER_GET -
+ * Fetch the present power consumption of the device (Current * Voltage).
+ *
+ * CPUCP_PACKET_NIC_PFC_SET -
+ * Enable/Disable the NIC PFC feature. The packet's arguments specify the
+ * NIC port, relevant lanes to configure and one bit indication for
+ * enable/disable.
+ *
+ * CPUCP_PACKET_NIC_FAULT_GET -
+ * Fetch the current indication for local/remote faults from the NIC MAC.
+ * The result is 32-bit value of the relevant register.
+ *
+ * CPUCP_PACKET_NIC_LPBK_SET -
+ * Enable/Disable the MAC loopback feature. The packet's arguments specify
+ * the NIC port, relevant lanes to configure and one bit indication for
+ * enable/disable.
+ *
+ * CPUCP_PACKET_NIC_MAC_INIT -
+ * Configure the NIC MAC channels. The packet's arguments specify the
+ * NIC port and the speed.
+ *
+ * CPUCP_PACKET_MSI_INFO_SET -
+ * set the index number for each supported msi type going from
+ * host to device
+ *
+ * CPUCP_PACKET_NIC_XPCS91_REGS_GET -
+ * Fetch the un/correctable counters values from the NIC MAC.
+ *
+ * CPUCP_PACKET_NIC_STAT_REGS_GET -
+ * Fetch various NIC MAC counters from the NIC STAT.
+ *
+ * CPUCP_PACKET_NIC_STAT_REGS_CLR -
+ * Clear the various NIC MAC counters in the NIC STAT.
+ *
+ * CPUCP_PACKET_NIC_STAT_REGS_ALL_GET -
+ * Fetch all NIC MAC counters from the NIC STAT.
+ *
+ * CPUCP_PACKET_IS_IDLE_CHECK -
+ * Check if the device is IDLE in regard to the DMA/compute engines
+ * and QMANs. The f/w will return a bitmask where each bit represents
+ * a different engine or QMAN according to enum cpucp_idle_mask.
+ * The bit will be 1 if the engine is NOT idle.
+ *
+ * CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET -
+ * Fetch all HBM replaced-rows and prending to be replaced rows data.
+ *
+ * CPUCP_PACKET_HBM_PENDING_ROWS_STATUS -
+ * Fetch status of HBM rows pending replacement and need a reboot to
+ * be replaced.
+ *
+ * CPUCP_PACKET_POWER_SET -
+ * Resets power history of device to 0
+ *
+ * CPUCP_PACKET_ENGINE_CORE_ASID_SET -
+ * Packet to perform engine core ASID configuration
+ *
+ * CPUCP_PACKET_SEC_ATTEST_GET -
+ * Get the attestaion data that is collected during various stages of the
+ * boot sequence. the attestation data is also hashed with some unique
+ * number (nonce) provided by the host to prevent replay attacks.
+ * public key and certificate also provided as part of the FW response.
+ *
+ * CPUCP_PACKET_INFO_SIGNED_GET -
+ * Get the device information signed by the Trusted Platform device.
+ * device info data is also hashed with some unique number (nonce) provided
+ * by the host to prevent replay attacks. public key and certificate also
+ * provided as part of the FW response.
+ *
+ * CPUCP_PACKET_MONITOR_DUMP_GET -
+ * Get monitors registers dump from the CpuCP kernel.
+ * The CPU will put the registers dump in the a buffer allocated by the driver
+ * which address is passed via the CpuCp packet. In addition, the host's driver
+ * passes the max size it allows the CpuCP to write to the structure, to prevent
+ * data corruption in case of mismatched driver/FW versions.
+ * Obsolete.
+ *
+ * CPUCP_PACKET_GENERIC_PASSTHROUGH -
+ * Generic opcode for all firmware info that is only passed to host
+ * through the LKD, without getting parsed there.
+ *
+ * CPUCP_PACKET_ACTIVE_STATUS_SET -
+ * LKD sends FW indication whether device is free or in use, this indication is reported
+ * also to the BMC.
+ *
+ * CPUCP_PACKET_SOFT_RESET -
+ * Packet to perform soft-reset.
+ *
+ * CPUCP_PACKET_INTS_REGISTER -
+ * Packet to inform FW that queues have been established and LKD is ready to receive
+ * EQ events.
+ */
+
+enum cpucp_packet_id {
+ CPUCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */
+ CPUCP_PACKET_ENABLE_PCI_ACCESS, /* internal */
+ CPUCP_PACKET_TEMPERATURE_GET, /* sysfs */
+ CPUCP_PACKET_VOLTAGE_GET, /* sysfs */
+ CPUCP_PACKET_CURRENT_GET, /* sysfs */
+ CPUCP_PACKET_FAN_SPEED_GET, /* sysfs */
+ CPUCP_PACKET_PWM_GET, /* sysfs */
+ CPUCP_PACKET_PWM_SET, /* sysfs */
+ CPUCP_PACKET_FREQUENCY_SET, /* sysfs */
+ CPUCP_PACKET_FREQUENCY_GET, /* sysfs */
+ CPUCP_PACKET_LED_SET, /* debugfs */
+ CPUCP_PACKET_I2C_WR, /* debugfs */
+ CPUCP_PACKET_I2C_RD, /* debugfs */
+ CPUCP_PACKET_INFO_GET, /* IOCTL */
+ CPUCP_PACKET_FLASH_PROGRAM_REMOVED,
+ CPUCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */
+ CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */
+ CPUCP_PACKET_TEST, /* internal */
+ CPUCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */
+ CPUCP_PACKET_MAX_POWER_GET, /* sysfs */
+ CPUCP_PACKET_MAX_POWER_SET, /* sysfs */
+ CPUCP_PACKET_EEPROM_DATA_GET, /* sysfs */
+ CPUCP_PACKET_NIC_INFO_GET, /* internal */
+ CPUCP_PACKET_TEMPERATURE_SET, /* sysfs */
+ CPUCP_PACKET_VOLTAGE_SET, /* sysfs */
+ CPUCP_PACKET_CURRENT_SET, /* sysfs */
+ CPUCP_PACKET_PCIE_THROUGHPUT_GET, /* internal */
+ CPUCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */
+ CPUCP_PACKET_TOTAL_ENERGY_GET, /* internal */
+ CPUCP_PACKET_PLL_INFO_GET, /* internal */
+ CPUCP_PACKET_NIC_STATUS, /* internal */
+ CPUCP_PACKET_POWER_GET, /* internal */
+ CPUCP_PACKET_NIC_PFC_SET, /* internal */
+ CPUCP_PACKET_NIC_FAULT_GET, /* internal */
+ CPUCP_PACKET_NIC_LPBK_SET, /* internal */
+ CPUCP_PACKET_NIC_MAC_CFG, /* internal */
+ CPUCP_PACKET_MSI_INFO_SET, /* internal */
+ CPUCP_PACKET_NIC_XPCS91_REGS_GET, /* internal */
+ CPUCP_PACKET_NIC_STAT_REGS_GET, /* internal */
+ CPUCP_PACKET_NIC_STAT_REGS_CLR, /* internal */
+ CPUCP_PACKET_NIC_STAT_REGS_ALL_GET, /* internal */
+ CPUCP_PACKET_IS_IDLE_CHECK, /* internal */
+ CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET,/* internal */
+ CPUCP_PACKET_HBM_PENDING_ROWS_STATUS, /* internal */
+ CPUCP_PACKET_POWER_SET, /* internal */
+ CPUCP_PACKET_RESERVED, /* not used */
+ CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */
+ CPUCP_PACKET_RESERVED2, /* not used */
+ CPUCP_PACKET_SEC_ATTEST_GET, /* internal */
+ CPUCP_PACKET_INFO_SIGNED_GET, /* internal */
+ CPUCP_PACKET_RESERVED4, /* not used */
+ CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */
+ CPUCP_PACKET_RESERVED5, /* not used */
+ CPUCP_PACKET_RESERVED6, /* not used */
+ CPUCP_PACKET_RESERVED7, /* not used */
+ CPUCP_PACKET_GENERIC_PASSTHROUGH, /* IOCTL */
+ CPUCP_PACKET_RESERVED8, /* not used */
+ CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */
+ CPUCP_PACKET_RESERVED9, /* not used */
+ CPUCP_PACKET_RESERVED10, /* not used */
+ CPUCP_PACKET_RESERVED11, /* not used */
+ CPUCP_PACKET_RESERVED12, /* internal */
+ CPUCP_PACKET_RESERVED13, /* internal */
+ CPUCP_PACKET_SOFT_RESET, /* internal */
+ CPUCP_PACKET_INTS_REGISTER, /* internal */
+ CPUCP_PACKET_ID_MAX /* must be last */
+};
+
+#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
+
+#define CPUCP_PKT_CTL_RC_SHIFT 12
+#define CPUCP_PKT_CTL_RC_MASK 0x0000F000
+
+#define CPUCP_PKT_CTL_OPCODE_SHIFT 16
+#define CPUCP_PKT_CTL_OPCODE_MASK 0x1FFF0000
+
+#define CPUCP_PKT_RES_PLL_OUT0_SHIFT 0
+#define CPUCP_PKT_RES_PLL_OUT0_MASK 0x000000000000FFFFull
+#define CPUCP_PKT_RES_PLL_OUT1_SHIFT 16
+#define CPUCP_PKT_RES_PLL_OUT1_MASK 0x00000000FFFF0000ull
+#define CPUCP_PKT_RES_PLL_OUT2_SHIFT 32
+#define CPUCP_PKT_RES_PLL_OUT2_MASK 0x0000FFFF00000000ull
+#define CPUCP_PKT_RES_PLL_OUT3_SHIFT 48
+#define CPUCP_PKT_RES_PLL_OUT3_MASK 0xFFFF000000000000ull
+
+#define CPUCP_PKT_RES_EEPROM_OUT0_SHIFT 0
+#define CPUCP_PKT_RES_EEPROM_OUT0_MASK 0x000000000000FFFFull
+#define CPUCP_PKT_RES_EEPROM_OUT1_SHIFT 16
+#define CPUCP_PKT_RES_EEPROM_OUT1_MASK 0x0000000000FF0000ull
+
+#define CPUCP_PKT_VAL_PFC_IN1_SHIFT 0
+#define CPUCP_PKT_VAL_PFC_IN1_MASK 0x0000000000000001ull
+#define CPUCP_PKT_VAL_PFC_IN2_SHIFT 1
+#define CPUCP_PKT_VAL_PFC_IN2_MASK 0x000000000000001Eull
+
+#define CPUCP_PKT_VAL_LPBK_IN1_SHIFT 0
+#define CPUCP_PKT_VAL_LPBK_IN1_MASK 0x0000000000000001ull
+#define CPUCP_PKT_VAL_LPBK_IN2_SHIFT 1
+#define CPUCP_PKT_VAL_LPBK_IN2_MASK 0x000000000000001Eull
+
+#define CPUCP_PKT_VAL_MAC_CNT_IN1_SHIFT 0
+#define CPUCP_PKT_VAL_MAC_CNT_IN1_MASK 0x0000000000000001ull
+#define CPUCP_PKT_VAL_MAC_CNT_IN2_SHIFT 1
+#define CPUCP_PKT_VAL_MAC_CNT_IN2_MASK 0x00000000FFFFFFFEull
+
+/* heartbeat status bits */
+#define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT 0
+#define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK 0x00000001
+
+struct cpucp_packet {
+ union {
+ __le64 value; /* For SET packets */
+ __le64 result; /* For GET packets */
+ __le64 addr; /* For PQ */
+ };
+
+ __le32 ctl;
+
+ __le32 fence; /* Signal to host that message is completed */
+
+ union {
+ struct {/* For temperature/current/voltage/fan/pwm get/set */
+ __le16 sensor_index;
+ __le16 type;
+ };
+
+ struct { /* For I2C read/write */
+ __u8 i2c_bus;
+ __u8 i2c_addr;
+ __u8 i2c_reg;
+ /*
+ * In legacy implemetations, i2c_len was not present,
+ * was unused and just added as pad.
+ * So if i2c_len is 0, it is treated as legacy
+ * and r/w 1 Byte, else if i2c_len is specified,
+ * its treated as new multibyte r/w support.
+ */
+ __u8 i2c_len;
+ };
+
+ struct {/* For PLL info fetch */
+ __le16 pll_type;
+ /* TODO pll_reg is kept temporary before removal */
+ __le16 pll_reg;
+ };
+
+ /* For any general request */
+ __le32 index;
+
+ /* For frequency get/set */
+ __le32 pll_index;
+
+ /* For led set */
+ __le32 led_index;
+
+ /* For get CpuCP info/EEPROM data/NIC info */
+ __le32 data_max_size;
+
+ /*
+ * For any general status bitmask. Shall be used whenever the
+ * result cannot be used to hold general purpose data.
+ */
+ __le32 status_mask;
+
+ /* random, used once number, for security packets */
+ __le32 nonce;
+ };
+
+ union {
+ /* For NIC requests */
+ __le32 port_index;
+
+ /* For Generic packet sub index */
+ __le32 pkt_subidx;
+ };
+};
+
+struct cpucp_unmask_irq_arr_packet {
+ struct cpucp_packet cpucp_pkt;
+ __le32 length;
+ __le32 irqs[];
+};
+
+struct cpucp_nic_status_packet {
+ struct cpucp_packet cpucp_pkt;
+ __le32 length;
+ __le32 data[];
+};
+
+struct cpucp_array_data_packet {
+ struct cpucp_packet cpucp_pkt;
+ __le32 length;
+ __le32 data[];
+};
+
+enum cpucp_led_index {
+ CPUCP_LED0_INDEX = 0,
+ CPUCP_LED1_INDEX,
+ CPUCP_LED2_INDEX,
+ CPUCP_LED_MAX_INDEX = CPUCP_LED2_INDEX
+};
+
+/*
+ * enum cpucp_packet_rc - Error return code
+ * @cpucp_packet_success -> in case of success.
+ * @cpucp_packet_invalid -> this is to support first generation platforms.
+ * @cpucp_packet_fault -> in case of processing error like failing to
+ * get device binding or semaphore etc.
+ * @cpucp_packet_invalid_pkt -> when cpucp packet is un-supported.
+ * @cpucp_packet_invalid_params -> when checking parameter like length of buffer
+ * or attribute value etc.
+ * @cpucp_packet_rc_max -> It indicates size of enum so should be at last.
+ */
+enum cpucp_packet_rc {
+ cpucp_packet_success,
+ cpucp_packet_invalid,
+ cpucp_packet_fault,
+ cpucp_packet_invalid_pkt,
+ cpucp_packet_invalid_params,
+ cpucp_packet_rc_max
+};
+
+/*
+ * cpucp_temp_type should adhere to hwmon_temp_attributes
+ * defined in Linux kernel hwmon.h file
+ */
+enum cpucp_temp_type {
+ cpucp_temp_input,
+ cpucp_temp_min = 4,
+ cpucp_temp_min_hyst,
+ cpucp_temp_max = 6,
+ cpucp_temp_max_hyst,
+ cpucp_temp_crit,
+ cpucp_temp_crit_hyst,
+ cpucp_temp_offset = 19,
+ cpucp_temp_lowest = 21,
+ cpucp_temp_highest = 22,
+ cpucp_temp_reset_history = 23,
+ cpucp_temp_warn = 24,
+ cpucp_temp_max_crit = 25,
+ cpucp_temp_max_warn = 26,
+};
+
+enum cpucp_in_attributes {
+ cpucp_in_input,
+ cpucp_in_min,
+ cpucp_in_max,
+ cpucp_in_lowest = 6,
+ cpucp_in_highest = 7,
+ cpucp_in_reset_history,
+ cpucp_in_intr_alarm_a,
+ cpucp_in_intr_alarm_b,
+};
+
+enum cpucp_curr_attributes {
+ cpucp_curr_input,
+ cpucp_curr_min,
+ cpucp_curr_max,
+ cpucp_curr_lowest = 6,
+ cpucp_curr_highest = 7,
+ cpucp_curr_reset_history
+};
+
+enum cpucp_fan_attributes {
+ cpucp_fan_input,
+ cpucp_fan_min = 2,
+ cpucp_fan_max
+};
+
+enum cpucp_pwm_attributes {
+ cpucp_pwm_input,
+ cpucp_pwm_enable
+};
+
+enum cpucp_pcie_throughput_attributes {
+ cpucp_pcie_throughput_tx,
+ cpucp_pcie_throughput_rx
+};
+
+/* TODO temporary kept before removal */
+enum cpucp_pll_reg_attributes {
+ cpucp_pll_nr_reg,
+ cpucp_pll_nf_reg,
+ cpucp_pll_od_reg,
+ cpucp_pll_div_factor_reg,
+ cpucp_pll_div_sel_reg
+};
+
+/* TODO temporary kept before removal */
+enum cpucp_pll_type_attributes {
+ cpucp_pll_cpu,
+ cpucp_pll_pci,
+};
+
+/*
+ * cpucp_power_type aligns with hwmon_power_attributes
+ * defined in Linux kernel hwmon.h file
+ */
+enum cpucp_power_type {
+ CPUCP_POWER_INPUT = 8,
+ CPUCP_POWER_INPUT_HIGHEST = 9,
+ CPUCP_POWER_RESET_INPUT_HISTORY = 11
+};
+
+/*
+ * MSI type enumeration table for all ASICs and future SW versions.
+ * For future ASIC-LKD compatibility, we can only add new enumerations.
+ * at the end of the table (before CPUCP_NUM_OF_MSI_TYPES).
+ * Changing the order of entries or removing entries is not allowed.
+ */
+enum cpucp_msi_type {
+ CPUCP_EVENT_QUEUE_MSI_TYPE,
+ CPUCP_NIC_PORT1_MSI_TYPE,
+ CPUCP_NIC_PORT3_MSI_TYPE,
+ CPUCP_NIC_PORT5_MSI_TYPE,
+ CPUCP_NIC_PORT7_MSI_TYPE,
+ CPUCP_NIC_PORT9_MSI_TYPE,
+ CPUCP_EVENT_QUEUE_ERR_MSI_TYPE,
+ CPUCP_NUM_OF_MSI_TYPES
+};
+
+/*
+ * PLL enumeration table used for all ASICs and future SW versions.
+ * For future ASIC-LKD compatibility, we can only add new enumerations.
+ * at the end of the table.
+ * Changing the order of entries or removing entries is not allowed.
+ */
+enum pll_index {
+ CPU_PLL = 0,
+ PCI_PLL = 1,
+ NIC_PLL = 2,
+ DMA_PLL = 3,
+ MESH_PLL = 4,
+ MME_PLL = 5,
+ TPC_PLL = 6,
+ IF_PLL = 7,
+ SRAM_PLL = 8,
+ NS_PLL = 9,
+ HBM_PLL = 10,
+ MSS_PLL = 11,
+ DDR_PLL = 12,
+ VID_PLL = 13,
+ BANK_PLL = 14,
+ MMU_PLL = 15,
+ IC_PLL = 16,
+ MC_PLL = 17,
+ EMMC_PLL = 18,
+ D2D_PLL = 19,
+ CS_PLL = 20,
+ C2C_PLL = 21,
+ NCH_PLL = 22,
+ C2M_PLL = 23,
+ PLL_MAX
+};
+
+enum rl_index {
+ TPC_RL = 0,
+ MME_RL,
+ EDMA_RL,
+};
+
+enum pvt_index {
+ PVT_SW,
+ PVT_SE,
+ PVT_NW,
+ PVT_NE
+};
+
+/* Event Queue Packets */
+
+struct eq_generic_event {
+ __le64 data[7];
+};
+
+/*
+ * CpuCP info
+ */
+
+#define CARD_NAME_MAX_LEN 16
+#define CPUCP_MAX_SENSORS 128
+#define CPUCP_MAX_NICS 128
+#define CPUCP_LANES_PER_NIC 4
+#define CPUCP_NIC_QSFP_EEPROM_MAX_LEN 1024
+#define CPUCP_MAX_NIC_LANES (CPUCP_MAX_NICS * CPUCP_LANES_PER_NIC)
+#define CPUCP_NIC_MASK_ARR_LEN ((CPUCP_MAX_NICS + 63) / 64)
+#define CPUCP_NIC_POLARITY_ARR_LEN ((CPUCP_MAX_NIC_LANES + 63) / 64)
+#define CPUCP_HBM_ROW_REPLACE_MAX 32
+
+struct cpucp_sensor {
+ __le32 type;
+ __le32 flags;
+};
+
+/**
+ * struct cpucp_card_types - ASIC card type.
+ * @cpucp_card_type_pci: PCI card.
+ * @cpucp_card_type_pmc: PCI Mezzanine Card.
+ */
+enum cpucp_card_types {
+ cpucp_card_type_pci,
+ cpucp_card_type_pmc
+};
+
+#define CPUCP_SEC_CONF_ENABLED_SHIFT 0
+#define CPUCP_SEC_CONF_ENABLED_MASK 0x00000001
+
+#define CPUCP_SEC_CONF_FLASH_WP_SHIFT 1
+#define CPUCP_SEC_CONF_FLASH_WP_MASK 0x00000002
+
+#define CPUCP_SEC_CONF_EEPROM_WP_SHIFT 2
+#define CPUCP_SEC_CONF_EEPROM_WP_MASK 0x00000004
+
+/**
+ * struct cpucp_security_info - Security information.
+ * @config: configuration bit field
+ * @keys_num: number of stored keys
+ * @revoked_keys: revoked keys bit field
+ * @min_svn: minimal security version
+ */
+struct cpucp_security_info {
+ __u8 config;
+ __u8 keys_num;
+ __u8 revoked_keys;
+ __u8 min_svn;
+};
+
+/**
+ * struct cpucp_info - Info from CpuCP that is necessary to the host's driver
+ * @sensors: available sensors description.
+ * @kernel_version: CpuCP linux kernel version.
+ * @reserved: reserved field.
+ * @card_type: card configuration type.
+ * @card_location: in a server, each card has different connections topology
+ * depending on its location (relevant for PMC card type)
+ * @cpld_version: CPLD programmed F/W version.
+ * @infineon_version: Infineon main DC-DC version.
+ * @fuse_version: silicon production FUSE information.
+ * @thermal_version: thermald S/W version.
+ * @cpucp_version: CpuCP S/W version.
+ * @infineon_second_stage_version: Infineon 2nd stage DC-DC version.
+ * @dram_size: available DRAM size.
+ * @card_name: card name that will be displayed in HWMON subsystem on the host
+ * @tpc_binning_mask: TPC binning mask, 1 bit per TPC instance
+ * (0 = functional, 1 = binned)
+ * @decoder_binning_mask: Decoder binning mask, 1 bit per decoder instance
+ * (0 = functional, 1 = binned), maximum 1 per dcore
+ * @sram_binning: Categorize SRAM functionality
+ * (0 = fully functional, 1 = lower-half is not functional,
+ * 2 = upper-half is not functional)
+ * @sec_info: security information
+ * @pll_map: Bit map of supported PLLs for current ASIC version.
+ * @mme_binning_mask: MME binning mask,
+ * bits [0:6] <==> dcore0 mme fma
+ * bits [7:13] <==> dcore1 mme fma
+ * bits [14:20] <==> dcore0 mme ima
+ * bits [21:27] <==> dcore1 mme ima
+ * For each group, if the 6th bit is set then first 5 bits
+ * represent the col's idx [0-31], otherwise these bits are
+ * ignored, and col idx 32 is binned. 7th bit is don't care.
+ * @dram_binning_mask: DRAM binning mask, 1 bit per dram instance
+ * (0 = functional 1 = binned)
+ * @memory_repair_flag: eFuse flag indicating memory repair
+ * @edma_binning_mask: EDMA binning mask, 1 bit per EDMA instance
+ * (0 = functional 1 = binned)
+ * @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance
+ * (0 = functional 1 = binned)
+ * @interposer_version: Interposer version programmed in eFuse
+ * @substrate_version: Substrate version programmed in eFuse
+ * @eq_health_check_supported: eq health check feature supported in FW.
+ * @fw_hbm_region_size: Size in bytes of FW reserved region in HBM.
+ * @fw_os_version: Firmware OS Version
+ */
+struct cpucp_info {
+ struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
+ __u8 kernel_version[VERSION_MAX_LEN];
+ __le32 reserved;
+ __le32 card_type;
+ __le32 card_location;
+ __le32 cpld_version;
+ __le32 infineon_version;
+ __u8 fuse_version[VERSION_MAX_LEN];
+ __u8 thermal_version[VERSION_MAX_LEN];
+ __u8 cpucp_version[VERSION_MAX_LEN];
+ __le32 infineon_second_stage_version;
+ __le64 dram_size;
+ char card_name[CARD_NAME_MAX_LEN];
+ __le64 tpc_binning_mask;
+ __le64 decoder_binning_mask;
+ __u8 sram_binning;
+ __u8 dram_binning_mask;
+ __u8 memory_repair_flag;
+ __u8 edma_binning_mask;
+ __u8 xbar_binning_mask;
+ __u8 interposer_version;
+ __u8 substrate_version;
+ __u8 eq_health_check_supported;
+ struct cpucp_security_info sec_info;
+ __le32 fw_hbm_region_size;
+ __u8 pll_map[PLL_MAP_LEN];
+ __le64 mme_binning_mask;
+ __u8 fw_os_version[VERSION_MAX_LEN];
+};
+
+struct cpucp_mac_addr {
+ __u8 mac_addr[ETH_ALEN];
+};
+
+enum cpucp_serdes_type {
+ TYPE_1_SERDES_TYPE,
+ TYPE_2_SERDES_TYPE,
+ HLS1_SERDES_TYPE,
+ HLS1H_SERDES_TYPE,
+ HLS2_SERDES_TYPE,
+ HLS2_TYPE_1_SERDES_TYPE,
+ MAX_NUM_SERDES_TYPE, /* number of types */
+ UNKNOWN_SERDES_TYPE = 0xFFFF /* serdes_type is u16 */
+};
+
+struct cpucp_nic_info {
+ struct cpucp_mac_addr mac_addrs[CPUCP_MAX_NICS];
+ __le64 link_mask[CPUCP_NIC_MASK_ARR_LEN];
+ __le64 pol_tx_mask[CPUCP_NIC_POLARITY_ARR_LEN];
+ __le64 pol_rx_mask[CPUCP_NIC_POLARITY_ARR_LEN];
+ __le64 link_ext_mask[CPUCP_NIC_MASK_ARR_LEN];
+ __u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN];
+ __le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN];
+ __le16 serdes_type; /* enum cpucp_serdes_type */
+ __le16 tx_swap_map[CPUCP_MAX_NICS];
+ __u8 reserved[6];
+};
+
+#define PAGE_DISCARD_MAX 64
+
+struct page_discard_info {
+ __u8 num_entries;
+ __u8 reserved[7];
+ __le32 mmu_page_idx[PAGE_DISCARD_MAX];
+};
+
+/*
+ * struct frac_val - fracture value represented by "integer.frac".
+ * @integer: the integer part of the fracture value;
+ * @frac: the fracture part of the fracture value.
+ */
+struct frac_val {
+ union {
+ struct {
+ __le16 integer;
+ __le16 frac;
+ };
+ __le32 val;
+ };
+};
+
+/*
+ * struct ser_val - the SER (symbol error rate) value is represented by "integer * 10 ^ -exp".
+ * @integer: the integer part of the SER value;
+ * @exp: the exponent part of the SER value.
+ */
+struct ser_val {
+ __le16 integer;
+ __le16 exp;
+};
+
+/*
+ * struct cpucp_nic_status - describes the status of a NIC port.
+ * @port: NIC port index.
+ * @bad_format_cnt: e.g. CRC.
+ * @responder_out_of_sequence_psn_cnt: e.g NAK.
+ * @high_ber_reinit_cnt: link reinit due to high BER.
+ * @correctable_err_cnt: e.g. bit-flip.
+ * @uncorrectable_err_cnt: e.g. MAC errors.
+ * @retraining_cnt: re-training counter.
+ * @up: is port up.
+ * @pcs_link: has PCS link.
+ * @phy_ready: is PHY ready.
+ * @auto_neg: is Autoneg enabled.
+ * @timeout_retransmission_cnt: timeout retransmission events.
+ * @high_ber_cnt: high ber events.
+ * @pre_fec_ser: pre FEC SER value.
+ * @post_fec_ser: post FEC SER value.
+ * @throughput: measured throughput.
+ * @latency: measured latency.
+ */
+struct cpucp_nic_status {
+ __le32 port;
+ __le32 bad_format_cnt;
+ __le32 responder_out_of_sequence_psn_cnt;
+ __le32 high_ber_reinit;
+ __le32 correctable_err_cnt;
+ __le32 uncorrectable_err_cnt;
+ __le32 retraining_cnt;
+ __u8 up;
+ __u8 pcs_link;
+ __u8 phy_ready;
+ __u8 auto_neg;
+ __le32 timeout_retransmission_cnt;
+ __le32 high_ber_cnt;
+ struct ser_val pre_fec_ser;
+ struct ser_val post_fec_ser;
+ struct frac_val bandwidth;
+ struct frac_val lat;
+};
+
+enum cpucp_hbm_row_replace_cause {
+ REPLACE_CAUSE_DOUBLE_ECC_ERR,
+ REPLACE_CAUSE_MULTI_SINGLE_ECC_ERR,
+};
+
+struct cpucp_hbm_row_info {
+ __u8 hbm_idx;
+ __u8 pc;
+ __u8 sid;
+ __u8 bank_idx;
+ __le16 row_addr;
+ __u8 replaced_row_cause; /* enum cpucp_hbm_row_replace_cause */
+ __u8 pad;
+};
+
+struct cpucp_hbm_row_replaced_rows_info {
+ __le16 num_replaced_rows;
+ __u8 pad[6];
+ struct cpucp_hbm_row_info replaced_rows[CPUCP_HBM_ROW_REPLACE_MAX];
+};
+
+enum cpu_reset_status {
+ CPU_RST_STATUS_NA = 0,
+ CPU_RST_STATUS_SOFT_RST_DONE = 1,
+};
+
+#define SEC_PCR_DATA_BUF_SZ 256
+#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */
+#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */
+#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */
+#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */
+
+/*
+ * struct cpucp_sec_attest_info - attestation report of the boot
+ * @pcr_data: raw values of the PCR registers
+ * @pcr_num_reg: number of PCR registers in the pcr_data array
+ * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes)
+ * @nonce: number only used once. random number provided by host. this also
+ * passed to the quote command as a qualifying data.
+ * @pcr_quote_len: length of the attestation quote data (bytes)
+ * @pcr_quote: attestation report data structure
+ * @quote_sig_len: length of the attestation report signature (bytes)
+ * @quote_sig: signature structure of the attestation report
+ * @pub_data_len: length of the public data (bytes)
+ * @public_data: public key for the signed attestation
+ * (outPublic + name + qualifiedName)
+ * @certificate_len: length of the certificate (bytes)
+ * @certificate: certificate for the attestation signing key
+ */
+struct cpucp_sec_attest_info {
+ __u8 pcr_data[SEC_PCR_DATA_BUF_SZ];
+ __u8 pcr_num_reg;
+ __u8 pcr_reg_len;
+ __le16 pad0;
+ __le32 nonce;
+ __le16 pcr_quote_len;
+ __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ];
+ __u8 quote_sig_len;
+ __u8 quote_sig[SEC_SIGNATURE_BUF_SZ];
+ __le16 pub_data_len;
+ __u8 public_data[SEC_PUB_DATA_BUF_SZ];
+ __le16 certificate_len;
+ __u8 certificate[SEC_CERTIFICATE_BUF_SZ];
+};
+
+/*
+ * struct cpucp_dev_info_signed - device information signed by a secured device
+ * @info: device information structure as defined above
+ * @nonce: number only used once. random number provided by host. this number is
+ * hashed and signed along with the device information.
+ * @info_sig_len: length of the attestation signature (bytes)
+ * @info_sig: signature of the info + nonce data.
+ * @pub_data_len: length of the public data (bytes)
+ * @public_data: public key info signed info data
+ * (outPublic + name + qualifiedName)
+ * @certificate_len: length of the certificate (bytes)
+ * @certificate: certificate for the signing key
+ */
+struct cpucp_dev_info_signed {
+ struct cpucp_info info; /* assumed to be 64bit aligned */
+ __le32 nonce;
+ __le32 pad0;
+ __u8 info_sig_len;
+ __u8 info_sig[SEC_SIGNATURE_BUF_SZ];
+ __le16 pub_data_len;
+ __u8 public_data[SEC_PUB_DATA_BUF_SZ];
+ __le16 certificate_len;
+ __u8 certificate[SEC_CERTIFICATE_BUF_SZ];
+};
+
+#define DCORE_MON_REGS_SZ 512
+/*
+ * struct dcore_monitor_regs_data - DCORE monitor regs data.
+ * the structure follows sync manager block layout. Obsolete.
+ * @mon_pay_addrl: array of payload address low bits.
+ * @mon_pay_addrh: array of payload address high bits.
+ * @mon_pay_data: array of payload data.
+ * @mon_arm: array of monitor arm.
+ * @mon_status: array of monitor status.
+ */
+struct dcore_monitor_regs_data {
+ __le32 mon_pay_addrl[DCORE_MON_REGS_SZ];
+ __le32 mon_pay_addrh[DCORE_MON_REGS_SZ];
+ __le32 mon_pay_data[DCORE_MON_REGS_SZ];
+ __le32 mon_arm[DCORE_MON_REGS_SZ];
+ __le32 mon_status[DCORE_MON_REGS_SZ];
+};
+
+/* contains SM data for each SYNC_MNGR (Obsolete) */
+struct cpucp_monitor_dump {
+ struct dcore_monitor_regs_data sync_mngr_w_s;
+ struct dcore_monitor_regs_data sync_mngr_e_s;
+ struct dcore_monitor_regs_data sync_mngr_w_n;
+ struct dcore_monitor_regs_data sync_mngr_e_n;
+};
+
+/*
+ * The Type of the generic request (and other input arguments) will be fetched from user by reading
+ * from "pkt_subidx" field in struct cpucp_packet.
+ *
+ * HL_PASSTHROUGHT_VERSIONS - Fetch all firmware versions.
+ */
+enum hl_passthrough_type {
+ HL_PASSTHROUGH_VERSIONS,
+};
+
+#endif /* CPUCP_IF_H */
diff --git a/include/linux/habanalabs/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h
new file mode 100644
index 000000000000..93366d5621fd
--- /dev/null
+++ b/include/linux/habanalabs/hl_boot_if.h
@@ -0,0 +1,792 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2018-2020 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef HL_BOOT_IF_H
+#define HL_BOOT_IF_H
+
+#define LKD_HARD_RESET_MAGIC 0xED7BD694 /* deprecated - do not use */
+#define HL_POWER9_HOST_MAGIC 0x1DA30009
+
+#define BOOT_FIT_SRAM_OFFSET 0x200000
+
+#define VERSION_MAX_LEN 128
+
+enum cpu_boot_err {
+ CPU_BOOT_ERR_DRAM_INIT_FAIL = 0,
+ CPU_BOOT_ERR_FIT_CORRUPTED = 1,
+ CPU_BOOT_ERR_TS_INIT_FAIL = 2,
+ CPU_BOOT_ERR_DRAM_SKIPPED = 3,
+ CPU_BOOT_ERR_BMC_WAIT_SKIPPED = 4,
+ CPU_BOOT_ERR_NIC_DATA_NOT_RDY = 5,
+ CPU_BOOT_ERR_NIC_FW_FAIL = 6,
+ CPU_BOOT_ERR_SECURITY_NOT_RDY = 7,
+ CPU_BOOT_ERR_SECURITY_FAIL = 8,
+ CPU_BOOT_ERR_EFUSE_FAIL = 9,
+ CPU_BOOT_ERR_PRI_IMG_VER_FAIL = 10,
+ CPU_BOOT_ERR_SEC_IMG_VER_FAIL = 11,
+ CPU_BOOT_ERR_PLL_FAIL = 12,
+ CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL = 13,
+ CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18,
+ CPU_BOOT_ERR_BINNING_FAIL = 19,
+ CPU_BOOT_ERR_TPM_FAIL = 20,
+ CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21,
+ CPU_BOOT_ERR_EEPROM_FAIL = 22,
+ CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL = 23,
+ CPU_BOOT_ERR_ENABLED = 31,
+ CPU_BOOT_ERR_SCND_EN = 63,
+ CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */
+};
+
+/*
+ * Mask for fatal failures
+ * This mask contains all possible fatal failures, and a dynamic code
+ * will clear the non-relevant ones.
+ */
+#define CPU_BOOT_ERR_FATAL_MASK \
+ ((1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) | \
+ (1 << CPU_BOOT_ERR_PLL_FAIL) | \
+ (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) | \
+ (1 << CPU_BOOT_ERR_BINNING_FAIL) | \
+ (1 << CPU_BOOT_ERR_DRAM_SKIPPED) | \
+ (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) | \
+ (1 << CPU_BOOT_ERR_EEPROM_FAIL))
+
+/*
+ * CPU error bits in BOOT_ERROR registers
+ *
+ * CPU_BOOT_ERR0_DRAM_INIT_FAIL DRAM initialization failed.
+ * DRAM is not reliable to use.
+ *
+ * CPU_BOOT_ERR0_FIT_CORRUPTED FIT data integrity verification of the
+ * image provided by the host has failed.
+ *
+ * CPU_BOOT_ERR0_TS_INIT_FAIL Thermal Sensor initialization failed.
+ * Boot continues as usual, but keep in
+ * mind this is a warning.
+ *
+ * CPU_BOOT_ERR0_DRAM_SKIPPED DRAM initialization has been skipped.
+ * Skipping DRAM initialization has been
+ * requested (e.g. strap, command, etc.)
+ * and FW skipped the DRAM initialization.
+ * Host can initialize the DRAM.
+ *
+ * CPU_BOOT_ERR0_BMC_WAIT_SKIPPED Waiting for BMC data will be skipped.
+ * Meaning the BMC data might not be
+ * available until reset.
+ *
+ * CPU_BOOT_ERR0_NIC_DATA_NOT_RDY NIC data from BMC is not ready.
+ * BMC has not provided the NIC data yet.
+ * Once provided this bit will be cleared.
+ *
+ * CPU_BOOT_ERR0_NIC_FW_FAIL NIC FW loading failed.
+ * The NIC FW loading and initialization
+ * failed. This means NICs are not usable.
+ *
+ * CPU_BOOT_ERR0_SECURITY_NOT_RDY Chip security initialization has been
+ * started, but is not ready yet - chip
+ * cannot be accessed.
+ *
+ * CPU_BOOT_ERR0_SECURITY_FAIL Security related tasks have failed.
+ * The tasks are security init (root of
+ * trust), boot authentication (chain of
+ * trust), data packets authentication.
+ *
+ * CPU_BOOT_ERR0_EFUSE_FAIL Reading from eFuse failed.
+ * The PCI device ID might be wrong.
+ *
+ * CPU_BOOT_ERR0_PRI_IMG_VER_FAIL Verification of primary image failed.
+ * It mean that ppboot checksum
+ * verification for the preboot primary
+ * image has failed to match expected
+ * checksum. Trying to program image again
+ * might solve this.
+ *
+ * CPU_BOOT_ERR0_SEC_IMG_VER_FAIL Verification of secondary image failed.
+ * It mean that ppboot checksum
+ * verification for the preboot secondary
+ * image has failed to match expected
+ * checksum. Trying to program image again
+ * might solve this.
+ *
+ * CPU_BOOT_ERR0_PLL_FAIL PLL settings failed, meaning that one
+ * of the PLLs remains in REF_CLK
+ *
+ * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL Device is unusable and customer support
+ * should be contacted.
+ *
+ * CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR Critical error was detected during
+ * the execution of ppboot or preboot.
+ * for example: stack overflow.
+ *
+ * CPU_BOOT_ERR0_BINNING_FAIL Binning settings failed, meaning
+ * malfunctioning components might still be
+ * in use.
+ *
+ * CPU_BOOT_ERR0_TPM_FAIL TPM verification flow failed.
+ *
+ * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature
+ * sensor.
+ *
+ * CPU_BOOT_ERR_EEPROM_FAIL Failed reading EEPROM data. Defaults
+ * are used.
+ *
+ * CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL Failed scrubbing the Engines/ARCFarm
+ * memories. Boot disabled until reset.
+ *
+ * CPU_BOOT_ERR0_ENABLED Error registers enabled.
+ * This is a main indication that the
+ * running FW populates the error
+ * registers. Meaning the error bits are
+ * not garbage, but actual error statuses.
+ */
+#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << CPU_BOOT_ERR_DRAM_INIT_FAIL)
+#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << CPU_BOOT_ERR_FIT_CORRUPTED)
+#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << CPU_BOOT_ERR_TS_INIT_FAIL)
+#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << CPU_BOOT_ERR_DRAM_SKIPPED)
+#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << CPU_BOOT_ERR_BMC_WAIT_SKIPPED)
+#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << CPU_BOOT_ERR_NIC_DATA_NOT_RDY)
+#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << CPU_BOOT_ERR_NIC_FW_FAIL)
+#define CPU_BOOT_ERR0_SECURITY_NOT_RDY (1 << CPU_BOOT_ERR_SECURITY_NOT_RDY)
+#define CPU_BOOT_ERR0_SECURITY_FAIL (1 << CPU_BOOT_ERR_SECURITY_FAIL)
+#define CPU_BOOT_ERR0_EFUSE_FAIL (1 << CPU_BOOT_ERR_EFUSE_FAIL)
+#define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL (1 << CPU_BOOT_ERR_PRI_IMG_VER_FAIL)
+#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << CPU_BOOT_ERR_SEC_IMG_VER_FAIL)
+#define CPU_BOOT_ERR0_PLL_FAIL (1 << CPU_BOOT_ERR_PLL_FAIL)
+#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL)
+#define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR)
+#define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL)
+#define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL)
+#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL)
+#define CPU_BOOT_ERR0_EEPROM_FAIL (1 << CPU_BOOT_ERR_EEPROM_FAIL)
+#define CPU_BOOT_ERR0_ENG_ARC_MEM_SCRUB_FAIL (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL)
+#define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
+#define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
+
+enum cpu_boot_dev_sts {
+ CPU_BOOT_DEV_STS_SECURITY_EN = 0,
+ CPU_BOOT_DEV_STS_DEBUG_EN = 1,
+ CPU_BOOT_DEV_STS_WATCHDOG_EN = 2,
+ CPU_BOOT_DEV_STS_DRAM_INIT_EN = 3,
+ CPU_BOOT_DEV_STS_BMC_WAIT_EN = 4,
+ CPU_BOOT_DEV_STS_E2E_CRED_EN = 5,
+ CPU_BOOT_DEV_STS_HBM_CRED_EN = 6,
+ CPU_BOOT_DEV_STS_RL_EN = 7,
+ CPU_BOOT_DEV_STS_SRAM_SCR_EN = 8,
+ CPU_BOOT_DEV_STS_DRAM_SCR_EN = 9,
+ CPU_BOOT_DEV_STS_FW_HARD_RST_EN = 10,
+ CPU_BOOT_DEV_STS_PLL_INFO_EN = 11,
+ CPU_BOOT_DEV_STS_SP_SRAM_EN = 12,
+ CPU_BOOT_DEV_STS_CLK_GATE_EN = 13,
+ CPU_BOOT_DEV_STS_HBM_ECC_EN = 14,
+ CPU_BOOT_DEV_STS_PKT_PI_ACK_EN = 15,
+ CPU_BOOT_DEV_STS_FW_LD_COM_EN = 16,
+ CPU_BOOT_DEV_STS_FW_IATU_CONF_EN = 17,
+ CPU_BOOT_DEV_STS_FW_NIC_MAC_EN = 18,
+ CPU_BOOT_DEV_STS_DYN_PLL_EN = 19,
+ CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN = 20,
+ CPU_BOOT_DEV_STS_EQ_INDEX_EN = 21,
+ CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN = 22,
+ CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN = 23,
+ CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN = 24,
+ CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN = 25,
+ CPU_BOOT_DEV_STS_MAP_HWMON_EN = 26,
+ CPU_BOOT_DEV_STS_ENABLED = 31,
+ CPU_BOOT_DEV_STS_SCND_EN = 63,
+ CPU_BOOT_DEV_STS_LAST = 64 /* we have 2 registers of 32 bits */
+};
+
+/*
+ * BOOT DEVICE STATUS bits in BOOT_DEVICE_STS registers
+ *
+ * CPU_BOOT_DEV_STS0_SECURITY_EN Security is Enabled.
+ * This is an indication for security
+ * enabled in FW, which means that
+ * all conditions for security are met:
+ * device is indicated as security enabled,
+ * registers are protected, and device
+ * uses keys for image verification.
+ * Initialized in: preboot
+ *
+ * CPU_BOOT_DEV_STS0_DEBUG_EN Debug is enabled.
+ * Enabled when JTAG or DEBUG is enabled
+ * in FW.
+ * Initialized in: preboot
+ *
+ * CPU_BOOT_DEV_STS0_WATCHDOG_EN Watchdog is enabled.
+ * Watchdog is enabled in FW.
+ * Initialized in: preboot
+ *
+ * CPU_BOOT_DEV_STS0_DRAM_INIT_EN DRAM initialization is enabled.
+ * DRAM initialization has been done in FW.
+ * Initialized in: u-boot
+ *
+ * CPU_BOOT_DEV_STS0_BMC_WAIT_EN Waiting for BMC data enabled.
+ * If set, it means that during boot,
+ * FW waited for BMC data.
+ * Initialized in: u-boot
+ *
+ * CPU_BOOT_DEV_STS0_E2E_CRED_EN E2E credits initialized.
+ * FW initialized E2E credits.
+ * Initialized in: u-boot
+ *
+ * CPU_BOOT_DEV_STS0_HBM_CRED_EN HBM credits initialized.
+ * FW initialized HBM credits.
+ * Initialized in: u-boot
+ *
+ * CPU_BOOT_DEV_STS0_RL_EN Rate limiter initialized.
+ * FW initialized rate limiter.
+ * Initialized in: u-boot
+ *
+ * CPU_BOOT_DEV_STS0_SRAM_SCR_EN SRAM scrambler enabled.
+ * FW initialized SRAM scrambler.
+ * Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_DRAM_SCR_EN DRAM scrambler enabled.
+ * FW initialized DRAM scrambler.
+ * Initialized in: u-boot
+ *
+ * CPU_BOOT_DEV_STS0_FW_HARD_RST_EN FW hard reset procedure is enabled.
+ * FW has the hard reset procedure
+ * implemented. This means that FW will
+ * perform hard reset procedure on
+ * receiving the halt-machine event.
+ * Initialized in: preboot, u-boot, linux
+ *
+ * CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled.
+ * Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_SP_SRAM_EN SP SRAM is initialized and available
+ * for use.
+ * Initialized in: preboot
+ *
+ * CPU_BOOT_DEV_STS0_CLK_GATE_EN Clock Gating enabled.
+ * FW initialized Clock Gating.
+ * Initialized in: preboot
+ *
+ * CPU_BOOT_DEV_STS0_HBM_ECC_EN HBM ECC handling Enabled.
+ * FW handles HBM ECC indications.
+ * Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN Packets ack value used in the armcpd
+ * is set to the PI counter.
+ * Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_FW_LD_COM_EN Flexible FW loading communication
+ * protocol is enabled.
+ * Initialized in: preboot
+ *
+ * CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN FW iATU configuration is enabled.
+ * This bit if set, means the iATU has been
+ * configured and is ready for use.
+ * Initialized in: ppboot
+ *
+ * CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN NIC MAC channels init is done by FW and
+ * any access to them is done via the FW.
+ * Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_DYN_PLL_EN Dynamic PLL configuration is enabled.
+ * FW sends to host a bitmap of supported
+ * PLLs.
+ * Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN GIC access permission only from
+ * previleged entity. FW sets this status
+ * bit for host. If this bit is set then
+ * GIC can not be accessed from host.
+ * Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_EQ_INDEX_EN Event Queue (EQ) index is a running
+ * index for each new event sent to host.
+ * This is used as a method in host to
+ * identify that the waiting event in
+ * queue is actually a new event which
+ * was not served before.
+ * Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN Use multiple scratchpad interfaces to
+ * prevent IRQs overriding each other.
+ * Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN
+ * NIC STAT and XPCS91 access is restricted
+ * and is done via FW only.
+ * Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN
+ * NIC STAT get all is supported.
+ * Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN
+ * F/W checks if the device is idle by reading defined set
+ * of registers. It returns a bitmask of all the engines,
+ * where a bit is set if the engine is not idle.
+ * Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_MAP_HWMON_EN
+ * If set, means f/w supports proprietary
+ * HWMON enum mapping to cpucp enums.
+ * Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled.
+ * This is a main indication that the
+ * running FW populates the device status
+ * register. Meaning the device status
+ * bits are not garbage, but actual
+ * statuses.
+ * Initialized in: preboot
+ *
+ */
+#define CPU_BOOT_DEV_STS0_SECURITY_EN (1 << CPU_BOOT_DEV_STS_SECURITY_EN)
+#define CPU_BOOT_DEV_STS0_DEBUG_EN (1 << CPU_BOOT_DEV_STS_DEBUG_EN)
+#define CPU_BOOT_DEV_STS0_WATCHDOG_EN (1 << CPU_BOOT_DEV_STS_WATCHDOG_EN)
+#define CPU_BOOT_DEV_STS0_DRAM_INIT_EN (1 << CPU_BOOT_DEV_STS_DRAM_INIT_EN)
+#define CPU_BOOT_DEV_STS0_BMC_WAIT_EN (1 << CPU_BOOT_DEV_STS_BMC_WAIT_EN)
+#define CPU_BOOT_DEV_STS0_E2E_CRED_EN (1 << CPU_BOOT_DEV_STS_E2E_CRED_EN)
+#define CPU_BOOT_DEV_STS0_HBM_CRED_EN (1 << CPU_BOOT_DEV_STS_HBM_CRED_EN)
+#define CPU_BOOT_DEV_STS0_RL_EN (1 << CPU_BOOT_DEV_STS_RL_EN)
+#define CPU_BOOT_DEV_STS0_SRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_SRAM_SCR_EN)
+#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_DRAM_SCR_EN)
+#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << CPU_BOOT_DEV_STS_FW_HARD_RST_EN)
+#define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << CPU_BOOT_DEV_STS_PLL_INFO_EN)
+#define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << CPU_BOOT_DEV_STS_SP_SRAM_EN)
+#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << CPU_BOOT_DEV_STS_CLK_GATE_EN)
+#define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << CPU_BOOT_DEV_STS_HBM_ECC_EN)
+#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << CPU_BOOT_DEV_STS_PKT_PI_ACK_EN)
+#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << CPU_BOOT_DEV_STS_FW_LD_COM_EN)
+#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << CPU_BOOT_DEV_STS_FW_IATU_CONF_EN)
+#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_MAC_EN)
+#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << CPU_BOOT_DEV_STS_DYN_PLL_EN)
+#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN)
+#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << CPU_BOOT_DEV_STS_EQ_INDEX_EN)
+#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN)
+#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN)
+#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN)
+#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN (1 << CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN)
+#define CPU_BOOT_DEV_STS0_MAP_HWMON_EN (1 << CPU_BOOT_DEV_STS_MAP_HWMON_EN)
+#define CPU_BOOT_DEV_STS0_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED)
+#define CPU_BOOT_DEV_STS1_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED)
+
+enum cpu_boot_status {
+ CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */
+ CPU_BOOT_STATUS_IN_WFE = 1,
+ CPU_BOOT_STATUS_DRAM_RDY = 2,
+ CPU_BOOT_STATUS_SRAM_AVAIL = 3,
+ CPU_BOOT_STATUS_IN_BTL = 4, /* BTL is H/W FSM */
+ CPU_BOOT_STATUS_IN_PREBOOT = 5,
+ CPU_BOOT_STATUS_IN_SPL, /* deprecated - not reported */
+ CPU_BOOT_STATUS_IN_UBOOT = 7,
+ CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */
+ CPU_BOOT_STATUS_FIT_CORRUPTED, /* deprecated - will be removed */
+ /* U-Boot console prompt activated, commands are not processed */
+ CPU_BOOT_STATUS_UBOOT_NOT_READY = 10,
+ /* Finished NICs init, reported after DRAM and NICs */
+ CPU_BOOT_STATUS_NIC_FW_RDY = 11,
+ CPU_BOOT_STATUS_TS_INIT_FAIL, /* deprecated - will be removed */
+ CPU_BOOT_STATUS_DRAM_SKIPPED, /* deprecated - will be removed */
+ CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */
+ /* Last boot loader progress status, ready to receive commands */
+ CPU_BOOT_STATUS_READY_TO_BOOT = 15,
+ /* Internal Boot finished, ready for boot-fit */
+ CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16,
+ /* Internal Security has been initialized, device can be accessed */
+ CPU_BOOT_STATUS_SECURITY_READY = 17,
+ /* FW component is preparing to shutdown and communication with host is not available */
+ CPU_BOOT_STATUS_FW_SHUTDOWN_PREP = 18,
+};
+
+enum kmd_msg {
+ KMD_MSG_NA = 0,
+ KMD_MSG_GOTO_WFE,
+ KMD_MSG_FIT_RDY,
+ KMD_MSG_SKIP_BMC,
+ RESERVED,
+ KMD_MSG_RST_DEV,
+ KMD_MSG_LAST
+};
+
+enum cpu_msg_status {
+ CPU_MSG_CLR = 0,
+ CPU_MSG_OK,
+ CPU_MSG_ERR,
+};
+
+/* communication registers mapping - consider ABI when changing */
+struct cpu_dyn_regs {
+ __le32 cpu_pq_base_addr_low;
+ __le32 cpu_pq_base_addr_high;
+ __le32 cpu_pq_length;
+ __le32 cpu_pq_init_status;
+ __le32 cpu_eq_base_addr_low;
+ __le32 cpu_eq_base_addr_high;
+ __le32 cpu_eq_length;
+ __le32 cpu_eq_ci;
+ __le32 cpu_cq_base_addr_low;
+ __le32 cpu_cq_base_addr_high;
+ __le32 cpu_cq_length;
+ __le32 cpu_pf_pq_pi;
+ __le32 cpu_boot_dev_sts0;
+ __le32 cpu_boot_dev_sts1;
+ __le32 cpu_boot_err0;
+ __le32 cpu_boot_err1;
+ __le32 cpu_boot_status;
+ __le32 fw_upd_sts;
+ __le32 fw_upd_cmd;
+ __le32 fw_upd_pending_sts;
+ __le32 fuse_ver_offset;
+ __le32 preboot_ver_offset;
+ __le32 uboot_ver_offset;
+ __le32 hw_state;
+ __le32 kmd_msg_to_cpu;
+ __le32 cpu_cmd_status_to_host;
+ __le32 gic_host_pi_upd_irq;
+ __le32 gic_tpc_qm_irq_ctrl;
+ __le32 gic_mme_qm_irq_ctrl;
+ __le32 gic_dma_qm_irq_ctrl;
+ __le32 gic_nic_qm_irq_ctrl;
+ __le32 gic_dma_core_irq_ctrl;
+ __le32 gic_host_halt_irq;
+ __le32 gic_host_ints_irq;
+ __le32 gic_host_soft_rst_irq;
+ __le32 gic_rot_qm_irq_ctrl;
+ __le32 cpu_rst_status;
+ __le32 eng_arc_irq_ctrl;
+ __le32 reserved1[20]; /* reserve for future use */
+};
+
+/* TODO: remove the desc magic after the code is updated to use message */
+/* HCDM - Habana Communications Descriptor Magic */
+#define HL_COMMS_DESC_MAGIC 0x4843444D
+#define HL_COMMS_DESC_VER 3
+
+/* HCMv - Habana Communications Message + header version */
+#define HL_COMMS_MSG_MAGIC_VALUE 0x48434D00
+#define HL_COMMS_MSG_MAGIC_MASK 0xFFFFFF00
+#define HL_COMMS_MSG_MAGIC_VER_MASK 0xFF
+
+#define HL_COMMS_MSG_MAGIC_VER(ver) (HL_COMMS_MSG_MAGIC_VALUE | \
+ ((ver) & HL_COMMS_MSG_MAGIC_VER_MASK))
+#define HL_COMMS_MSG_MAGIC_V0 HL_COMMS_DESC_MAGIC
+#define HL_COMMS_MSG_MAGIC_V1 HL_COMMS_MSG_MAGIC_VER(1)
+#define HL_COMMS_MSG_MAGIC_V2 HL_COMMS_MSG_MAGIC_VER(2)
+#define HL_COMMS_MSG_MAGIC_V3 HL_COMMS_MSG_MAGIC_VER(3)
+
+#define HL_COMMS_MSG_MAGIC HL_COMMS_MSG_MAGIC_V3
+
+#define HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC(magic) \
+ (((magic) & HL_COMMS_MSG_MAGIC_MASK) == \
+ HL_COMMS_MSG_MAGIC_VALUE)
+
+#define HL_COMMS_MSG_MAGIC_VALIDATE_VERSION(magic, ver) \
+ (((magic) & HL_COMMS_MSG_MAGIC_VER_MASK) >= \
+ ((ver) & HL_COMMS_MSG_MAGIC_VER_MASK))
+
+#define HL_COMMS_MSG_MAGIC_VALIDATE(magic, ver) \
+ (HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC((magic)) && \
+ HL_COMMS_MSG_MAGIC_VALIDATE_VERSION((magic), (ver)))
+
+enum comms_msg_type {
+ HL_COMMS_DESC_TYPE = 0,
+ HL_COMMS_RESET_CAUSE_TYPE = 1,
+ HL_COMMS_FW_CFG_SKIP_TYPE = 2,
+ HL_COMMS_BINNING_CONF_TYPE = 3,
+};
+
+/*
+ * Binning information shared between LKD and FW
+ * @tpc_mask_l - TPC binning information lower 64 bit
+ * @dec_mask - Decoder binning information
+ * @dram_mask - DRAM binning information
+ * @edma_mask - EDMA binning information
+ * @mme_mask_l - MME binning information lower 32
+ * @mme_mask_h - MME binning information upper 32
+ * @rot_mask - Rotator binning information
+ * @xbar_mask - xBAR binning information
+ * @reserved - reserved field for future binning info w/o ABI change
+ * @tpc_mask_h - TPC binning information upper 64 bit
+ * @nic_mask - NIC binning information
+ */
+struct lkd_fw_binning_info {
+ __le64 tpc_mask_l;
+ __le32 dec_mask;
+ __le32 dram_mask;
+ __le32 edma_mask;
+ __le32 mme_mask_l;
+ __le32 mme_mask_h;
+ __le32 rot_mask;
+ __le32 xbar_mask;
+ __le32 reserved0;
+ __le64 tpc_mask_h;
+ __le64 nic_mask;
+ __le32 reserved1[8];
+};
+
+/* TODO: remove this struct after the code is updated to use message */
+/* this is the comms descriptor header - meta data */
+struct comms_desc_header {
+ __le32 magic; /* magic for validation */
+ __le32 crc32; /* CRC32 of the descriptor w/o header */
+ __le16 size; /* size of the descriptor w/o header */
+ __u8 version; /* descriptor version */
+ __u8 reserved[5]; /* pad to 64 bit */
+};
+
+/* this is the comms message header - meta data */
+struct comms_msg_header {
+ __le32 magic; /* magic for validation */
+ __le32 crc32; /* CRC32 of the message w/o header */
+ __le16 size; /* size of the message w/o header */
+ __u8 version; /* message payload version */
+ __u8 type; /* message type */
+ __u8 reserved[4]; /* pad to 64 bit */
+};
+
+enum lkd_fw_ascii_msg_lvls {
+ LKD_FW_ASCII_MSG_ERR = 0,
+ LKD_FW_ASCII_MSG_WRN = 1,
+ LKD_FW_ASCII_MSG_INF = 2,
+ LKD_FW_ASCII_MSG_DBG = 3,
+};
+
+#define LKD_FW_ASCII_MSG_MAX_LEN 128
+#define LKD_FW_ASCII_MSG_MAX 4 /* consider ABI when changing */
+
+struct lkd_fw_ascii_msg {
+ __u8 valid;
+ __u8 msg_lvl;
+ __u8 reserved[6];
+ char msg[LKD_FW_ASCII_MSG_MAX_LEN];
+};
+
+/* this is the main FW descriptor - consider ABI when changing */
+struct lkd_fw_comms_desc {
+ struct comms_desc_header header;
+ struct cpu_dyn_regs cpu_dyn_regs;
+ char fuse_ver[VERSION_MAX_LEN];
+ char cur_fw_ver[VERSION_MAX_LEN];
+ /* can be used for 1 more version w/o ABI change */
+ char reserved0[VERSION_MAX_LEN];
+ __le64 img_addr; /* address for next FW component load */
+ struct lkd_fw_binning_info binning_info;
+ struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX];
+ __le32 rsvd_mem_size_mb; /* reserved memory size [MB] for FW/SVE */
+ char reserved1[4];
+};
+
+enum comms_reset_cause {
+ HL_RESET_CAUSE_UNKNOWN = 0,
+ HL_RESET_CAUSE_HEARTBEAT = 1,
+ HL_RESET_CAUSE_TDR = 2,
+};
+
+/* TODO: remove define after struct name is aligned on all projects */
+#define lkd_msg_comms lkd_fw_comms_msg
+
+/* this is the comms message descriptor */
+struct lkd_fw_comms_msg {
+ struct comms_msg_header header;
+ /* union for future expantions of new messages */
+ union {
+ struct {
+ struct cpu_dyn_regs cpu_dyn_regs;
+ char fuse_ver[VERSION_MAX_LEN];
+ char cur_fw_ver[VERSION_MAX_LEN];
+ /* can be used for 1 more version w/o ABI change */
+ char reserved0[VERSION_MAX_LEN];
+ /* address for next FW component load */
+ __le64 img_addr;
+ struct lkd_fw_binning_info binning_info;
+ struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX];
+ /* reserved memory size [MB] for FW/SVE */
+ __le32 rsvd_mem_size_mb;
+ char reserved1[4];
+ };
+ struct {
+ __u8 reset_cause;
+ };
+ struct {
+ __u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */
+ };
+ struct lkd_fw_binning_info binning_conf;
+ };
+};
+
+/*
+ * LKD commands:
+ *
+ * COMMS_NOOP Used to clear the command register and no actual
+ * command is send.
+ *
+ * COMMS_CLR_STS Clear status command - FW should clear the
+ * status register. Used for synchronization
+ * between the commands as part of the race free
+ * protocol.
+ *
+ * COMMS_RST_STATE Reset the current communication state which is
+ * kept by FW for proper responses.
+ * Should be used in the beginning of the
+ * communication cycle to clean any leftovers from
+ * previous communication attempts.
+ *
+ * COMMS_PREP_DESC Prepare descriptor for setting up the
+ * communication and other dynamic data:
+ * struct lkd_fw_comms_desc.
+ * This command has a parameter stating the next FW
+ * component size, so the FW can actually prepare a
+ * space for it and in the status response provide
+ * the descriptor offset. The Offset of the next FW
+ * data component is a part of the descriptor
+ * structure.
+ *
+ * COMMS_DATA_RDY The FW data has been uploaded and is ready for
+ * validation.
+ *
+ * COMMS_EXEC Execute the next FW component.
+ *
+ * COMMS_RST_DEV Reset the device.
+ *
+ * COMMS_GOTO_WFE Execute WFE command. Allowed only on non-secure
+ * devices.
+ *
+ * COMMS_SKIP_BMC Perform actions required for BMC-less servers.
+ * Do not wait for BMC response.
+ *
+ * COMMS_PREP_DESC_ELBI Same as COMMS_PREP_DESC only that the memory
+ * space is allocated in a ELBI access only
+ * address range.
+ *
+ */
+enum comms_cmd {
+ COMMS_NOOP = 0,
+ COMMS_CLR_STS = 1,
+ COMMS_RST_STATE = 2,
+ COMMS_PREP_DESC = 3,
+ COMMS_DATA_RDY = 4,
+ COMMS_EXEC = 5,
+ COMMS_RST_DEV = 6,
+ COMMS_GOTO_WFE = 7,
+ COMMS_SKIP_BMC = 8,
+ COMMS_PREP_DESC_ELBI = 10,
+ COMMS_INVLD_LAST
+};
+
+#define COMMS_COMMAND_SIZE_SHIFT 0
+#define COMMS_COMMAND_SIZE_MASK 0x1FFFFFF
+#define COMMS_COMMAND_CMD_SHIFT 27
+#define COMMS_COMMAND_CMD_MASK 0xF8000000
+
+/*
+ * LKD command to FW register structure
+ * @size - FW component size
+ * @cmd - command from enum comms_cmd
+ */
+struct comms_command {
+ union { /* bit fields are only for FW use */
+ struct {
+ u32 size :25; /* 32MB max. */
+ u32 reserved :2;
+ enum comms_cmd cmd :5; /* 32 commands */
+ };
+ __le32 val;
+ };
+};
+
+/*
+ * FW status
+ *
+ * COMMS_STS_NOOP Used to clear the status register and no actual
+ * status is provided.
+ *
+ * COMMS_STS_ACK Command has been received and recognized.
+ *
+ * COMMS_STS_OK Command execution has finished successfully.
+ *
+ * COMMS_STS_ERR Command execution was unsuccessful and resulted
+ * in error.
+ *
+ * COMMS_STS_VALID_ERR FW validation has failed.
+ *
+ * COMMS_STS_TIMEOUT_ERR Command execution has timed out.
+ */
+enum comms_sts {
+ COMMS_STS_NOOP = 0,
+ COMMS_STS_ACK = 1,
+ COMMS_STS_OK = 2,
+ COMMS_STS_ERR = 3,
+ COMMS_STS_VALID_ERR = 4,
+ COMMS_STS_TIMEOUT_ERR = 5,
+ COMMS_STS_INVLD_LAST
+};
+
+/* RAM types for FW components loading - defines the base address */
+enum comms_ram_types {
+ COMMS_SRAM = 0,
+ COMMS_DRAM = 1,
+};
+
+#define COMMS_STATUS_OFFSET_SHIFT 0
+#define COMMS_STATUS_OFFSET_MASK 0x03FFFFFF
+#define COMMS_STATUS_OFFSET_ALIGN_SHIFT 2
+#define COMMS_STATUS_RAM_TYPE_SHIFT 26
+#define COMMS_STATUS_RAM_TYPE_MASK 0x0C000000
+#define COMMS_STATUS_STATUS_SHIFT 28
+#define COMMS_STATUS_STATUS_MASK 0xF0000000
+
+/*
+ * FW status to LKD register structure
+ * @offset - an offset from the base of the ram_type shifted right by
+ * 2 bits (always aligned to 32 bits).
+ * Allows a maximum addressable offset of 256MB from RAM base.
+ * Example: for real offset in RAM of 0x800000 (8MB), the value
+ * in offset field is (0x800000 >> 2) = 0x200000.
+ * @ram_type - the RAM type that should be used for offset from
+ * enum comms_ram_types
+ * @status - status from enum comms_sts
+ */
+struct comms_status {
+ union { /* bit fields are only for FW use */
+ struct {
+ u32 offset :26;
+ enum comms_ram_types ram_type :2;
+ enum comms_sts status :4; /* 16 statuses */
+ };
+ __le32 val;
+ };
+};
+
+#define NAME_MAX_LEN 32 /* bytes */
+struct hl_module_data {
+ __u8 name[NAME_MAX_LEN];
+ __u8 version[VERSION_MAX_LEN];
+};
+
+/**
+ * struct hl_component_versions - versions associated with hl component.
+ * @struct_size: size of all the struct (including dynamic size of modules).
+ * @modules_offset: offset of the modules field in this struct.
+ * @component: version of the component itself.
+ * @fw_os: Firmware OS Version.
+ * @comp_name: Name of the component.
+ * @modules_counter: number of set bits in modules_mask.
+ * @reserved: reserved for future use.
+ * @modules: versions of the component's modules. Elborated explanation in
+ * struct cpucp_versions.
+ */
+struct hl_component_versions {
+ __le16 struct_size;
+ __le16 modules_offset;
+ __u8 component[VERSION_MAX_LEN];
+ __u8 fw_os[VERSION_MAX_LEN];
+ __u8 comp_name[NAME_MAX_LEN];
+ __u8 modules_counter;
+ __u8 reserved[3];
+ struct hl_module_data modules[];
+};
+
+/* Max size of fit size */
+#define HL_FW_VERSIONS_FIT_SIZE 4096
+
+#endif /* HL_BOOT_IF_H */
diff --git a/include/linux/hid-roccat.h b/include/linux/hid-roccat.h
index 3214fb0815fc..753654fff07f 100644
--- a/include/linux/hid-roccat.h
+++ b/include/linux/hid-roccat.h
@@ -16,7 +16,7 @@
#ifdef __KERNEL__
-int roccat_connect(struct class *klass, struct hid_device *hid,
+int roccat_connect(const struct class *klass, struct hid_device *hid,
int report_size);
void roccat_disconnect(int minor);
int roccat_report_event(int minor, u8 const *data);
diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h
index 13b1e65fbdcc..6730ee900ee1 100644
--- a/include/linux/hid-sensor-ids.h
+++ b/include/linux/hid-sensor-ids.h
@@ -21,6 +21,10 @@
#define HID_USAGE_SENSOR_ALS 0x200041
#define HID_USAGE_SENSOR_DATA_LIGHT 0x2004d0
#define HID_USAGE_SENSOR_LIGHT_ILLUM 0x2004d1
+#define HID_USAGE_SENSOR_LIGHT_COLOR_TEMPERATURE 0x2004d2
+#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY 0x2004d3
+#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY_X 0x2004d4
+#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY_Y 0x2004d5
/* PROX (200011) */
#define HID_USAGE_SENSOR_PROX 0x200011
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 39e21e3815ad..b12cb1c8e682 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -341,6 +341,29 @@ struct hid_item {
*/
#define MAX_USBHID_BOOT_QUIRKS 4
+/**
+ * DOC: HID quirks
+ * | @HID_QUIRK_NOTOUCH:
+ * | @HID_QUIRK_IGNORE: ignore this device
+ * | @HID_QUIRK_NOGET:
+ * | @HID_QUIRK_HIDDEV_FORCE:
+ * | @HID_QUIRK_BADPAD:
+ * | @HID_QUIRK_MULTI_INPUT:
+ * | @HID_QUIRK_HIDINPUT_FORCE:
+ * | @HID_QUIRK_ALWAYS_POLL:
+ * | @HID_QUIRK_INPUT_PER_APP:
+ * | @HID_QUIRK_X_INVERT:
+ * | @HID_QUIRK_Y_INVERT:
+ * | @HID_QUIRK_SKIP_OUTPUT_REPORTS:
+ * | @HID_QUIRK_SKIP_OUTPUT_REPORT_ID:
+ * | @HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP:
+ * | @HID_QUIRK_HAVE_SPECIAL_DRIVER:
+ * | @HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE:
+ * | @HID_QUIRK_FULLSPEED_INTERVAL:
+ * | @HID_QUIRK_NO_INIT_REPORTS:
+ * | @HID_QUIRK_NO_IGNORE:
+ * | @HID_QUIRK_NO_INPUT_SYNC:
+ */
/* BIT(0) reserved for backward compatibility, was HID_QUIRK_INVERT */
#define HID_QUIRK_NOTOUCH BIT(1)
#define HID_QUIRK_IGNORE BIT(2)
@@ -360,6 +383,7 @@ struct hid_item {
#define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP BIT(18)
#define HID_QUIRK_HAVE_SPECIAL_DRIVER BIT(19)
#define HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE BIT(20)
+#define HID_QUIRK_NOINVERT BIT(21)
#define HID_QUIRK_FULLSPEED_INTERVAL BIT(28)
#define HID_QUIRK_NO_INIT_REPORTS BIT(29)
#define HID_QUIRK_NO_IGNORE BIT(30)
@@ -555,9 +579,9 @@ struct hid_input {
struct hid_report *report;
struct input_dev *input;
const char *name;
- bool registered;
struct list_head reports; /* the list of reports */
unsigned int application; /* application usage for this input */
+ bool registered;
};
enum hid_type {
@@ -655,14 +679,17 @@ struct hid_device { /* device report descriptor */
struct list_head debug_list;
spinlock_t debug_list_lock;
wait_queue_head_t debug_wait;
+ struct kref ref;
unsigned int id; /* system unique id */
-#ifdef CONFIG_BPF
+#ifdef CONFIG_HID_BPF
struct hid_bpf bpf; /* hid-bpf data */
-#endif /* CONFIG_BPF */
+#endif /* CONFIG_HID_BPF */
};
+void hiddev_free(struct kref *ref);
+
#define to_hid_device(pdev) \
container_of(pdev, struct hid_device, dev)
@@ -809,11 +836,11 @@ struct hid_driver {
void (*feature_mapping)(struct hid_device *hdev,
struct hid_field *field,
struct hid_usage *usage);
-#ifdef CONFIG_PM
+
int (*suspend)(struct hid_device *hdev, pm_message_t message);
int (*resume)(struct hid_device *hdev);
int (*reset_resume)(struct hid_device *hdev);
-#endif
+
/* private: */
struct device_driver driver;
};
@@ -885,7 +912,7 @@ extern bool hid_ignore(struct hid_device *);
extern int hid_add_device(struct hid_device *);
extern void hid_destroy_device(struct hid_device *);
-extern struct bus_type hid_bus_type;
+extern const struct bus_type hid_bus_type;
extern int __must_check __hid_register_driver(struct hid_driver *,
struct module *, const char *mod_name);
diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index e9afb61e6ee0..7118ac28d468 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -77,17 +77,6 @@ enum hid_bpf_attach_flags {
int hid_bpf_device_event(struct hid_bpf_ctx *ctx);
int hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx);
-/* Following functions are kfunc that we export to BPF programs */
-/* available everywhere in HID-BPF */
-__u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t __sz);
-
-/* only available in syscall */
-int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags);
-int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz,
- enum hid_report_type rtype, enum hid_class_request reqtype);
-struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id);
-void hid_bpf_release_context(struct hid_bpf_ctx *ctx);
-
/*
* Below is HID internal
*/
@@ -115,7 +104,7 @@ struct hid_bpf_ops {
size_t len, enum hid_report_type rtype,
enum hid_class_request reqtype);
struct module *owner;
- struct bus_type *bus_type;
+ const struct bus_type *bus_type;
};
extern struct hid_bpf_ops *hid_bpf_ops;
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 68da30625a6c..00341b56d291 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -440,6 +440,140 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len)
}
/**
+ * memcpy_from_folio - Copy a range of bytes from a folio.
+ * @to: The memory to copy to.
+ * @folio: The folio to read from.
+ * @offset: The first byte in the folio to read.
+ * @len: The number of bytes to copy.
+ */
+static inline void memcpy_from_folio(char *to, struct folio *folio,
+ size_t offset, size_t len)
+{
+ VM_BUG_ON(offset + len > folio_size(folio));
+
+ do {
+ const char *from = kmap_local_folio(folio, offset);
+ size_t chunk = len;
+
+ if (folio_test_highmem(folio) &&
+ chunk > PAGE_SIZE - offset_in_page(offset))
+ chunk = PAGE_SIZE - offset_in_page(offset);
+ memcpy(to, from, chunk);
+ kunmap_local(from);
+
+ to += chunk;
+ offset += chunk;
+ len -= chunk;
+ } while (len > 0);
+}
+
+/**
+ * memcpy_to_folio - Copy a range of bytes to a folio.
+ * @folio: The folio to write to.
+ * @offset: The first byte in the folio to store to.
+ * @from: The memory to copy from.
+ * @len: The number of bytes to copy.
+ */
+static inline void memcpy_to_folio(struct folio *folio, size_t offset,
+ const char *from, size_t len)
+{
+ VM_BUG_ON(offset + len > folio_size(folio));
+
+ do {
+ char *to = kmap_local_folio(folio, offset);
+ size_t chunk = len;
+
+ if (folio_test_highmem(folio) &&
+ chunk > PAGE_SIZE - offset_in_page(offset))
+ chunk = PAGE_SIZE - offset_in_page(offset);
+ memcpy(to, from, chunk);
+ kunmap_local(to);
+
+ from += chunk;
+ offset += chunk;
+ len -= chunk;
+ } while (len > 0);
+
+ flush_dcache_folio(folio);
+}
+
+/**
+ * folio_zero_tail - Zero the tail of a folio.
+ * @folio: The folio to zero.
+ * @offset: The byte offset in the folio to start zeroing at.
+ * @kaddr: The address the folio is currently mapped to.
+ *
+ * If you have already used kmap_local_folio() to map a folio, written
+ * some data to it and now need to zero the end of the folio (and flush
+ * the dcache), you can use this function. If you do not have the
+ * folio kmapped (eg the folio has been partially populated by DMA),
+ * use folio_zero_range() or folio_zero_segment() instead.
+ *
+ * Return: An address which can be passed to kunmap_local().
+ */
+static inline __must_check void *folio_zero_tail(struct folio *folio,
+ size_t offset, void *kaddr)
+{
+ size_t len = folio_size(folio) - offset;
+
+ if (folio_test_highmem(folio)) {
+ size_t max = PAGE_SIZE - offset_in_page(offset);
+
+ while (len > max) {
+ memset(kaddr, 0, max);
+ kunmap_local(kaddr);
+ len -= max;
+ offset += max;
+ max = PAGE_SIZE;
+ kaddr = kmap_local_folio(folio, offset);
+ }
+ }
+
+ memset(kaddr, 0, len);
+ flush_dcache_folio(folio);
+
+ return kaddr;
+}
+
+/**
+ * folio_fill_tail - Copy some data to a folio and pad with zeroes.
+ * @folio: The destination folio.
+ * @offset: The offset into @folio at which to start copying.
+ * @from: The data to copy.
+ * @len: How many bytes of data to copy.
+ *
+ * This function is most useful for filesystems which support inline data.
+ * When they want to copy data from the inode into the page cache, this
+ * function does everything for them. It supports large folios even on
+ * HIGHMEM configurations.
+ */
+static inline void folio_fill_tail(struct folio *folio, size_t offset,
+ const char *from, size_t len)
+{
+ char *to = kmap_local_folio(folio, offset);
+
+ VM_BUG_ON(offset + len > folio_size(folio));
+
+ if (folio_test_highmem(folio)) {
+ size_t max = PAGE_SIZE - offset_in_page(offset);
+
+ while (len > max) {
+ memcpy(to, from, max);
+ kunmap_local(to);
+ len -= max;
+ from += max;
+ offset += max;
+ max = PAGE_SIZE;
+ to = kmap_local_folio(folio, offset);
+ }
+ }
+
+ memcpy(to, from, len);
+ to = folio_zero_tail(folio, offset + len, to + len);
+ kunmap_local(to);
+}
+
+/**
* memcpy_from_file_folio - Copy some bytes from a file folio.
* @to: The destination buffer.
* @folio: The folio to copy from.
@@ -507,10 +641,24 @@ static inline void folio_zero_range(struct folio *folio,
zero_user_segments(&folio->page, start, start + length, 0, 0);
}
-static inline void unmap_and_put_page(struct page *page, void *addr)
+/**
+ * folio_release_kmap - Unmap a folio and drop a refcount.
+ * @folio: The folio to release.
+ * @addr: The address previously returned by a call to kmap_local_folio().
+ *
+ * It is common, eg in directory handling to kmap a folio. This function
+ * unmaps the folio and drops the refcount that was being held to keep the
+ * folio alive while we accessed it.
+ */
+static inline void folio_release_kmap(struct folio *folio, void *addr)
{
kunmap_local(addr);
- put_page(page);
+ folio_put(folio);
+}
+
+static inline void unmap_and_put_page(struct page *page, void *addr)
+{
+ folio_release_kmap(page_folio(page), addr);
}
#endif /* _LINUX_HIGHMEM_H */
diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index a7d54d4d41fd..9d7754ad5e9b 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -43,6 +43,7 @@
#define QM_MB_CMD_CQC_BT 0x5
#define QM_MB_CMD_SQC_VFT_V2 0x6
#define QM_MB_CMD_STOP_QP 0x8
+#define QM_MB_CMD_FLUSH_QM 0x9
#define QM_MB_CMD_SRC 0xc
#define QM_MB_CMD_DST 0xd
@@ -104,21 +105,17 @@
enum qm_stop_reason {
QM_NORMAL,
QM_SOFT_RESET,
- QM_FLR,
+ QM_DOWN,
};
enum qm_state {
- QM_INIT = 0,
- QM_START,
- QM_CLOSE,
+ QM_WORK = 0,
QM_STOP,
};
enum qp_state {
- QP_INIT = 1,
- QP_START,
+ QP_START = 1,
QP_STOP,
- QP_CLOSE,
};
enum qm_hw_ver {
@@ -144,15 +141,33 @@ enum qm_vf_state {
QM_NOT_READY,
};
+enum qm_misc_ctl_bits {
+ QM_DRIVER_REMOVING = 0x0,
+ QM_RST_SCHED,
+ QM_RESETTING,
+ QM_MODULE_PARAM,
+};
+
enum qm_cap_bits {
QM_SUPPORT_DB_ISOLATION = 0x0,
QM_SUPPORT_FUNC_QOS,
QM_SUPPORT_STOP_QP,
+ QM_SUPPORT_STOP_FUNC,
QM_SUPPORT_MB_COMMAND,
QM_SUPPORT_SVA_PREFETCH,
QM_SUPPORT_RPM,
};
+struct qm_dev_alg {
+ u64 alg_msk;
+ const char *alg;
+};
+
+struct qm_dev_dfx {
+ u32 dev_state;
+ u32 dev_timeout;
+};
+
struct dfx_diff_registers {
u32 *regs;
u32 reg_offset;
@@ -181,6 +196,7 @@ struct qm_debug {
struct dentry *debug_root;
struct dentry *qm_d;
struct debugfs_file files[DEBUG_FILE_NUM];
+ struct qm_dev_dfx dev_dfx;
unsigned int *qm_last_words;
/* ACC engines recoreding last regs */
unsigned int *last_words;
@@ -258,6 +274,16 @@ struct hisi_qm_cap_info {
u32 v3_val;
};
+struct hisi_qm_cap_record {
+ u32 type;
+ u32 cap_val;
+};
+
+struct hisi_qm_cap_tables {
+ struct hisi_qm_cap_record *qm_cap_table;
+ struct hisi_qm_cap_record *dev_cap_table;
+};
+
struct hisi_qm_list {
struct mutex lock;
struct list_head list;
@@ -269,6 +295,7 @@ struct hisi_qm_poll_data {
struct hisi_qm *qm;
struct work_struct work;
u16 *qp_finish_id;
+ u16 eqe_num;
};
/**
@@ -285,6 +312,18 @@ struct qm_err_isolate {
struct list_head qm_hw_errs;
};
+struct qm_rsv_buf {
+ struct qm_sqc *sqc;
+ struct qm_cqc *cqc;
+ struct qm_eqc *eqc;
+ struct qm_aeqc *aeqc;
+ dma_addr_t sqc_dma;
+ dma_addr_t cqc_dma;
+ dma_addr_t eqc_dma;
+ dma_addr_t aeqc_dma;
+ struct qm_dma qcdma;
+};
+
struct hisi_qm {
enum qm_hw_ver ver;
enum qm_fun_type fun_type;
@@ -317,6 +356,7 @@ struct hisi_qm {
dma_addr_t cqc_dma;
dma_addr_t eqe_dma;
dma_addr_t aeqe_dma;
+ struct qm_rsv_buf xqc_buf;
struct hisi_qm_status status;
const struct hisi_qm_err_ini *err_ini;
@@ -344,7 +384,6 @@ struct hisi_qm {
struct work_struct rst_work;
struct work_struct cmd_process;
- const char *algs;
bool use_sva;
resource_size_t phys_base;
@@ -355,6 +394,8 @@ struct hisi_qm {
u32 mb_qos;
u32 type_rate;
struct qm_err_isolate isolate_data;
+
+ struct hisi_qm_cap_tables cap_tables;
};
struct hisi_qp_status {
@@ -471,12 +512,26 @@ static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list)
mutex_init(&qm_list->lock);
}
+static inline void hisi_qm_add_list(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
+{
+ mutex_lock(&qm_list->lock);
+ list_add_tail(&qm->list, &qm_list->list);
+ mutex_unlock(&qm_list->lock);
+}
+
+static inline void hisi_qm_del_list(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
+{
+ mutex_lock(&qm_list->lock);
+ list_del(&qm->list);
+ mutex_unlock(&qm_list->lock);
+}
+
int hisi_qm_init(struct hisi_qm *qm);
void hisi_qm_uninit(struct hisi_qm *qm);
int hisi_qm_start(struct hisi_qm *qm);
int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r);
int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg);
-int hisi_qm_stop_qp(struct hisi_qp *qp);
+void hisi_qm_stop_qp(struct hisi_qp *qp);
int hisi_qp_send(struct hisi_qp *qp, const void *msg);
void hisi_qm_debug_init(struct hisi_qm *qm);
void hisi_qm_debug_regs_clear(struct hisi_qm *qm);
@@ -516,8 +571,8 @@ int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num,
void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num);
void hisi_qm_dev_shutdown(struct pci_dev *pdev);
void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
-int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
-void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
+int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard);
+void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard);
int hisi_qm_resume(struct device *dev);
int hisi_qm_suspend(struct device *dev);
void hisi_qm_pm_uninit(struct hisi_qm *qm);
@@ -528,6 +583,8 @@ void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset);
u32 hisi_qm_get_hw_info(struct hisi_qm *qm,
const struct hisi_qm_cap_info *info_table,
u32 index, bool is_read);
+int hisi_qm_set_algs(struct hisi_qm *qm, u64 alg_msk, const struct qm_dev_alg *dev_algs,
+ u32 dev_algs_size);
/* Used by VFIO ACC live migration driver */
struct pci_driver *hisi_sec_get_pf_driver(void);
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 0ee140176f10..aa1e65ccb615 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -13,16 +13,12 @@
#define _LINUX_HRTIMER_H
#include <linux/hrtimer_defs.h>
-#include <linux/rbtree.h>
+#include <linux/hrtimer_types.h>
#include <linux/init.h>
#include <linux/list.h>
-#include <linux/percpu.h>
-#include <linux/seqlock.h>
+#include <linux/percpu-defs.h>
+#include <linux/rbtree.h>
#include <linux/timer.h>
-#include <linux/timerqueue.h>
-
-struct hrtimer_clock_base;
-struct hrtimer_cpu_base;
/*
* Mode arguments of xxx_hrtimer functions:
@@ -60,14 +56,6 @@ enum hrtimer_mode {
};
/*
- * Return values for the callback function
- */
-enum hrtimer_restart {
- HRTIMER_NORESTART, /* Timer is not restarted */
- HRTIMER_RESTART, /* Timer must be restarted */
-};
-
-/*
* Values to track state of the timer
*
* Possible states:
@@ -95,38 +83,6 @@ enum hrtimer_restart {
#define HRTIMER_STATE_ENQUEUED 0x01
/**
- * struct hrtimer - the basic hrtimer structure
- * @node: timerqueue node, which also manages node.expires,
- * the absolute expiry time in the hrtimers internal
- * representation. The time is related to the clock on
- * which the timer is based. Is setup by adding
- * slack to the _softexpires value. For non range timers
- * identical to _softexpires.
- * @_softexpires: the absolute earliest expiry time of the hrtimer.
- * The time which was given as expiry time when the timer
- * was armed.
- * @function: timer expiry callback function
- * @base: pointer to the timer base (per cpu and per clock)
- * @state: state information (See bit values above)
- * @is_rel: Set if the timer was armed relative
- * @is_soft: Set if hrtimer will be expired in soft interrupt context.
- * @is_hard: Set if hrtimer will be expired in hard interrupt context
- * even on RT.
- *
- * The hrtimer structure must be initialized by hrtimer_init()
- */
-struct hrtimer {
- struct timerqueue_node node;
- ktime_t _softexpires;
- enum hrtimer_restart (*function)(struct hrtimer *);
- struct hrtimer_clock_base *base;
- u8 state;
- u8 is_rel;
- u8 is_soft;
- u8 is_hard;
-};
-
-/**
* struct hrtimer_sleeper - simple sleeper structure
* @timer: embedded timer structure
* @task: task to wake up
@@ -138,105 +94,6 @@ struct hrtimer_sleeper {
struct task_struct *task;
};
-#ifdef CONFIG_64BIT
-# define __hrtimer_clock_base_align ____cacheline_aligned
-#else
-# define __hrtimer_clock_base_align
-#endif
-
-/**
- * struct hrtimer_clock_base - the timer base for a specific clock
- * @cpu_base: per cpu clock base
- * @index: clock type index for per_cpu support when moving a
- * timer to a base on another cpu.
- * @clockid: clock id for per_cpu support
- * @seq: seqcount around __run_hrtimer
- * @running: pointer to the currently running hrtimer
- * @active: red black tree root node for the active timers
- * @get_time: function to retrieve the current time of the clock
- * @offset: offset of this clock to the monotonic base
- */
-struct hrtimer_clock_base {
- struct hrtimer_cpu_base *cpu_base;
- unsigned int index;
- clockid_t clockid;
- seqcount_raw_spinlock_t seq;
- struct hrtimer *running;
- struct timerqueue_head active;
- ktime_t (*get_time)(void);
- ktime_t offset;
-} __hrtimer_clock_base_align;
-
-enum hrtimer_base_type {
- HRTIMER_BASE_MONOTONIC,
- HRTIMER_BASE_REALTIME,
- HRTIMER_BASE_BOOTTIME,
- HRTIMER_BASE_TAI,
- HRTIMER_BASE_MONOTONIC_SOFT,
- HRTIMER_BASE_REALTIME_SOFT,
- HRTIMER_BASE_BOOTTIME_SOFT,
- HRTIMER_BASE_TAI_SOFT,
- HRTIMER_MAX_CLOCK_BASES,
-};
-
-/**
- * struct hrtimer_cpu_base - the per cpu clock bases
- * @lock: lock protecting the base and associated clock bases
- * and timers
- * @cpu: cpu number
- * @active_bases: Bitfield to mark bases with active timers
- * @clock_was_set_seq: Sequence counter of clock was set events
- * @hres_active: State of high resolution mode
- * @in_hrtirq: hrtimer_interrupt() is currently executing
- * @hang_detected: The last hrtimer interrupt detected a hang
- * @softirq_activated: displays, if the softirq is raised - update of softirq
- * related settings is not required then.
- * @nr_events: Total number of hrtimer interrupt events
- * @nr_retries: Total number of hrtimer interrupt retries
- * @nr_hangs: Total number of hrtimer interrupt hangs
- * @max_hang_time: Maximum time spent in hrtimer_interrupt
- * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
- * expired
- * @timer_waiters: A hrtimer_cancel() invocation waits for the timer
- * callback to finish.
- * @expires_next: absolute time of the next event, is required for remote
- * hrtimer enqueue; it is the total first expiry time (hard
- * and soft hrtimer are taken into account)
- * @next_timer: Pointer to the first expiring timer
- * @softirq_expires_next: Time to check, if soft queues needs also to be expired
- * @softirq_next_timer: Pointer to the first expiring softirq based timer
- * @clock_base: array of clock bases for this cpu
- *
- * Note: next_timer is just an optimization for __remove_hrtimer().
- * Do not dereference the pointer because it is not reliable on
- * cross cpu removals.
- */
-struct hrtimer_cpu_base {
- raw_spinlock_t lock;
- unsigned int cpu;
- unsigned int active_bases;
- unsigned int clock_was_set_seq;
- unsigned int hres_active : 1,
- in_hrtirq : 1,
- hang_detected : 1,
- softirq_activated : 1;
-#ifdef CONFIG_HIGH_RES_TIMERS
- unsigned int nr_events;
- unsigned short nr_retries;
- unsigned short nr_hangs;
- unsigned int max_hang_time;
-#endif
-#ifdef CONFIG_PREEMPT_RT
- spinlock_t softirq_expiry_lock;
- atomic_t timer_waiters;
-#endif
- ktime_t expires_next;
- struct hrtimer *next_timer;
- ktime_t softirq_expires_next;
- struct hrtimer *softirq_next_timer;
- struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
-} ____cacheline_aligned;
-
static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
{
timer->node.expires = time;
@@ -485,20 +342,12 @@ extern u64
hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);
/**
- * hrtimer_forward_now - forward the timer expiry so it expires after now
+ * hrtimer_forward_now() - forward the timer expiry so it expires after now
* @timer: hrtimer to forward
* @interval: the interval to forward
*
- * Forward the timer expiry so it will expire after the current time
- * of the hrtimer clock base. Returns the number of overruns.
- *
- * Can be safely called from the callback function of @timer. If
- * called from other contexts @timer must neither be enqueued nor
- * running the callback and the caller needs to take care of
- * serialization.
- *
- * Note: This only updates the timer expiry value and does not requeue
- * the timer.
+ * It is a variant of hrtimer_forward(). The timer will expire after the current
+ * time of the hrtimer clock base. See hrtimer_forward() for details.
*/
static inline u64 hrtimer_forward_now(struct hrtimer *timer,
ktime_t interval)
@@ -531,9 +380,9 @@ extern void sysrq_timer_list_show(void);
int hrtimers_prepare_cpu(unsigned int cpu);
#ifdef CONFIG_HOTPLUG_CPU
-int hrtimers_dead_cpu(unsigned int cpu);
+int hrtimers_cpu_dying(unsigned int cpu);
#else
-#define hrtimers_dead_cpu NULL
+#define hrtimers_cpu_dying NULL
#endif
#endif
diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
index 2d3e3c5fb946..c3b4b7ed7c16 100644
--- a/include/linux/hrtimer_defs.h
+++ b/include/linux/hrtimer_defs.h
@@ -3,6 +3,8 @@
#define _LINUX_HRTIMER_DEFS_H
#include <linux/ktime.h>
+#include <linux/timerqueue.h>
+#include <linux/seqlock.h>
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -24,4 +26,106 @@
#endif
+#ifdef CONFIG_64BIT
+# define __hrtimer_clock_base_align ____cacheline_aligned
+#else
+# define __hrtimer_clock_base_align
+#endif
+
+/**
+ * struct hrtimer_clock_base - the timer base for a specific clock
+ * @cpu_base: per cpu clock base
+ * @index: clock type index for per_cpu support when moving a
+ * timer to a base on another cpu.
+ * @clockid: clock id for per_cpu support
+ * @seq: seqcount around __run_hrtimer
+ * @running: pointer to the currently running hrtimer
+ * @active: red black tree root node for the active timers
+ * @get_time: function to retrieve the current time of the clock
+ * @offset: offset of this clock to the monotonic base
+ */
+struct hrtimer_clock_base {
+ struct hrtimer_cpu_base *cpu_base;
+ unsigned int index;
+ clockid_t clockid;
+ seqcount_raw_spinlock_t seq;
+ struct hrtimer *running;
+ struct timerqueue_head active;
+ ktime_t (*get_time)(void);
+ ktime_t offset;
+} __hrtimer_clock_base_align;
+
+enum hrtimer_base_type {
+ HRTIMER_BASE_MONOTONIC,
+ HRTIMER_BASE_REALTIME,
+ HRTIMER_BASE_BOOTTIME,
+ HRTIMER_BASE_TAI,
+ HRTIMER_BASE_MONOTONIC_SOFT,
+ HRTIMER_BASE_REALTIME_SOFT,
+ HRTIMER_BASE_BOOTTIME_SOFT,
+ HRTIMER_BASE_TAI_SOFT,
+ HRTIMER_MAX_CLOCK_BASES,
+};
+
+/**
+ * struct hrtimer_cpu_base - the per cpu clock bases
+ * @lock: lock protecting the base and associated clock bases
+ * and timers
+ * @cpu: cpu number
+ * @active_bases: Bitfield to mark bases with active timers
+ * @clock_was_set_seq: Sequence counter of clock was set events
+ * @hres_active: State of high resolution mode
+ * @in_hrtirq: hrtimer_interrupt() is currently executing
+ * @hang_detected: The last hrtimer interrupt detected a hang
+ * @softirq_activated: displays, if the softirq is raised - update of softirq
+ * related settings is not required then.
+ * @nr_events: Total number of hrtimer interrupt events
+ * @nr_retries: Total number of hrtimer interrupt retries
+ * @nr_hangs: Total number of hrtimer interrupt hangs
+ * @max_hang_time: Maximum time spent in hrtimer_interrupt
+ * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
+ * expired
+ * @online: CPU is online from an hrtimers point of view
+ * @timer_waiters: A hrtimer_cancel() invocation waits for the timer
+ * callback to finish.
+ * @expires_next: absolute time of the next event, is required for remote
+ * hrtimer enqueue; it is the total first expiry time (hard
+ * and soft hrtimer are taken into account)
+ * @next_timer: Pointer to the first expiring timer
+ * @softirq_expires_next: Time to check, if soft queues needs also to be expired
+ * @softirq_next_timer: Pointer to the first expiring softirq based timer
+ * @clock_base: array of clock bases for this cpu
+ *
+ * Note: next_timer is just an optimization for __remove_hrtimer().
+ * Do not dereference the pointer because it is not reliable on
+ * cross cpu removals.
+ */
+struct hrtimer_cpu_base {
+ raw_spinlock_t lock;
+ unsigned int cpu;
+ unsigned int active_bases;
+ unsigned int clock_was_set_seq;
+ unsigned int hres_active : 1,
+ in_hrtirq : 1,
+ hang_detected : 1,
+ softirq_activated : 1,
+ online : 1;
+#ifdef CONFIG_HIGH_RES_TIMERS
+ unsigned int nr_events;
+ unsigned short nr_retries;
+ unsigned short nr_hangs;
+ unsigned int max_hang_time;
+#endif
+#ifdef CONFIG_PREEMPT_RT
+ spinlock_t softirq_expiry_lock;
+ atomic_t timer_waiters;
+#endif
+ ktime_t expires_next;
+ struct hrtimer *next_timer;
+ ktime_t softirq_expires_next;
+ struct hrtimer *softirq_next_timer;
+ struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
+} ____cacheline_aligned;
+
+
#endif
diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h
new file mode 100644
index 000000000000..ad66a3081735
--- /dev/null
+++ b/include/linux/hrtimer_types.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_HRTIMER_TYPES_H
+#define _LINUX_HRTIMER_TYPES_H
+
+#include <linux/types.h>
+#include <linux/timerqueue_types.h>
+
+struct hrtimer_clock_base;
+
+/*
+ * Return values for the callback function
+ */
+enum hrtimer_restart {
+ HRTIMER_NORESTART, /* Timer is not restarted */
+ HRTIMER_RESTART, /* Timer must be restarted */
+};
+
+/**
+ * struct hrtimer - the basic hrtimer structure
+ * @node: timerqueue node, which also manages node.expires,
+ * the absolute expiry time in the hrtimers internal
+ * representation. The time is related to the clock on
+ * which the timer is based. Is setup by adding
+ * slack to the _softexpires value. For non range timers
+ * identical to _softexpires.
+ * @_softexpires: the absolute earliest expiry time of the hrtimer.
+ * The time which was given as expiry time when the timer
+ * was armed.
+ * @function: timer expiry callback function
+ * @base: pointer to the timer base (per cpu and per clock)
+ * @state: state information (See bit values above)
+ * @is_rel: Set if the timer was armed relative
+ * @is_soft: Set if hrtimer will be expired in soft interrupt context.
+ * @is_hard: Set if hrtimer will be expired in hard interrupt context
+ * even on RT.
+ *
+ * The hrtimer structure must be initialized by hrtimer_init()
+ */
+struct hrtimer {
+ struct timerqueue_node node;
+ ktime_t _softexpires;
+ enum hrtimer_restart (*function)(struct hrtimer *);
+ struct hrtimer_clock_base *base;
+ u8 state;
+ u8 is_rel;
+ u8 is_soft;
+ u8 is_hard;
+};
+
+#endif /* _LINUX_HRTIMER_TYPES_H */
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e718dbe928ba..de0c89105076 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -67,6 +67,26 @@ extern struct kobj_attribute shmem_enabled_attr;
#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
+/*
+ * Mask of all large folio orders supported for anonymous THP; all orders up to
+ * and including PMD_ORDER, except order-0 (which is not "huge") and order-1
+ * (which is a limitation of the THP implementation).
+ */
+#define THP_ORDERS_ALL_ANON ((BIT(PMD_ORDER + 1) - 1) & ~(BIT(0) | BIT(1)))
+
+/*
+ * Mask of all large folio orders supported for file THP.
+ */
+#define THP_ORDERS_ALL_FILE (BIT(PMD_ORDER) | BIT(PUD_ORDER))
+
+/*
+ * Mask of all large folio orders supported for THP.
+ */
+#define THP_ORDERS_ALL (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE)
+
+#define thp_vma_allowable_order(vma, vm_flags, smaps, in_pf, enforce_sysfs, order) \
+ (!!thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, enforce_sysfs, BIT(order)))
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define HPAGE_PMD_SHIFT PMD_SHIFT
#define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT)
@@ -77,45 +97,105 @@ extern struct kobj_attribute shmem_enabled_attr;
#define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1))
extern unsigned long transparent_hugepage_flags;
+extern unsigned long huge_anon_orders_always;
+extern unsigned long huge_anon_orders_madvise;
+extern unsigned long huge_anon_orders_inherit;
+
+static inline bool hugepage_global_enabled(void)
+{
+ return transparent_hugepage_flags &
+ ((1<<TRANSPARENT_HUGEPAGE_FLAG) |
+ (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG));
+}
+
+static inline bool hugepage_global_always(void)
+{
+ return transparent_hugepage_flags &
+ (1<<TRANSPARENT_HUGEPAGE_FLAG);
+}
+
+static inline bool hugepage_flags_enabled(void)
+{
+ /*
+ * We cover both the anon and the file-backed case here; we must return
+ * true if globally enabled, even when all anon sizes are set to never.
+ * So we don't need to look at huge_anon_orders_inherit.
+ */
+ return hugepage_global_enabled() ||
+ huge_anon_orders_always ||
+ huge_anon_orders_madvise;
+}
+
+static inline int highest_order(unsigned long orders)
+{
+ return fls_long(orders) - 1;
+}
-#define hugepage_flags_enabled() \
- (transparent_hugepage_flags & \
- ((1<<TRANSPARENT_HUGEPAGE_FLAG) | \
- (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)))
-#define hugepage_flags_always() \
- (transparent_hugepage_flags & \
- (1<<TRANSPARENT_HUGEPAGE_FLAG))
+static inline int next_order(unsigned long *orders, int prev)
+{
+ *orders &= ~BIT(prev);
+ return highest_order(*orders);
+}
/*
* Do the below checks:
* - For file vma, check if the linear page offset of vma is
- * HPAGE_PMD_NR aligned within the file. The hugepage is
- * guaranteed to be hugepage-aligned within the file, but we must
- * check that the PMD-aligned addresses in the VMA map to
- * PMD-aligned offsets within the file, else the hugepage will
- * not be PMD-mappable.
- * - For all vmas, check if the haddr is in an aligned HPAGE_PMD_SIZE
+ * order-aligned within the file. The hugepage is
+ * guaranteed to be order-aligned within the file, but we must
+ * check that the order-aligned addresses in the VMA map to
+ * order-aligned offsets within the file, else the hugepage will
+ * not be mappable.
+ * - For all vmas, check if the haddr is in an aligned hugepage
* area.
*/
-static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
- unsigned long addr)
+static inline bool thp_vma_suitable_order(struct vm_area_struct *vma,
+ unsigned long addr, int order)
{
+ unsigned long hpage_size = PAGE_SIZE << order;
unsigned long haddr;
/* Don't have to check pgoff for anonymous vma */
if (!vma_is_anonymous(vma)) {
if (!IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
- HPAGE_PMD_NR))
+ hpage_size >> PAGE_SHIFT))
return false;
}
- haddr = addr & HPAGE_PMD_MASK;
+ haddr = ALIGN_DOWN(addr, hpage_size);
- if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
+ if (haddr < vma->vm_start || haddr + hpage_size > vma->vm_end)
return false;
return true;
}
+/*
+ * Filter the bitfield of input orders to the ones suitable for use in the vma.
+ * See thp_vma_suitable_order().
+ * All orders that pass the checks are returned as a bitfield.
+ */
+static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long orders)
+{
+ int order;
+
+ /*
+ * Iterate over orders, highest to lowest, removing orders that don't
+ * meet alignment requirements from the set. Exit loop at first order
+ * that meets requirements, since all lower orders must also meet
+ * requirements.
+ */
+
+ order = highest_order(orders);
+
+ while (orders) {
+ if (thp_vma_suitable_order(vma, addr, order))
+ break;
+ order = next_order(&orders, order);
+ }
+
+ return orders;
+}
+
static inline bool file_thp_enabled(struct vm_area_struct *vma)
{
struct inode *inode;
@@ -126,12 +206,55 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma)
inode = vma->vm_file->f_inode;
return (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS)) &&
- (vma->vm_flags & VM_EXEC) &&
!inode_is_open_for_write(inode) && S_ISREG(inode->i_mode);
}
-bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags,
- bool smaps, bool in_pf, bool enforce_sysfs);
+unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
+ unsigned long vm_flags, bool smaps,
+ bool in_pf, bool enforce_sysfs,
+ unsigned long orders);
+
+/**
+ * thp_vma_allowable_orders - determine hugepage orders that are allowed for vma
+ * @vma: the vm area to check
+ * @vm_flags: use these vm_flags instead of vma->vm_flags
+ * @smaps: whether answer will be used for smaps file
+ * @in_pf: whether answer will be used by page fault handler
+ * @enforce_sysfs: whether sysfs config should be taken into account
+ * @orders: bitfield of all orders to consider
+ *
+ * Calculates the intersection of the requested hugepage orders and the allowed
+ * hugepage orders for the provided vma. Permitted orders are encoded as a set
+ * bit at the corresponding bit position (bit-2 corresponds to order-2, bit-3
+ * corresponds to order-3, etc). Order-0 is never considered a hugepage order.
+ *
+ * Return: bitfield of orders allowed for hugepage in the vma. 0 if no hugepage
+ * orders are allowed.
+ */
+static inline
+unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
+ unsigned long vm_flags, bool smaps,
+ bool in_pf, bool enforce_sysfs,
+ unsigned long orders)
+{
+ /* Optimization to check if required orders are enabled early. */
+ if (enforce_sysfs && vma_is_anonymous(vma)) {
+ unsigned long mask = READ_ONCE(huge_anon_orders_always);
+
+ if (vm_flags & VM_HUGEPAGE)
+ mask |= READ_ONCE(huge_anon_orders_madvise);
+ if (hugepage_global_always() ||
+ ((vm_flags & VM_HUGEPAGE) && hugepage_global_enabled()))
+ mask |= READ_ONCE(huge_anon_orders_inherit);
+
+ orders &= mask;
+ if (!orders)
+ return 0;
+ }
+
+ return __thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf,
+ enforce_sysfs, orders);
+}
#define transparent_hugepage_use_zero_page() \
(transparent_hugepage_flags & \
@@ -140,14 +263,13 @@ bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags,
unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags);
-void prep_transhuge_page(struct page *page);
-void free_transhuge_page(struct page *page);
-
+void folio_prep_large_rmappable(struct folio *folio);
bool can_split_folio(struct folio *folio, int *pextra_pins);
-int split_huge_page_to_list(struct page *page, struct list_head *list);
+int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
+ unsigned int new_order);
static inline int split_huge_page(struct page *page)
{
- return split_huge_page_to_list(page, NULL);
+ return split_huge_page_to_list_to_order(page, NULL, 0);
}
void deferred_split_folio(struct folio *folio);
@@ -269,20 +391,27 @@ static inline bool folio_test_pmd_mappable(struct folio *folio)
return false;
}
-static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
- unsigned long addr)
+static inline bool thp_vma_suitable_order(struct vm_area_struct *vma,
+ unsigned long addr, int order)
{
return false;
}
-static inline bool hugepage_vma_check(struct vm_area_struct *vma,
- unsigned long vm_flags, bool smaps,
- bool in_pf, bool enforce_sysfs)
+static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long orders)
{
- return false;
+ return 0;
+}
+
+static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
+ unsigned long vm_flags, bool smaps,
+ bool in_pf, bool enforce_sysfs,
+ unsigned long orders)
+{
+ return 0;
}
-static inline void prep_transhuge_page(struct page *page) {}
+static inline void folio_prep_large_rmappable(struct folio *folio) {}
#define transparent_hugepage_flags 0UL
@@ -294,7 +423,8 @@ can_split_folio(struct folio *folio, int *pextra_pins)
return false;
}
static inline int
-split_huge_page_to_list(struct page *page, struct list_head *list)
+split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
+ unsigned int new_order)
{
return 0;
}
@@ -391,17 +521,20 @@ static inline bool thp_migration_supported(void)
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-static inline int split_folio_to_list(struct folio *folio,
- struct list_head *list)
+static inline int split_folio_to_list_to_order(struct folio *folio,
+ struct list_head *list, int new_order)
{
- return split_huge_page_to_list(&folio->page, list);
+ return split_huge_page_to_list_to_order(&folio->page, list, new_order);
}
-static inline int split_folio(struct folio *folio)
+static inline int split_folio_to_order(struct folio *folio, int new_order)
{
- return split_folio_to_list(folio, NULL);
+ return split_folio_to_list_to_order(folio, NULL, new_order);
}
+#define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0)
+#define split_folio(f) split_folio_to_order(f, 0)
+
/*
* archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to
* limitations in the implementation like arm64 MTE can override this to
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ca3c8e10f24a..77b30a8c6076 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -26,9 +26,11 @@ typedef struct { unsigned long pd; } hugepd_t;
#define __hugepd(x) ((hugepd_t) { (x) })
#endif
+void free_huge_folio(struct folio *folio);
+
#ifdef CONFIG_HUGETLB_PAGE
-#include <linux/mempolicy.h>
+#include <linux/pagemap.h>
#include <linux/shm.h>
#include <asm/tlbflush.h>
@@ -58,6 +60,7 @@ struct resv_map {
long adds_in_progress;
struct list_head region_cache;
long region_cache_count;
+ struct rw_semaphore rw_sema;
#ifdef CONFIG_CGROUP_HUGETLB
/*
* On private mappings, the counter to uncharge reservations is stored
@@ -131,14 +134,12 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *,
struct vm_area_struct *, struct vm_area_struct *);
struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
- unsigned long address, unsigned int flags);
-long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
- struct page **, unsigned long *, unsigned long *,
- long, unsigned int, int *);
+ unsigned long address, unsigned int flags,
+ unsigned int *page_mask);
void unmap_hugepage_range(struct vm_area_struct *,
unsigned long, unsigned long, struct page *,
zap_flags_t);
-void __unmap_hugepage_range_final(struct mmu_gather *tlb,
+void __unmap_hugepage_range(struct mmu_gather *tlb,
struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct page *ref_page, zap_flags_t zap_flags);
@@ -167,7 +168,6 @@ int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared);
void folio_putback_active_hugetlb(struct folio *folio);
void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason);
-void free_huge_page(struct page *page);
void hugetlb_fix_reserve_counts(struct inode *inode);
extern struct mutex *hugetlb_fault_mutex_table;
u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx);
@@ -178,7 +178,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage);
extern int sysctl_hugetlb_shm_group;
-extern struct list_head huge_boot_pages;
+extern struct list_head huge_boot_pages[MAX_NUMNODES];
/* arch callbacks */
@@ -246,6 +246,25 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end);
+extern void __hugetlb_zap_begin(struct vm_area_struct *vma,
+ unsigned long *begin, unsigned long *end);
+extern void __hugetlb_zap_end(struct vm_area_struct *vma,
+ struct zap_details *details);
+
+static inline void hugetlb_zap_begin(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end)
+{
+ if (is_vm_hugetlb_page(vma))
+ __hugetlb_zap_begin(vma, start, end);
+}
+
+static inline void hugetlb_zap_end(struct vm_area_struct *vma,
+ struct zap_details *details)
+{
+ if (is_vm_hugetlb_page(vma))
+ __hugetlb_zap_end(vma, details);
+}
+
void hugetlb_vma_lock_read(struct vm_area_struct *vma);
void hugetlb_vma_unlock_read(struct vm_area_struct *vma);
void hugetlb_vma_lock_write(struct vm_area_struct *vma);
@@ -261,6 +280,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long cp_flags);
bool is_hugetlb_entry_migration(pte_t pte);
+bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -297,19 +317,23 @@ static inline void adjust_range_if_pmd_sharing_possible(
{
}
-static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
- unsigned long address, unsigned int flags)
+static inline void hugetlb_zap_begin(
+ struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end)
{
- BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/
}
-static inline long follow_hugetlb_page(struct mm_struct *mm,
- struct vm_area_struct *vma, struct page **pages,
- unsigned long *position, unsigned long *nr_pages,
- long i, unsigned int flags, int *nonblocking)
+static inline void hugetlb_zap_end(
+ struct vm_area_struct *vma,
+ struct zap_details *details)
{
- BUG();
- return 0;
+}
+
+static inline struct page *hugetlb_follow_page_mask(
+ struct vm_area_struct *vma, unsigned long address, unsigned int flags,
+ unsigned int *page_mask)
+{
+ BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/
}
static inline int copy_hugetlb_page_range(struct mm_struct *dst,
@@ -450,7 +474,7 @@ static inline long hugetlb_change_protection(
return 0;
}
-static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb,
+static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct page *ref_page,
zap_flags_t zap_flags)
@@ -521,7 +545,6 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
}
struct hugetlbfs_inode_info {
- struct shared_policy policy;
struct inode vfs_inode;
unsigned int seals;
};
@@ -725,8 +748,6 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, int avoid_reserve);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask);
-struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma,
- unsigned long address);
int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
pgoff_t idx);
void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
@@ -808,7 +829,7 @@ static inline unsigned huge_page_shift(struct hstate *h)
static inline bool hstate_is_gigantic(struct hstate *h)
{
- return huge_page_order(h) > MAX_ORDER;
+ return huge_page_order(h) > MAX_PAGE_ORDER;
}
static inline unsigned int pages_per_huge_page(const struct hstate *h)
@@ -821,6 +842,12 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h)
return huge_page_size(h) / 512;
}
+static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h,
+ struct address_space *mapping, pgoff_t idx)
+{
+ return filemap_lock_folio(mapping, idx << huge_page_order(h));
+}
+
#include <asm/hugetlb.h>
#ifndef is_hugepage_only_range
@@ -851,11 +878,6 @@ static inline struct hstate *folio_hstate(struct folio *folio)
return size_to_hstate(folio_size(folio));
}
-static inline struct hstate *page_hstate(struct page *page)
-{
- return folio_hstate(page_folio(page));
-}
-
static inline unsigned hstate_index_to_shift(unsigned index)
{
return hstates[index].order + PAGE_SHIFT;
@@ -998,7 +1020,9 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t old_pte, pte_t pte)
{
- set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+ unsigned long psize = huge_page_size(hstate_vma(vma));
+
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize);
}
#endif
@@ -1007,6 +1031,11 @@ void hugetlb_register_node(struct node *node);
void hugetlb_unregister_node(struct node *node);
#endif
+/*
+ * Check if a given raw @page in a hugepage is HWPOISON.
+ */
+bool is_raw_hwpoison_page_in_hugepage(struct page *page);
+
#else /* CONFIG_HUGETLB_PAGE */
struct hstate {};
@@ -1015,6 +1044,12 @@ static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio
return NULL;
}
+static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h,
+ struct address_space *mapping, pgoff_t idx)
+{
+ return NULL;
+}
+
static inline int isolate_or_dissolve_huge_page(struct page *page,
struct list_head *list)
{
@@ -1035,13 +1070,6 @@ alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
return NULL;
}
-static inline struct folio *alloc_hugetlb_folio_vma(struct hstate *h,
- struct vm_area_struct *vma,
- unsigned long address)
-{
- return NULL;
-}
-
static inline int __alloc_bootmem_huge_page(struct hstate *h)
{
return 0;
@@ -1067,11 +1095,6 @@ static inline struct hstate *folio_hstate(struct folio *folio)
return NULL;
}
-static inline struct hstate *page_hstate(struct page *page)
-{
- return NULL;
-}
-
static inline struct hstate *size_to_hstate(unsigned long size)
{
return NULL;
@@ -1187,7 +1210,7 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
}
static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
+ pte_t *ptep, pte_t pte, unsigned long sz)
{
}
@@ -1245,6 +1268,8 @@ static inline bool __vma_shareable_lock(struct vm_area_struct *vma)
return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data;
}
+bool __vma_private_lock(struct vm_area_struct *vma);
+
/*
* Safe version of huge_pte_offset() to check the locks. See comments
* above huge_pte_offset().
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 3d82d91f49ac..e5d64b8b59c2 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -22,13 +22,6 @@ struct resv_map;
struct file_region;
#ifdef CONFIG_CGROUP_HUGETLB
-/*
- * Minimum page order trackable by hugetlb cgroup.
- * At least 3 pages are necessary for all the tracking information.
- * The second tail page contains all of the hugetlb-specific fields.
- */
-#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__NR_USED_SUBPAGE)
-
enum hugetlb_memory_event {
HUGETLB_MAX,
HUGETLB_NR_MEMORY_EVENTS,
@@ -68,8 +61,6 @@ static inline struct hugetlb_cgroup *
__hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd)
{
VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
- if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
- return NULL;
if (rsvd)
return folio->_hugetlb_cgroup_rsvd;
else
@@ -91,8 +82,6 @@ static inline void __set_hugetlb_cgroup(struct folio *folio,
struct hugetlb_cgroup *h_cg, bool rsvd)
{
VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
- if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
- return;
if (rsvd)
folio->_hugetlb_cgroup_rsvd = h_cg;
else
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 7fbb45911273..db199d653dd1 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -90,9 +90,6 @@ extern int dbg_reserve_bp_slot(struct perf_event *bp);
extern int dbg_release_bp_slot(struct perf_event *bp);
extern int reserve_bp_slot(struct perf_event *bp);
extern void release_bp_slot(struct perf_event *bp);
-int arch_reserve_bp_slot(struct perf_event *bp);
-void arch_release_bp_slot(struct perf_event *bp);
-void arch_unregister_hw_breakpoint(struct perf_event *bp);
extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 8a3115516a1b..136e9842120e 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -63,5 +63,6 @@ extern void hwrng_unregister(struct hwrng *rng);
extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng);
extern long hwrng_msleep(struct hwrng *rng, unsigned int msecs);
+extern long hwrng_yield(struct hwrng *rng);
#endif /* LINUX_HWRANDOM_H_ */
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 8cd6a6b33593..edf96f249eb5 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -141,6 +141,7 @@ enum hwmon_in_attributes {
hwmon_in_rated_min,
hwmon_in_rated_max,
hwmon_in_beep,
+ hwmon_in_fault,
};
#define HWMON_I_ENABLE BIT(hwmon_in_enable)
@@ -162,6 +163,7 @@ enum hwmon_in_attributes {
#define HWMON_I_RATED_MIN BIT(hwmon_in_rated_min)
#define HWMON_I_RATED_MAX BIT(hwmon_in_rated_max)
#define HWMON_I_BEEP BIT(hwmon_in_beep)
+#define HWMON_I_FAULT BIT(hwmon_in_fault)
enum hwmon_curr_attributes {
hwmon_curr_enable,
@@ -293,6 +295,8 @@ enum hwmon_humidity_attributes {
hwmon_humidity_fault,
hwmon_humidity_rated_min,
hwmon_humidity_rated_max,
+ hwmon_humidity_min_alarm,
+ hwmon_humidity_max_alarm,
};
#define HWMON_H_ENABLE BIT(hwmon_humidity_enable)
@@ -306,6 +310,8 @@ enum hwmon_humidity_attributes {
#define HWMON_H_FAULT BIT(hwmon_humidity_fault)
#define HWMON_H_RATED_MIN BIT(hwmon_humidity_rated_min)
#define HWMON_H_RATED_MAX BIT(hwmon_humidity_rated_max)
+#define HWMON_H_MIN_ALARM BIT(hwmon_humidity_min_alarm)
+#define HWMON_H_MAX_ALARM BIT(hwmon_humidity_max_alarm)
enum hwmon_fan_attributes {
hwmon_fan_enable,
@@ -425,12 +431,12 @@ struct hwmon_channel_info {
const u32 *config;
};
-#define HWMON_CHANNEL_INFO(stype, ...) \
- (&(struct hwmon_channel_info) { \
- .type = hwmon_##stype, \
- .config = (u32 []) { \
- __VA_ARGS__, 0 \
- } \
+#define HWMON_CHANNEL_INFO(stype, ...) \
+ (&(const struct hwmon_channel_info) { \
+ .type = hwmon_##stype, \
+ .config = (const u32 []) { \
+ __VA_ARGS__, 0 \
+ } \
})
/**
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 3ac3974b3c78..96ceb4095425 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -164,8 +164,28 @@ struct hv_ring_buffer {
u8 buffer[];
} __packed;
+
+/*
+ * If the requested ring buffer size is at least 8 times the size of the
+ * header, steal space from the ring buffer for the header. Otherwise, add
+ * space for the header so that is doesn't take too much of the ring buffer
+ * space.
+ *
+ * The factor of 8 is somewhat arbitrary. The goal is to prevent adding a
+ * relatively small header (4 Kbytes on x86) to a large-ish power-of-2 ring
+ * buffer size (such as 128 Kbytes) and so end up making a nearly twice as
+ * large allocation that will be almost half wasted. As a contrasting example,
+ * on ARM64 with 64 Kbyte page size, we don't want to take 64 Kbytes for the
+ * header from a 128 Kbyte allocation, leaving only 64 Kbytes for the ring.
+ * In this latter case, we must add 64 Kbytes for the header and not worry
+ * about what's wasted.
+ */
+#define VMBUS_HEADER_ADJ(payload_sz) \
+ ((payload_sz) >= 8 * sizeof(struct hv_ring_buffer) ? \
+ 0 : sizeof(struct hv_ring_buffer))
+
/* Calculate the proper size of a ringbuffer, it must be page-aligned */
-#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(sizeof(struct hv_ring_buffer) + \
+#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(VMBUS_HEADER_ADJ(payload_sz) + \
(payload_sz))
struct hv_ring_buffer_info {
@@ -348,7 +368,7 @@ struct vmtransfer_page_packet_header {
u8 sender_owns_set;
u8 reserved;
u32 range_cnt;
- struct vmtransfer_page_range ranges[1];
+ struct vmtransfer_page_range ranges[];
} __packed;
struct vmgpadl_packet_header {
@@ -665,8 +685,8 @@ struct vmbus_channel_initiate_contact {
u64 interrupt_page;
struct {
u8 msg_sint;
- u8 padding1[3];
- u32 padding2;
+ u8 msg_vtl;
+ u8 reserved[6];
};
};
u64 monitor_page1;
@@ -812,6 +832,7 @@ struct vmbus_gpadl {
u32 gpadl_handle;
u32 size;
void *buffer;
+ bool decrypted;
};
struct vmbus_channel {
diff --git a/include/linux/i2c-atr.h b/include/linux/i2c-atr.h
new file mode 100644
index 000000000000..4d5da161c225
--- /dev/null
+++ b/include/linux/i2c-atr.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * I2C Address Translator
+ *
+ * Copyright (c) 2019,2022 Luca Ceresoli <luca@lucaceresoli.net>
+ * Copyright (c) 2022,2023 Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
+ *
+ * Based on i2c-mux.h
+ */
+
+#ifndef _LINUX_I2C_ATR_H
+#define _LINUX_I2C_ATR_H
+
+#include <linux/i2c.h>
+#include <linux/types.h>
+
+struct device;
+struct fwnode_handle;
+struct i2c_atr;
+
+/**
+ * struct i2c_atr_ops - Callbacks from ATR to the device driver.
+ * @attach_client: Notify the driver of a new device connected on a child
+ * bus, with the alias assigned to it. The driver must
+ * configure the hardware to use the alias.
+ * @detach_client: Notify the driver of a device getting disconnected. The
+ * driver must configure the hardware to stop using the
+ * alias.
+ *
+ * All these functions return 0 on success, a negative error code otherwise.
+ */
+struct i2c_atr_ops {
+ int (*attach_client)(struct i2c_atr *atr, u32 chan_id,
+ const struct i2c_client *client, u16 alias);
+ void (*detach_client)(struct i2c_atr *atr, u32 chan_id,
+ const struct i2c_client *client);
+};
+
+/**
+ * i2c_atr_new() - Allocate and initialize an I2C ATR helper.
+ * @parent: The parent (upstream) adapter
+ * @dev: The device acting as an ATR
+ * @ops: Driver-specific callbacks
+ * @max_adapters: Maximum number of child adapters
+ *
+ * The new ATR helper is connected to the parent adapter but has no child
+ * adapters. Call i2c_atr_add_adapter() to add some.
+ *
+ * Call i2c_atr_delete() to remove.
+ *
+ * Return: pointer to the new ATR helper object, or ERR_PTR
+ */
+struct i2c_atr *i2c_atr_new(struct i2c_adapter *parent, struct device *dev,
+ const struct i2c_atr_ops *ops, int max_adapters);
+
+/**
+ * i2c_atr_delete - Delete an I2C ATR helper.
+ * @atr: I2C ATR helper to be deleted.
+ *
+ * Precondition: all the adapters added with i2c_atr_add_adapter() must be
+ * removed by calling i2c_atr_del_adapter().
+ */
+void i2c_atr_delete(struct i2c_atr *atr);
+
+/**
+ * i2c_atr_add_adapter - Create a child ("downstream") I2C bus.
+ * @atr: The I2C ATR
+ * @chan_id: Index of the new adapter (0 .. max_adapters-1). This value is
+ * passed to the callbacks in `struct i2c_atr_ops`.
+ * @adapter_parent: The device used as the parent of the new i2c adapter, or NULL
+ * to use the i2c-atr device as the parent.
+ * @bus_handle: The fwnode handle that points to the adapter's i2c
+ * peripherals, or NULL.
+ *
+ * After calling this function a new i2c bus will appear. Adding and removing
+ * devices on the downstream bus will result in calls to the
+ * &i2c_atr_ops->attach_client and &i2c_atr_ops->detach_client callbacks for the
+ * driver to assign an alias to the device.
+ *
+ * The adapter's fwnode is set to @bus_handle, or if @bus_handle is NULL the
+ * function looks for a child node whose 'reg' property matches the chan_id
+ * under the i2c-atr device's 'i2c-atr' node.
+ *
+ * Call i2c_atr_del_adapter() to remove the adapter.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int i2c_atr_add_adapter(struct i2c_atr *atr, u32 chan_id,
+ struct device *adapter_parent,
+ struct fwnode_handle *bus_handle);
+
+/**
+ * i2c_atr_del_adapter - Remove a child ("downstream") I2C bus added by
+ * i2c_atr_add_adapter(). If no I2C bus has been added
+ * this function is a no-op.
+ * @atr: The I2C ATR
+ * @chan_id: Index of the adapter to be removed (0 .. max_adapters-1)
+ */
+void i2c_atr_del_adapter(struct i2c_atr *atr, u32 chan_id);
+
+/**
+ * i2c_atr_set_driver_data - Set private driver data to the i2c-atr instance.
+ * @atr: The I2C ATR
+ * @data: Pointer to the data to store
+ */
+void i2c_atr_set_driver_data(struct i2c_atr *atr, void *data);
+
+/**
+ * i2c_atr_get_driver_data - Get the stored drive data.
+ * @atr: The I2C ATR
+ *
+ * Return: Pointer to the stored data
+ */
+void *i2c_atr_get_driver_data(struct i2c_atr *atr);
+
+#endif /* _LINUX_I2C_ATR_H */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 3430cc2b05a6..5e6cd43a6dbd 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -23,9 +23,9 @@
#include <linux/swab.h> /* for swab16 */
#include <uapi/linux/i2c.h>
-extern struct bus_type i2c_bus_type;
-extern struct device_type i2c_adapter_type;
-extern struct device_type i2c_client_type;
+extern const struct bus_type i2c_bus_type;
+extern const struct device_type i2c_adapter_type;
+extern const struct device_type i2c_client_type;
/* --- General options ------------------------------------------------ */
@@ -237,7 +237,6 @@ enum i2c_driver_flags {
* struct i2c_driver - represent an I2C device driver
* @class: What kind of i2c device we instantiate (for detect)
* @probe: Callback for device binding
- * @probe_new: Transitional callback for device binding - do not use
* @remove: Callback for device unbinding
* @shutdown: Callback for device shutdown
* @alert: Alert callback, for example for the SMBus alert protocol
@@ -272,16 +271,8 @@ enum i2c_driver_flags {
struct i2c_driver {
unsigned int class;
- union {
/* Standard driver model interfaces */
- int (*probe)(struct i2c_client *client);
- /*
- * Legacy callback that was part of a conversion of .probe().
- * Today it has the same semantic as .probe(). Don't use for new
- * code.
- */
- int (*probe_new)(struct i2c_client *client);
- };
+ int (*probe)(struct i2c_client *client);
void (*remove)(struct i2c_client *client);
@@ -755,6 +746,8 @@ struct i2c_adapter {
struct irq_domain *host_notify_domain;
struct regulator *bus_regulator;
+
+ struct dentry *debugfs;
};
#define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev)
@@ -859,7 +852,6 @@ static inline void i2c_mark_adapter_resumed(struct i2c_adapter *adap)
/* i2c adapter classes (bitmask) */
#define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */
-#define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */
#define I2C_CLASS_SPD (1<<7) /* Memory modules */
/* Warn users that the adapter doesn't support classes anymore */
#define I2C_CLASS_DEPRECATED (1<<8)
@@ -939,7 +931,7 @@ static inline int i2c_adapter_id(struct i2c_adapter *adap)
static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg)
{
- return (msg->addr << 1) | (msg->flags & I2C_M_RD ? 1 : 0);
+ return (msg->addr << 1) | (msg->flags & I2C_M_RD);
}
u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold);
diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h
index 90fa83464f00..e119f11948ef 100644
--- a/include/linux/i3c/device.h
+++ b/include/linux/i3c/device.h
@@ -54,6 +54,7 @@ enum i3c_hdr_mode {
* struct i3c_priv_xfer - I3C SDR private transfer
* @rnw: encodes the transfer direction. true for a read, false for a write
* @len: transfer length in bytes of the transfer
+ * @actual_len: actual length in bytes are transferred by the controller
* @data: input/output buffer
* @data.in: input buffer. Must point to a DMA-able buffer
* @data.out: output buffer. Must point to a DMA-able buffer
@@ -62,6 +63,7 @@ enum i3c_hdr_mode {
struct i3c_priv_xfer {
u8 rnw;
u16 len;
+ u16 actual_len;
union {
void *in;
const void *out;
@@ -96,7 +98,7 @@ enum i3c_dcr {
/**
* struct i3c_device_info - I3C device information
- * @pid: Provisional ID
+ * @pid: Provisioned ID
* @bcr: Bus Characteristic Register
* @dcr: Device Characteristic Register
* @static_addr: static/I2C address
diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h
index 0b52da4f2346..0ca27dd86956 100644
--- a/include/linux/i3c/master.h
+++ b/include/linux/i3c/master.h
@@ -24,6 +24,12 @@
struct i2c_client;
+/* notifier actions. notifier call data is the struct i3c_bus */
+enum {
+ I3C_NOTIFY_BUS_ADD,
+ I3C_NOTIFY_BUS_REMOVE,
+};
+
struct i3c_master_controller;
struct i3c_bus;
struct i3c_device;
@@ -70,7 +76,6 @@ struct i2c_dev_boardinfo {
/**
* struct i2c_dev_desc - I2C device descriptor
* @common: common part of the I2C device descriptor
- * @boardinfo: pointer to the boardinfo attached to this I2C device
* @dev: I2C device object registered to the I2C framework
* @addr: I2C device address
* @lvr: LVR (Legacy Virtual Register) needed by the I3C core to know about
@@ -129,6 +134,7 @@ struct i3c_ibi_slot {
* rejected by the master
* @num_slots: number of IBI slots reserved for this device
* @enabled: reflect the IBI status
+ * @wq: workqueue used to execute IBI handlers.
* @handler: IBI handler specified at i3c_device_request_ibi() call time. This
* handler will be called from the controller workqueue, and as such
* is allowed to sleep (though it is recommended to process the IBI
@@ -151,6 +157,7 @@ struct i3c_device_ibi_info {
unsigned int max_payload_len;
unsigned int num_slots;
unsigned int enabled;
+ struct workqueue_struct *wq;
void (*handler)(struct i3c_device *dev,
const struct i3c_ibi_payload *payload);
};
@@ -166,7 +173,7 @@ struct i3c_device_ibi_info {
* assigned a dynamic address by the master. Will be used during
* bus initialization to assign it a specific dynamic address
* before starting DAA (Dynamic Address Assignment)
- * @pid: I3C Provisional ID exposed by the device. This is a unique identifier
+ * @pid: I3C Provisioned ID exposed by the device. This is a unique identifier
* that may be used to attach boardinfo to i3c_dev_desc when the device
* does not have a static address
* @of_node: optional DT node in case the device has been described in the DT
@@ -426,6 +433,8 @@ struct i3c_bus {
* for a future IBI
* This method is mandatory only if ->request_ibi is not
* NULL.
+ * @enable_hotjoin: enable hot join event detect.
+ * @disable_hotjoin: disable hot join event detect.
*/
struct i3c_master_controller_ops {
int (*bus_init)(struct i3c_master_controller *master);
@@ -452,6 +461,8 @@ struct i3c_master_controller_ops {
int (*disable_ibi)(struct i3c_dev_desc *dev);
void (*recycle_ibi_slot)(struct i3c_dev_desc *dev,
struct i3c_ibi_slot *slot);
+ int (*enable_hotjoin)(struct i3c_master_controller *master);
+ int (*disable_hotjoin)(struct i3c_master_controller *master);
};
/**
@@ -465,11 +476,12 @@ struct i3c_master_controller_ops {
* @ops: master operations. See &struct i3c_master_controller_ops
* @secondary: true if the master is a secondary master
* @init_done: true when the bus initialization is done
+ * @hotjoin: true if the master support hotjoin
* @boardinfo.i3c: list of I3C boardinfo objects
* @boardinfo.i2c: list of I2C boardinfo objects
* @boardinfo: board-level information attached to devices connected on the bus
* @bus: I3C bus exposed by this master
- * @wq: workqueue used to execute IBI handlers. Can also be used by master
+ * @wq: workqueue which can be used by master
* drivers if they need to postpone operations that need to take place
* in a thread context. Typical examples are Hot Join processing which
* requires taking the bus lock in maintenance, which in turn, can only
@@ -487,6 +499,7 @@ struct i3c_master_controller {
const struct i3c_master_controller_ops *ops;
unsigned int secondary : 1;
unsigned int init_done : 1;
+ unsigned int hotjoin: 1;
struct {
struct list_head i3c;
struct list_head i2c;
@@ -543,6 +556,8 @@ int i3c_master_register(struct i3c_master_controller *master,
const struct i3c_master_controller_ops *ops,
bool secondary);
void i3c_master_unregister(struct i3c_master_controller *master);
+int i3c_master_enable_hotjoin(struct i3c_master_controller *master);
+int i3c_master_disable_hotjoin(struct i3c_master_controller *master);
/**
* i3c_dev_get_master_data() - get master private data attached to an I3C
@@ -652,4 +667,9 @@ void i3c_master_queue_ibi(struct i3c_dev_desc *dev, struct i3c_ibi_slot *slot);
struct i3c_ibi_slot *i3c_master_get_free_ibi_slot(struct i3c_dev_desc *dev);
+void i3c_for_each_bus_locked(int (*fn)(struct i3c_bus *bus, void *data),
+ void *data);
+int i3c_register_notifier(struct notifier_block *nb);
+int i3c_unregister_notifier(struct notifier_block *nb);
+
#endif /* I3C_MASTER_H */
diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index db0f4fcfdaf4..e3b3b0fa2a8f 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -85,12 +85,10 @@ extern void icmpv6_param_prob_reason(struct sk_buff *skb,
struct flowi6;
struct in6_addr;
-extern void icmpv6_flow_init(struct sock *sk,
- struct flowi6 *fl6,
- u8 type,
- const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- int oif);
+
+void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr, int oif);
static inline void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
{
diff --git a/include/linux/idr.h b/include/linux/idr.h
index a0dce14090a9..da5f5fa4a3a6 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -200,7 +200,7 @@ static inline void idr_preload_end(void)
*/
#define idr_for_each_entry_ul(idr, entry, tmp, id) \
for (tmp = 0, id = 0; \
- tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \
+ ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \
tmp = id, ++id)
/**
@@ -224,10 +224,12 @@ static inline void idr_preload_end(void)
* @id: Entry ID.
*
* Continue to iterate over entries, continuing after the current position.
+ * After normal termination @entry is left with the value NULL. This
+ * is convenient for a "not found" value.
*/
#define idr_for_each_entry_continue_ul(idr, entry, tmp, id) \
for (tmp = id; \
- tmp <= id && ((entry) = idr_get_next_ul(idr, &(id))) != NULL; \
+ ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \
tmp = id, ++id)
/*
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 4b998090898e..3385a2cc5b09 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -9,7 +9,7 @@
* Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net>
* Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH
* Copyright (c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (c) 2018 - 2023 Intel Corporation
+ * Copyright (c) 2018 - 2024 Intel Corporation
*/
#ifndef LINUX_IEEE80211_H
@@ -172,11 +172,11 @@
#define IEEE80211_SN_MODULO (IEEE80211_MAX_SN + 1)
-/* PV1 Layout 11ah 9.8.3.1 */
+/* PV1 Layout IEEE 802.11-2020 9.8.3.1 */
#define IEEE80211_PV1_FCTL_VERS 0x0003
#define IEEE80211_PV1_FCTL_FTYPE 0x001c
#define IEEE80211_PV1_FCTL_STYPE 0x00e0
-#define IEEE80211_PV1_FCTL_TODS 0x0100
+#define IEEE80211_PV1_FCTL_FROMDS 0x0100
#define IEEE80211_PV1_FCTL_MOREFRAGS 0x0200
#define IEEE80211_PV1_FCTL_PM 0x0400
#define IEEE80211_PV1_FCTL_MOREDATA 0x0800
@@ -191,6 +191,11 @@ static inline bool ieee80211_sn_less(u16 sn1, u16 sn2)
return ((sn1 - sn2) & IEEE80211_SN_MASK) > (IEEE80211_SN_MODULO >> 1);
}
+static inline bool ieee80211_sn_less_eq(u16 sn1, u16 sn2)
+{
+ return ((sn2 - sn1) & IEEE80211_SN_MASK) <= (IEEE80211_SN_MODULO >> 1);
+}
+
static inline u16 ieee80211_sn_add(u16 sn1, u16 sn2)
{
return (sn1 + sn2) & IEEE80211_SN_MASK;
@@ -307,6 +312,13 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2)
#define IEEE80211_TRIGGER_TYPE_BQRP 0x6
#define IEEE80211_TRIGGER_TYPE_NFRP 0x7
+/* UL-bandwidth within common_info of trigger frame */
+#define IEEE80211_TRIGGER_ULBW_MASK 0xc0000
+#define IEEE80211_TRIGGER_ULBW_20MHZ 0x0
+#define IEEE80211_TRIGGER_ULBW_40MHZ 0x1
+#define IEEE80211_TRIGGER_ULBW_80MHZ 0x2
+#define IEEE80211_TRIGGER_ULBW_160_80P80MHZ 0x3
+
struct ieee80211_hdr {
__le16 frame_control;
__le16 duration_id;
@@ -801,6 +813,11 @@ static inline bool ieee80211_is_frag(struct ieee80211_hdr *hdr)
hdr->seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG);
}
+static inline u16 ieee80211_get_sn(struct ieee80211_hdr *hdr)
+{
+ return le16_get_bits(hdr->seq_ctrl, IEEE80211_SCTL_SEQ);
+}
+
struct ieee80211s_hdr {
u8 flags;
u8 ttl;
@@ -836,9 +853,14 @@ enum ieee80211_preq_target_flags {
};
/**
- * struct ieee80211_quiet_ie
+ * struct ieee80211_quiet_ie - Quiet element
+ * @count: Quiet Count
+ * @period: Quiet Period
+ * @duration: Quiet Duration
+ * @offset: Quiet Offset
*
- * This structure refers to "Quiet information element"
+ * This structure represents the payload of the "Quiet element" as
+ * described in IEEE Std 802.11-2020 section 9.4.2.22.
*/
struct ieee80211_quiet_ie {
u8 count;
@@ -848,9 +870,15 @@ struct ieee80211_quiet_ie {
} __packed;
/**
- * struct ieee80211_msrment_ie
+ * struct ieee80211_msrment_ie - Measurement element
+ * @token: Measurement Token
+ * @mode: Measurement Report Mode
+ * @type: Measurement Type
+ * @request: Measurement Request or Measurement Report
*
- * This structure refers to "Measurement Request/Report information element"
+ * This structure represents the payload of both the "Measurement
+ * Request element" and the "Measurement Report element" as described
+ * in IEEE Std 802.11-2020 sections 9.4.2.20 and 9.4.2.21.
*/
struct ieee80211_msrment_ie {
u8 token;
@@ -860,9 +888,14 @@ struct ieee80211_msrment_ie {
} __packed;
/**
- * struct ieee80211_channel_sw_ie
+ * struct ieee80211_channel_sw_ie - Channel Switch Announcement element
+ * @mode: Channel Switch Mode
+ * @new_ch_num: New Channel Number
+ * @count: Channel Switch Count
*
- * This structure refers to "Channel Switch Announcement information element"
+ * This structure represents the payload of the "Channel Switch
+ * Announcement element" as described in IEEE Std 802.11-2020 section
+ * 9.4.2.18.
*/
struct ieee80211_channel_sw_ie {
u8 mode;
@@ -871,9 +904,14 @@ struct ieee80211_channel_sw_ie {
} __packed;
/**
- * struct ieee80211_ext_chansw_ie
+ * struct ieee80211_ext_chansw_ie - Extended Channel Switch Announcement element
+ * @mode: Channel Switch Mode
+ * @new_operating_class: New Operating Class
+ * @new_ch_num: New Channel Number
+ * @count: Channel Switch Count
*
- * This structure represents the "Extended Channel Switch Announcement element"
+ * This structure represents the "Extended Channel Switch Announcement
+ * element" as described in IEEE Std 802.11-2020 section 9.4.2.52.
*/
struct ieee80211_ext_chansw_ie {
u8 mode;
@@ -894,8 +932,14 @@ struct ieee80211_sec_chan_offs_ie {
/**
* struct ieee80211_mesh_chansw_params_ie - mesh channel switch parameters IE
+ * @mesh_ttl: Time To Live
+ * @mesh_flags: Flags
+ * @mesh_reason: Reason Code
+ * @mesh_pre_value: Precedence Value
*
- * This structure represents the "Mesh Channel Switch Paramters element"
+ * This structure represents the payload of the "Mesh Channel Switch
+ * Parameters element" as described in IEEE Std 802.11-2020 section
+ * 9.4.2.102.
*/
struct ieee80211_mesh_chansw_params_ie {
u8 mesh_ttl;
@@ -906,6 +950,13 @@ struct ieee80211_mesh_chansw_params_ie {
/**
* struct ieee80211_wide_bw_chansw_ie - wide bandwidth channel switch IE
+ * @new_channel_width: New Channel Width
+ * @new_center_freq_seg0: New Channel Center Frequency Segment 0
+ * @new_center_freq_seg1: New Channel Center Frequency Segment 1
+ *
+ * This structure represents the payload of the "Wide Bandwidth
+ * Channel Switch element" as described in IEEE Std 802.11-2020
+ * section 9.4.2.160.
*/
struct ieee80211_wide_bw_chansw_ie {
u8 new_channel_width;
@@ -913,22 +964,42 @@ struct ieee80211_wide_bw_chansw_ie {
} __packed;
/**
- * struct ieee80211_tim
+ * struct ieee80211_tim_ie - Traffic Indication Map information element
+ * @dtim_count: DTIM Count
+ * @dtim_period: DTIM Period
+ * @bitmap_ctrl: Bitmap Control
+ * @required_octet: "Syntatic sugar" to force the struct size to the
+ * minimum valid size when carried in a non-S1G PPDU
+ * @virtual_map: Partial Virtual Bitmap
*
- * This structure refers to "Traffic Indication Map information element"
+ * This structure represents the payload of the "TIM element" as
+ * described in IEEE Std 802.11-2020 section 9.4.2.5. Note that this
+ * definition is only applicable when the element is carried in a
+ * non-S1G PPDU. When the TIM is carried in an S1G PPDU, the Bitmap
+ * Control and Partial Virtual Bitmap may not be present.
*/
struct ieee80211_tim_ie {
u8 dtim_count;
u8 dtim_period;
u8 bitmap_ctrl;
- /* variable size: 1 - 251 bytes */
- u8 virtual_map[1];
+ union {
+ u8 required_octet;
+ DECLARE_FLEX_ARRAY(u8, virtual_map);
+ };
} __packed;
/**
- * struct ieee80211_meshconf_ie
+ * struct ieee80211_meshconf_ie - Mesh Configuration element
+ * @meshconf_psel: Active Path Selection Protocol Identifier
+ * @meshconf_pmetric: Active Path Selection Metric Identifier
+ * @meshconf_congest: Congestion Control Mode Identifier
+ * @meshconf_synch: Synchronization Method Identifier
+ * @meshconf_auth: Authentication Protocol Identifier
+ * @meshconf_form: Mesh Formation Info
+ * @meshconf_cap: Mesh Capability (see &enum mesh_config_capab_flags)
*
- * This structure refers to "Mesh Configuration information element"
+ * This structure represents the payload of the "Mesh Configuration
+ * element" as described in IEEE Std 802.11-2020 section 9.4.2.97.
*/
struct ieee80211_meshconf_ie {
u8 meshconf_psel;
@@ -950,6 +1021,9 @@ struct ieee80211_meshconf_ie {
* is ongoing
* @IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL: STA is in deep sleep mode or has
* neighbors in deep sleep mode
+ *
+ * Enumerates the "Mesh Capability" as described in IEEE Std
+ * 802.11-2020 section 9.4.2.97.7.
*/
enum mesh_config_capab_flags {
IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS = 0x01,
@@ -960,7 +1034,7 @@ enum mesh_config_capab_flags {
#define IEEE80211_MESHCONF_FORM_CONNECTED_TO_GATE 0x1
-/**
+/*
* mesh channel switch parameters element's flag indicator
*
*/
@@ -969,9 +1043,17 @@ enum mesh_config_capab_flags {
#define WLAN_EID_CHAN_SWITCH_PARAM_REASON BIT(2)
/**
- * struct ieee80211_rann_ie
+ * struct ieee80211_rann_ie - RANN (root announcement) element
+ * @rann_flags: Flags
+ * @rann_hopcount: Hop Count
+ * @rann_ttl: Element TTL
+ * @rann_addr: Root Mesh STA Address
+ * @rann_seq: HWMP Sequence Number
+ * @rann_interval: Interval
+ * @rann_metric: Metric
*
- * This structure refers to "Root Announcement information element"
+ * This structure represents the payload of the "RANN element" as
+ * described in IEEE Std 802.11-2020 section 9.4.2.111.
*/
struct ieee80211_rann_ie {
u8 rann_flags;
@@ -993,7 +1075,7 @@ enum ieee80211_ht_chanwidth_values {
};
/**
- * enum ieee80211_opmode_bits - VHT operating mode field bits
+ * enum ieee80211_vht_opmode_bits - VHT operating mode field bits
* @IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK: channel width mask
* @IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: 20 MHz channel width
* @IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: 40 MHz channel width
@@ -1042,9 +1124,12 @@ enum ieee80211_s1g_chanwidth {
#define WLAN_USER_POSITION_LEN 16
/**
- * struct ieee80211_tpc_report_ie
+ * struct ieee80211_tpc_report_ie - TPC Report element
+ * @tx_power: Transmit Power
+ * @link_margin: Link Margin
*
- * This structure refers to "TPC Report element"
+ * This structure represents the payload of the "TPC Report element" as
+ * described in IEEE Std 802.11-2020 section 9.4.2.16.
*/
struct ieee80211_tpc_report_ie {
u8 tx_power;
@@ -1062,9 +1147,14 @@ struct ieee80211_addba_ext_ie {
} __packed;
/**
- * struct ieee80211_s1g_bcn_compat_ie
+ * struct ieee80211_s1g_bcn_compat_ie - S1G Beacon Compatibility element
+ * @compat_info: Compatibility Information
+ * @beacon_int: Beacon Interval
+ * @tsf_completion: TSF Completion
*
- * S1G Beacon Compatibility element
+ * This structure represents the payload of the "S1G Beacon
+ * Compatibility element" as described in IEEE Std 802.11-2020 section
+ * 9.4.2.196.
*/
struct ieee80211_s1g_bcn_compat_ie {
__le16 compat_info;
@@ -1073,9 +1163,15 @@ struct ieee80211_s1g_bcn_compat_ie {
} __packed;
/**
- * struct ieee80211_s1g_oper_ie
+ * struct ieee80211_s1g_oper_ie - S1G Operation element
+ * @ch_width: S1G Operation Information Channel Width
+ * @oper_class: S1G Operation Information Operating Class
+ * @primary_ch: S1G Operation Information Primary Channel Number
+ * @oper_ch: S1G Operation Information Channel Center Frequency
+ * @basic_mcs_nss: Basic S1G-MCS and NSS Set
*
- * S1G Operation element
+ * This structure represents the payload of the "S1G Operation
+ * element" as described in IEEE Std 802.11-2020 section 9.4.2.212.
*/
struct ieee80211_s1g_oper_ie {
u8 ch_width;
@@ -1086,9 +1182,13 @@ struct ieee80211_s1g_oper_ie {
} __packed;
/**
- * struct ieee80211_aid_response_ie
+ * struct ieee80211_aid_response_ie - AID Response element
+ * @aid: AID/Group AID
+ * @switch_count: AID Switch Count
+ * @response_int: AID Response Interval
*
- * AID Response element
+ * This structure represents the payload of the "AID Response element"
+ * as described in IEEE Std 802.11-2020 section 9.4.2.194.
*/
struct ieee80211_aid_response_ie {
__le16 aid;
@@ -1163,6 +1263,30 @@ struct ieee80211_twt_setup {
u8 params[];
} __packed;
+#define IEEE80211_TTLM_MAX_CNT 2
+#define IEEE80211_TTLM_CONTROL_DIRECTION 0x03
+#define IEEE80211_TTLM_CONTROL_DEF_LINK_MAP 0x04
+#define IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT 0x08
+#define IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT 0x10
+#define IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE 0x20
+
+#define IEEE80211_TTLM_DIRECTION_DOWN 0
+#define IEEE80211_TTLM_DIRECTION_UP 1
+#define IEEE80211_TTLM_DIRECTION_BOTH 2
+
+/**
+ * struct ieee80211_ttlm_elem - TID-To-Link Mapping element
+ *
+ * Defined in section 9.4.2.314 in P802.11be_D4
+ *
+ * @control: the first part of control field
+ * @optional: the second part of control field
+ */
+struct ieee80211_ttlm_elem {
+ u8 control;
+ u8 optional[];
+} __packed;
+
struct ieee80211_mgmt {
__le16 frame_control;
__le16 duration;
@@ -1340,6 +1464,20 @@ struct ieee80211_mgmt {
u8 max_tod_error;
u8 max_toa_error;
} __packed wnm_timing_msr;
+ struct {
+ u8 action_code;
+ u8 dialog_token;
+ u8 variable[];
+ } __packed ttlm_req;
+ struct {
+ u8 action_code;
+ u8 dialog_token;
+ u8 status_code;
+ u8 variable[];
+ } __packed ttlm_res;
+ struct {
+ u8 action_code;
+ } __packed ttlm_tear_down;
} u;
} __packed action;
DECLARE_FLEX_ARRAY(u8, body); /* Generic frame body */
@@ -1489,7 +1627,7 @@ struct ieee80211_tdls_data {
/*
* Peer-to-Peer IE attribute related definitions.
*/
-/**
+/*
* enum ieee80211_p2p_attr_id - identifies type of peer-to-peer attribute.
*/
enum ieee80211_p2p_attr_id {
@@ -1539,11 +1677,17 @@ struct ieee80211_p2p_noa_attr {
#define IEEE80211_P2P_OPPPS_CTWINDOW_MASK 0x7F
/**
- * struct ieee80211_bar - HT Block Ack Request
+ * struct ieee80211_bar - Block Ack Request frame format
+ * @frame_control: Frame Control
+ * @duration: Duration
+ * @ra: RA
+ * @ta: TA
+ * @control: BAR Control
+ * @start_seq_num: Starting Sequence Number (see Figure 9-37)
*
- * This structure refers to "HT BlockAckReq" as
- * described in 802.11n draft section 7.2.1.7.1
- */
+ * This structure represents the "BlockAckReq frame format"
+ * as described in IEEE Std 802.11-2020 section 9.3.1.7.
+*/
struct ieee80211_bar {
__le16 frame_control;
__le16 duration;
@@ -1563,13 +1707,17 @@ struct ieee80211_bar {
#define IEEE80211_HT_MCS_MASK_LEN 10
/**
- * struct ieee80211_mcs_info - MCS information
+ * struct ieee80211_mcs_info - Supported MCS Set field
* @rx_mask: RX mask
* @rx_highest: highest supported RX rate. If set represents
* the highest supported RX data rate in units of 1 Mbps.
* If this field is 0 this value should not be used to
* consider the highest RX data rate supported.
* @tx_params: TX parameters
+ * @reserved: Reserved bits
+ *
+ * This structure represents the "Supported MCS Set field" as
+ * described in IEEE Std 802.11-2020 section 9.4.2.55.4.
*/
struct ieee80211_mcs_info {
u8 rx_mask[IEEE80211_HT_MCS_MASK_LEN];
@@ -1588,6 +1736,8 @@ struct ieee80211_mcs_info {
#define IEEE80211_HT_MCS_TX_MAX_STREAMS 4
#define IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION 0x10
+#define IEEE80211_HT_MCS_CHAINS(mcs) ((mcs) == 32 ? 1 : (1 + ((mcs) >> 3)))
+
/*
* 802.11n D5.0 20.3.5 / 20.6 says:
* - indices 0 to 7 and 32 are single spatial stream
@@ -1600,10 +1750,16 @@ struct ieee80211_mcs_info {
(IEEE80211_HT_MCS_UNEQUAL_MODULATION_START / 8)
/**
- * struct ieee80211_ht_cap - HT capabilities
+ * struct ieee80211_ht_cap - HT capabilities element
+ * @cap_info: HT Capability Information
+ * @ampdu_params_info: A-MPDU Parameters
+ * @mcs: Supported MCS Set
+ * @extended_ht_cap_info: HT Extended Capabilities
+ * @tx_BF_cap_info: Transmit Beamforming Capabilities
+ * @antenna_selection_info: ASEL Capability
*
- * This structure is the "HT capabilities element" as
- * described in 802.11n D5.0 7.3.2.57
+ * This structure represents the payload of the "HT Capabilities
+ * element" as described in IEEE Std 802.11-2020 section 9.4.2.55.
*/
struct ieee80211_ht_cap {
__le16 cap_info;
@@ -1691,9 +1847,14 @@ enum ieee80211_min_mpdu_spacing {
/**
* struct ieee80211_ht_operation - HT operation IE
+ * @primary_chan: Primary Channel
+ * @ht_param: HT Operation Information parameters
+ * @operation_mode: HT Operation Information operation mode
+ * @stbc_param: HT Operation Information STBC params
+ * @basic_set: Basic HT-MCS Set
*
- * This structure is the "HT operation element" as
- * described in 802.11n-2009 7.3.2.57
+ * This structure represents the payload of the "HT Operation
+ * element" as described in IEEE Std 802.11-2020 section 9.4.2.56.
*/
struct ieee80211_ht_operation {
u8 primary_chan;
@@ -1862,9 +2023,12 @@ struct ieee80211_vht_operation {
/**
* struct ieee80211_he_cap_elem - HE capabilities element
+ * @mac_cap_info: HE MAC Capabilities Information
+ * @phy_cap_info: HE PHY Capabilities Information
*
- * This structure is the "HE capabilities element" fixed fields as
- * described in P802.11ax_D4.0 section 9.4.2.242.2 and 9.4.2.242.3
+ * This structure represents the fixed fields of the payload of the
+ * "HE capabilities element" as described in IEEE Std 802.11ax-2021
+ * sections 9.4.2.248.2 and 9.4.2.248.3.
*/
struct ieee80211_he_cap_elem {
u8 mac_cap_info[6];
@@ -1923,35 +2087,45 @@ struct ieee80211_he_mcs_nss_supp {
} __packed;
/**
- * struct ieee80211_he_operation - HE capabilities element
+ * struct ieee80211_he_operation - HE Operation element
+ * @he_oper_params: HE Operation Parameters + BSS Color Information
+ * @he_mcs_nss_set: Basic HE-MCS And NSS Set
+ * @optional: Optional fields VHT Operation Information, Max Co-Hosted
+ * BSSID Indicator, and 6 GHz Operation Information
*
- * This structure is the "HE operation element" fields as
- * described in P802.11ax_D4.0 section 9.4.2.243
+ * This structure represents the payload of the "HE Operation
+ * element" as described in IEEE Std 802.11ax-2021 section 9.4.2.249.
*/
struct ieee80211_he_operation {
__le32 he_oper_params;
__le16 he_mcs_nss_set;
- /* Optional 0,1,3,4,5,7 or 8 bytes: depends on @he_oper_params */
u8 optional[];
} __packed;
/**
- * struct ieee80211_he_spr - HE spatial reuse element
+ * struct ieee80211_he_spr - Spatial Reuse Parameter Set element
+ * @he_sr_control: SR Control
+ * @optional: Optional fields Non-SRG OBSS PD Max Offset, SRG OBSS PD
+ * Min Offset, SRG OBSS PD Max Offset, SRG BSS Color
+ * Bitmap, and SRG Partial BSSID Bitmap
*
- * This structure is the "HE spatial reuse element" element as
- * described in P802.11ax_D4.0 section 9.4.2.241
+ * This structure represents the payload of the "Spatial Reuse
+ * Parameter Set element" as described in IEEE Std 802.11ax-2021
+ * section 9.4.2.252.
*/
struct ieee80211_he_spr {
u8 he_sr_control;
- /* Optional 0 to 19 bytes: depends on @he_sr_control */
u8 optional[];
} __packed;
/**
* struct ieee80211_he_mu_edca_param_ac_rec - MU AC Parameter Record field
+ * @aifsn: ACI/AIFSN
+ * @ecw_min_max: ECWmin/ECWmax
+ * @mu_edca_timer: MU EDCA Timer
*
- * This structure is the "MU AC Parameter Record" fields as
- * described in P802.11ax_D4.0 section 9.4.2.245
+ * This structure represents the "MU AC Parameter Record" as described
+ * in IEEE Std 802.11ax-2021 section 9.4.2.251, Figure 9-788p.
*/
struct ieee80211_he_mu_edca_param_ac_rec {
u8 aifsn;
@@ -1961,9 +2135,14 @@ struct ieee80211_he_mu_edca_param_ac_rec {
/**
* struct ieee80211_mu_edca_param_set - MU EDCA Parameter Set element
+ * @mu_qos_info: QoS Info
+ * @ac_be: MU AC_BE Parameter Record
+ * @ac_bk: MU AC_BK Parameter Record
+ * @ac_vi: MU AC_VI Parameter Record
+ * @ac_vo: MU AC_VO Parameter Record
*
- * This structure is the "MU EDCA Parameter Set element" fields as
- * described in P802.11ax_D4.0 section 9.4.2.245
+ * This structure represents the payload of the "MU EDCA Parameter Set
+ * element" as described in IEEE Std 802.11ax-2021 section 9.4.2.251.
*/
struct ieee80211_mu_edca_param_set {
u8 mu_qos_info;
@@ -2177,9 +2356,9 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
* enum ieee80211_ap_reg_power - regulatory power for a Access Point
*
* @IEEE80211_REG_UNSET_AP: Access Point has no regulatory power mode
- * @IEEE80211_REG_LPI: Indoor Access Point
- * @IEEE80211_REG_SP: Standard power Access Point
- * @IEEE80211_REG_VLP: Very low power Access Point
+ * @IEEE80211_REG_LPI_AP: Indoor Access Point
+ * @IEEE80211_REG_SP_AP: Standard power Access Point
+ * @IEEE80211_REG_VLP_AP: Very low power Access Point
* @IEEE80211_REG_AP_POWER_AFTER_LAST: internal
* @IEEE80211_REG_AP_POWER_MAX: maximum value
*/
@@ -2565,9 +2744,10 @@ static inline bool ieee80211_he_capa_size_ok(const u8 *data, u8 len)
#define IEEE80211_6GHZ_CTRL_REG_LPI_AP 0
#define IEEE80211_6GHZ_CTRL_REG_SP_AP 1
+#define IEEE80211_6GHZ_CTRL_REG_VLP_AP 2
/**
- * ieee80211_he_6ghz_oper - HE 6 GHz operation Information field
+ * struct ieee80211_he_6ghz_oper - HE 6 GHz operation Information field
* @primary: primary channel
* @control: control flags
* @ccfs0: channel center frequency segment 0
@@ -2614,9 +2794,13 @@ enum ieee80211_tx_power_intrpt_type {
};
/**
- * struct ieee80211_tx_pwr_env
+ * struct ieee80211_tx_pwr_env - Transmit Power Envelope
+ * @tx_power_info: Transmit Power Information field
+ * @tx_power: Maximum Transmit Power field
*
- * This structure represents the "Transmit Power Envelope element"
+ * This structure represents the payload of the "Transmit Power
+ * Envelope element" as described in IEEE Std 802.11ax-2021 section
+ * 9.4.2.161
*/
struct ieee80211_tx_pwr_env {
u8 tx_power_info;
@@ -2671,12 +2855,14 @@ ieee80211_he_oper_size(const u8 *he_oper_ie)
static inline const struct ieee80211_he_6ghz_oper *
ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper)
{
- const u8 *ret = (const void *)&he_oper->optional;
+ const u8 *ret;
u32 he_oper_params;
if (!he_oper)
return NULL;
+ ret = (const void *)&he_oper->optional;
+
he_oper_params = le32_to_cpu(he_oper->he_oper_params);
if (!(he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO))
@@ -2874,6 +3060,9 @@ ieee80211_he_spr_size(const u8 *he_spr_ie)
#define IEEE80211_EHT_PHY_CAP5_SUPP_EXTRA_EHT_LTF 0x40
#define IEEE80211_EHT_PHY_CAP6_MAX_NUM_SUPP_EHT_LTF_MASK 0x07
+#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ 0x08
+#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ 0x30
+#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_320MHZ 0x40
#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK 0x78
#define IEEE80211_EHT_PHY_CAP6_EHT_DUP_6GHZ_SUPP 0x80
@@ -3013,6 +3202,44 @@ ieee80211_eht_oper_size_ok(const u8 *data, u8 len)
return len >= needed;
}
+/* must validate ieee80211_eht_oper_size_ok() first */
+static inline u16
+ieee80211_eht_oper_dis_subchan_bitmap(const struct ieee80211_eht_operation *eht_oper)
+{
+ const struct ieee80211_eht_operation_info *info =
+ (const void *)eht_oper->optional;
+
+ if (!(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT))
+ return 0;
+
+ if (!(eht_oper->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT))
+ return 0;
+
+ return get_unaligned_le16(info->optional);
+}
+
+#define IEEE80211_BW_IND_DIS_SUBCH_PRESENT BIT(1)
+
+struct ieee80211_bandwidth_indication {
+ u8 params;
+ struct ieee80211_eht_operation_info info;
+} __packed;
+
+static inline bool
+ieee80211_bandwidth_indication_size_ok(const u8 *data, u8 len)
+{
+ const struct ieee80211_bandwidth_indication *bwi = (const void *)data;
+
+ if (len < sizeof(*bwi))
+ return false;
+
+ if (bwi->params & IEEE80211_BW_IND_DIS_SUBCH_PRESENT &&
+ len < sizeof(*bwi) + 2)
+ return false;
+
+ return true;
+}
+
#define LISTEN_INT_USF GENMASK(15, 14)
#define LISTEN_INT_UI GENMASK(13, 0)
@@ -3173,6 +3400,8 @@ enum ieee80211_statuscode {
WLAN_STATUS_UNKNOWN_AUTHENTICATION_SERVER = 109,
WLAN_STATUS_SAE_HASH_TO_ELEMENT = 126,
WLAN_STATUS_SAE_PK = 127,
+ WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING = 133,
+ WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED = 134,
};
@@ -3470,6 +3699,8 @@ enum ieee80211_eid_ext {
WLAN_EID_EXT_EHT_OPERATION = 106,
WLAN_EID_EXT_EHT_MULTI_LINK = 107,
WLAN_EID_EXT_EHT_CAPABILITY = 108,
+ WLAN_EID_EXT_TID_TO_LINK_MAPPING = 109,
+ WLAN_EID_EXT_BANDWIDTH_INDICATION = 135,
};
/* Action category code */
@@ -3496,6 +3727,7 @@ enum ieee80211_category {
WLAN_CATEGORY_UNPROT_DMG = 20,
WLAN_CATEGORY_VHT = 21,
WLAN_CATEGORY_S1G = 22,
+ WLAN_CATEGORY_PROTECTED_EHT = 37,
WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126,
WLAN_CATEGORY_VENDOR_SPECIFIC = 127,
};
@@ -3559,6 +3791,13 @@ enum ieee80211_unprotected_wnm_actioncode {
WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE = 1,
};
+/* Protected EHT action codes */
+enum ieee80211_protected_eht_actioncode {
+ WLAN_PROTECTED_EHT_ACTION_TTLM_REQ = 0,
+ WLAN_PROTECTED_EHT_ACTION_TTLM_RES = 1,
+ WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN = 2,
+};
+
/* Security key length */
enum ieee80211_key_len {
WLAN_KEY_LEN_WEP40 = 5,
@@ -4237,6 +4476,36 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr,
}
/**
+ * ieee80211_is_protected_dual_of_public_action - check if skb contains a
+ * protected dual of public action management frame
+ * @skb: the skb containing the frame, length will be checked
+ *
+ * Return: true if the skb contains a protected dual of public action
+ * management frame, false otherwise.
+ */
+static inline bool
+ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb)
+{
+ u8 action;
+
+ if (!ieee80211_is_public_action((void *)skb->data, skb->len) ||
+ skb->len < IEEE80211_MIN_ACTION_SIZE + 1)
+ return false;
+
+ action = *(u8 *)(skb->data + IEEE80211_MIN_ACTION_SIZE);
+
+ return action != WLAN_PUB_ACTION_20_40_BSS_COEX &&
+ action != WLAN_PUB_ACTION_DSE_REG_LOC_ANN &&
+ action != WLAN_PUB_ACTION_MSMT_PILOT &&
+ action != WLAN_PUB_ACTION_TDLS_DISCOVER_RES &&
+ action != WLAN_PUB_ACTION_LOC_TRACK_NOTI &&
+ action != WLAN_PUB_ACTION_FTM_REQUEST &&
+ action != WLAN_PUB_ACTION_FTM_RESPONSE &&
+ action != WLAN_PUB_ACTION_FILS_DISCOVERY &&
+ action != WLAN_PUB_ACTION_VENDOR_SPECIFIC;
+}
+
+/**
* _ieee80211_is_group_privacy_action - check if frame is a group addressed
* privacy action frame
* @hdr: the frame
@@ -4307,12 +4576,11 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim,
/**
* ieee80211_get_tdls_action - get tdls packet action (or -1, if not tdls packet)
* @skb: the skb containing the frame, length will not be checked
- * @hdr_size: the size of the ieee80211_hdr that starts at skb->data
*
* This function assumes the frame is a data frame, and that the network header
* is in the correct place.
*/
-static inline int ieee80211_get_tdls_action(struct sk_buff *skb, u32 hdr_size)
+static inline int ieee80211_get_tdls_action(struct sk_buff *skb)
{
if (!skb_is_nonlinear(skb) &&
skb->len > (skb_network_offset(skb) + 2)) {
@@ -4478,7 +4746,7 @@ static inline bool for_each_element_completed(const struct element *element,
return (const u8 *)element == (const u8 *)data + datalen;
}
-/**
+/*
* RSNX Capabilities:
* bits 0-3: Field length (n-1)
*/
@@ -4630,6 +4898,10 @@ struct ieee80211_multi_link_elem {
#define IEEE80211_MLD_CAP_OP_MAX_SIMUL_LINKS 0x000f
#define IEEE80211_MLD_CAP_OP_SRS_SUPPORT 0x0010
#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP 0x0060
+#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_NO_SUPP 0
+#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_SAME 1
+#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_RESERVED 2
+#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_DIFF 3
#define IEEE80211_MLD_CAP_OP_FREQ_SEP_TYPE_IND 0x0f80
#define IEEE80211_MLD_CAP_OP_AAR_SUPPORT 0x1000
@@ -4693,18 +4965,43 @@ static inline u8 ieee80211_mle_common_size(const u8 *data)
}
/**
+ * ieee80211_mle_get_link_id - returns the link ID
+ * @data: the basic multi link element
+ *
+ * The element is assumed to be of the correct type (BASIC) and big enough,
+ * this must be checked using ieee80211_mle_type_ok().
+ *
+ * If the BSS link ID can't be found, -1 will be returned
+ */
+static inline int ieee80211_mle_get_link_id(const u8 *data)
+{
+ const struct ieee80211_multi_link_elem *mle = (const void *)data;
+ u16 control = le16_to_cpu(mle->control);
+ const u8 *common = mle->variable;
+
+ /* common points now at the beginning of ieee80211_mle_basic_common_info */
+ common += sizeof(struct ieee80211_mle_basic_common_info);
+
+ if (!(control & IEEE80211_MLC_BASIC_PRES_LINK_ID))
+ return -1;
+
+ return *common;
+}
+
+/**
* ieee80211_mle_get_bss_param_ch_cnt - returns the BSS parameter change count
- * @mle: the basic multi link element
+ * @data: pointer to the basic multi link element
*
* The element is assumed to be of the correct type (BASIC) and big enough,
* this must be checked using ieee80211_mle_type_ok().
*
* If the BSS parameter change count value can't be found (the presence bit
- * for it is clear), 0 will be returned.
+ * for it is clear), -1 will be returned.
*/
-static inline u8
-ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle)
+static inline int
+ieee80211_mle_get_bss_param_ch_cnt(const u8 *data)
{
+ const struct ieee80211_multi_link_elem *mle = (const void *)data;
u16 control = le16_to_cpu(mle->control);
const u8 *common = mle->variable;
@@ -4712,7 +5009,7 @@ ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle)
common += sizeof(struct ieee80211_mle_basic_common_info);
if (!(control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT))
- return 0;
+ return -1;
if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID)
common += 1;
@@ -4721,7 +5018,7 @@ ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle)
}
/**
- * ieee80211_mle_get_eml_sync_delay - returns the medium sync delay
+ * ieee80211_mle_get_eml_med_sync_delay - returns the medium sync delay
* @data: pointer to the multi link EHT IE
*
* The element is assumed to be of the correct type (BASIC) and big enough,
@@ -4782,6 +5079,81 @@ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data)
}
/**
+ * ieee80211_mle_get_mld_capa_op - returns the MLD capabilities and operations.
+ * @data: pointer to the multi link EHT IE
+ *
+ * The element is assumed to be of the correct type (BASIC) and big enough,
+ * this must be checked using ieee80211_mle_type_ok().
+ *
+ * If the MLD capabilities and operations field is not present, 0 will be
+ * returned.
+ */
+static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data)
+{
+ const struct ieee80211_multi_link_elem *mle = (const void *)data;
+ u16 control = le16_to_cpu(mle->control);
+ const u8 *common = mle->variable;
+
+ /*
+ * common points now at the beginning of
+ * ieee80211_mle_basic_common_info
+ */
+ common += sizeof(struct ieee80211_mle_basic_common_info);
+
+ if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP))
+ return 0;
+
+ if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID)
+ common += 1;
+ if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT)
+ common += 1;
+ if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY)
+ common += 2;
+ if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA)
+ common += 2;
+
+ return get_unaligned_le16(common);
+}
+
+/**
+ * ieee80211_mle_get_mld_id - returns the MLD ID
+ * @data: pointer to the multi link element
+ *
+ * The element is assumed to be of the correct type (BASIC) and big enough,
+ * this must be checked using ieee80211_mle_type_ok().
+ *
+ * If the MLD ID is not present, 0 will be returned.
+ */
+static inline u8 ieee80211_mle_get_mld_id(const u8 *data)
+{
+ const struct ieee80211_multi_link_elem *mle = (const void *)data;
+ u16 control = le16_to_cpu(mle->control);
+ const u8 *common = mle->variable;
+
+ /*
+ * common points now at the beginning of
+ * ieee80211_mle_basic_common_info
+ */
+ common += sizeof(struct ieee80211_mle_basic_common_info);
+
+ if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_ID))
+ return 0;
+
+ if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID)
+ common += 1;
+ if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT)
+ common += 1;
+ if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY)
+ common += 2;
+ if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA)
+ common += 2;
+ if (control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP)
+ common += 2;
+
+ return *common;
+}
+
+/**
* ieee80211_mle_size_ok - validate multi-link element size
* @data: pointer to the element data
* @len: length of the containing element
@@ -5006,6 +5378,39 @@ static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data,
fixed + prof->sta_info_len - 1 <= len;
}
+static inline bool ieee80211_tid_to_link_map_size_ok(const u8 *data, size_t len)
+{
+ const struct ieee80211_ttlm_elem *t2l = (const void *)data;
+ u8 control, fixed = sizeof(*t2l), elem_len = 0;
+
+ if (len < fixed)
+ return false;
+
+ control = t2l->control;
+
+ if (control & IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT)
+ elem_len += 2;
+ if (control & IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT)
+ elem_len += 3;
+
+ if (!(control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP)) {
+ u8 bm_size;
+
+ elem_len += 1;
+ if (len < fixed + elem_len)
+ return false;
+
+ if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE)
+ bm_size = 1;
+ else
+ bm_size = 2;
+
+ elem_len += hweight8(t2l->optional[0]) * bm_size;
+ }
+
+ return len >= fixed + elem_len;
+}
+
#define for_each_mle_subelement(_elem, _data, _len) \
if (ieee80211_mle_size_ok(_data, _len)) \
for_each_element(_elem, \
diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index 1ed52441972f..10a1e81434cb 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -53,6 +53,10 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev)
case ARPHRD_NONE:
case ARPHRD_RAWIP:
case ARPHRD_PIMREG:
+ /* PPP adds its l2 header automatically in ppp_start_xmit().
+ * This makes it look like an l3 device to __bpf_redirect() and tcf_mirred_init().
+ */
+ case ARPHRD_PPP:
return false;
default:
return true;
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 8de6b6e67829..cdc684e04a2f 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -162,8 +162,8 @@ struct team_option {
bool per_port;
unsigned int array_size; /* != 0 means the option is array */
enum team_option_type type;
- int (*init)(struct team *team, struct team_option_inst_info *info);
- int (*getter)(struct team *team, struct team_gsetter_ctx *ctx);
+ void (*init)(struct team *team, struct team_option_inst_info *info);
+ void (*getter)(struct team *team, struct team_gsetter_ctx *ctx);
int (*setter)(struct team *team, struct team_gsetter_ctx *ctx);
};
@@ -189,6 +189,8 @@ struct team {
struct net_device *dev; /* associated netdevice */
struct team_pcpu_stats __percpu *pcpu_stats;
+ const struct header_ops *header_ops_cache;
+
struct mutex lock; /* used for overall locking, e.g. port lists write */
/*
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 2a7660843444..043d442994b0 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -27,44 +27,54 @@ struct tun_xdp_hdr {
#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
struct socket *tun_get_socket(struct file *);
struct ptr_ring *tun_get_tx_ring(struct file *file);
+
static inline bool tun_is_xdp_frame(void *ptr)
{
- return (unsigned long)ptr & TUN_XDP_FLAG;
+ return (unsigned long)ptr & TUN_XDP_FLAG;
}
+
static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp)
{
- return (void *)((unsigned long)xdp | TUN_XDP_FLAG);
+ return (void *)((unsigned long)xdp | TUN_XDP_FLAG);
}
+
static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr)
{
- return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG);
+ return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG);
}
+
void tun_ptr_free(void *ptr);
#else
#include <linux/err.h>
#include <linux/errno.h>
struct file;
struct socket;
+
static inline struct socket *tun_get_socket(struct file *f)
{
return ERR_PTR(-EINVAL);
}
+
static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
{
return ERR_PTR(-EINVAL);
}
+
static inline bool tun_is_xdp_frame(void *ptr)
{
return false;
}
+
static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp)
{
return NULL;
}
+
static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr)
{
return NULL;
}
+
static inline void tun_ptr_free(void *ptr)
{
}
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 6ba71957851e..c1645c86eed9 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -408,7 +408,7 @@ static inline int __vlan_insert_tag(struct sk_buff *skb,
* @mac_len: MAC header length including outer vlan headers
*
* Inserts the VLAN tag into @skb as part of the payload at offset mac_len
- * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
+ * Returns a VLAN tagged skb. This might change skb->head.
*
* Following the skb_unshare() example, in case of error, the calling function
* doesn't have to worry about freeing the original skb.
@@ -437,7 +437,7 @@ static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb,
* @vlan_tci: VLAN TCI to insert
*
* Inserts the VLAN tag into @skb as part of the payload
- * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
+ * Returns a VLAN tagged skb. This might change skb->head.
*
* Following the skb_unshare() example, in case of error, the calling function
* doesn't have to worry about freeing the original skb.
@@ -457,7 +457,7 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
* @vlan_tci: VLAN TCI to insert
*
* Inserts the VLAN tag into @skb as part of the payload
- * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
+ * Returns a VLAN tagged skb. This might change skb->head.
*
* Following the skb_unshare() example, in case of error, the calling function
* doesn't have to worry about freeing the original skb.
@@ -540,7 +540,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
struct vlan_ethhdr *veth = skb_vlan_eth_hdr(skb);
if (!eth_type_vlan(veth->h_vlan_proto))
- return -EINVAL;
+ return -ENODATA;
*vlan_tci = ntohs(veth->h_vlan_TCI);
return 0;
@@ -561,7 +561,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb,
return 0;
} else {
*vlan_tci = 0;
- return -EINVAL;
+ return -ENODATA;
}
}
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index ebf4349a53af..5171231f70a8 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -39,7 +39,7 @@ struct ip_sf_socklist {
unsigned int sl_max;
unsigned int sl_count;
struct rcu_head rcu;
- __be32 sl_addr[];
+ __be32 sl_addr[] __counted_by(sl_max);
};
#define IP_SFBLOCK 10 /* allocate this many at once */
diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h
index 7852f6c9a714..719cf9cc6e1a 100644
--- a/include/linux/iio/adc/ad_sigma_delta.h
+++ b/include/linux/iio/adc/ad_sigma_delta.h
@@ -8,6 +8,8 @@
#ifndef __AD_SIGMA_DELTA_H__
#define __AD_SIGMA_DELTA_H__
+#include <linux/iio/iio.h>
+
enum ad_sigma_delta_mode {
AD_SD_MODE_CONTINUOUS = 0,
AD_SD_MODE_SINGLE = 1,
@@ -99,7 +101,7 @@ struct ad_sigma_delta {
* 'rx_buf' is up to 32 bits per sample + 64 bit timestamp,
* rounded to 16 bytes to take into account padding.
*/
- uint8_t tx_buf[4] ____cacheline_aligned;
+ uint8_t tx_buf[4] __aligned(IIO_DMA_MINALIGN);
uint8_t rx_buf[16] __aligned(8);
};
diff --git a/include/linux/iio/adc/adi-axi-adc.h b/include/linux/iio/adc/adi-axi-adc.h
deleted file mode 100644
index 52620e5b8052..000000000000
--- a/include/linux/iio/adc/adi-axi-adc.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Analog Devices Generic AXI ADC IP core driver/library
- * Link: https://wiki.analog.com/resources/fpga/docs/axi_adc_ip
- *
- * Copyright 2012-2020 Analog Devices Inc.
- */
-#ifndef __ADI_AXI_ADC_H__
-#define __ADI_AXI_ADC_H__
-
-struct device;
-struct iio_chan_spec;
-
-/**
- * struct adi_axi_adc_chip_info - Chip specific information
- * @name Chip name
- * @id Chip ID (usually product ID)
- * @channels Channel specifications of type @struct iio_chan_spec
- * @num_channels Number of @channels
- * @scale_table Supported scales by the chip; tuples of 2 ints
- * @num_scales Number of scales in the table
- * @max_rate Maximum sampling rate supported by the device
- */
-struct adi_axi_adc_chip_info {
- const char *name;
- unsigned int id;
-
- const struct iio_chan_spec *channels;
- unsigned int num_channels;
-
- const unsigned int (*scale_table)[2];
- int num_scales;
-
- unsigned long max_rate;
-};
-
-/**
- * struct adi_axi_adc_conv - data of the ADC attached to the AXI ADC
- * @chip_info chip info details for the client ADC
- * @preenable_setup op to run in the client before enabling the AXI ADC
- * @reg_access IIO debugfs_reg_access hook for the client ADC
- * @read_raw IIO read_raw hook for the client ADC
- * @write_raw IIO write_raw hook for the client ADC
- */
-struct adi_axi_adc_conv {
- const struct adi_axi_adc_chip_info *chip_info;
-
- int (*preenable_setup)(struct adi_axi_adc_conv *conv);
- int (*reg_access)(struct adi_axi_adc_conv *conv, unsigned int reg,
- unsigned int writeval, unsigned int *readval);
- int (*read_raw)(struct adi_axi_adc_conv *conv,
- struct iio_chan_spec const *chan,
- int *val, int *val2, long mask);
- int (*write_raw)(struct adi_axi_adc_conv *conv,
- struct iio_chan_spec const *chan,
- int val, int val2, long mask);
-};
-
-struct adi_axi_adc_conv *devm_adi_axi_adc_conv_register(struct device *dev,
- size_t sizeof_priv);
-
-void *adi_axi_adc_conv_priv(struct adi_axi_adc_conv *conv);
-
-#endif
diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h
new file mode 100644
index 000000000000..a6d79381866e
--- /dev/null
+++ b/include/linux/iio/backend.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _IIO_BACKEND_H_
+#define _IIO_BACKEND_H_
+
+#include <linux/types.h>
+
+struct fwnode_handle;
+struct iio_backend;
+struct device;
+struct iio_dev;
+
+enum iio_backend_data_type {
+ IIO_BACKEND_TWOS_COMPLEMENT,
+ IIO_BACKEND_OFFSET_BINARY,
+ IIO_BACKEND_DATA_TYPE_MAX
+};
+
+/**
+ * struct iio_backend_data_fmt - Backend data format
+ * @type: Data type.
+ * @sign_extend: Bool to tell if the data is sign extended.
+ * @enable: Enable/Disable the data format module. If disabled,
+ * not formatting will happen.
+ */
+struct iio_backend_data_fmt {
+ enum iio_backend_data_type type;
+ bool sign_extend;
+ bool enable;
+};
+
+/**
+ * struct iio_backend_ops - operations structure for an iio_backend
+ * @enable: Enable backend.
+ * @disable: Disable backend.
+ * @chan_enable: Enable one channel.
+ * @chan_disable: Disable one channel.
+ * @data_format_set: Configure the data format for a specific channel.
+ * @request_buffer: Request an IIO buffer.
+ * @free_buffer: Free an IIO buffer.
+ **/
+struct iio_backend_ops {
+ int (*enable)(struct iio_backend *back);
+ void (*disable)(struct iio_backend *back);
+ int (*chan_enable)(struct iio_backend *back, unsigned int chan);
+ int (*chan_disable)(struct iio_backend *back, unsigned int chan);
+ int (*data_format_set)(struct iio_backend *back, unsigned int chan,
+ const struct iio_backend_data_fmt *data);
+ struct iio_buffer *(*request_buffer)(struct iio_backend *back,
+ struct iio_dev *indio_dev);
+ void (*free_buffer)(struct iio_backend *back,
+ struct iio_buffer *buffer);
+};
+
+int iio_backend_chan_enable(struct iio_backend *back, unsigned int chan);
+int iio_backend_chan_disable(struct iio_backend *back, unsigned int chan);
+int devm_iio_backend_enable(struct device *dev, struct iio_backend *back);
+int iio_backend_data_format_set(struct iio_backend *back, unsigned int chan,
+ const struct iio_backend_data_fmt *data);
+int devm_iio_backend_request_buffer(struct device *dev,
+ struct iio_backend *back,
+ struct iio_dev *indio_dev);
+
+void *iio_backend_get_priv(const struct iio_backend *conv);
+struct iio_backend *devm_iio_backend_get(struct device *dev, const char *name);
+struct iio_backend *
+__devm_iio_backend_get_from_fwnode_lookup(struct device *dev,
+ struct fwnode_handle *fwnode);
+
+int devm_iio_backend_register(struct device *dev,
+ const struct iio_backend_ops *ops, void *priv);
+
+#endif
diff --git a/include/linux/iio/buffer-dma.h b/include/linux/iio/buffer-dma.h
index 6564bdcdac66..18d3702fa95d 100644
--- a/include/linux/iio/buffer-dma.h
+++ b/include/linux/iio/buffer-dma.h
@@ -19,14 +19,12 @@ struct device;
/**
* enum iio_block_state - State of a struct iio_dma_buffer_block
- * @IIO_BLOCK_STATE_DEQUEUED: Block is not queued
* @IIO_BLOCK_STATE_QUEUED: Block is on the incoming queue
* @IIO_BLOCK_STATE_ACTIVE: Block is currently being processed by the DMA
* @IIO_BLOCK_STATE_DONE: Block is on the outgoing queue
* @IIO_BLOCK_STATE_DEAD: Block has been marked as to be freed
*/
enum iio_block_state {
- IIO_BLOCK_STATE_DEQUEUED,
IIO_BLOCK_STATE_QUEUED,
IIO_BLOCK_STATE_ACTIVE,
IIO_BLOCK_STATE_DONE,
@@ -73,12 +71,15 @@ struct iio_dma_buffer_block {
* @active_block: Block being used in read()
* @pos: Read offset in the active block
* @block_size: Size of each block
+ * @next_dequeue: index of next block that will be dequeued
*/
struct iio_dma_buffer_queue_fileio {
struct iio_dma_buffer_block *blocks[2];
struct iio_dma_buffer_block *active_block;
size_t pos;
size_t block_size;
+
+ unsigned int next_dequeue;
};
/**
@@ -93,7 +94,6 @@ struct iio_dma_buffer_queue_fileio {
* list and typically also a list of active blocks in the part that handles
* the DMA controller
* @incoming: List of buffers on the incoming queue
- * @outgoing: List of buffers on the outgoing queue
* @active: Whether the buffer is currently active
* @fileio: FileIO state
*/
@@ -105,7 +105,6 @@ struct iio_dma_buffer_queue {
struct mutex lock;
spinlock_t list_lock;
struct list_head incoming;
- struct list_head outgoing;
bool active;
diff --git a/include/linux/iio/buffer-dmaengine.h b/include/linux/iio/buffer-dmaengine.h
index 5c355be89814..cbb8ba957fad 100644
--- a/include/linux/iio/buffer-dmaengine.h
+++ b/include/linux/iio/buffer-dmaengine.h
@@ -10,6 +10,9 @@
struct iio_dev;
struct device;
+struct iio_buffer *iio_dmaengine_buffer_alloc(struct device *dev,
+ const char *channel);
+void iio_dmaengine_buffer_free(struct iio_buffer *buffer);
int devm_iio_dmaengine_buffer_setup(struct device *dev,
struct iio_dev *indio_dev,
const char *channel);
diff --git a/include/linux/iio/common/inv_sensors_timestamp.h b/include/linux/iio/common/inv_sensors_timestamp.h
new file mode 100644
index 000000000000..a47d304d1ba7
--- /dev/null
+++ b/include/linux/iio/common/inv_sensors_timestamp.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 Invensense, Inc.
+ */
+
+#ifndef INV_SENSORS_TIMESTAMP_H_
+#define INV_SENSORS_TIMESTAMP_H_
+
+/**
+ * struct inv_sensors_timestamp_chip - chip internal properties
+ * @clock_period: internal clock period in ns
+ * @jitter: acceptable jitter in per-mille
+ * @init_period: chip initial period at reset in ns
+ */
+struct inv_sensors_timestamp_chip {
+ uint32_t clock_period;
+ uint32_t jitter;
+ uint32_t init_period;
+};
+
+/**
+ * struct inv_sensors_timestamp_interval - timestamps interval
+ * @lo: interval lower bound
+ * @up: interval upper bound
+ */
+struct inv_sensors_timestamp_interval {
+ int64_t lo;
+ int64_t up;
+};
+
+/**
+ * struct inv_sensors_timestamp_acc - accumulator for computing an estimation
+ * @val: current estimation of the value, the mean of all values
+ * @idx: current index of the next free place in values table
+ * @values: table of all measured values, use for computing the mean
+ */
+struct inv_sensors_timestamp_acc {
+ uint32_t val;
+ size_t idx;
+ uint32_t values[32];
+};
+
+/**
+ * struct inv_sensors_timestamp - timestamp management states
+ * @chip: chip internal characteristics
+ * @min_period: minimal acceptable clock period
+ * @max_period: maximal acceptable clock period
+ * @it: interrupts interval timestamps
+ * @timestamp: store last timestamp for computing next data timestamp
+ * @mult: current internal period multiplier
+ * @new_mult: new set internal period multiplier (not yet effective)
+ * @period: measured current period of the sensor
+ * @chip_period: accumulator for computing internal chip period
+ */
+struct inv_sensors_timestamp {
+ struct inv_sensors_timestamp_chip chip;
+ uint32_t min_period;
+ uint32_t max_period;
+ struct inv_sensors_timestamp_interval it;
+ int64_t timestamp;
+ uint32_t mult;
+ uint32_t new_mult;
+ uint32_t period;
+ struct inv_sensors_timestamp_acc chip_period;
+};
+
+void inv_sensors_timestamp_init(struct inv_sensors_timestamp *ts,
+ const struct inv_sensors_timestamp_chip *chip);
+
+int inv_sensors_timestamp_update_odr(struct inv_sensors_timestamp *ts,
+ uint32_t period, bool fifo);
+
+void inv_sensors_timestamp_interrupt(struct inv_sensors_timestamp *ts,
+ uint32_t fifo_period, size_t fifo_nb,
+ size_t sensor_nb, int64_t timestamp);
+
+static inline int64_t inv_sensors_timestamp_pop(struct inv_sensors_timestamp *ts)
+{
+ ts->timestamp += ts->period;
+ return ts->timestamp;
+}
+
+void inv_sensors_timestamp_apply_odr(struct inv_sensors_timestamp *ts,
+ uint32_t fifo_period, size_t fifo_nb,
+ unsigned int fifo_no);
+
+static inline void inv_sensors_timestamp_reset(struct inv_sensors_timestamp *ts)
+{
+ const struct inv_sensors_timestamp_interval interval_init = {0LL, 0LL};
+
+ ts->it = interval_init;
+ ts->timestamp = 0;
+}
+
+#endif
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 607c3a89a647..f9ae5cdd884f 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -258,9 +258,9 @@ struct st_sensor_data {
bool hw_irq_trigger;
s64 hw_timestamp;
- char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned;
-
struct mutex odr_lock;
+
+ char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] __aligned(IIO_DMA_MINALIGN);
};
#ifdef CONFIG_IIO_BUFFER
diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h
index 6802596b017c..e9910b41d48e 100644
--- a/include/linux/iio/consumer.h
+++ b/include/linux/iio/consumer.h
@@ -201,8 +201,9 @@ struct iio_dev
* @chan: The channel being queried.
* @val: Value read back.
*
- * Note raw reads from iio channels are in adc counts and hence
- * scale will need to be applied if standard units required.
+ * Note, if standard units are required, raw reads from iio channels
+ * need the offset (default 0) and scale (default 1) to be applied
+ * as (raw + offset) * scale.
*/
int iio_read_channel_raw(struct iio_channel *chan,
int *val);
@@ -212,8 +213,9 @@ int iio_read_channel_raw(struct iio_channel *chan,
* @chan: The channel being queried.
* @val: Value read back.
*
- * Note raw reads from iio channels are in adc counts and hence
- * scale will need to be applied if standard units required.
+ * Note, if standard units are required, raw reads from iio channels
+ * need the offset (default 0) and scale (default 1) to be applied
+ * as (raw + offset) * scale.
*
* In opposit to the normal iio_read_channel_raw this function
* returns the average of multiple reads.
@@ -281,8 +283,9 @@ int iio_read_channel_attribute(struct iio_channel *chan, int *val,
* @chan: The channel being queried.
* @val: Value being written.
*
- * Note raw writes to iio channels are in dac counts and hence
- * scale will need to be applied if standard units required.
+ * Note that for raw writes to iio channels, if the value provided is
+ * in standard units, the affect of the scale and offset must be removed
+ * as (value / scale) - offset.
*/
int iio_write_channel_raw(struct iio_channel *chan, int val);
@@ -292,12 +295,25 @@ int iio_write_channel_raw(struct iio_channel *chan, int val);
* @chan: The channel being queried.
* @val: Value read back.
*
- * Note raw reads from iio channels are in adc counts and hence
- * scale will need to be applied if standard units are required.
+ * Note, if standard units are required, raw reads from iio channels
+ * need the offset (default 0) and scale (default 1) to be applied
+ * as (raw + offset) * scale.
*/
int iio_read_max_channel_raw(struct iio_channel *chan, int *val);
/**
+ * iio_read_min_channel_raw() - read minimum available raw value from a given
+ * channel, i.e. the minimum possible value.
+ * @chan: The channel being queried.
+ * @val: Value read back.
+ *
+ * Note, if standard units are required, raw reads from iio channels
+ * need the offset (default 0) and scale (default 1) to be applied
+ * as (raw + offset) * scale.
+ */
+int iio_read_min_channel_raw(struct iio_channel *chan, int *val);
+
+/**
* iio_read_avail_channel_raw() - read available raw values from a given channel
* @chan: The channel being queried.
* @vals: Available values read back.
@@ -308,8 +324,9 @@ int iio_read_max_channel_raw(struct iio_channel *chan, int *val);
* For ranges, three vals are always returned; min, step and max.
* For lists, all the possible values are enumerated.
*
- * Note raw available values from iio channels are in adc counts and
- * hence scale will need to be applied if standard units are required.
+ * Note, if standard units are required, raw available values from iio
+ * channels need the offset (default 0) and scale (default 1) to be applied
+ * as (raw + offset) * scale.
*/
int iio_read_avail_channel_raw(struct iio_channel *chan,
const int **vals, int *length);
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 202e55b0a28b..e370a7bb3300 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -9,6 +9,7 @@
#include <linux/device.h>
#include <linux/cdev.h>
+#include <linux/cleanup.h>
#include <linux/slab.h>
#include <linux/iio/types.h>
/* IIO TODO LIST */
@@ -427,18 +428,14 @@ struct iio_trigger; /* forward declaration */
* @write_event_config: set if the event is enabled.
* @read_event_value: read a configuration value associated with the event.
* @write_event_value: write a configuration value for the event.
+ * @read_event_label: function to request label name for a specified label,
+ * for better event identification.
* @validate_trigger: function to validate the trigger when the
* current trigger gets changed.
* @update_scan_mode: function to configure device and scan buffer when
* channels have changed
* @debugfs_reg_access: function to read or write register value of device
- * @of_xlate: function pointer to obtain channel specifier index.
- * When #iio-cells is greater than '0', the driver could
- * provide a custom of_xlate function that reads the
- * *args* and returns the appropriate index in registered
- * IIO channels array.
* @fwnode_xlate: fwnode based function pointer to obtain channel specifier index.
- * Functionally the same as @of_xlate.
* @hwfifo_set_watermark: function pointer to set the current hardware
* fifo watermark level; see hwfifo_* entries in
* Documentation/ABI/testing/sysfs-bus-iio for details on
@@ -511,6 +508,12 @@ struct iio_info {
enum iio_event_direction dir,
enum iio_event_info info, int val, int val2);
+ int (*read_event_label)(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan,
+ enum iio_event_type type,
+ enum iio_event_direction dir,
+ char *label);
+
int (*validate_trigger)(struct iio_dev *indio_dev,
struct iio_trigger *trig);
int (*update_scan_mode)(struct iio_dev *indio_dev,
@@ -556,7 +559,9 @@ struct iio_buffer_setup_ops {
* and owner
* @buffer: [DRIVER] any buffer present
* @scan_bytes: [INTERN] num bytes captured to be fed to buffer demux
- * @available_scan_masks: [DRIVER] optional array of allowed bitmasks
+ * @available_scan_masks: [DRIVER] optional array of allowed bitmasks. Sort the
+ * array in order of preference, the most preferred
+ * masks first.
* @masklength: [INTERN] the length of the mask established from
* channels
* @active_scan_mask: [INTERN] union of all scan masks requested by buffers
@@ -634,10 +639,37 @@ int __devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev,
int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp);
int iio_device_claim_direct_mode(struct iio_dev *indio_dev);
void iio_device_release_direct_mode(struct iio_dev *indio_dev);
+
+/*
+ * This autocleanup logic is normally used via
+ * iio_device_claim_direct_scoped().
+ */
+DEFINE_GUARD(iio_claim_direct, struct iio_dev *, iio_device_claim_direct_mode(_T),
+ iio_device_release_direct_mode(_T))
+
+DEFINE_GUARD_COND(iio_claim_direct, _try, ({
+ struct iio_dev *dev;
+ int d = iio_device_claim_direct_mode(_T);
+
+ if (d < 0)
+ dev = NULL;
+ else
+ dev = _T;
+ dev;
+ }))
+
+/**
+ * iio_device_claim_direct_scoped() - Scoped call to iio_device_claim_direct.
+ * @fail: What to do on failure to claim device.
+ * @iio_dev: Pointer to the IIO devices structure
+ */
+#define iio_device_claim_direct_scoped(fail, iio_dev) \
+ scoped_cond_guard(iio_claim_direct_try, fail, iio_dev)
+
int iio_device_claim_buffer_mode(struct iio_dev *indio_dev);
void iio_device_release_buffer_mode(struct iio_dev *indio_dev);
-extern struct bus_type iio_bus_type;
+extern const struct bus_type iio_bus_type;
/**
* iio_device_put() - reference counted deallocation of struct device
diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index dc9ea299e088..8898966bc0f0 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -11,6 +11,7 @@
#include <linux/spi/spi.h>
#include <linux/interrupt.h>
+#include <linux/iio/iio.h>
#include <linux/iio/types.h>
#define ADIS_WRITE_REG(reg) ((0x80 | (reg)))
@@ -131,7 +132,7 @@ struct adis {
unsigned long irq_flag;
void *buffer;
- u8 tx[10] ____cacheline_aligned;
+ u8 tx[10] __aligned(IIO_DMA_MINALIGN);
u8 rx[4];
};
diff --git a/include/linux/iio/sw_device.h b/include/linux/iio/sw_device.h
index eff1e6b2595c..0f7fe7b522e3 100644
--- a/include/linux/iio/sw_device.h
+++ b/include/linux/iio/sw_device.h
@@ -51,9 +51,6 @@ void iio_unregister_sw_device_type(struct iio_sw_device_type *dt);
struct iio_sw_device *iio_sw_device_create(const char *, const char *);
void iio_sw_device_destroy(struct iio_sw_device *);
-int iio_sw_device_type_configfs_register(struct iio_sw_device_type *dt);
-void iio_sw_device_type_configfs_unregister(struct iio_sw_device_type *dt);
-
static inline
void iio_swd_group_init_type_name(struct iio_sw_device *d,
const char *name,
diff --git a/include/linux/iio/sw_trigger.h b/include/linux/iio/sw_trigger.h
index 47de2443e984..bc77f88df303 100644
--- a/include/linux/iio/sw_trigger.h
+++ b/include/linux/iio/sw_trigger.h
@@ -51,9 +51,6 @@ void iio_unregister_sw_trigger_type(struct iio_sw_trigger_type *tt);
struct iio_sw_trigger *iio_sw_trigger_create(const char *, const char *);
void iio_sw_trigger_destroy(struct iio_sw_trigger *);
-int iio_sw_trigger_type_configfs_register(struct iio_sw_trigger_type *tt);
-void iio_sw_trigger_type_configfs_unregister(struct iio_sw_trigger_type *tt);
-
static inline
void iio_swt_group_init_type_name(struct iio_sw_trigger *t,
const char *name,
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index 82faa98c719a..d89982c98368 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -19,6 +19,8 @@ enum iio_event_info {
IIO_EV_INFO_TIMEOUT,
IIO_EV_INFO_RESET_TIMEOUT,
IIO_EV_INFO_TAP2_MIN_DELAY,
+ IIO_EV_INFO_RUNNING_PERIOD,
+ IIO_EV_INFO_RUNNING_COUNT,
};
#define IIO_VAL_INT 1
@@ -66,6 +68,7 @@ enum iio_chan_info_enum {
IIO_CHAN_INFO_THERMOCOUPLE_TYPE,
IIO_CHAN_INFO_CALIBAMBIENT,
IIO_CHAN_INFO_ZEROPOINT,
+ IIO_CHAN_INFO_TROUGH,
};
#endif /* _IIO_TYPES_H_ */
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 86b57757c7b1..0bae61a15b60 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -16,23 +16,6 @@ struct linux_binprm;
#ifdef CONFIG_IMA
extern enum hash_algo ima_get_current_hash_algo(void);
-extern int ima_bprm_check(struct linux_binprm *bprm);
-extern int ima_file_check(struct file *file, int mask);
-extern void ima_post_create_tmpfile(struct mnt_idmap *idmap,
- struct inode *inode);
-extern void ima_file_free(struct file *file);
-extern int ima_file_mmap(struct file *file, unsigned long reqprot,
- unsigned long prot, unsigned long flags);
-extern int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot);
-extern int ima_load_data(enum kernel_load_data_id id, bool contents);
-extern int ima_post_load_data(char *buf, loff_t size,
- enum kernel_load_data_id id, char *description);
-extern int ima_read_file(struct file *file, enum kernel_read_file_id id,
- bool contents);
-extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
- enum kernel_read_file_id id);
-extern void ima_post_path_mknod(struct mnt_idmap *idmap,
- struct dentry *dentry);
extern int ima_file_hash(struct file *file, char *buf, size_t buf_size);
extern int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size);
extern void ima_kexec_cmdline(int kernel_fd, const void *buf, int size);
@@ -57,68 +40,6 @@ static inline enum hash_algo ima_get_current_hash_algo(void)
return HASH_ALGO__LAST;
}
-static inline int ima_bprm_check(struct linux_binprm *bprm)
-{
- return 0;
-}
-
-static inline int ima_file_check(struct file *file, int mask)
-{
- return 0;
-}
-
-static inline void ima_post_create_tmpfile(struct mnt_idmap *idmap,
- struct inode *inode)
-{
-}
-
-static inline void ima_file_free(struct file *file)
-{
- return;
-}
-
-static inline int ima_file_mmap(struct file *file, unsigned long reqprot,
- unsigned long prot, unsigned long flags)
-{
- return 0;
-}
-
-static inline int ima_file_mprotect(struct vm_area_struct *vma,
- unsigned long prot)
-{
- return 0;
-}
-
-static inline int ima_load_data(enum kernel_load_data_id id, bool contents)
-{
- return 0;
-}
-
-static inline int ima_post_load_data(char *buf, loff_t size,
- enum kernel_load_data_id id,
- char *description)
-{
- return 0;
-}
-
-static inline int ima_read_file(struct file *file, enum kernel_read_file_id id,
- bool contents)
-{
- return 0;
-}
-
-static inline int ima_post_read_file(struct file *file, void *buf, loff_t size,
- enum kernel_read_file_id id)
-{
- return 0;
-}
-
-static inline void ima_post_path_mknod(struct mnt_idmap *idmap,
- struct dentry *dentry)
-{
- return;
-}
-
static inline int ima_file_hash(struct file *file, char *buf, size_t buf_size)
{
return -EOPNOTSUPP;
@@ -169,76 +90,13 @@ static inline void ima_add_kexec_buffer(struct kimage *image)
{}
#endif
-#ifdef CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS
-extern void ima_post_key_create_or_update(struct key *keyring,
- struct key *key,
- const void *payload, size_t plen,
- unsigned long flags, bool create);
-#else
-static inline void ima_post_key_create_or_update(struct key *keyring,
- struct key *key,
- const void *payload,
- size_t plen,
- unsigned long flags,
- bool create) {}
-#endif /* CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS */
-
#ifdef CONFIG_IMA_APPRAISE
extern bool is_ima_appraise_enabled(void);
-extern void ima_inode_post_setattr(struct mnt_idmap *idmap,
- struct dentry *dentry);
-extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
- const void *xattr_value, size_t xattr_value_len);
-extern int ima_inode_set_acl(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *acl_name,
- struct posix_acl *kacl);
-static inline int ima_inode_remove_acl(struct mnt_idmap *idmap,
- struct dentry *dentry,
- const char *acl_name)
-{
- return ima_inode_set_acl(idmap, dentry, acl_name, NULL);
-}
-extern int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name);
#else
static inline bool is_ima_appraise_enabled(void)
{
return 0;
}
-
-static inline void ima_inode_post_setattr(struct mnt_idmap *idmap,
- struct dentry *dentry)
-{
- return;
-}
-
-static inline int ima_inode_setxattr(struct dentry *dentry,
- const char *xattr_name,
- const void *xattr_value,
- size_t xattr_value_len)
-{
- return 0;
-}
-
-static inline int ima_inode_set_acl(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *acl_name,
- struct posix_acl *kacl)
-{
-
- return 0;
-}
-
-static inline int ima_inode_removexattr(struct dentry *dentry,
- const char *xattr_name)
-{
- return 0;
-}
-
-static inline int ima_inode_remove_acl(struct mnt_idmap *idmap,
- struct dentry *dentry,
- const char *acl_name)
-{
- return 0;
-}
#endif /* CONFIG_IMA_APPRAISE */
#if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING)
diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h
index c1c76a70a6ce..35227d47cfc9 100644
--- a/include/linux/indirect_call_wrapper.h
+++ b/include/linux/indirect_call_wrapper.h
@@ -2,7 +2,7 @@
#ifndef _LINUX_INDIRECT_CALL_WRAPPER_H
#define _LINUX_INDIRECT_CALL_WRAPPER_H
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
/*
* INDIRECT_CALL_$NR - wrapper for indirect calls with $NR known builtin
@@ -11,7 +11,7 @@
* @__VA_ARGS__: arguments for @f
*
* Avoid retpoline overhead for known builtin, checking @f vs each of them and
- * eventually invoking directly the builtin function. The functions are check
+ * eventually invoking directly the builtin function. The functions are checked
* in the given order. Fallback to the indirect call.
*/
#define INDIRECT_CALL_1(f, f1, ...) \
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 84abb30a3fbb..a9033696b0aa 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -8,6 +8,7 @@
struct inet_hashinfo;
struct inet_diag_handler {
+ struct module *owner;
void (*dump)(struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r);
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index ddb27fc0ee8c..cb5280e6cc21 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -53,13 +53,15 @@ struct in_device {
};
#define IPV4_DEVCONF(cnf, attr) ((cnf).data[IPV4_DEVCONF_ ## attr - 1])
+#define IPV4_DEVCONF_RO(cnf, attr) READ_ONCE(IPV4_DEVCONF(cnf, attr))
#define IPV4_DEVCONF_ALL(net, attr) \
IPV4_DEVCONF((*(net)->ipv4.devconf_all), attr)
+#define IPV4_DEVCONF_ALL_RO(net, attr) READ_ONCE(IPV4_DEVCONF_ALL(net, attr))
-static inline int ipv4_devconf_get(struct in_device *in_dev, int index)
+static inline int ipv4_devconf_get(const struct in_device *in_dev, int index)
{
index--;
- return in_dev->cnf.data[index];
+ return READ_ONCE(in_dev->cnf.data[index]);
}
static inline void ipv4_devconf_set(struct in_device *in_dev, int index,
@@ -67,7 +69,7 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index,
{
index--;
set_bit(index, in_dev->cnf.state);
- in_dev->cnf.data[index] = val;
+ WRITE_ONCE(in_dev->cnf.data[index], val);
}
static inline void ipv4_devconf_setall(struct in_device *in_dev)
@@ -81,18 +83,18 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
ipv4_devconf_set((in_dev), IPV4_DEVCONF_ ## attr, (val))
#define IN_DEV_ANDCONF(in_dev, attr) \
- (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr) && \
+ (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr) && \
IN_DEV_CONF_GET((in_dev), attr))
#define IN_DEV_NET_ORCONF(in_dev, net, attr) \
- (IPV4_DEVCONF_ALL(net, attr) || \
+ (IPV4_DEVCONF_ALL_RO(net, attr) || \
IN_DEV_CONF_GET((in_dev), attr))
#define IN_DEV_ORCONF(in_dev, attr) \
IN_DEV_NET_ORCONF(in_dev, dev_net(in_dev->dev), attr)
#define IN_DEV_MAXCONF(in_dev, attr) \
- (max(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr), \
+ (max(IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr), \
IN_DEV_CONF_GET((in_dev), attr)))
#define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING)
diff --git a/include/linux/init.h b/include/linux/init.h
index 266c3e1640d4..58cef4c2e59a 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -89,9 +89,6 @@
__latent_entropy
#define __meminitdata __section(".meminit.data")
#define __meminitconst __section(".meminit.rodata")
-#define __memexit __section(".memexit.text") __exitused __cold notrace
-#define __memexitdata __section(".memexit.data")
-#define __memexitconst __section(".memexit.rodata")
/* For assembly routines */
#define __HEAD .section ".head.text","ax"
@@ -171,17 +168,20 @@ extern initcall_entry_t __initcall_end[];
extern struct file_system_type rootfs_fs_type;
-#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX)
extern bool rodata_enabled;
-#endif
-#ifdef CONFIG_STRICT_KERNEL_RWX
void mark_rodata_ro(void);
-#endif
extern void (*late_time_init)(void);
extern bool initcall_debug;
+#ifdef MODULE
+extern struct module __this_module;
+#define THIS_MODULE (&__this_module)
+#else
+#define THIS_MODULE ((struct module *)0)
+#endif
+
#endif
#ifndef MODULE
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 40fc5813cf93..bccb3f1f6262 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -37,13 +37,6 @@ extern struct cred init_cred;
#define INIT_TASK_COMM "swapper"
-/* Attach to the init_task data structure for proper alignment */
-#ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
-#define __init_task_data __section(".data..init_task")
-#else
-#define __init_task_data /**/
-#endif
-
/* Attach to the thread_info data structure for proper alignment */
#define __init_thread_info __section(".data..init_thread_info")
diff --git a/include/linux/input.h b/include/linux/input.h
index c22ac465254b..89a0be6ee0e2 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -275,7 +275,8 @@ struct input_handle;
* it may not sleep
* @events: event sequence handler. This method is being called by
* input core with interrupts disabled and dev->event_lock
- * spinlock held and so it may not sleep
+ * spinlock held and so it may not sleep. The method must return
+ * number of events passed to it.
* @filter: similar to @event; separates normal event handlers from
* "filters".
* @match: called after comparing device's id with handler's id_table
@@ -312,8 +313,8 @@ struct input_handler {
void *private;
void (*event)(struct input_handle *handle, unsigned int type, unsigned int code, int value);
- void (*events)(struct input_handle *handle,
- const struct input_value *vals, unsigned int count);
+ unsigned int (*events)(struct input_handle *handle,
+ struct input_value *vals, unsigned int count);
bool (*filter)(struct input_handle *handle, unsigned int type, unsigned int code, int value);
bool (*match)(struct input_handler *handler, struct input_dev *dev);
int (*connect)(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id);
diff --git a/include/linux/instruction_pointer.h b/include/linux/instruction_pointer.h
index cda1f706eaeb..aa0b3ffea935 100644
--- a/include/linux/instruction_pointer.h
+++ b/include/linux/instruction_pointer.h
@@ -2,7 +2,12 @@
#ifndef _LINUX_INSTRUCTION_POINTER_H
#define _LINUX_INSTRUCTION_POINTER_H
+#include <asm/linkage.h>
+
#define _RET_IP_ (unsigned long)__builtin_return_address(0)
+
+#ifndef _THIS_IP_
#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
+#endif
#endif /* _LINUX_INSTRUCTION_POINTER_H */
diff --git a/include/linux/int_log.h b/include/linux/int_log.h
new file mode 100644
index 000000000000..0a6f58c38b61
--- /dev/null
+++ b/include/linux/int_log.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Provides fixed-point logarithm operations.
+ *
+ * Copyright (C) 2006 Christoph Pfister (christophpfister@gmail.com)
+ */
+
+#ifndef __LINUX_INT_LOG_H
+#define __LINUX_INT_LOG_H
+
+#include <linux/types.h>
+
+/**
+ * intlog2 - computes log2 of a value; the result is shifted left by 24 bits
+ *
+ * @value: The value (must be != 0)
+ *
+ * to use rational values you can use the following method:
+ *
+ * intlog2(value) = intlog2(value * 2^x) - x * 2^24
+ *
+ * Some usecase examples:
+ *
+ * intlog2(8) will give 3 << 24 = 3 * 2^24
+ *
+ * intlog2(9) will give 3 << 24 + ... = 3.16... * 2^24
+ *
+ * intlog2(1.5) = intlog2(3) - 2^24 = 0.584... * 2^24
+ *
+ *
+ * return: log2(value) * 2^24
+ */
+extern unsigned int intlog2(u32 value);
+
+/**
+ * intlog10 - computes log10 of a value; the result is shifted left by 24 bits
+ *
+ * @value: The value (must be != 0)
+ *
+ * to use rational values you can use the following method:
+ *
+ * intlog10(value) = intlog10(value * 10^x) - x * 2^24
+ *
+ * An usecase example:
+ *
+ * intlog10(1000) will give 3 << 24 = 3 * 2^24
+ *
+ * due to the implementation intlog10(1000) might be not exactly 3 * 2^24
+ *
+ * look at intlog2 for similar examples
+ *
+ * return: log10(value) * 2^24
+ */
+extern unsigned int intlog10(u32 value);
+
+#endif
diff --git a/include/linux/integrity.h b/include/linux/integrity.h
index 2ea0f2f65ab6..459b79683783 100644
--- a/include/linux/integrity.h
+++ b/include/linux/integrity.h
@@ -19,40 +19,13 @@ enum integrity_status {
INTEGRITY_UNKNOWN,
};
-/* List of EVM protected security xattrs */
#ifdef CONFIG_INTEGRITY
-extern struct integrity_iint_cache *integrity_inode_get(struct inode *inode);
-extern void integrity_inode_free(struct inode *inode);
extern void __init integrity_load_keys(void);
#else
-static inline struct integrity_iint_cache *
- integrity_inode_get(struct inode *inode)
-{
- return NULL;
-}
-
-static inline void integrity_inode_free(struct inode *inode)
-{
- return;
-}
-
static inline void integrity_load_keys(void)
{
}
#endif /* CONFIG_INTEGRITY */
-#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
-
-extern int integrity_kernel_module_request(char *kmod_name);
-
-#else
-
-static inline int integrity_kernel_module_request(char *kmod_name)
-{
- return 0;
-}
-
-#endif /* CONFIG_INTEGRITY_ASYMMETRIC_KEYS */
-
#endif /* _LINUX_INTEGRITY_H */
diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h
index f45f13304add..771622650247 100644
--- a/include/linux/intel-ish-client-if.h
+++ b/include/linux/intel-ish-client-if.h
@@ -94,6 +94,9 @@ int ishtp_cl_link(struct ishtp_cl *cl);
void ishtp_cl_unlink(struct ishtp_cl *cl);
int ishtp_cl_disconnect(struct ishtp_cl *cl);
int ishtp_cl_connect(struct ishtp_cl *cl);
+int ishtp_cl_establish_connection(struct ishtp_cl *cl, const guid_t *uuid,
+ int tx_size, int rx_size, bool reset);
+void ishtp_cl_destroy_connection(struct ishtp_cl *cl, bool reset);
int ishtp_cl_send(struct ishtp_cl *cl, uint8_t *buf, size_t length);
int ishtp_cl_flush_queues(struct ishtp_cl *cl);
int ishtp_cl_io_rb_recycle(struct ishtp_cl_rb *rb);
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index 33f21bd85dbf..f3196f82fd8a 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -178,6 +178,12 @@ struct rapl_package {
struct rapl_if_priv *priv;
};
+struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_priv *priv,
+ bool id_is_cpu);
+struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *priv,
+ bool id_is_cpu);
+void rapl_remove_package_cpuslocked(struct rapl_package *rp);
+
struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu);
struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu);
void rapl_remove_package(struct rapl_package *rp);
diff --git a/include/linux/intel_tcc.h b/include/linux/intel_tcc.h
index f422612c28d6..8ff8eabb4a98 100644
--- a/include/linux/intel_tcc.h
+++ b/include/linux/intel_tcc.h
@@ -13,6 +13,6 @@
int intel_tcc_get_tjmax(int cpu);
int intel_tcc_get_offset(int cpu);
int intel_tcc_set_offset(int cpu, int offset);
-int intel_tcc_get_temp(int cpu, bool pkg);
+int intel_tcc_get_temp(int cpu, int *temp, bool pkg);
#endif /* __INTEL_TCC_H__ */
diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h
index f505788c05da..a3529b962be6 100644
--- a/include/linux/intel_tpmi.h
+++ b/include/linux/intel_tpmi.h
@@ -6,6 +6,25 @@
#ifndef _INTEL_TPMI_H_
#define _INTEL_TPMI_H_
+#include <linux/bitfield.h>
+
+#define TPMI_VERSION_INVALID 0xff
+#define TPMI_MINOR_VERSION(val) FIELD_GET(GENMASK(4, 0), val)
+#define TPMI_MAJOR_VERSION(val) FIELD_GET(GENMASK(7, 5), val)
+
+/*
+ * List of supported TMPI IDs.
+ * Some TMPI IDs are not used by Linux, so the numbers are not consecutive.
+ */
+enum intel_tpmi_id {
+ TPMI_ID_RAPL = 0, /* Running Average Power Limit */
+ TPMI_ID_PEM = 1, /* Power and Perf excursion Monitor */
+ TPMI_ID_UNCORE = 2, /* Uncore Frequency Scaling */
+ TPMI_ID_SST = 5, /* Speed Select Technology */
+ TPMI_CONTROL_ID = 0x80, /* Special ID for getting feature status */
+ TPMI_INFO_ID = 0x81, /* Special ID for PCI BDF and Package ID information */
+};
+
/**
* struct intel_tpmi_plat_info - Platform information for a TPMI device instance
* @package_id: CPU Package id
@@ -26,5 +45,6 @@ struct intel_tpmi_plat_info {
struct intel_tpmi_plat_info *tpmi_get_platform_data(struct auxiliary_device *auxdev);
struct resource *tpmi_get_resource_at_index(struct auxiliary_device *auxdev, int index);
int tpmi_get_resource_count(struct auxiliary_device *auxdev);
-
+int tpmi_get_feature_status(struct auxiliary_device *auxdev, int feature_id, bool *read_blocked,
+ bool *write_blocked);
#endif
diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h
index e6d8aca6886d..f5aef8784692 100644
--- a/include/linux/interconnect-provider.h
+++ b/include/linux/interconnect-provider.h
@@ -33,10 +33,10 @@ struct icc_node_data {
*/
struct icc_onecell_data {
unsigned int num_nodes;
- struct icc_node *nodes[];
+ struct icc_node *nodes[] __counted_by(num_nodes);
};
-struct icc_node *of_icc_xlate_onecell(struct of_phandle_args *spec,
+struct icc_node *of_icc_xlate_onecell(const struct of_phandle_args *spec,
void *data);
/**
@@ -65,8 +65,9 @@ struct icc_provider {
u32 peak_bw, u32 *agg_avg, u32 *agg_peak);
void (*pre_aggregate)(struct icc_node *node);
int (*get_bw)(struct icc_node *node, u32 *avg, u32 *peak);
- struct icc_node* (*xlate)(struct of_phandle_args *spec, void *data);
- struct icc_node_data* (*xlate_extended)(struct of_phandle_args *spec, void *data);
+ struct icc_node* (*xlate)(const struct of_phandle_args *spec, void *data);
+ struct icc_node_data* (*xlate_extended)(const struct of_phandle_args *spec,
+ void *data);
struct device *dev;
int users;
bool inter_set;
@@ -124,7 +125,7 @@ int icc_nodes_remove(struct icc_provider *provider);
void icc_provider_init(struct icc_provider *provider);
int icc_provider_register(struct icc_provider *provider);
void icc_provider_deregister(struct icc_provider *provider);
-struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec);
+struct icc_node_data *of_icc_get_from_provider(const struct of_phandle_args *spec);
void icc_sync_state(struct device *dev);
#else
@@ -171,7 +172,7 @@ static inline int icc_provider_register(struct icc_provider *provider)
static inline void icc_provider_deregister(struct icc_provider *provider) { }
-static inline struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec)
+static inline struct icc_node_data *of_icc_get_from_provider(const struct of_phandle_args *spec)
{
return ERR_PTR(-ENOTSUPP);
}
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index a92bce40b04b..3a36e64119c8 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -5,6 +5,7 @@
#include <linux/kernel.h>
#include <linux/bitops.h>
+#include <linux/cleanup.h>
#include <linux/cpumask.h>
#include <linux/irqreturn.h>
#include <linux/irqnr.h>
@@ -67,6 +68,8 @@
* later.
* IRQF_NO_DEBUG - Exclude from runnaway detection for IPI and similar handlers,
* depends on IRQF_PERCPU.
+ * IRQF_COND_ONESHOT - Agree to do IRQF_ONESHOT if already set for a shared
+ * interrupt.
*/
#define IRQF_SHARED 0x00000080
#define IRQF_PROBE_SHARED 0x00000100
@@ -82,6 +85,7 @@
#define IRQF_COND_SUSPEND 0x00040000
#define IRQF_NO_AUTOEN 0x00080000
#define IRQF_NO_DEBUG 0x00100000
+#define IRQF_COND_ONESHOT 0x00200000
#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
@@ -232,6 +236,9 @@ extern void enable_percpu_irq(unsigned int irq, unsigned int type);
extern bool irq_percpu_is_enabled(unsigned int irq);
extern void irq_wake_thread(unsigned int irq, void *dev_id);
+DEFINE_LOCK_GUARD_1(disable_irq, int,
+ disable_irq(*_T->lock), enable_irq(*_T->lock))
+
extern void disable_nmi_nosync(unsigned int irq);
extern void disable_percpu_nmi(unsigned int irq);
extern void enable_nmi(unsigned int irq);
@@ -566,11 +573,15 @@ enum
*
* _ RCU:
* 1) rcutree_migrate_callbacks() migrates the queue.
- * 2) rcu_report_dead() reports the final quiescent states.
+ * 2) rcutree_report_cpu_dead() reports the final quiescent states.
*
* _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue
+ *
+ * _ (HR)TIMER_SOFTIRQ: (hr)timers_dead_cpu() migrates the queue
*/
-#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ))
+#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(TIMER_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ) |\
+ BIT(HRTIMER_SOFTIRQ) | BIT(RCU_SOFTIRQ))
+
/* map softirq index to softirq name. update 'softirq_to_name' in
* kernel/softirq.c when adding a new softirq.
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 1b7a44b35616..86cf1f7ae389 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -100,6 +100,30 @@ struct io_pgtable_cfg {
const struct iommu_flush_ops *tlb;
struct device *iommu_dev;
+ /**
+ * @alloc: Custom page allocator.
+ *
+ * Optional hook used to allocate page tables. If this function is NULL,
+ * @free must be NULL too.
+ *
+ * Memory returned should be zeroed and suitable for dma_map_single() and
+ * virt_to_phys().
+ *
+ * Not all formats support custom page allocators. Before considering
+ * passing a non-NULL value, make sure the chosen page format supports
+ * this feature.
+ */
+ void *(*alloc)(void *cookie, size_t size, gfp_t gfp);
+
+ /**
+ * @free: Custom page de-allocator.
+ *
+ * Optional hook used to free page tables allocated with the @alloc
+ * hook. Must be non-NULL if @alloc is not NULL, must be NULL
+ * otherwise.
+ */
+ void (*free)(void *cookie, void *pages, size_t size);
+
/* Low-level data specific to the table format */
union {
struct {
@@ -166,6 +190,10 @@ struct io_pgtable_ops {
struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
unsigned long iova);
+ int (*read_and_clear_dirty)(struct io_pgtable_ops *ops,
+ unsigned long iova, size_t size,
+ unsigned long flags,
+ struct iommu_dirty_bitmap *dirty);
};
/**
@@ -238,15 +266,25 @@ io_pgtable_tlb_add_page(struct io_pgtable *iop,
}
/**
+ * enum io_pgtable_caps - IO page table backend capabilities.
+ */
+enum io_pgtable_caps {
+ /** @IO_PGTABLE_CAP_CUSTOM_ALLOCATOR: Backend accepts custom page table allocators. */
+ IO_PGTABLE_CAP_CUSTOM_ALLOCATOR = BIT(0),
+};
+
+/**
* struct io_pgtable_init_fns - Alloc/free a set of page tables for a
* particular format.
*
* @alloc: Allocate a set of page tables described by cfg.
* @free: Free the page tables associated with iop.
+ * @caps: Combination of @io_pgtable_caps flags encoding the backend capabilities.
*/
struct io_pgtable_init_fns {
struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie);
void (*free)(struct io_pgtable *iop);
+ u32 caps;
};
extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns;
diff --git a/include/linux/io.h b/include/linux/io.h
index 7304f2a69960..235ba7d80a8f 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -23,12 +23,19 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count);
#ifdef CONFIG_MMU
int ioremap_page_range(unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot);
+int vmap_page_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot);
#else
static inline int ioremap_page_range(unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot)
{
return 0;
}
+static inline int vmap_page_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot)
+{
+ return 0;
+}
#endif
/*
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index bb9c666bd584..68ed6697fece 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -6,63 +6,13 @@
#include <linux/xarray.h>
#include <uapi/linux/io_uring.h>
-enum io_uring_cmd_flags {
- IO_URING_F_COMPLETE_DEFER = 1,
- IO_URING_F_UNLOCKED = 2,
- /* the request is executed from poll, it should not be freed */
- IO_URING_F_MULTISHOT = 4,
- /* executed by io-wq */
- IO_URING_F_IOWQ = 8,
- /* int's last bit, sign checks are usually faster than a bit test */
- IO_URING_F_NONBLOCK = INT_MIN,
-
- /* ctx state flags, for URING_CMD */
- IO_URING_F_SQE128 = (1 << 8),
- IO_URING_F_CQE32 = (1 << 9),
- IO_URING_F_IOPOLL = (1 << 10),
-};
-
-struct io_uring_cmd {
- struct file *file;
- const struct io_uring_sqe *sqe;
- union {
- /* callback to defer completions to task context */
- void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
- /* used for polled completion */
- void *cookie;
- };
- u32 cmd_op;
- u32 flags;
- u8 pdu[32]; /* available inline for free use */
-};
-
-static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
-{
- return sqe->cmd;
-}
-
#if defined(CONFIG_IO_URING)
-int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
- struct iov_iter *iter, void *ioucmd);
-void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
- unsigned issue_flags);
-struct sock *io_uring_get_socket(struct file *file);
void __io_uring_cancel(bool cancel_all);
void __io_uring_free(struct task_struct *tsk);
void io_uring_unreg_ringfd(void);
const char *io_uring_get_opcode(u8 opcode);
-void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
- void (*task_work_cb)(struct io_uring_cmd *, unsigned),
- unsigned flags);
-/* users should follow semantics of IOU_F_TWQ_LAZY_WAKE */
-void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
- void (*task_work_cb)(struct io_uring_cmd *, unsigned));
-
-static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
- void (*task_work_cb)(struct io_uring_cmd *, unsigned))
-{
- __io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
-}
+int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
+bool io_is_uring_fops(struct file *file);
static inline void io_uring_files_cancel(void)
{
@@ -82,27 +32,6 @@ static inline void io_uring_free(struct task_struct *tsk)
__io_uring_free(tsk);
}
#else
-static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
- struct iov_iter *iter, void *ioucmd)
-{
- return -EOPNOTSUPP;
-}
-static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
- ssize_t ret2, unsigned issue_flags)
-{
-}
-static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
- void (*task_work_cb)(struct io_uring_cmd *, unsigned))
-{
-}
-static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
- void (*task_work_cb)(struct io_uring_cmd *, unsigned))
-{
-}
-static inline struct sock *io_uring_get_socket(struct file *file)
-{
- return NULL;
-}
static inline void io_uring_task_cancel(void)
{
}
@@ -116,6 +45,15 @@ static inline const char *io_uring_get_opcode(u8 opcode)
{
return "";
}
+static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+ return -EOPNOTSUPP;
+}
+static inline bool io_is_uring_fops(struct file *file)
+{
+ return false;
+}
#endif
#endif
diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
new file mode 100644
index 000000000000..e453a997c060
--- /dev/null
+++ b/include/linux/io_uring/cmd.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _LINUX_IO_URING_CMD_H
+#define _LINUX_IO_URING_CMD_H
+
+#include <uapi/linux/io_uring.h>
+#include <linux/io_uring_types.h>
+
+/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */
+#define IORING_URING_CMD_CANCELABLE (1U << 30)
+
+struct io_uring_cmd {
+ struct file *file;
+ const struct io_uring_sqe *sqe;
+ /* callback to defer completions to task context */
+ void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
+ u32 cmd_op;
+ u32 flags;
+ u8 pdu[32]; /* available inline for free use */
+};
+
+static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
+{
+ return sqe->cmd;
+}
+
+#if defined(CONFIG_IO_URING)
+int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
+ struct iov_iter *iter, void *ioucmd);
+void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
+ unsigned issue_flags);
+void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
+ void (*task_work_cb)(struct io_uring_cmd *, unsigned),
+ unsigned flags);
+
+void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
+ unsigned int issue_flags);
+
+#else
+static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
+ struct iov_iter *iter, void *ioucmd)
+{
+ return -EOPNOTSUPP;
+}
+static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
+ ssize_t ret2, unsigned issue_flags)
+{
+}
+static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
+ void (*task_work_cb)(struct io_uring_cmd *, unsigned),
+ unsigned flags)
+{
+}
+static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+}
+#endif
+
+/* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */
+static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
+ void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+{
+ __io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE);
+}
+
+static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+ void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+{
+ __io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
+}
+
+static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
+{
+ return cmd_to_io_kiocb(cmd)->task;
+}
+
+#endif /* _LINUX_IO_URING_CMD_H */
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index f04ce513fadb..ac333ea81d31 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -2,11 +2,43 @@
#define IO_URING_TYPES_H
#include <linux/blkdev.h>
+#include <linux/hashtable.h>
#include <linux/task_work.h>
#include <linux/bitmap.h>
#include <linux/llist.h>
#include <uapi/linux/io_uring.h>
+enum {
+ /*
+ * A hint to not wake right away but delay until there are enough of
+ * tw's queued to match the number of CQEs the task is waiting for.
+ *
+ * Must not be used with requests generating more than one CQE.
+ * It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
+ */
+ IOU_F_TWQ_LAZY_WAKE = 1,
+};
+
+enum io_uring_cmd_flags {
+ IO_URING_F_COMPLETE_DEFER = 1,
+ IO_URING_F_UNLOCKED = 2,
+ /* the request is executed from poll, it should not be freed */
+ IO_URING_F_MULTISHOT = 4,
+ /* executed by io-wq */
+ IO_URING_F_IOWQ = 8,
+ /* int's last bit, sign checks are usually faster than a bit test */
+ IO_URING_F_NONBLOCK = INT_MIN,
+
+ /* ctx state flags, for URING_CMD */
+ IO_URING_F_SQE128 = (1 << 8),
+ IO_URING_F_CQE32 = (1 << 9),
+ IO_URING_F_IOPOLL = (1 << 10),
+
+ /* set when uring wants to cancel a previously issued command */
+ IO_URING_F_CANCEL = (1 << 11),
+ IO_URING_F_COMPAT = (1 << 12),
+};
+
struct io_wq_work_node {
struct io_wq_work_node *next;
};
@@ -69,8 +101,8 @@ struct io_uring_task {
};
struct io_uring {
- u32 head ____cacheline_aligned_in_smp;
- u32 tail ____cacheline_aligned_in_smp;
+ u32 head;
+ u32 tail;
};
/*
@@ -176,7 +208,6 @@ struct io_submit_state {
unsigned short submit_nr;
unsigned int cqes_count;
struct blk_plug plug;
- struct io_uring_cqe cqes[16];
};
struct io_ev_fd {
@@ -205,25 +236,19 @@ struct io_ring_ctx {
unsigned int has_evfd: 1;
/* all CQEs should be posted only by the submitter task */
unsigned int task_complete: 1;
+ unsigned int lockless_cq: 1;
unsigned int syscall_iopoll: 1;
unsigned int poll_activated: 1;
unsigned int drain_disabled: 1;
unsigned int compat: 1;
+ unsigned int iowq_limits_set : 1;
- enum task_work_notify_mode notify_method;
+ struct task_struct *submitter_task;
+ struct io_rings *rings;
+ struct percpu_ref refs;
- /*
- * If IORING_SETUP_NO_MMAP is used, then the below holds
- * the gup'ed pages for the two rings, and the sqes.
- */
- unsigned short n_ring_pages;
- unsigned short n_sqe_pages;
- struct page **ring_pages;
- struct page **sqe_pages;
-
- struct io_rings *rings;
- struct task_struct *submitter_task;
- struct percpu_ref refs;
+ enum task_work_notify_mode notify_method;
+ unsigned sq_thread_idle;
} ____cacheline_aligned_in_smp;
/* submission data */
@@ -252,40 +277,35 @@ struct io_ring_ctx {
*/
struct io_rsrc_node *rsrc_node;
atomic_t cancel_seq;
+
+ /*
+ * ->iopoll_list is protected by the ctx->uring_lock for
+ * io_uring instances that don't use IORING_SETUP_SQPOLL.
+ * For SQPOLL, only the single threaded io_sq_thread() will
+ * manipulate the list, hence no extra locking is needed there.
+ */
+ bool poll_multi_queue;
+ struct io_wq_work_list iopoll_list;
+
struct io_file_table file_table;
+ struct io_mapped_ubuf **user_bufs;
unsigned nr_user_files;
unsigned nr_user_bufs;
- struct io_mapped_ubuf **user_bufs;
struct io_submit_state submit_state;
- struct io_buffer_list *io_bl;
struct xarray io_bl_xa;
- struct list_head io_buffers_cache;
struct io_hash_table cancel_table_locked;
- struct list_head cq_overflow_list;
struct io_alloc_cache apoll_cache;
struct io_alloc_cache netmsg_cache;
- } ____cacheline_aligned_in_smp;
- /* IRQ completion list, under ->completion_lock */
- struct io_wq_work_list locked_free_list;
- unsigned int locked_free_nr;
-
- const struct cred *sq_creds; /* cred used for __io_sq_thread() */
- struct io_sq_data *sq_data; /* if using sq thread polling */
-
- struct wait_queue_head sqo_sq_wait;
- struct list_head sqd_list;
-
- unsigned long check_cq;
-
- unsigned int file_alloc_start;
- unsigned int file_alloc_end;
-
- struct xarray personalities;
- u32 pers_next;
+ /*
+ * Any cancelable uring_cmd is added to this list in
+ * ->uring_cmd() by io_uring_cmd_insert_cancelable()
+ */
+ struct hlist_head cancelable_uring_cmd;
+ } ____cacheline_aligned_in_smp;
struct {
/*
@@ -298,39 +318,62 @@ struct io_ring_ctx {
unsigned cached_cq_tail;
unsigned cq_entries;
struct io_ev_fd __rcu *io_ev_fd;
- struct wait_queue_head cq_wait;
unsigned cq_extra;
} ____cacheline_aligned_in_smp;
+ /*
+ * task_work and async notification delivery cacheline. Expected to
+ * regularly bounce b/w CPUs.
+ */
struct {
- spinlock_t completion_lock;
-
- bool poll_multi_queue;
- atomic_t cq_wait_nr;
-
- /*
- * ->iopoll_list is protected by the ctx->uring_lock for
- * io_uring instances that don't use IORING_SETUP_SQPOLL.
- * For SQPOLL, only the single threaded io_sq_thread() will
- * manipulate the list, hence no extra locking is needed there.
- */
- struct io_wq_work_list iopoll_list;
- struct io_hash_table cancel_table;
-
struct llist_head work_llist;
-
- struct list_head io_buffers_comp;
+ unsigned long check_cq;
+ atomic_t cq_wait_nr;
+ atomic_t cq_timeouts;
+ struct wait_queue_head cq_wait;
} ____cacheline_aligned_in_smp;
/* timeouts */
struct {
spinlock_t timeout_lock;
- atomic_t cq_timeouts;
struct list_head timeout_list;
struct list_head ltimeout_list;
unsigned cq_last_tm_flush;
} ____cacheline_aligned_in_smp;
+ struct io_uring_cqe completion_cqes[16];
+
+ spinlock_t completion_lock;
+
+ /* IRQ completion list, under ->completion_lock */
+ unsigned int locked_free_nr;
+ struct io_wq_work_list locked_free_list;
+
+ struct list_head io_buffers_comp;
+ struct list_head cq_overflow_list;
+ struct io_hash_table cancel_table;
+
+ struct hlist_head waitid_list;
+
+#ifdef CONFIG_FUTEX
+ struct hlist_head futex_list;
+ struct io_alloc_cache futex_cache;
+#endif
+
+ const struct cred *sq_creds; /* cred used for __io_sq_thread() */
+ struct io_sq_data *sq_data; /* if using sq thread polling */
+
+ struct wait_queue_head sqo_sq_wait;
+ struct list_head sqd_list;
+
+ unsigned int file_alloc_start;
+ unsigned int file_alloc_end;
+
+ struct list_head io_buffers_cache;
+
+ /* deferred free list, protected by ->uring_lock */
+ struct hlist_head io_buf_list;
+
/* Keep this last, we don't need it for the fast path */
struct wait_queue_head poll_wq;
struct io_restriction restrictions;
@@ -346,11 +389,9 @@ struct io_ring_ctx {
struct wait_queue_head rsrc_quiesce_wq;
unsigned rsrc_quiesce;
- struct list_head io_buffers_pages;
+ u32 pers_next;
+ struct xarray personalities;
- #if defined(CONFIG_UNIX)
- struct socket *ring_sock;
- #endif
/* hashed buffered write serialization */
struct io_wq_hash *hash_map;
@@ -367,13 +408,33 @@ struct io_ring_ctx {
/* io-wq management, e.g. thread count */
u32 iowq_limits[2];
- bool iowq_limits_set;
struct callback_head poll_wq_task_work;
struct list_head defer_list;
- unsigned sq_thread_idle;
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ struct list_head napi_list; /* track busy poll napi_id */
+ spinlock_t napi_lock; /* napi_list lock */
+
+ /* napi busy poll default timeout */
+ unsigned int napi_busy_poll_to;
+ bool napi_prefer_busy_poll;
+ bool napi_enabled;
+
+ DECLARE_HASHTABLE(napi_ht, 4);
+#endif
+
/* protected by ->completion_lock */
unsigned evfd_last_cq_tail;
+
+ /*
+ * If IORING_SETUP_NO_MMAP is used, then the below holds
+ * the gup'ed pages for the two rings, and the sqes.
+ */
+ unsigned short n_ring_pages;
+ unsigned short n_sqe_pages;
+ struct page **ring_pages;
+ struct page **sqe_pages;
};
struct io_tw_state {
@@ -408,83 +469,95 @@ enum {
REQ_F_SKIP_LINK_CQES_BIT,
REQ_F_SINGLE_POLL_BIT,
REQ_F_DOUBLE_POLL_BIT,
- REQ_F_PARTIAL_IO_BIT,
- REQ_F_CQE32_INIT_BIT,
REQ_F_APOLL_MULTISHOT_BIT,
REQ_F_CLEAR_POLLIN_BIT,
REQ_F_HASH_LOCKED_BIT,
/* keep async read/write and isreg together and in order */
REQ_F_SUPPORT_NOWAIT_BIT,
REQ_F_ISREG_BIT,
+ REQ_F_POLL_NO_LAZY_BIT,
+ REQ_F_CANCEL_SEQ_BIT,
+ REQ_F_CAN_POLL_BIT,
+ REQ_F_BL_EMPTY_BIT,
+ REQ_F_BL_NO_RECYCLE_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
};
+typedef u64 __bitwise io_req_flags_t;
+#define IO_REQ_FLAG(bitno) ((__force io_req_flags_t) BIT_ULL((bitno)))
+
enum {
/* ctx owns file */
- REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT),
+ REQ_F_FIXED_FILE = IO_REQ_FLAG(REQ_F_FIXED_FILE_BIT),
/* drain existing IO first */
- REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT),
+ REQ_F_IO_DRAIN = IO_REQ_FLAG(REQ_F_IO_DRAIN_BIT),
/* linked sqes */
- REQ_F_LINK = BIT(REQ_F_LINK_BIT),
+ REQ_F_LINK = IO_REQ_FLAG(REQ_F_LINK_BIT),
/* doesn't sever on completion < 0 */
- REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT),
+ REQ_F_HARDLINK = IO_REQ_FLAG(REQ_F_HARDLINK_BIT),
/* IOSQE_ASYNC */
- REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT),
+ REQ_F_FORCE_ASYNC = IO_REQ_FLAG(REQ_F_FORCE_ASYNC_BIT),
/* IOSQE_BUFFER_SELECT */
- REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT),
+ REQ_F_BUFFER_SELECT = IO_REQ_FLAG(REQ_F_BUFFER_SELECT_BIT),
/* IOSQE_CQE_SKIP_SUCCESS */
- REQ_F_CQE_SKIP = BIT(REQ_F_CQE_SKIP_BIT),
+ REQ_F_CQE_SKIP = IO_REQ_FLAG(REQ_F_CQE_SKIP_BIT),
/* fail rest of links */
- REQ_F_FAIL = BIT(REQ_F_FAIL_BIT),
+ REQ_F_FAIL = IO_REQ_FLAG(REQ_F_FAIL_BIT),
/* on inflight list, should be cancelled and waited on exit reliably */
- REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT),
+ REQ_F_INFLIGHT = IO_REQ_FLAG(REQ_F_INFLIGHT_BIT),
/* read/write uses file position */
- REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT),
+ REQ_F_CUR_POS = IO_REQ_FLAG(REQ_F_CUR_POS_BIT),
/* must not punt to workers */
- REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT),
+ REQ_F_NOWAIT = IO_REQ_FLAG(REQ_F_NOWAIT_BIT),
/* has or had linked timeout */
- REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
+ REQ_F_LINK_TIMEOUT = IO_REQ_FLAG(REQ_F_LINK_TIMEOUT_BIT),
/* needs cleanup */
- REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
+ REQ_F_NEED_CLEANUP = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT),
/* already went through poll handler */
- REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
+ REQ_F_POLLED = IO_REQ_FLAG(REQ_F_POLLED_BIT),
/* buffer already selected */
- REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT),
+ REQ_F_BUFFER_SELECTED = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT),
/* buffer selected from ring, needs commit */
- REQ_F_BUFFER_RING = BIT(REQ_F_BUFFER_RING_BIT),
+ REQ_F_BUFFER_RING = IO_REQ_FLAG(REQ_F_BUFFER_RING_BIT),
/* caller should reissue async */
- REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT),
+ REQ_F_REISSUE = IO_REQ_FLAG(REQ_F_REISSUE_BIT),
/* supports async reads/writes */
- REQ_F_SUPPORT_NOWAIT = BIT(REQ_F_SUPPORT_NOWAIT_BIT),
+ REQ_F_SUPPORT_NOWAIT = IO_REQ_FLAG(REQ_F_SUPPORT_NOWAIT_BIT),
/* regular file */
- REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
+ REQ_F_ISREG = IO_REQ_FLAG(REQ_F_ISREG_BIT),
/* has creds assigned */
- REQ_F_CREDS = BIT(REQ_F_CREDS_BIT),
+ REQ_F_CREDS = IO_REQ_FLAG(REQ_F_CREDS_BIT),
/* skip refcounting if not set */
- REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT),
+ REQ_F_REFCOUNT = IO_REQ_FLAG(REQ_F_REFCOUNT_BIT),
/* there is a linked timeout that has to be armed */
- REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT),
+ REQ_F_ARM_LTIMEOUT = IO_REQ_FLAG(REQ_F_ARM_LTIMEOUT_BIT),
/* ->async_data allocated */
- REQ_F_ASYNC_DATA = BIT(REQ_F_ASYNC_DATA_BIT),
+ REQ_F_ASYNC_DATA = IO_REQ_FLAG(REQ_F_ASYNC_DATA_BIT),
/* don't post CQEs while failing linked requests */
- REQ_F_SKIP_LINK_CQES = BIT(REQ_F_SKIP_LINK_CQES_BIT),
+ REQ_F_SKIP_LINK_CQES = IO_REQ_FLAG(REQ_F_SKIP_LINK_CQES_BIT),
/* single poll may be active */
- REQ_F_SINGLE_POLL = BIT(REQ_F_SINGLE_POLL_BIT),
+ REQ_F_SINGLE_POLL = IO_REQ_FLAG(REQ_F_SINGLE_POLL_BIT),
/* double poll may active */
- REQ_F_DOUBLE_POLL = BIT(REQ_F_DOUBLE_POLL_BIT),
- /* request has already done partial IO */
- REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT),
+ REQ_F_DOUBLE_POLL = IO_REQ_FLAG(REQ_F_DOUBLE_POLL_BIT),
/* fast poll multishot mode */
- REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT),
- /* ->extra1 and ->extra2 are initialised */
- REQ_F_CQE32_INIT = BIT(REQ_F_CQE32_INIT_BIT),
+ REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT),
/* recvmsg special flag, clear EPOLLIN */
- REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT),
+ REQ_F_CLEAR_POLLIN = IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT),
/* hashed into ->cancel_hash_locked, protected by ->uring_lock */
- REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT),
+ REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT),
+ /* don't use lazy poll wake for this request */
+ REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT),
+ /* cancel sequence is set and valid */
+ REQ_F_CANCEL_SEQ = IO_REQ_FLAG(REQ_F_CANCEL_SEQ_BIT),
+ /* file is pollable */
+ REQ_F_CAN_POLL = IO_REQ_FLAG(REQ_F_CAN_POLL_BIT),
+ /* buffer list was empty after selection of buffer */
+ REQ_F_BL_EMPTY = IO_REQ_FLAG(REQ_F_BL_EMPTY_BIT),
+ /* don't recycle provided buffers for this request */
+ REQ_F_BL_NO_RECYCLE = IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT),
};
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
@@ -545,15 +618,17 @@ struct io_kiocb {
* and after selection it points to the buffer ID itself.
*/
u16 buf_index;
- unsigned int flags;
+
+ unsigned nr_tw;
+
+ /* REQ_F_* flags */
+ io_req_flags_t flags;
struct io_cqe cqe;
struct io_ring_ctx *ctx;
struct task_struct *task;
- struct io_rsrc_node *rsrc_node;
-
union {
/* store used ubuf, so we can prevent reloading */
struct io_mapped_ubuf *imu;
@@ -574,18 +649,14 @@ struct io_kiocb {
/* cache ->apoll->events */
__poll_t apoll_events;
};
+
+ struct io_rsrc_node *rsrc_node;
+
atomic_t refs;
atomic_t poll_refs;
struct io_task_work io_task_work;
- unsigned nr_tw;
/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
- union {
- struct hlist_node hash_node;
- struct {
- u64 extra1;
- u64 extra2;
- };
- };
+ struct hlist_node hash_node;
/* internal polling, see IORING_FEAT_FAST_POLL */
struct async_poll *apoll;
/* opcode allocated if it needs to store data for async defer */
@@ -595,6 +666,11 @@ struct io_kiocb {
/* custom credentials, valid IFF REQ_F_CREDS is set */
const struct cred *creds;
struct io_wq_work work;
+
+ struct {
+ u64 extra1;
+ u64 extra2;
+ } big_cqe;
};
struct io_overflow_cqe {
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index e2b836c2e119..6fc1c858013d 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -58,7 +58,11 @@ struct vm_fault;
#define IOMAP_F_DIRTY (1U << 1)
#define IOMAP_F_SHARED (1U << 2)
#define IOMAP_F_MERGED (1U << 3)
+#ifdef CONFIG_BUFFER_HEAD
#define IOMAP_F_BUFFER_HEAD (1U << 4)
+#else
+#define IOMAP_F_BUFFER_HEAD 0
+#endif /* CONFIG_BUFFER_HEAD */
#define IOMAP_F_XATTR (1U << 5)
/*
@@ -261,9 +265,10 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
-struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos);
+struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len);
bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags);
void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len);
+bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio);
int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
const struct iomap_ops *ops);
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
@@ -288,22 +293,32 @@ struct iomap_ioend {
struct list_head io_list; /* next ioend in chain */
u16 io_type;
u16 io_flags; /* IOMAP_F_* */
- u32 io_folios; /* folios added to ioend */
struct inode *io_inode; /* file being written to */
size_t io_size; /* size of the extent */
loff_t io_offset; /* offset in the file */
sector_t io_sector; /* start sector of ioend */
- struct bio *io_bio; /* bio being built */
- struct bio io_inline_bio; /* MUST BE LAST! */
+ struct bio io_bio; /* MUST BE LAST! */
};
+static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio)
+{
+ return container_of(bio, struct iomap_ioend, io_bio);
+}
+
struct iomap_writeback_ops {
/*
* Required, maps the blocks so that writeback can be performed on
* the range starting at offset.
+ *
+ * Can return arbitrarily large regions, but we need to call into it at
+ * least once per folio to allow the file systems to synchronize with
+ * the write path that could be invalidating mappings.
+ *
+ * An existing mapping from a previous call to this method can be reused
+ * by the file system if it is still valid.
*/
int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
- loff_t offset);
+ loff_t offset, unsigned len);
/*
* Optional, allows the file systems to perform actions just before
@@ -324,6 +339,7 @@ struct iomap_writepage_ctx {
struct iomap iomap;
struct iomap_ioend *ioend;
const struct iomap_writeback_ops *ops;
+ u32 nr_folios; /* folios added to the ioend */
};
void iomap_finish_ioends(struct iomap_ioend *ioend, int error);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index d31642596675..2e925b5eba53 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -13,7 +13,7 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/of.h>
-#include <uapi/linux/iommu.h>
+#include <linux/iova_bitmap.h>
#define IOMMU_READ (1 << 0)
#define IOMMU_WRITE (1 << 1)
@@ -37,10 +37,113 @@ struct bus_type;
struct device;
struct iommu_domain;
struct iommu_domain_ops;
+struct iommu_dirty_ops;
struct notifier_block;
struct iommu_sva;
-struct iommu_fault_event;
struct iommu_dma_cookie;
+struct iommu_fault_param;
+
+#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */
+#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */
+#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */
+#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */
+
+/* Generic fault types, can be expanded IRQ remapping fault */
+enum iommu_fault_type {
+ IOMMU_FAULT_PAGE_REQ = 1, /* page request fault */
+};
+
+/**
+ * struct iommu_fault_page_request - Page Request data
+ * @flags: encodes whether the corresponding fields are valid and whether this
+ * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values).
+ * When IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID is set, the page response
+ * must have the same PASID value as the page request. When it is clear,
+ * the page response should not have a PASID.
+ * @pasid: Process Address Space ID
+ * @grpid: Page Request Group Index
+ * @perm: requested page permissions (IOMMU_FAULT_PERM_* values)
+ * @addr: page address
+ * @private_data: device-specific private information
+ */
+struct iommu_fault_page_request {
+#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0)
+#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1)
+#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2)
+#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 3)
+ u32 flags;
+ u32 pasid;
+ u32 grpid;
+ u32 perm;
+ u64 addr;
+ u64 private_data[2];
+};
+
+/**
+ * struct iommu_fault - Generic fault data
+ * @type: fault type from &enum iommu_fault_type
+ * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ
+ */
+struct iommu_fault {
+ u32 type;
+ struct iommu_fault_page_request prm;
+};
+
+/**
+ * enum iommu_page_response_code - Return status of fault handlers
+ * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
+ * populated, retry the access. This is "Success" in PCI PRI.
+ * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from
+ * this device if possible. This is "Response Failure" in PCI PRI.
+ * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
+ * access. This is "Invalid Request" in PCI PRI.
+ */
+enum iommu_page_response_code {
+ IOMMU_PAGE_RESP_SUCCESS = 0,
+ IOMMU_PAGE_RESP_INVALID,
+ IOMMU_PAGE_RESP_FAILURE,
+};
+
+/**
+ * struct iommu_page_response - Generic page response information
+ * @pasid: Process Address Space ID
+ * @grpid: Page Request Group Index
+ * @code: response code from &enum iommu_page_response_code
+ */
+struct iommu_page_response {
+ u32 pasid;
+ u32 grpid;
+ u32 code;
+};
+
+struct iopf_fault {
+ struct iommu_fault fault;
+ /* node for pending lists */
+ struct list_head list;
+};
+
+struct iopf_group {
+ struct iopf_fault last_fault;
+ struct list_head faults;
+ /* list node for iommu_fault_param::faults */
+ struct list_head pending_node;
+ struct work_struct work;
+ struct iommu_domain *domain;
+ /* The device's fault data parameter. */
+ struct iommu_fault_param *fault_param;
+};
+
+/**
+ * struct iopf_queue - IO Page Fault queue
+ * @wq: the fault workqueue
+ * @devices: devices attached to this queue
+ * @lock: protects the device list
+ */
+struct iopf_queue {
+ struct workqueue_struct *wq;
+ struct list_head devices;
+ struct mutex lock;
+};
/* iommu fault flags */
#define IOMMU_FAULT_READ 0x0
@@ -48,7 +151,6 @@ struct iommu_dma_cookie;
typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
struct device *, unsigned long, int, void *);
-typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault *, void *);
struct iommu_domain_geometry {
dma_addr_t aperture_start; /* First address that can be mapped */
@@ -64,6 +166,10 @@ struct iommu_domain_geometry {
#define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */
#define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */
+#define __IOMMU_DOMAIN_PLATFORM (1U << 5)
+
+#define __IOMMU_DOMAIN_NESTED (1U << 6) /* User-managed address space nested
+ on a stage-2 translation */
#define IOMMU_DOMAIN_ALLOC_FLAGS ~__IOMMU_DOMAIN_DMA_FQ
/*
@@ -81,6 +187,8 @@ struct iommu_domain_geometry {
* invalidation.
* IOMMU_DOMAIN_SVA - DMA addresses are shared process addresses
* represented by mm_struct's.
+ * IOMMU_DOMAIN_PLATFORM - Legacy domain for drivers that do their own
+ * dma_api stuff. Do not use in new drivers.
*/
#define IOMMU_DOMAIN_BLOCKED (0U)
#define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT)
@@ -91,15 +199,18 @@ struct iommu_domain_geometry {
__IOMMU_DOMAIN_DMA_API | \
__IOMMU_DOMAIN_DMA_FQ)
#define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SVA)
+#define IOMMU_DOMAIN_PLATFORM (__IOMMU_DOMAIN_PLATFORM)
+#define IOMMU_DOMAIN_NESTED (__IOMMU_DOMAIN_NESTED)
struct iommu_domain {
unsigned type;
const struct iommu_domain_ops *ops;
+ const struct iommu_dirty_ops *dirty_ops;
+ const struct iommu_ops *owner; /* Whose domain_alloc we came from */
unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
- enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault,
- void *data);
+ int (*iopf_handler)(struct iopf_group *group);
void *fault_data;
union {
struct {
@@ -109,6 +220,11 @@ struct iommu_domain {
struct { /* IOMMU_DOMAIN_SVA */
struct mm_struct *mm;
int users;
+ /*
+ * Next iommu_domain in mm->iommu_mm->sva-domains list
+ * protected by iommu_sva_lock.
+ */
+ struct list_head next;
};
};
};
@@ -133,6 +249,7 @@ enum iommu_cap {
* usefully support the non-strict DMA flush queue.
*/
IOMMU_CAP_DEFERRED_FLUSH,
+ IOMMU_CAP_DIRTY_TRACKING, /* IOMMU supports dirty tracking */
};
/* These are the possible reserved region types */
@@ -196,6 +313,8 @@ enum iommu_dev_features {
IOMMU_DEV_FEAT_IOPF,
};
+#define IOMMU_NO_PASID (0U) /* Reserved for DMA w/o PASID */
+#define IOMMU_FIRST_GLOBAL_PASID (1U) /*starting range for allocation */
#define IOMMU_PASID_INVALID (-1U)
typedef unsigned int ioasid_t;
@@ -226,16 +345,183 @@ struct iommu_iotlb_gather {
};
/**
+ * struct iommu_dirty_bitmap - Dirty IOVA bitmap state
+ * @bitmap: IOVA bitmap
+ * @gather: Range information for a pending IOTLB flush
+ */
+struct iommu_dirty_bitmap {
+ struct iova_bitmap *bitmap;
+ struct iommu_iotlb_gather *gather;
+};
+
+/* Read but do not clear any dirty bits */
+#define IOMMU_DIRTY_NO_CLEAR (1 << 0)
+
+/**
+ * struct iommu_dirty_ops - domain specific dirty tracking operations
+ * @set_dirty_tracking: Enable or Disable dirty tracking on the iommu domain
+ * @read_and_clear_dirty: Walk IOMMU page tables for dirtied PTEs marshalled
+ * into a bitmap, with a bit represented as a page.
+ * Reads the dirty PTE bits and clears it from IO
+ * pagetables.
+ */
+struct iommu_dirty_ops {
+ int (*set_dirty_tracking)(struct iommu_domain *domain, bool enabled);
+ int (*read_and_clear_dirty)(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ unsigned long flags,
+ struct iommu_dirty_bitmap *dirty);
+};
+
+/**
+ * struct iommu_user_data - iommu driver specific user space data info
+ * @type: The data type of the user buffer
+ * @uptr: Pointer to the user buffer for copy_from_user()
+ * @len: The length of the user buffer in bytes
+ *
+ * A user space data is an uAPI that is defined in include/uapi/linux/iommufd.h
+ * @type, @uptr and @len should be just copied from an iommufd core uAPI struct.
+ */
+struct iommu_user_data {
+ unsigned int type;
+ void __user *uptr;
+ size_t len;
+};
+
+/**
+ * struct iommu_user_data_array - iommu driver specific user space data array
+ * @type: The data type of all the entries in the user buffer array
+ * @uptr: Pointer to the user buffer array
+ * @entry_len: The fixed-width length of an entry in the array, in bytes
+ * @entry_num: The number of total entries in the array
+ *
+ * The user buffer includes an array of requests with format defined in
+ * include/uapi/linux/iommufd.h
+ */
+struct iommu_user_data_array {
+ unsigned int type;
+ void __user *uptr;
+ size_t entry_len;
+ u32 entry_num;
+};
+
+/**
+ * __iommu_copy_struct_from_user - Copy iommu driver specific user space data
+ * @dst_data: Pointer to an iommu driver specific user data that is defined in
+ * include/uapi/linux/iommufd.h
+ * @src_data: Pointer to a struct iommu_user_data for user space data info
+ * @data_type: The data type of the @dst_data. Must match with @src_data.type
+ * @data_len: Length of current user data structure, i.e. sizeof(struct _dst)
+ * @min_len: Initial length of user data structure for backward compatibility.
+ * This should be offsetofend using the last member in the user data
+ * struct that was initially added to include/uapi/linux/iommufd.h
+ */
+static inline int __iommu_copy_struct_from_user(
+ void *dst_data, const struct iommu_user_data *src_data,
+ unsigned int data_type, size_t data_len, size_t min_len)
+{
+ if (src_data->type != data_type)
+ return -EINVAL;
+ if (WARN_ON(!dst_data || !src_data))
+ return -EINVAL;
+ if (src_data->len < min_len || data_len < src_data->len)
+ return -EINVAL;
+ return copy_struct_from_user(dst_data, data_len, src_data->uptr,
+ src_data->len);
+}
+
+/**
+ * iommu_copy_struct_from_user - Copy iommu driver specific user space data
+ * @kdst: Pointer to an iommu driver specific user data that is defined in
+ * include/uapi/linux/iommufd.h
+ * @user_data: Pointer to a struct iommu_user_data for user space data info
+ * @data_type: The data type of the @kdst. Must match with @user_data->type
+ * @min_last: The last memember of the data structure @kdst points in the
+ * initial version.
+ * Return 0 for success, otherwise -error.
+ */
+#define iommu_copy_struct_from_user(kdst, user_data, data_type, min_last) \
+ __iommu_copy_struct_from_user(kdst, user_data, data_type, \
+ sizeof(*kdst), \
+ offsetofend(typeof(*kdst), min_last))
+
+/**
+ * __iommu_copy_struct_from_user_array - Copy iommu driver specific user space
+ * data from an iommu_user_data_array
+ * @dst_data: Pointer to an iommu driver specific user data that is defined in
+ * include/uapi/linux/iommufd.h
+ * @src_array: Pointer to a struct iommu_user_data_array for a user space array
+ * @data_type: The data type of the @dst_data. Must match with @src_array.type
+ * @index: Index to the location in the array to copy user data from
+ * @data_len: Length of current user data structure, i.e. sizeof(struct _dst)
+ * @min_len: Initial length of user data structure for backward compatibility.
+ * This should be offsetofend using the last member in the user data
+ * struct that was initially added to include/uapi/linux/iommufd.h
+ */
+static inline int __iommu_copy_struct_from_user_array(
+ void *dst_data, const struct iommu_user_data_array *src_array,
+ unsigned int data_type, unsigned int index, size_t data_len,
+ size_t min_len)
+{
+ struct iommu_user_data src_data;
+
+ if (WARN_ON(!src_array || index >= src_array->entry_num))
+ return -EINVAL;
+ if (!src_array->entry_num)
+ return -EINVAL;
+ src_data.uptr = src_array->uptr + src_array->entry_len * index;
+ src_data.len = src_array->entry_len;
+ src_data.type = src_array->type;
+
+ return __iommu_copy_struct_from_user(dst_data, &src_data, data_type,
+ data_len, min_len);
+}
+
+/**
+ * iommu_copy_struct_from_user_array - Copy iommu driver specific user space
+ * data from an iommu_user_data_array
+ * @kdst: Pointer to an iommu driver specific user data that is defined in
+ * include/uapi/linux/iommufd.h
+ * @user_array: Pointer to a struct iommu_user_data_array for a user space
+ * array
+ * @data_type: The data type of the @kdst. Must match with @user_array->type
+ * @index: Index to the location in the array to copy user data from
+ * @min_last: The last member of the data structure @kdst points in the
+ * initial version.
+ * Return 0 for success, otherwise -error.
+ */
+#define iommu_copy_struct_from_user_array(kdst, user_array, data_type, index, \
+ min_last) \
+ __iommu_copy_struct_from_user_array( \
+ kdst, user_array, data_type, index, sizeof(*(kdst)), \
+ offsetofend(typeof(*(kdst)), min_last))
+
+/**
* struct iommu_ops - iommu ops and capabilities
* @capable: check capability
- * @domain_alloc: allocate iommu domain
+ * @hw_info: report iommu hardware information. The data buffer returned by this
+ * op is allocated in the iommu driver and freed by the caller after
+ * use. The information type is one of enum iommu_hw_info_type defined
+ * in include/uapi/linux/iommufd.h.
+ * @domain_alloc: allocate and return an iommu domain if success. Otherwise
+ * NULL is returned. The domain is not fully initialized until
+ * the caller iommu_domain_alloc() returns.
+ * @domain_alloc_user: Allocate an iommu domain corresponding to the input
+ * parameters as defined in include/uapi/linux/iommufd.h.
+ * Unlike @domain_alloc, it is called only by IOMMUFD and
+ * must fully initialize the new domain before return.
+ * Upon success, if the @user_data is valid and the @parent
+ * points to a kernel-managed domain, the new domain must be
+ * IOMMU_DOMAIN_NESTED type; otherwise, the @parent must be
+ * NULL while the @user_data can be optionally provided, the
+ * new domain must support __IOMMU_DOMAIN_PAGING.
+ * Upon failure, ERR_PTR must be returned.
+ * @domain_alloc_paging: Allocate an iommu_domain that can be used for
+ * UNMANAGED, DMA, and DMA_FQ domain types.
* @probe_device: Add device to iommu driver handling
* @release_device: Remove device from iommu driver handling
* @probe_finalize: Do final setup work after the device is added to an IOMMU
* group and attached to the groups domain
- * @set_platform_dma_ops: Returning control back to the platform DMA ops. This op
- * is to support old IOMMU drivers, new drivers should use
- * default domains, and the common IOMMU DMA ops.
* @device_group: find iommu group for a particular device
* @get_resv_regions: Request list of reserved regions for a device
* @of_xlate: add OF master IDs to iommu grouping
@@ -254,32 +540,42 @@ struct iommu_iotlb_gather {
* will be blocked by the hardware.
* @pgsize_bitmap: bitmap of all possible supported page sizes
* @owner: Driver module providing these ops
+ * @identity_domain: An always available, always attachable identity
+ * translation.
+ * @blocked_domain: An always available, always attachable blocking
+ * translation.
+ * @default_domain: If not NULL this will always be set as the default domain.
+ * This should be an IDENTITY/BLOCKED/PLATFORM domain.
+ * Do not use in new drivers.
*/
struct iommu_ops {
bool (*capable)(struct device *dev, enum iommu_cap);
+ void *(*hw_info)(struct device *dev, u32 *length, u32 *type);
/* Domain allocation and freeing by the iommu driver */
struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type);
+ struct iommu_domain *(*domain_alloc_user)(
+ struct device *dev, u32 flags, struct iommu_domain *parent,
+ const struct iommu_user_data *user_data);
+ struct iommu_domain *(*domain_alloc_paging)(struct device *dev);
struct iommu_device *(*probe_device)(struct device *dev);
void (*release_device)(struct device *dev);
void (*probe_finalize)(struct device *dev);
- void (*set_platform_dma_ops)(struct device *dev);
struct iommu_group *(*device_group)(struct device *dev);
/* Request/Free a list of reserved regions for a device */
void (*get_resv_regions)(struct device *dev, struct list_head *list);
- int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
+ int (*of_xlate)(struct device *dev, const struct of_phandle_args *args);
bool (*is_attach_deferred)(struct device *dev);
/* Per device IOMMU features */
int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f);
int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f);
- int (*page_response)(struct device *dev,
- struct iommu_fault_event *evt,
- struct iommu_page_response *msg);
+ void (*page_response)(struct device *dev, struct iopf_fault *evt,
+ struct iommu_page_response *msg);
int (*def_domain_type)(struct device *dev);
void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid);
@@ -287,6 +583,10 @@ struct iommu_ops {
const struct iommu_domain_ops *default_domain_ops;
unsigned long pgsize_bitmap;
struct module *owner;
+ struct iommu_domain *identity_domain;
+ struct iommu_domain *blocked_domain;
+ struct iommu_domain *release_domain;
+ struct iommu_domain *default_domain;
};
/**
@@ -305,15 +605,20 @@ struct iommu_ops {
* * ENODEV - device specific errors, not able to be attached
* * <others> - treated as ENODEV by the caller. Use is discouraged
* @set_dev_pasid: set an iommu domain to a pasid of device
- * @map: map a physically contiguous memory region to an iommu domain
* @map_pages: map a physically contiguous set of pages of the same size to
* an iommu domain.
- * @unmap: unmap a physically contiguous memory region from an iommu domain
* @unmap_pages: unmap a number of pages of the same size from an iommu domain
* @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
* @iotlb_sync_map: Sync mappings created recently using @map to the hardware
* @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
* queue
+ * @cache_invalidate_user: Flush hardware cache for user space IO page table.
+ * The @domain must be IOMMU_DOMAIN_NESTED. The @array
+ * passes in the cache invalidation requests, in form
+ * of a driver data structure. The driver must update
+ * array->entry_num to report the number of handled
+ * invalidation requests. The driver data structure
+ * must be defined in include/uapi/linux/iommufd.h
* @iova_to_phys: translate iova to physical address
* @enforce_cache_coherency: Prevent any kind of DMA from bypassing IOMMU_CACHE,
* including no-snoop TLPs on PCIe or other platform
@@ -327,22 +632,20 @@ struct iommu_domain_ops {
int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev,
ioasid_t pasid);
- int (*map)(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
int (*map_pages)(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped);
- size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
- size_t size, struct iommu_iotlb_gather *iotlb_gather);
size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova,
size_t pgsize, size_t pgcount,
struct iommu_iotlb_gather *iotlb_gather);
void (*flush_iotlb_all)(struct iommu_domain *domain);
- void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova,
- size_t size);
+ int (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova,
+ size_t size);
void (*iotlb_sync)(struct iommu_domain *domain,
struct iommu_iotlb_gather *iotlb_gather);
+ int (*cache_invalidate_user)(struct iommu_domain *domain,
+ struct iommu_user_data_array *array);
phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
dma_addr_t iova);
@@ -361,6 +664,7 @@ struct iommu_domain_ops {
* @list: Used by the iommu-core to keep a list of registered iommus
* @ops: iommu-ops for talking to this iommu
* @dev: struct device for sysfs handling
+ * @singleton_group: Used internally for drivers that have only one group
* @max_pasids: number of supported PASIDs
*/
struct iommu_device {
@@ -368,60 +672,62 @@ struct iommu_device {
const struct iommu_ops *ops;
struct fwnode_handle *fwnode;
struct device *dev;
+ struct iommu_group *singleton_group;
u32 max_pasids;
};
/**
- * struct iommu_fault_event - Generic fault event
- *
- * Can represent recoverable faults such as a page requests or
- * unrecoverable faults such as DMA or IRQ remapping faults.
- *
- * @fault: fault descriptor
- * @list: pending fault event list, used for tracking responses
- */
-struct iommu_fault_event {
- struct iommu_fault fault;
- struct list_head list;
-};
-
-/**
* struct iommu_fault_param - per-device IOMMU fault data
- * @handler: Callback function to handle IOMMU faults at device level
- * @data: handler private data
- * @faults: holds the pending faults which needs response
* @lock: protect pending faults list
+ * @users: user counter to manage the lifetime of the data
+ * @rcu: rcu head for kfree_rcu()
+ * @dev: the device that owns this param
+ * @queue: IOPF queue
+ * @queue_list: index into queue->devices
+ * @partial: faults that are part of a Page Request Group for which the last
+ * request hasn't been submitted yet.
+ * @faults: holds the pending faults which need response
*/
struct iommu_fault_param {
- iommu_dev_fault_handler_t handler;
- void *data;
- struct list_head faults;
struct mutex lock;
+ refcount_t users;
+ struct rcu_head rcu;
+
+ struct device *dev;
+ struct iopf_queue *queue;
+ struct list_head queue_list;
+
+ struct list_head partial;
+ struct list_head faults;
};
/**
* struct dev_iommu - Collection of per-device IOMMU data
*
* @fault_param: IOMMU detected device fault reporting data
- * @iopf_param: I/O Page Fault queue and data
* @fwspec: IOMMU fwspec data
* @iommu_dev: IOMMU device this device is linked to
* @priv: IOMMU Driver private data
* @max_pasids: number of PASIDs this device can consume
* @attach_deferred: the dma domain attachment is deferred
+ * @pci_32bit_workaround: Limit DMA allocations to 32-bit IOVAs
+ * @require_direct: device requires IOMMU_RESV_DIRECT regions
+ * @shadow_on_flush: IOTLB flushes are used to sync shadow tables
*
* TODO: migrate other per device data pointers under iommu_dev_data, e.g.
* struct iommu_group *iommu_group;
*/
struct dev_iommu {
struct mutex lock;
- struct iommu_fault_param *fault_param;
- struct iopf_device_param *iopf_param;
+ struct iommu_fault_param __rcu *fault_param;
struct iommu_fwspec *fwspec;
struct iommu_device *iommu_dev;
void *priv;
u32 max_pasids;
u32 attach_deferred:1;
+ u32 pci_32bit_workaround:1;
+ u32 require_direct:1;
+ u32 shadow_on_flush:1;
};
int iommu_device_register(struct iommu_device *iommu,
@@ -442,6 +748,22 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev)
return (struct iommu_device *)dev_get_drvdata(dev);
}
+/**
+ * iommu_get_iommu_dev - Get iommu_device for a device
+ * @dev: an end-point device
+ *
+ * Note that this function must be called from the iommu_ops
+ * to retrieve the iommu_device for a device, which the core code
+ * guarentees it will not invoke the op without an attached iommu.
+ */
+static inline struct iommu_device *__iommu_get_iommu_dev(struct device *dev)
+{
+ return dev->iommu->iommu_dev;
+}
+
+#define iommu_get_iommu_dev(dev, type, member) \
+ container_of(__iommu_get_iommu_dev(dev), type, member)
+
static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
{
*gather = (struct iommu_iotlb_gather) {
@@ -450,17 +772,6 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
};
}
-static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
-{
- /*
- * Assume that valid ops must be installed if iommu_probe_device()
- * has succeeded. The device ops are essentially for internal use
- * within the IOMMU subsystem itself, so we should be able to trust
- * ourselves not to misuse the helper.
- */
- return dev->iommu->iommu_dev->ops;
-}
-
extern int bus_iommu_probe(const struct bus_type *bus);
extern bool iommu_present(const struct bus_type *bus);
extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
@@ -518,16 +829,6 @@ extern int iommu_group_for_each_dev(struct iommu_group *group, void *data,
extern struct iommu_group *iommu_group_get(struct device *dev);
extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group);
extern void iommu_group_put(struct iommu_group *group);
-extern int iommu_register_device_fault_handler(struct device *dev,
- iommu_dev_fault_handler_t handler,
- void *data);
-
-extern int iommu_unregister_device_fault_handler(struct device *dev);
-
-extern int iommu_report_device_fault(struct device *dev,
- struct iommu_fault_event *evt);
-extern int iommu_page_response(struct device *dev,
- struct iommu_page_response *msg);
extern int iommu_group_id(struct iommu_group *group);
extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *);
@@ -632,12 +933,35 @@ static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather)
return gather && gather->queued;
}
+static inline void iommu_dirty_bitmap_init(struct iommu_dirty_bitmap *dirty,
+ struct iova_bitmap *bitmap,
+ struct iommu_iotlb_gather *gather)
+{
+ if (gather)
+ iommu_iotlb_gather_init(gather);
+
+ dirty->bitmap = bitmap;
+ dirty->gather = gather;
+}
+
+static inline void iommu_dirty_bitmap_record(struct iommu_dirty_bitmap *dirty,
+ unsigned long iova,
+ unsigned long length)
+{
+ if (dirty->bitmap)
+ iova_bitmap_set(dirty->bitmap, iova, length);
+
+ if (dirty->gather)
+ iommu_iotlb_gather_add_range(dirty->gather, iova, length);
+}
+
/* PCI device grouping function */
extern struct iommu_group *pci_device_group(struct device *dev);
/* Generic device grouping function */
extern struct iommu_group *generic_device_group(struct device *dev);
/* FSL-MC device grouping function */
struct iommu_group *fsl_mc_device_group(struct device *dev);
+extern struct iommu_group *generic_single_device_group(struct device *dev);
/**
* struct iommu_fwspec - per-device IOMMU instance data
@@ -668,13 +992,21 @@ struct iommu_fwspec {
struct iommu_sva {
struct device *dev;
struct iommu_domain *domain;
+ struct list_head handle_item;
+ refcount_t users;
+};
+
+struct iommu_mm_data {
+ u32 pasid;
+ struct list_head sva_domains;
+ struct list_head sva_handles;
};
int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
const struct iommu_ops *ops);
void iommu_fwspec_free(struct device *dev);
-int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids);
-const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode);
+int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids);
+const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode);
static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev)
{
@@ -698,11 +1030,9 @@ static inline void *dev_iommu_priv_get(struct device *dev)
return NULL;
}
-static inline void dev_iommu_priv_set(struct device *dev, void *priv)
-{
- dev->iommu->priv = priv;
-}
+void dev_iommu_priv_set(struct device *dev, void *priv);
+extern struct mutex iommu_probe_device_lock;
int iommu_probe_device(struct device *dev);
int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);
@@ -718,8 +1048,6 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group);
int iommu_device_claim_dma_owner(struct device *dev, void *owner);
void iommu_device_release_dma_owner(struct device *dev);
-struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
- struct mm_struct *mm);
int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
void iommu_detach_device_pasid(struct iommu_domain *domain,
@@ -727,6 +1055,8 @@ void iommu_detach_device_pasid(struct iommu_domain *domain,
struct iommu_domain *
iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
unsigned int type);
+ioasid_t iommu_alloc_global_pasid(struct device *dev);
+void iommu_free_global_pasid(ioasid_t pasid);
#else /* CONFIG_IOMMU_API */
struct iommu_ops {};
@@ -735,6 +1065,8 @@ struct iommu_fwspec {};
struct iommu_device {};
struct iommu_fault_param {};
struct iommu_iotlb_gather {};
+struct iommu_dirty_bitmap {};
+struct iommu_dirty_ops {};
static inline bool iommu_present(const struct bus_type *bus)
{
@@ -904,31 +1236,6 @@ static inline void iommu_group_put(struct iommu_group *group)
{
}
-static inline
-int iommu_register_device_fault_handler(struct device *dev,
- iommu_dev_fault_handler_t handler,
- void *data)
-{
- return -ENODEV;
-}
-
-static inline int iommu_unregister_device_fault_handler(struct device *dev)
-{
- return 0;
-}
-
-static inline
-int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
-{
- return -ENODEV;
-}
-
-static inline int iommu_page_response(struct device *dev,
- struct iommu_page_response *msg)
-{
- return -ENODEV;
-}
-
static inline int iommu_group_id(struct iommu_group *group)
{
return -ENODEV;
@@ -967,6 +1274,18 @@ static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather)
return false;
}
+static inline void iommu_dirty_bitmap_init(struct iommu_dirty_bitmap *dirty,
+ struct iova_bitmap *bitmap,
+ struct iommu_iotlb_gather *gather)
+{
+}
+
+static inline void iommu_dirty_bitmap_record(struct iommu_dirty_bitmap *dirty,
+ unsigned long iova,
+ unsigned long length)
+{
+}
+
static inline void iommu_device_unregister(struct iommu_device *iommu)
{
}
@@ -1010,7 +1329,7 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids,
}
static inline
-const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
+const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode)
{
return NULL;
}
@@ -1065,12 +1384,6 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner)
return -ENODEV;
}
-static inline struct iommu_domain *
-iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm)
-{
- return NULL;
-}
-
static inline int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
{
@@ -1088,8 +1401,23 @@ iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
{
return NULL;
}
+
+static inline ioasid_t iommu_alloc_global_pasid(struct device *dev)
+{
+ return IOMMU_PASID_INVALID;
+}
+
+static inline void iommu_free_global_pasid(ioasid_t pasid) {}
#endif /* CONFIG_IOMMU_API */
+#if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API)
+void iommu_group_mutex_assert(struct device *dev);
+#else
+static inline void iommu_group_mutex_assert(struct device *dev)
+{
+}
+#endif
+
/**
* iommu_map_sgtable - Map the given buffer to the IOMMU domain
* @domain: The IOMMU domain to perform the mapping
@@ -1100,7 +1428,7 @@ iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
* Creates a mapping at @iova for the buffer described by a scatterlist
* stored in the given sg_table object in the provided IOMMU domain.
*/
-static inline size_t iommu_map_sgtable(struct iommu_domain *domain,
+static inline ssize_t iommu_map_sgtable(struct iommu_domain *domain,
unsigned long iova, struct sg_table *sgt, int prot)
{
return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot,
@@ -1171,20 +1499,40 @@ static inline bool tegra_dev_iommu_get_stream_id(struct device *dev, u32 *stream
return false;
}
-#ifdef CONFIG_IOMMU_SVA
+#ifdef CONFIG_IOMMU_MM_DATA
static inline void mm_pasid_init(struct mm_struct *mm)
{
- mm->pasid = IOMMU_PASID_INVALID;
+ /*
+ * During dup_mm(), a new mm will be memcpy'd from an old one and that makes
+ * the new mm and the old one point to a same iommu_mm instance. When either
+ * one of the two mms gets released, the iommu_mm instance is freed, leaving
+ * the other mm running into a use-after-free/double-free problem. To avoid
+ * the problem, zeroing the iommu_mm pointer of a new mm is needed here.
+ */
+ mm->iommu_mm = NULL;
}
+
static inline bool mm_valid_pasid(struct mm_struct *mm)
{
- return mm->pasid != IOMMU_PASID_INVALID;
+ return READ_ONCE(mm->iommu_mm);
}
+
+static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm)
+{
+ struct iommu_mm_data *iommu_mm = READ_ONCE(mm->iommu_mm);
+
+ if (!iommu_mm)
+ return IOMMU_PASID_INVALID;
+ return iommu_mm->pasid;
+}
+
void mm_pasid_drop(struct mm_struct *mm);
struct iommu_sva *iommu_sva_bind_device(struct device *dev,
struct mm_struct *mm);
void iommu_sva_unbind_device(struct iommu_sva *handle);
u32 iommu_sva_get_pasid(struct iommu_sva *handle);
+struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
+ struct mm_struct *mm);
#else
static inline struct iommu_sva *
iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
@@ -1202,7 +1550,75 @@ static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle)
}
static inline void mm_pasid_init(struct mm_struct *mm) {}
static inline bool mm_valid_pasid(struct mm_struct *mm) { return false; }
+
+static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm)
+{
+ return IOMMU_PASID_INVALID;
+}
+
static inline void mm_pasid_drop(struct mm_struct *mm) {}
+
+static inline struct iommu_domain *
+iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm)
+{
+ return NULL;
+}
#endif /* CONFIG_IOMMU_SVA */
+#ifdef CONFIG_IOMMU_IOPF
+int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev);
+void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev);
+int iopf_queue_flush_dev(struct device *dev);
+struct iopf_queue *iopf_queue_alloc(const char *name);
+void iopf_queue_free(struct iopf_queue *queue);
+int iopf_queue_discard_partial(struct iopf_queue *queue);
+void iopf_free_group(struct iopf_group *group);
+void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt);
+void iopf_group_response(struct iopf_group *group,
+ enum iommu_page_response_code status);
+#else
+static inline int
+iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
+{
+ return -ENODEV;
+}
+
+static inline void
+iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
+{
+}
+
+static inline int iopf_queue_flush_dev(struct device *dev)
+{
+ return -ENODEV;
+}
+
+static inline struct iopf_queue *iopf_queue_alloc(const char *name)
+{
+ return NULL;
+}
+
+static inline void iopf_queue_free(struct iopf_queue *queue)
+{
+}
+
+static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
+{
+ return -ENODEV;
+}
+
+static inline void iopf_free_group(struct iopf_group *group)
+{
+}
+
+static inline void
+iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
+{
+}
+
+static inline void iopf_group_response(struct iopf_group *group,
+ enum iommu_page_response_code status)
+{
+}
+#endif /* CONFIG_IOMMU_IOPF */
#endif /* __LINUX_IOMMU_H */
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 1129a36a74c4..ffc3a949f837 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -16,14 +16,19 @@ struct page;
struct iommufd_ctx;
struct iommufd_access;
struct file;
+struct iommu_group;
struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
struct device *dev, u32 *id);
void iommufd_device_unbind(struct iommufd_device *idev);
int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id);
+int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id);
void iommufd_device_detach(struct iommufd_device *idev);
+struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev);
+u32 iommufd_device_to_id(struct iommufd_device *idev);
+
struct iommufd_access_ops {
u8 needs_pin_pages : 1;
void (*unmap)(void *data, unsigned long iova, unsigned long length);
@@ -44,12 +49,16 @@ iommufd_access_create(struct iommufd_ctx *ictx,
const struct iommufd_access_ops *ops, void *data, u32 *id);
void iommufd_access_destroy(struct iommufd_access *access);
int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id);
+int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id);
+void iommufd_access_detach(struct iommufd_access *access);
void iommufd_ctx_get(struct iommufd_ctx *ictx);
#if IS_ENABLED(CONFIG_IOMMUFD)
struct iommufd_ctx *iommufd_ctx_from_file(struct file *file);
+struct iommufd_ctx *iommufd_ctx_from_fd(int fd);
void iommufd_ctx_put(struct iommufd_ctx *ictx);
+bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group);
int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
unsigned long length, struct page **out_pages,
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 25d768d48970..db7fe25f3370 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -229,7 +229,7 @@ static inline unsigned long resource_ext_type(const struct resource *res)
return res->flags & IORESOURCE_EXT_TYPE_BITS;
}
/* True iff r1 completely contains r2 */
-static inline bool resource_contains(struct resource *r1, struct resource *r2)
+static inline bool resource_contains(const struct resource *r1, const struct resource *r2)
{
if (resource_type(r1) != resource_type(r2))
return false;
@@ -239,13 +239,13 @@ static inline bool resource_contains(struct resource *r1, struct resource *r2)
}
/* True if any part of r1 overlaps r2 */
-static inline bool resource_overlaps(struct resource *r1, struct resource *r2)
+static inline bool resource_overlaps(const struct resource *r1, const struct resource *r2)
{
return r1->start <= r2->end && r1->end >= r2->start;
}
-static inline bool
-resource_intersection(struct resource *r1, struct resource *r2, struct resource *r)
+static inline bool resource_intersection(const struct resource *r1, const struct resource *r2,
+ struct resource *r)
{
if (!resource_overlaps(r1, r2))
return false;
@@ -254,8 +254,8 @@ resource_intersection(struct resource *r1, struct resource *r2, struct resource
return true;
}
-static inline bool
-resource_union(struct resource *r1, struct resource *r2, struct resource *r)
+static inline bool resource_union(const struct resource *r1, const struct resource *r2,
+ struct resource *r)
{
if (!resource_overlaps(r1, r2))
return false;
@@ -331,6 +331,9 @@ extern int
walk_system_ram_res(u64 start, u64 end, void *arg,
int (*func)(struct resource *, void *));
extern int
+walk_system_ram_res_rev(u64 start, u64 end, void *arg,
+ int (*func)(struct resource *, void *));
+extern int
walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
void *arg, int (*func)(struct resource *, void *));
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 7578d4f6a969..db1249cd9692 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -47,7 +47,30 @@ static inline int task_nice_ioclass(struct task_struct *task)
}
#ifdef CONFIG_BLOCK
-int __get_task_ioprio(struct task_struct *p);
+/*
+ * If the task has set an I/O priority, use that. Otherwise, return
+ * the default I/O priority.
+ *
+ * Expected to be called for current task or with task_lock() held to keep
+ * io_context stable.
+ */
+static inline int __get_task_ioprio(struct task_struct *p)
+{
+ struct io_context *ioc = p->io_context;
+ int prio;
+
+ if (!ioc)
+ return IOPRIO_DEFAULT;
+
+ if (p != current)
+ lockdep_assert_held(&p->alloc_lock);
+
+ prio = ioc->ioprio;
+ if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE)
+ prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p),
+ task_nice_ioprio(p));
+ return prio;
+}
#else
static inline int __get_task_ioprio(struct task_struct *p)
{
diff --git a/include/linux/ioremap.h b/include/linux/ioremap.h
new file mode 100644
index 000000000000..f0e99fc7dd8b
--- /dev/null
+++ b/include/linux/ioremap.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_IOREMAP_H
+#define _LINUX_IOREMAP_H
+
+#include <linux/kasan.h>
+#include <asm/pgtable.h>
+
+#if defined(CONFIG_HAS_IOMEM) || defined(CONFIG_GENERIC_IOREMAP)
+/*
+ * Ioremap often, but not always uses the generic vmalloc area. E.g on
+ * Power ARCH, it could have different ioremap space.
+ */
+#ifndef IOREMAP_START
+#define IOREMAP_START VMALLOC_START
+#define IOREMAP_END VMALLOC_END
+#endif
+static inline bool is_ioremap_addr(const void *x)
+{
+ unsigned long addr = (unsigned long)kasan_reset_tag(x);
+
+ return addr >= IOREMAP_START && addr < IOREMAP_END;
+}
+#else
+static inline bool is_ioremap_addr(const void *x)
+{
+ return false;
+}
+#endif
+
+#endif /* _LINUX_IOREMAP_H */
diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h
index cb71aa616bd3..4696abfd311c 100644
--- a/include/linux/iosys-map.h
+++ b/include/linux/iosys-map.h
@@ -34,7 +34,7 @@
* the same driver for allocation, read and write operations.
*
* Open-coding access to :c:type:`struct iosys_map <iosys_map>` is considered
- * bad style. Rather then accessing its fields directly, use one of the provided
+ * bad style. Rather than accessing its fields directly, use one of the provided
* helper functions, or implement your own. For example, instances of
* :c:type:`struct iosys_map <iosys_map>` can be initialized statically with
* IOSYS_MAP_INIT_VADDR(), or at runtime with iosys_map_set_vaddr(). These
@@ -168,9 +168,9 @@ struct iosys_map {
* about the use of uninitialized variable.
*/
#define IOSYS_MAP_INIT_OFFSET(map_, offset_) ({ \
- struct iosys_map copy = *map_; \
- iosys_map_incr(&copy, offset_); \
- copy; \
+ struct iosys_map copy_ = *map_; \
+ iosys_map_incr(&copy_, offset_); \
+ copy_; \
})
/**
@@ -391,14 +391,14 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset,
* Returns:
* The value read from the mapping.
*/
-#define iosys_map_rd(map__, offset__, type__) ({ \
- type__ val; \
- if ((map__)->is_iomem) { \
- __iosys_map_rd_io(val, (map__)->vaddr_iomem + (offset__), type__);\
- } else { \
- __iosys_map_rd_sys(val, (map__)->vaddr + (offset__), type__); \
- } \
- val; \
+#define iosys_map_rd(map__, offset__, type__) ({ \
+ type__ val_; \
+ if ((map__)->is_iomem) { \
+ __iosys_map_rd_io(val_, (map__)->vaddr_iomem + (offset__), type__); \
+ } else { \
+ __iosys_map_rd_sys(val_, (map__)->vaddr + (offset__), type__); \
+ } \
+ val_; \
})
/**
@@ -413,20 +413,20 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset,
* or if pointer may be unaligned (and problematic for the architecture
* supported), use iosys_map_memcpy_to()
*/
-#define iosys_map_wr(map__, offset__, type__, val__) ({ \
- type__ val = (val__); \
- if ((map__)->is_iomem) { \
- __iosys_map_wr_io(val, (map__)->vaddr_iomem + (offset__), type__);\
- } else { \
- __iosys_map_wr_sys(val, (map__)->vaddr + (offset__), type__); \
- } \
+#define iosys_map_wr(map__, offset__, type__, val__) ({ \
+ type__ val_ = (val__); \
+ if ((map__)->is_iomem) { \
+ __iosys_map_wr_io(val_, (map__)->vaddr_iomem + (offset__), type__); \
+ } else { \
+ __iosys_map_wr_sys(val_, (map__)->vaddr + (offset__), type__); \
+ } \
})
/**
* iosys_map_rd_field - Read a member from a struct in the iosys_map
*
* @map__: The iosys_map structure
- * @struct_offset__: Offset from the beggining of the map, where the struct
+ * @struct_offset__: Offset from the beginning of the map, where the struct
* is located
* @struct_type__: The struct describing the layout of the mapping
* @field__: Member of the struct to read
@@ -485,16 +485,16 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset,
* The value read from the mapping.
*/
#define iosys_map_rd_field(map__, struct_offset__, struct_type__, field__) ({ \
- struct_type__ *s; \
+ struct_type__ *s_; \
iosys_map_rd(map__, struct_offset__ + offsetof(struct_type__, field__), \
- typeof(s->field__)); \
+ typeof(s_->field__)); \
})
/**
* iosys_map_wr_field - Write to a member of a struct in the iosys_map
*
* @map__: The iosys_map structure
- * @struct_offset__: Offset from the beggining of the map, where the struct
+ * @struct_offset__: Offset from the beginning of the map, where the struct
* is located
* @struct_type__: The struct describing the layout of the mapping
* @field__: Member of the struct to read
@@ -508,9 +508,9 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset,
* usage and memory layout.
*/
#define iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__) ({ \
- struct_type__ *s; \
+ struct_type__ *s_; \
iosys_map_wr(map__, struct_offset__ + offsetof(struct_type__, field__), \
- typeof(s->field__), val__); \
+ typeof(s_->field__), val__); \
})
#endif /* __IOSYS_MAP_H__ */
diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h
new file mode 100644
index 000000000000..270454a6703d
--- /dev/null
+++ b/include/linux/iov_iter.h
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* I/O iterator iteration building functions.
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef _LINUX_IOV_ITER_H
+#define _LINUX_IOV_ITER_H
+
+#include <linux/uio.h>
+#include <linux/bvec.h>
+
+typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len,
+ void *priv, void *priv2);
+typedef size_t (*iov_ustep_f)(void __user *iter_base, size_t progress, size_t len,
+ void *priv, void *priv2);
+
+/*
+ * Handle ITER_UBUF.
+ */
+static __always_inline
+size_t iterate_ubuf(struct iov_iter *iter, size_t len, void *priv, void *priv2,
+ iov_ustep_f step)
+{
+ void __user *base = iter->ubuf;
+ size_t progress = 0, remain;
+
+ remain = step(base + iter->iov_offset, 0, len, priv, priv2);
+ progress = len - remain;
+ iter->iov_offset += progress;
+ iter->count -= progress;
+ return progress;
+}
+
+/*
+ * Handle ITER_IOVEC.
+ */
+static __always_inline
+size_t iterate_iovec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
+ iov_ustep_f step)
+{
+ const struct iovec *p = iter->__iov;
+ size_t progress = 0, skip = iter->iov_offset;
+
+ do {
+ size_t remain, consumed;
+ size_t part = min(len, p->iov_len - skip);
+
+ if (likely(part)) {
+ remain = step(p->iov_base + skip, progress, part, priv, priv2);
+ consumed = part - remain;
+ progress += consumed;
+ skip += consumed;
+ len -= consumed;
+ if (skip < p->iov_len)
+ break;
+ }
+ p++;
+ skip = 0;
+ } while (len);
+
+ iter->nr_segs -= p - iter->__iov;
+ iter->__iov = p;
+ iter->iov_offset = skip;
+ iter->count -= progress;
+ return progress;
+}
+
+/*
+ * Handle ITER_KVEC.
+ */
+static __always_inline
+size_t iterate_kvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
+ iov_step_f step)
+{
+ const struct kvec *p = iter->kvec;
+ size_t progress = 0, skip = iter->iov_offset;
+
+ do {
+ size_t remain, consumed;
+ size_t part = min(len, p->iov_len - skip);
+
+ if (likely(part)) {
+ remain = step(p->iov_base + skip, progress, part, priv, priv2);
+ consumed = part - remain;
+ progress += consumed;
+ skip += consumed;
+ len -= consumed;
+ if (skip < p->iov_len)
+ break;
+ }
+ p++;
+ skip = 0;
+ } while (len);
+
+ iter->nr_segs -= p - iter->kvec;
+ iter->kvec = p;
+ iter->iov_offset = skip;
+ iter->count -= progress;
+ return progress;
+}
+
+/*
+ * Handle ITER_BVEC.
+ */
+static __always_inline
+size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
+ iov_step_f step)
+{
+ const struct bio_vec *p = iter->bvec;
+ size_t progress = 0, skip = iter->iov_offset;
+
+ do {
+ size_t remain, consumed;
+ size_t offset = p->bv_offset + skip, part;
+ void *kaddr = kmap_local_page(p->bv_page + offset / PAGE_SIZE);
+
+ part = min3(len,
+ (size_t)(p->bv_len - skip),
+ (size_t)(PAGE_SIZE - offset % PAGE_SIZE));
+ remain = step(kaddr + offset % PAGE_SIZE, progress, part, priv, priv2);
+ kunmap_local(kaddr);
+ consumed = part - remain;
+ len -= consumed;
+ progress += consumed;
+ skip += consumed;
+ if (skip >= p->bv_len) {
+ skip = 0;
+ p++;
+ }
+ if (remain)
+ break;
+ } while (len);
+
+ iter->nr_segs -= p - iter->bvec;
+ iter->bvec = p;
+ iter->iov_offset = skip;
+ iter->count -= progress;
+ return progress;
+}
+
+/*
+ * Handle ITER_XARRAY.
+ */
+static __always_inline
+size_t iterate_xarray(struct iov_iter *iter, size_t len, void *priv, void *priv2,
+ iov_step_f step)
+{
+ struct folio *folio;
+ size_t progress = 0;
+ loff_t start = iter->xarray_start + iter->iov_offset;
+ pgoff_t index = start / PAGE_SIZE;
+ XA_STATE(xas, iter->xarray, index);
+
+ rcu_read_lock();
+ xas_for_each(&xas, folio, ULONG_MAX) {
+ size_t remain, consumed, offset, part, flen;
+
+ if (xas_retry(&xas, folio))
+ continue;
+ if (WARN_ON(xa_is_value(folio)))
+ break;
+ if (WARN_ON(folio_test_hugetlb(folio)))
+ break;
+
+ offset = offset_in_folio(folio, start + progress);
+ flen = min(folio_size(folio) - offset, len);
+
+ while (flen) {
+ void *base = kmap_local_folio(folio, offset);
+
+ part = min_t(size_t, flen,
+ PAGE_SIZE - offset_in_page(offset));
+ remain = step(base, progress, part, priv, priv2);
+ kunmap_local(base);
+
+ consumed = part - remain;
+ progress += consumed;
+ len -= consumed;
+
+ if (remain || len == 0)
+ goto out;
+ flen -= consumed;
+ offset += consumed;
+ }
+ }
+
+out:
+ rcu_read_unlock();
+ iter->iov_offset += progress;
+ iter->count -= progress;
+ return progress;
+}
+
+/*
+ * Handle ITER_DISCARD.
+ */
+static __always_inline
+size_t iterate_discard(struct iov_iter *iter, size_t len, void *priv, void *priv2,
+ iov_step_f step)
+{
+ size_t progress = len;
+
+ iter->count -= progress;
+ return progress;
+}
+
+/**
+ * iterate_and_advance2 - Iterate over an iterator
+ * @iter: The iterator to iterate over.
+ * @len: The amount to iterate over.
+ * @priv: Data for the step functions.
+ * @priv2: More data for the step functions.
+ * @ustep: Function for UBUF/IOVEC iterators; given __user addresses.
+ * @step: Function for other iterators; given kernel addresses.
+ *
+ * Iterate over the next part of an iterator, up to the specified length. The
+ * buffer is presented in segments, which for kernel iteration are broken up by
+ * physical pages and mapped, with the mapped address being presented.
+ *
+ * Two step functions, @step and @ustep, must be provided, one for handling
+ * mapped kernel addresses and the other is given user addresses which have the
+ * potential to fault since no pinning is performed.
+ *
+ * The step functions are passed the address and length of the segment, @priv,
+ * @priv2 and the amount of data so far iterated over (which can, for example,
+ * be added to @priv to point to the right part of a second buffer). The step
+ * functions should return the amount of the segment they didn't process (ie. 0
+ * indicates complete processsing).
+ *
+ * This function returns the amount of data processed (ie. 0 means nothing was
+ * processed and the value of @len means processes to completion).
+ */
+static __always_inline
+size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv,
+ void *priv2, iov_ustep_f ustep, iov_step_f step)
+{
+ if (unlikely(iter->count < len))
+ len = iter->count;
+ if (unlikely(!len))
+ return 0;
+
+ if (likely(iter_is_ubuf(iter)))
+ return iterate_ubuf(iter, len, priv, priv2, ustep);
+ if (likely(iter_is_iovec(iter)))
+ return iterate_iovec(iter, len, priv, priv2, ustep);
+ if (iov_iter_is_bvec(iter))
+ return iterate_bvec(iter, len, priv, priv2, step);
+ if (iov_iter_is_kvec(iter))
+ return iterate_kvec(iter, len, priv, priv2, step);
+ if (iov_iter_is_xarray(iter))
+ return iterate_xarray(iter, len, priv, priv2, step);
+ return iterate_discard(iter, len, priv, priv2, step);
+}
+
+/**
+ * iterate_and_advance - Iterate over an iterator
+ * @iter: The iterator to iterate over.
+ * @len: The amount to iterate over.
+ * @priv: Data for the step functions.
+ * @ustep: Function for UBUF/IOVEC iterators; given __user addresses.
+ * @step: Function for other iterators; given kernel addresses.
+ *
+ * As iterate_and_advance2(), but priv2 is always NULL.
+ */
+static __always_inline
+size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv,
+ iov_ustep_f ustep, iov_step_f step)
+{
+ return iterate_and_advance2(iter, len, priv, NULL, ustep, step);
+}
+
+#endif /* _LINUX_IOV_ITER_H */
diff --git a/include/linux/iova_bitmap.h b/include/linux/iova_bitmap.h
index c006cf0a25f3..1c338f5e5b7a 100644
--- a/include/linux/iova_bitmap.h
+++ b/include/linux/iova_bitmap.h
@@ -7,6 +7,7 @@
#define _IOVA_BITMAP_H_
#include <linux/types.h>
+#include <linux/errno.h>
struct iova_bitmap;
@@ -14,6 +15,7 @@ typedef int (*iova_bitmap_fn_t)(struct iova_bitmap *bitmap,
unsigned long iova, size_t length,
void *opaque);
+#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER)
struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length,
unsigned long page_size,
u64 __user *data);
@@ -22,5 +24,29 @@ int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque,
iova_bitmap_fn_t fn);
void iova_bitmap_set(struct iova_bitmap *bitmap,
unsigned long iova, size_t length);
+#else
+static inline struct iova_bitmap *iova_bitmap_alloc(unsigned long iova,
+ size_t length,
+ unsigned long page_size,
+ u64 __user *data)
+{
+ return NULL;
+}
+
+static inline void iova_bitmap_free(struct iova_bitmap *bitmap)
+{
+}
+
+static inline int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque,
+ iova_bitmap_fn_t fn)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void iova_bitmap_set(struct iova_bitmap *bitmap,
+ unsigned long iova, size_t length)
+{
+}
+#endif
#endif
diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index e1c9eea6015b..9b1434247aab 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -2,7 +2,7 @@
#ifndef _LINUX_IPC_H
#define _LINUX_IPC_H
-#include <linux/spinlock.h>
+#include <linux/spinlock_types.h>
#include <linux/uidgid.h>
#include <linux/rhashtable-types.h>
#include <uapi/linux/ipc.h>
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 839247a4f48e..383a0ea2ab91 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -3,6 +3,7 @@
#define _IPV6_H
#include <uapi/linux/ipv6.h>
+#include <linux/cache.h>
#define ipv6_optlen(p) (((p)->hdrlen+1) << 3)
#define ipv6_authlen(p) (((p)->hdrlen+2) << 2)
@@ -10,9 +11,16 @@
* This structure contains configuration options per IPv6 link.
*/
struct ipv6_devconf {
- __s32 forwarding;
+ /* RX & TX fastpath fields. */
+ __cacheline_group_begin(ipv6_devconf_read_txrx);
+ __s32 disable_ipv6;
__s32 hop_limit;
__s32 mtu6;
+ __s32 forwarding;
+ __s32 disable_policy;
+ __s32 proxy_ndp;
+ __cacheline_group_end(ipv6_devconf_read_txrx);
+
__s32 accept_ra;
__s32 accept_redirects;
__s32 autoconf;
@@ -27,12 +35,14 @@ struct ipv6_devconf {
__s32 use_tempaddr;
__s32 temp_valid_lft;
__s32 temp_prefered_lft;
+ __s32 regen_min_advance;
__s32 regen_max_retry;
__s32 max_desync_factor;
__s32 max_addresses;
__s32 accept_ra_defrtr;
__u32 ra_defrtr_metric;
__s32 accept_ra_min_hop_limit;
+ __s32 accept_ra_min_lft;
__s32 accept_ra_pinfo;
__s32 ignore_routes_with_linkdown;
#ifdef CONFIG_IPV6_ROUTER_PREF
@@ -43,7 +53,6 @@ struct ipv6_devconf {
__s32 accept_ra_rt_info_max_plen;
#endif
#endif
- __s32 proxy_ndp;
__s32 accept_source_route;
__s32 accept_ra_from_local;
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
@@ -53,7 +62,6 @@ struct ipv6_devconf {
#ifdef CONFIG_IPV6_MROUTE
atomic_t mc_forwarding;
#endif
- __s32 disable_ipv6;
__s32 drop_unicast_in_l2_multicast;
__s32 accept_dad;
__s32 force_tllao;
@@ -74,13 +82,13 @@ struct ipv6_devconf {
#endif
__u32 enhanced_dad;
__u32 addr_gen_mode;
- __s32 disable_policy;
__s32 ndisc_tclass;
__s32 rpl_seg_enabled;
__u32 ioam6_id;
__u32 ioam6_id_wide;
__u8 ioam6_enabled;
__u8 ndisc_evict_nocarrier;
+ __u8 ra_honor_pio_life;
struct ctl_table_header *sysctl_header;
};
@@ -146,6 +154,7 @@ struct inet6_skb_parm {
#define IP6SKB_JUMBOGRAM 128
#define IP6SKB_SEG6 256
#define IP6SKB_FAKEJUMBO 512
+#define IP6SKB_MULTIPATH 1024
};
#if defined(CONFIG_NET_L3_MASTER_DEV)
@@ -199,14 +208,7 @@ struct inet6_cork {
u8 tclass;
};
-/**
- * struct ipv6_pinfo - ipv6 private area
- *
- * In the struct sock hierarchy (tcp6_sock, upd6_sock, etc)
- * this _must_ be the last member, so that inet6_sk_generic
- * is able to calculate its offset from the base struct sock
- * by using the struct proto->slab_obj_size member. -acme
- */
+/* struct ipv6_pinfo - ipv6 private area */
struct ipv6_pinfo {
struct in6_addr saddr;
struct in6_pktinfo sticky_pktinfo;
@@ -218,28 +220,9 @@ struct ipv6_pinfo {
__be32 flow_label;
__u32 frag_size;
- /*
- * Packed in 16bits.
- * Omit one shift by putting the signed field at MSB.
- */
-#if defined(__BIG_ENDIAN_BITFIELD)
- __s16 hop_limit:9;
- __u16 __unused_1:7;
-#else
- __u16 __unused_1:7;
- __s16 hop_limit:9;
-#endif
+ s16 hop_limit;
+ u8 mcast_hops;
-#if defined(__BIG_ENDIAN_BITFIELD)
- /* Packed in 16bits. */
- __s16 mcast_hops:9;
- __u16 __unused_2:6,
- mc_loop:1;
-#else
- __u16 mc_loop:1,
- __unused_2:6;
- __s16 mcast_hops:9;
-#endif
int ucast_oif;
int mcast_oif;
@@ -267,21 +250,11 @@ struct ipv6_pinfo {
} rxopt;
/* sockopt flags */
- __u16 recverr:1,
- sndflow:1,
- repflow:1,
- pmtudisc:3,
- padding:1, /* 1 bit hole */
- srcprefs:3, /* 001: prefer temporary address
+ __u8 srcprefs; /* 001: prefer temporary address
* 010: prefer public address
* 100: prefer care-of address
*/
- dontfrag:1,
- autoflowlabel:1,
- autoflowlabel_set:1,
- mc_all:1,
- recverr_rfc4884:1,
- rtalert_isolate:1;
+ __u8 pmtudisc;
__u8 min_hopcount;
__u8 tclass;
__be32 rcv_flowinfo;
@@ -298,6 +271,18 @@ struct ipv6_pinfo {
struct inet6_cork cork;
};
+/* We currently use available bits from inet_sk(sk)->inet_flags,
+ * this could change in the future.
+ */
+#define inet6_test_bit(nr, sk) \
+ test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet6_set_bit(nr, sk) \
+ set_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet6_clear_bit(nr, sk) \
+ clear_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet6_assign_bit(nr, sk, val) \
+ assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val)
+
/* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */
struct raw6_sock {
/* inet_sock has to be the first member of raw6_sock */
@@ -306,19 +291,19 @@ struct raw6_sock {
__u32 offset; /* checksum offset */
struct icmp6_filter filter;
__u32 ip6mr_table;
- /* ipv6_pinfo has to be the last member of raw6_sock, see inet6_sk_generic */
+
struct ipv6_pinfo inet6;
};
struct udp6_sock {
struct udp_sock udp;
- /* ipv6_pinfo has to be the last member of udp6_sock, see inet6_sk_generic */
+
struct ipv6_pinfo inet6;
};
struct tcp6_sock {
struct tcp_sock tcp;
- /* ipv6_pinfo has to be the last member of tcp6_sock, see inet6_sk_generic */
+
struct ipv6_pinfo inet6;
};
diff --git a/include/linux/irq.h b/include/linux/irq.h
index d8a6fdce9373..97baa937ab5b 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -179,7 +179,7 @@ struct irq_common_data {
struct irq_data {
u32 mask;
unsigned int irq;
- unsigned long hwirq;
+ irq_hw_number_t hwirq;
struct irq_common_data *common;
struct irq_chip *chip;
struct irq_domain *domain;
@@ -215,8 +215,6 @@ struct irq_data {
* IRQD_SINGLE_TARGET - IRQ allows only a single affinity target
* IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set
* IRQD_CAN_RESERVE - Can use reservation mode
- * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change
- * required
* IRQD_HANDLE_ENFORCE_IRQCTX - Enforce that handle_irq_*() is only invoked
* from actual interrupt context.
* IRQD_AFFINITY_ON_ACTIVATE - Affinity is set on activation. Don't call
@@ -247,11 +245,10 @@ enum {
IRQD_SINGLE_TARGET = BIT(24),
IRQD_DEFAULT_TRIGGER_SET = BIT(25),
IRQD_CAN_RESERVE = BIT(26),
- IRQD_MSI_NOMASK_QUIRK = BIT(27),
- IRQD_HANDLE_ENFORCE_IRQCTX = BIT(28),
- IRQD_AFFINITY_ON_ACTIVATE = BIT(29),
- IRQD_IRQ_ENABLED_ON_SUSPEND = BIT(30),
- IRQD_RESEND_WHEN_IN_PROGRESS = BIT(31),
+ IRQD_HANDLE_ENFORCE_IRQCTX = BIT(27),
+ IRQD_AFFINITY_ON_ACTIVATE = BIT(28),
+ IRQD_IRQ_ENABLED_ON_SUSPEND = BIT(29),
+ IRQD_RESEND_WHEN_IN_PROGRESS = BIT(30),
};
#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -426,21 +423,6 @@ static inline bool irqd_can_reserve(struct irq_data *d)
return __irqd_to_state(d) & IRQD_CAN_RESERVE;
}
-static inline void irqd_set_msi_nomask_quirk(struct irq_data *d)
-{
- __irqd_to_state(d) |= IRQD_MSI_NOMASK_QUIRK;
-}
-
-static inline void irqd_clr_msi_nomask_quirk(struct irq_data *d)
-{
- __irqd_to_state(d) &= ~IRQD_MSI_NOMASK_QUIRK;
-}
-
-static inline bool irqd_msi_nomask_quirk(struct irq_data *d)
-{
- return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK;
-}
-
static inline void irqd_set_affinity_on_activate(struct irq_data *d)
{
__irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE;
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 8cd11a223260..136f2980cba3 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -66,6 +66,9 @@ void irq_work_sync(struct irq_work *work);
void irq_work_run(void);
bool irq_work_needs_cpu(void);
void irq_work_single(void *arg);
+
+void arch_irq_work_raise(void);
+
#else
static inline bool irq_work_needs_cpu(void) { return false; }
static inline void irq_work_run(void) { }
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 51c254b7fec2..21ecf582a0fe 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -174,7 +174,7 @@ struct irq_domain {
irq_hw_number_t hwirq_max;
unsigned int revmap_size;
struct radix_tree_root revmap_tree;
- struct irq_data __rcu *revmap[];
+ struct irq_data __rcu *revmap[] __counted_by(revmap_size);
};
/* Irq domain flags */
@@ -619,6 +619,23 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain)
#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
+#ifdef CONFIG_GENERIC_MSI_IRQ
+int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
+ unsigned int type);
+void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq);
+#else
+static inline int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
+ unsigned int type)
+{
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+}
+static inline void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq)
+{
+ WARN_ON_ONCE(1);
+}
+#endif
+
#else /* CONFIG_IRQ_DOMAIN */
static inline void irq_dispose_mapping(unsigned int virq) { }
static inline struct irq_domain *irq_find_matching_fwnode(
diff --git a/include/linux/irqdomain_defs.h b/include/linux/irqdomain_defs.h
index c29921fd8cd1..5c1fe6f1fcde 100644
--- a/include/linux/irqdomain_defs.h
+++ b/include/linux/irqdomain_defs.h
@@ -26,6 +26,8 @@ enum irq_domain_bus_token {
DOMAIN_BUS_DMAR,
DOMAIN_BUS_AMDVI,
DOMAIN_BUS_PCI_DEVICE_IMS,
+ DOMAIN_BUS_DEVICE_MSI,
+ DOMAIN_BUS_WIRED_TO_MSI,
};
#endif /* _LINUX_IRQDOMAIN_DEFS_H */
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 2b665c32f5fe..3f003d5fde53 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -12,6 +12,7 @@
#ifndef _LINUX_TRACE_IRQFLAGS_H
#define _LINUX_TRACE_IRQFLAGS_H
+#include <linux/irqflags_types.h>
#include <linux/typecheck.h>
#include <linux/cleanup.h>
#include <asm/irqflags.h>
@@ -34,19 +35,6 @@
#ifdef CONFIG_TRACE_IRQFLAGS
-/* Per-task IRQ trace events information. */
-struct irqtrace_events {
- unsigned int irq_events;
- unsigned long hardirq_enable_ip;
- unsigned long hardirq_disable_ip;
- unsigned int hardirq_enable_event;
- unsigned int hardirq_disable_event;
- unsigned long softirq_disable_ip;
- unsigned long softirq_enable_ip;
- unsigned int softirq_disable_event;
- unsigned int softirq_enable_event;
-};
-
DECLARE_PER_CPU(int, hardirqs_enabled);
DECLARE_PER_CPU(int, hardirq_context);
@@ -126,7 +114,7 @@ do { \
# define lockdep_softirq_enter() do { } while (0)
# define lockdep_softirq_exit() do { } while (0)
# define lockdep_hrtimer_enter(__hrtimer) false
-# define lockdep_hrtimer_exit(__context) do { } while (0)
+# define lockdep_hrtimer_exit(__context) do { (void)(__context); } while (0)
# define lockdep_posixtimer_enter() do { } while (0)
# define lockdep_posixtimer_exit() do { } while (0)
# define lockdep_irq_work_enter(__work) do { } while (0)
diff --git a/include/linux/irqflags_types.h b/include/linux/irqflags_types.h
new file mode 100644
index 000000000000..c13f0d915097
--- /dev/null
+++ b/include/linux/irqflags_types.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_IRQFLAGS_TYPES_H
+#define _LINUX_IRQFLAGS_TYPES_H
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+/* Per-task IRQ trace events information. */
+struct irqtrace_events {
+ unsigned int irq_events;
+ unsigned long hardirq_enable_ip;
+ unsigned long hardirq_disable_ip;
+ unsigned int hardirq_enable_event;
+ unsigned int hardirq_disable_event;
+ unsigned long softirq_disable_ip;
+ unsigned long softirq_enable_ip;
+ unsigned int softirq_disable_event;
+ unsigned int softirq_enable_event;
+};
+
+#endif
+
+#endif /* _LINUX_IRQFLAGS_TYPES_H */
diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h
index c30f454a9518..72dd1eb3a0e7 100644
--- a/include/linux/irqhandler.h
+++ b/include/linux/irqhandler.h
@@ -8,7 +8,7 @@
*/
struct irq_desc;
-struct irq_data;
+
typedef void (*irq_flow_handler_t)(struct irq_desc *desc);
#endif
diff --git a/include/linux/ism.h b/include/linux/ism.h
index 9a4c204df3da..5428edd90982 100644
--- a/include/linux/ism.h
+++ b/include/linux/ism.h
@@ -86,7 +86,6 @@ int ism_register_dmb(struct ism_dev *dev, struct ism_dmb *dmb,
int ism_unregister_dmb(struct ism_dev *dev, struct ism_dmb *dmb);
int ism_move(struct ism_dev *dev, u64 dmb_tok, unsigned int idx, bool sf,
unsigned int offset, void *data, unsigned int size);
-u8 *ism_get_seid(void);
const struct smcd_ops *ism_get_smcd_ops(void);
diff --git a/include/linux/iversion.h b/include/linux/iversion.h
index f174ff1b59ee..8f972eaca2ed 100644
--- a/include/linux/iversion.h
+++ b/include/linux/iversion.h
@@ -256,7 +256,7 @@ inode_peek_iversion(const struct inode *inode)
* For filesystems without any sort of change attribute, the best we can
* do is fake one up from the ctime:
*/
-static inline u64 time_to_chattr(struct timespec64 *t)
+static inline u64 time_to_chattr(const struct timespec64 *t)
{
u64 chattr = t->tv_sec;
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 44c298aa58d4..971f3e826e15 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -631,11 +631,6 @@ struct transaction_s
struct list_head t_inode_list;
/*
- * Protects info related to handles
- */
- spinlock_t t_handle_lock;
-
- /*
* Longest time some handle had to wait for running transaction
*/
unsigned long t_max_wait;
@@ -761,11 +756,6 @@ struct journal_s
unsigned long j_flags;
/**
- * @j_atomic_flags: Atomic journaling state flags.
- */
- unsigned long j_atomic_flags;
-
- /**
* @j_errno:
*
* Is there an outstanding uncleared error on the journal (from a prior
@@ -891,7 +881,7 @@ struct journal_s
* Journal head shrinker, reclaim buffer's journal head which
* has been written back.
*/
- struct shrinker j_shrinker;
+ struct shrinker *j_shrinker;
/**
* @j_checkpoint_jh_count:
@@ -1004,6 +994,13 @@ struct journal_s
struct block_device *j_fs_dev;
/**
+ * @j_fs_dev_wb_err:
+ *
+ * Records the errseq of the client fs's backing block device.
+ */
+ errseq_t j_fs_dev_wb_err;
+
+ /**
* @j_total_len: Total maximum capacity of the journal region on disk.
*/
unsigned int j_total_len;
@@ -1379,6 +1376,9 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum2, CSUM_V2)
JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3)
JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT)
+/* Journal high priority write IO operation flags */
+#define JBD2_JOURNAL_REQ_FLAGS (REQ_META | REQ_SYNC | REQ_IDLE)
+
/*
* Journal flag definitions
*/
@@ -1402,12 +1402,6 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT)
JBD2_JOURNAL_FLUSH_ZEROOUT)
/*
- * Journal atomic flag definitions
- */
-#define JBD2_CHECKPOINT_IO_ERROR 0x001 /* Detect io error while writing
- * buffer back to disk */
-
-/*
* Function declarations for the journaling transaction and buffer
* management
*/
@@ -1700,6 +1694,25 @@ static inline void jbd2_journal_abort_handle(handle_t *handle)
handle->h_aborted = 1;
}
+static inline void jbd2_init_fs_dev_write_error(journal_t *journal)
+{
+ struct address_space *mapping = journal->j_fs_dev->bd_inode->i_mapping;
+
+ /*
+ * Save the original wb_err value of client fs's bdev mapping which
+ * could be used to detect the client fs's metadata async write error.
+ */
+ errseq_check_and_advance(&mapping->wb_err, &journal->j_fs_dev_wb_err);
+}
+
+static inline int jbd2_check_fs_dev_write_error(journal_t *journal)
+{
+ struct address_space *mapping = journal->j_fs_dev->bd_inode->i_mapping;
+
+ return errseq_check(&mapping->wb_err,
+ READ_ONCE(journal->j_fs_dev_wb_err));
+}
+
#endif /* __KERNEL__ */
/* Comparison functions for transaction IDs: perform comparisons using
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 5e13f801c902..d9f1435a5a13 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -72,9 +72,15 @@ extern int register_refined_jiffies(long clock_tick_rate);
#endif
/*
- * The 64-bit value is not atomic - you MUST NOT read it
+ * The 64-bit value is not atomic on 32-bit systems - you MUST NOT read it
* without sampling the sequence number in jiffies_lock.
* get_jiffies_64() will do this for you as appropriate.
+ *
+ * jiffies and jiffies_64 are at the same address for little-endian systems
+ * and for 64-bit big-endian systems.
+ * On 32-bit big-endian systems, jiffies is the lower 32 bits of jiffies_64
+ * (i.e., at address @jiffies_64 + 4).
+ * See arch/ARCH/kernel/vmlinux.lds.S
*/
extern u64 __cacheline_aligned_in_smp jiffies_64;
extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies;
@@ -82,46 +88,94 @@ extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffi
#if (BITS_PER_LONG < 64)
u64 get_jiffies_64(void);
#else
+/**
+ * get_jiffies_64 - read the 64-bit non-atomic jiffies_64 value
+ *
+ * When BITS_PER_LONG < 64, this uses sequence number sampling using
+ * jiffies_lock to protect the 64-bit read.
+ *
+ * Return: current 64-bit jiffies value
+ */
static inline u64 get_jiffies_64(void)
{
return (u64)jiffies;
}
#endif
-/*
- * These inlines deal with timer wrapping correctly. You are
- * strongly encouraged to use them
- * 1. Because people otherwise forget
- * 2. Because if the timer wrap changes in future you won't have to
- * alter your driver code.
+/**
+ * DOC: General information about time_* inlines
+ *
+ * These inlines deal with timer wrapping correctly. You are strongly encouraged
+ * to use them:
*
- * time_after(a,b) returns true if the time a is after time b.
+ * #. Because people otherwise forget
+ * #. Because if the timer wrap changes in future you won't have to alter your
+ * driver code.
+ */
+
+/**
+ * time_after - returns true if the time a is after time b.
+ * @a: first comparable as unsigned long
+ * @b: second comparable as unsigned long
*
* Do this with "<0" and ">=0" to only test the sign of the result. A
* good compiler would generate better code (and a really good compiler
* wouldn't care). Gcc is currently neither.
+ *
+ * Return: %true is time a is after time b, otherwise %false.
*/
#define time_after(a,b) \
(typecheck(unsigned long, a) && \
typecheck(unsigned long, b) && \
((long)((b) - (a)) < 0))
+/**
+ * time_before - returns true if the time a is before time b.
+ * @a: first comparable as unsigned long
+ * @b: second comparable as unsigned long
+ *
+ * Return: %true is time a is before time b, otherwise %false.
+ */
#define time_before(a,b) time_after(b,a)
+/**
+ * time_after_eq - returns true if the time a is after or the same as time b.
+ * @a: first comparable as unsigned long
+ * @b: second comparable as unsigned long
+ *
+ * Return: %true is time a is after or the same as time b, otherwise %false.
+ */
#define time_after_eq(a,b) \
(typecheck(unsigned long, a) && \
typecheck(unsigned long, b) && \
((long)((a) - (b)) >= 0))
+/**
+ * time_before_eq - returns true if the time a is before or the same as time b.
+ * @a: first comparable as unsigned long
+ * @b: second comparable as unsigned long
+ *
+ * Return: %true is time a is before or the same as time b, otherwise %false.
+ */
#define time_before_eq(a,b) time_after_eq(b,a)
-/*
- * Calculate whether a is in the range of [b, c].
+/**
+ * time_in_range - Calculate whether a is in the range of [b, c].
+ * @a: time to test
+ * @b: beginning of the range
+ * @c: end of the range
+ *
+ * Return: %true is time a is in the range [b, c], otherwise %false.
*/
#define time_in_range(a,b,c) \
(time_after_eq(a,b) && \
time_before_eq(a,c))
-/*
- * Calculate whether a is in the range of [b, c).
+/**
+ * time_in_range_open - Calculate whether a is in the range of [b, c).
+ * @a: time to test
+ * @b: beginning of the range
+ * @c: end of the range
+ *
+ * Return: %true is time a is in the range [b, c), otherwise %false.
*/
#define time_in_range_open(a,b,c) \
(time_after_eq(a,b) && \
@@ -129,45 +183,138 @@ static inline u64 get_jiffies_64(void)
/* Same as above, but does so with platform independent 64bit types.
* These must be used when utilizing jiffies_64 (i.e. return value of
- * get_jiffies_64() */
+ * get_jiffies_64()). */
+
+/**
+ * time_after64 - returns true if the time a is after time b.
+ * @a: first comparable as __u64
+ * @b: second comparable as __u64
+ *
+ * This must be used when utilizing jiffies_64 (i.e. return value of
+ * get_jiffies_64()).
+ *
+ * Return: %true is time a is after time b, otherwise %false.
+ */
#define time_after64(a,b) \
(typecheck(__u64, a) && \
typecheck(__u64, b) && \
((__s64)((b) - (a)) < 0))
+/**
+ * time_before64 - returns true if the time a is before time b.
+ * @a: first comparable as __u64
+ * @b: second comparable as __u64
+ *
+ * This must be used when utilizing jiffies_64 (i.e. return value of
+ * get_jiffies_64()).
+ *
+ * Return: %true is time a is before time b, otherwise %false.
+ */
#define time_before64(a,b) time_after64(b,a)
+/**
+ * time_after_eq64 - returns true if the time a is after or the same as time b.
+ * @a: first comparable as __u64
+ * @b: second comparable as __u64
+ *
+ * This must be used when utilizing jiffies_64 (i.e. return value of
+ * get_jiffies_64()).
+ *
+ * Return: %true is time a is after or the same as time b, otherwise %false.
+ */
#define time_after_eq64(a,b) \
(typecheck(__u64, a) && \
typecheck(__u64, b) && \
((__s64)((a) - (b)) >= 0))
+/**
+ * time_before_eq64 - returns true if the time a is before or the same as time b.
+ * @a: first comparable as __u64
+ * @b: second comparable as __u64
+ *
+ * This must be used when utilizing jiffies_64 (i.e. return value of
+ * get_jiffies_64()).
+ *
+ * Return: %true is time a is before or the same as time b, otherwise %false.
+ */
#define time_before_eq64(a,b) time_after_eq64(b,a)
+/**
+ * time_in_range64 - Calculate whether a is in the range of [b, c].
+ * @a: time to test
+ * @b: beginning of the range
+ * @c: end of the range
+ *
+ * Return: %true is time a is in the range [b, c], otherwise %false.
+ */
#define time_in_range64(a, b, c) \
(time_after_eq64(a, b) && \
time_before_eq64(a, c))
/*
- * These four macros compare jiffies and 'a' for convenience.
+ * These eight macros compare jiffies[_64] and 'a' for convenience.
*/
-/* time_is_before_jiffies(a) return true if a is before jiffies */
+/**
+ * time_is_before_jiffies - return true if a is before jiffies
+ * @a: time (unsigned long) to compare to jiffies
+ *
+ * Return: %true is time a is before jiffies, otherwise %false.
+ */
#define time_is_before_jiffies(a) time_after(jiffies, a)
+/**
+ * time_is_before_jiffies64 - return true if a is before jiffies_64
+ * @a: time (__u64) to compare to jiffies_64
+ *
+ * Return: %true is time a is before jiffies_64, otherwise %false.
+ */
#define time_is_before_jiffies64(a) time_after64(get_jiffies_64(), a)
-/* time_is_after_jiffies(a) return true if a is after jiffies */
+/**
+ * time_is_after_jiffies - return true if a is after jiffies
+ * @a: time (unsigned long) to compare to jiffies
+ *
+ * Return: %true is time a is after jiffies, otherwise %false.
+ */
#define time_is_after_jiffies(a) time_before(jiffies, a)
+/**
+ * time_is_after_jiffies64 - return true if a is after jiffies_64
+ * @a: time (__u64) to compare to jiffies_64
+ *
+ * Return: %true is time a is after jiffies_64, otherwise %false.
+ */
#define time_is_after_jiffies64(a) time_before64(get_jiffies_64(), a)
-/* time_is_before_eq_jiffies(a) return true if a is before or equal to jiffies*/
+/**
+ * time_is_before_eq_jiffies - return true if a is before or equal to jiffies
+ * @a: time (unsigned long) to compare to jiffies
+ *
+ * Return: %true is time a is before or the same as jiffies, otherwise %false.
+ */
#define time_is_before_eq_jiffies(a) time_after_eq(jiffies, a)
+/**
+ * time_is_before_eq_jiffies64 - return true if a is before or equal to jiffies_64
+ * @a: time (__u64) to compare to jiffies_64
+ *
+ * Return: %true is time a is before or the same jiffies_64, otherwise %false.
+ */
#define time_is_before_eq_jiffies64(a) time_after_eq64(get_jiffies_64(), a)
-/* time_is_after_eq_jiffies(a) return true if a is after or equal to jiffies*/
+/**
+ * time_is_after_eq_jiffies - return true if a is after or equal to jiffies
+ * @a: time (unsigned long) to compare to jiffies
+ *
+ * Return: %true is time a is after or the same as jiffies, otherwise %false.
+ */
#define time_is_after_eq_jiffies(a) time_before_eq(jiffies, a)
+/**
+ * time_is_after_eq_jiffies64 - return true if a is after or equal to jiffies_64
+ * @a: time (__u64) to compare to jiffies_64
+ *
+ * Return: %true is time a is after or the same as jiffies_64, otherwise %false.
+ */
#define time_is_after_eq_jiffies64(a) time_before_eq64(get_jiffies_64(), a)
/*
- * Have the 32 bit jiffies value wrap 5 minutes after boot
+ * Have the 32-bit jiffies value wrap 5 minutes after boot
* so jiffies wrap bugs show up earlier.
*/
#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
@@ -278,7 +425,7 @@ extern unsigned long preset_lpj;
#if BITS_PER_LONG < 64
# define MAX_SEC_IN_JIFFIES \
(long)((u64)((u64)MAX_JIFFY_OFFSET * TICK_NSEC) / NSEC_PER_SEC)
-#else /* take care of overflow on 64 bits machines */
+#else /* take care of overflow on 64-bit machines */
# define MAX_SEC_IN_JIFFIES \
(SH_DIV((MAX_JIFFY_OFFSET >> SEC_JIFFIE_SC) * TICK_NSEC, NSEC_PER_SEC, 1) - 1)
@@ -290,6 +437,12 @@ extern unsigned long preset_lpj;
extern unsigned int jiffies_to_msecs(const unsigned long j);
extern unsigned int jiffies_to_usecs(const unsigned long j);
+/**
+ * jiffies_to_nsecs - Convert jiffies to nanoseconds
+ * @j: jiffies value
+ *
+ * Return: nanoseconds value
+ */
static inline u64 jiffies_to_nsecs(const unsigned long j)
{
return (u64)jiffies_to_usecs(j) * NSEC_PER_USEC;
@@ -353,12 +506,14 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m)
*
* msecs_to_jiffies() checks for the passed in value being a constant
* via __builtin_constant_p() allowing gcc to eliminate most of the
- * code, __msecs_to_jiffies() is called if the value passed does not
+ * code. __msecs_to_jiffies() is called if the value passed does not
* allow constant folding and the actual conversion must be done at
* runtime.
- * the HZ range specific helpers _msecs_to_jiffies() are called both
+ * The HZ range specific helpers _msecs_to_jiffies() are called both
* directly here and from __msecs_to_jiffies() in the case where
* constant folding is not possible.
+ *
+ * Return: jiffies value
*/
static __always_inline unsigned long msecs_to_jiffies(const unsigned int m)
{
@@ -400,12 +555,14 @@ static inline unsigned long _usecs_to_jiffies(const unsigned int u)
*
* usecs_to_jiffies() checks for the passed in value being a constant
* via __builtin_constant_p() allowing gcc to eliminate most of the
- * code, __usecs_to_jiffies() is called if the value passed does not
+ * code. __usecs_to_jiffies() is called if the value passed does not
* allow constant folding and the actual conversion must be done at
* runtime.
- * the HZ range specific helpers _usecs_to_jiffies() are called both
+ * The HZ range specific helpers _usecs_to_jiffies() are called both
* directly here and from __msecs_to_jiffies() in the case where
* constant folding is not possible.
+ *
+ * Return: jiffies value
*/
static __always_inline unsigned long usecs_to_jiffies(const unsigned int u)
{
@@ -422,6 +579,7 @@ extern unsigned long timespec64_to_jiffies(const struct timespec64 *value);
extern void jiffies_to_timespec64(const unsigned long jiffies,
struct timespec64 *value);
extern clock_t jiffies_to_clock_t(unsigned long x);
+
static inline clock_t jiffies_delta_to_clock_t(long delta)
{
return jiffies_to_clock_t(max(0L, delta));
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 819b6bc8ac08..70d6a8f6e25d 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -4,6 +4,7 @@
#include <linux/bug.h>
#include <linux/kasan-enabled.h>
+#include <linux/kasan-tags.h>
#include <linux/kernel.h>
#include <linux/static_key.h>
#include <linux/types.h>
@@ -54,11 +55,13 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D];
int kasan_populate_early_shadow(const void *shadow_start,
const void *shadow_end);
+#ifndef kasan_mem_to_shadow
static inline void *kasan_mem_to_shadow(const void *addr)
{
return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
+ KASAN_SHADOW_OFFSET;
}
+#endif
int kasan_add_zero_shadow(void *start, unsigned long size);
void kasan_remove_zero_shadow(void *start, unsigned long size);
@@ -127,20 +130,39 @@ static __always_inline void kasan_poison_slab(struct slab *slab)
__kasan_poison_slab(slab);
}
-void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
-static __always_inline void kasan_unpoison_object_data(struct kmem_cache *cache,
+void __kasan_unpoison_new_object(struct kmem_cache *cache, void *object);
+/**
+ * kasan_unpoison_new_object - Temporarily unpoison a new slab object.
+ * @cache: Cache the object belong to.
+ * @object: Pointer to the object.
+ *
+ * This function is intended for the slab allocator's internal use. It
+ * temporarily unpoisons an object from a newly allocated slab without doing
+ * anything else. The object must later be repoisoned by
+ * kasan_poison_new_object().
+ */
+static __always_inline void kasan_unpoison_new_object(struct kmem_cache *cache,
void *object)
{
if (kasan_enabled())
- __kasan_unpoison_object_data(cache, object);
+ __kasan_unpoison_new_object(cache, object);
}
-void __kasan_poison_object_data(struct kmem_cache *cache, void *object);
-static __always_inline void kasan_poison_object_data(struct kmem_cache *cache,
+void __kasan_poison_new_object(struct kmem_cache *cache, void *object);
+/**
+ * kasan_unpoison_new_object - Repoison a new slab object.
+ * @cache: Cache the object belong to.
+ * @object: Pointer to the object.
+ *
+ * This function is intended for the slab allocator's internal use. It
+ * repoisons an object that was previously unpoisoned by
+ * kasan_unpoison_new_object() without doing anything else.
+ */
+static __always_inline void kasan_poison_new_object(struct kmem_cache *cache,
void *object)
{
if (kasan_enabled())
- __kasan_poison_object_data(cache, object);
+ __kasan_poison_new_object(cache, object);
}
void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache,
@@ -170,13 +192,6 @@ static __always_inline void kasan_kfree_large(void *ptr)
__kasan_kfree_large(ptr, _RET_IP_);
}
-void __kasan_slab_free_mempool(void *ptr, unsigned long ip);
-static __always_inline void kasan_slab_free_mempool(void *ptr)
-{
- if (kasan_enabled())
- __kasan_slab_free_mempool(ptr, _RET_IP_);
-}
-
void * __must_check __kasan_slab_alloc(struct kmem_cache *s,
void *object, gfp_t flags, bool init);
static __always_inline void * __must_check kasan_slab_alloc(
@@ -217,6 +232,113 @@ static __always_inline void * __must_check kasan_krealloc(const void *object,
return (void *)object;
}
+bool __kasan_mempool_poison_pages(struct page *page, unsigned int order,
+ unsigned long ip);
+/**
+ * kasan_mempool_poison_pages - Check and poison a mempool page allocation.
+ * @page: Pointer to the page allocation.
+ * @order: Order of the allocation.
+ *
+ * This function is intended for kernel subsystems that cache page allocations
+ * to reuse them instead of freeing them back to page_alloc (e.g. mempool).
+ *
+ * This function is similar to kasan_mempool_poison_object() but operates on
+ * page allocations.
+ *
+ * Before the poisoned allocation can be reused, it must be unpoisoned via
+ * kasan_mempool_unpoison_pages().
+ *
+ * Return: true if the allocation can be safely reused; false otherwise.
+ */
+static __always_inline bool kasan_mempool_poison_pages(struct page *page,
+ unsigned int order)
+{
+ if (kasan_enabled())
+ return __kasan_mempool_poison_pages(page, order, _RET_IP_);
+ return true;
+}
+
+void __kasan_mempool_unpoison_pages(struct page *page, unsigned int order,
+ unsigned long ip);
+/**
+ * kasan_mempool_unpoison_pages - Unpoison a mempool page allocation.
+ * @page: Pointer to the page allocation.
+ * @order: Order of the allocation.
+ *
+ * This function is intended for kernel subsystems that cache page allocations
+ * to reuse them instead of freeing them back to page_alloc (e.g. mempool).
+ *
+ * This function unpoisons a page allocation that was previously poisoned by
+ * kasan_mempool_poison_pages() without zeroing the allocation's memory. For
+ * the tag-based modes, this function assigns a new tag to the allocation.
+ */
+static __always_inline void kasan_mempool_unpoison_pages(struct page *page,
+ unsigned int order)
+{
+ if (kasan_enabled())
+ __kasan_mempool_unpoison_pages(page, order, _RET_IP_);
+}
+
+bool __kasan_mempool_poison_object(void *ptr, unsigned long ip);
+/**
+ * kasan_mempool_poison_object - Check and poison a mempool slab allocation.
+ * @ptr: Pointer to the slab allocation.
+ *
+ * This function is intended for kernel subsystems that cache slab allocations
+ * to reuse them instead of freeing them back to the slab allocator (e.g.
+ * mempool).
+ *
+ * This function poisons a slab allocation and saves a free stack trace for it
+ * without initializing the allocation's memory and without putting it into the
+ * quarantine (for the Generic mode).
+ *
+ * This function also performs checks to detect double-free and invalid-free
+ * bugs and reports them. The caller can use the return value of this function
+ * to find out if the allocation is buggy.
+ *
+ * Before the poisoned allocation can be reused, it must be unpoisoned via
+ * kasan_mempool_unpoison_object().
+ *
+ * This function operates on all slab allocations including large kmalloc
+ * allocations (the ones returned by kmalloc_large() or by kmalloc() with the
+ * size > KMALLOC_MAX_SIZE).
+ *
+ * Return: true if the allocation can be safely reused; false otherwise.
+ */
+static __always_inline bool kasan_mempool_poison_object(void *ptr)
+{
+ if (kasan_enabled())
+ return __kasan_mempool_poison_object(ptr, _RET_IP_);
+ return true;
+}
+
+void __kasan_mempool_unpoison_object(void *ptr, size_t size, unsigned long ip);
+/**
+ * kasan_mempool_unpoison_object - Unpoison a mempool slab allocation.
+ * @ptr: Pointer to the slab allocation.
+ * @size: Size to be unpoisoned.
+ *
+ * This function is intended for kernel subsystems that cache slab allocations
+ * to reuse them instead of freeing them back to the slab allocator (e.g.
+ * mempool).
+ *
+ * This function unpoisons a slab allocation that was previously poisoned via
+ * kasan_mempool_poison_object() and saves an alloc stack trace for it without
+ * initializing the allocation's memory. For the tag-based modes, this function
+ * does not assign a new tag to the allocation and instead restores the
+ * original tags based on the pointer value.
+ *
+ * This function operates on all slab allocations including large kmalloc
+ * allocations (the ones returned by kmalloc_large() or by kmalloc() with the
+ * size > KMALLOC_MAX_SIZE).
+ */
+static __always_inline void kasan_mempool_unpoison_object(void *ptr,
+ size_t size)
+{
+ if (kasan_enabled())
+ __kasan_mempool_unpoison_object(ptr, size, _RET_IP_);
+}
+
/*
* Unlike kasan_check_read/write(), kasan_check_byte() is performed even for
* the hardware tag-based mode that doesn't rely on compiler instrumentation.
@@ -240,9 +362,9 @@ static inline bool kasan_unpoison_pages(struct page *page, unsigned int order,
return false;
}
static inline void kasan_poison_slab(struct slab *slab) {}
-static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
+static inline void kasan_unpoison_new_object(struct kmem_cache *cache,
void *object) {}
-static inline void kasan_poison_object_data(struct kmem_cache *cache,
+static inline void kasan_poison_new_object(struct kmem_cache *cache,
void *object) {}
static inline void *kasan_init_slab_obj(struct kmem_cache *cache,
const void *object)
@@ -254,7 +376,6 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object, bool init
return false;
}
static inline void kasan_kfree_large(void *ptr) {}
-static inline void kasan_slab_free_mempool(void *ptr) {}
static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object,
gfp_t flags, bool init)
{
@@ -274,6 +395,17 @@ static inline void *kasan_krealloc(const void *object, size_t new_size,
{
return (void *)object;
}
+static inline bool kasan_mempool_poison_pages(struct page *page, unsigned int order)
+{
+ return true;
+}
+static inline void kasan_mempool_unpoison_pages(struct page *page, unsigned int order) {}
+static inline bool kasan_mempool_poison_object(void *ptr)
+{
+ return true;
+}
+static inline void kasan_mempool_unpoison_object(void *ptr, size_t size) {}
+
static inline bool kasan_check_byte(const void *address)
{
return true;
@@ -283,8 +415,10 @@ static inline bool kasan_check_byte(const void *address)
#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK)
void kasan_unpoison_task_stack(struct task_struct *task);
+asmlinkage void kasan_unpoison_task_stack_below(const void *watermark);
#else
static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
+static inline void kasan_unpoison_task_stack_below(const void *watermark) {}
#endif
#ifdef CONFIG_KASAN_GENERIC
@@ -295,7 +429,6 @@ struct kasan_cache {
};
size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object);
-slab_flags_t kasan_never_merge(void);
void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
slab_flags_t *flags);
@@ -312,11 +445,6 @@ static inline size_t kasan_metadata_size(struct kmem_cache *cache,
{
return 0;
}
-/* And thus nothing prevents cache merging. */
-static inline slab_flags_t kasan_never_merge(void)
-{
- return 0;
-}
/* And no cache-related metadata initialization is required. */
static inline void kasan_cache_create(struct kmem_cache *cache,
unsigned int *size,
@@ -464,10 +592,10 @@ static inline void kasan_free_module_shadow(const struct vm_struct *vm) {}
#endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */
-#ifdef CONFIG_KASAN_INLINE
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
void kasan_non_canonical_hook(unsigned long addr);
-#else /* CONFIG_KASAN_INLINE */
+#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
static inline void kasan_non_canonical_hook(unsigned long addr) { }
-#endif /* CONFIG_KASAN_INLINE */
+#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
#endif /* LINUX_KASAN_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 0d91e0af0125..be2e8c0a187e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -13,6 +13,7 @@
#include <linux/stdarg.h>
#include <linux/align.h>
+#include <linux/array_size.h>
#include <linux/limits.h>
#include <linux/linkage.h>
#include <linux/stddef.h>
@@ -29,32 +30,21 @@
#include <linux/panic.h>
#include <linux/printk.h>
#include <linux/build_bug.h>
+#include <linux/sprintf.h>
#include <linux/static_call_types.h>
#include <linux/instruction_pointer.h>
+#include <linux/wordpart.h>
+
#include <asm/byteorder.h>
#include <uapi/linux/kernel.h>
#define STACK_MAGIC 0xdeadbeef
-/**
- * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value
- * @x: value to repeat
- *
- * NOTE: @x is not checked for > 0xff; larger values produce odd results.
- */
-#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
-
/* generic data direction definitions */
#define READ 0
#define WRITE 1
-/**
- * ARRAY_SIZE - get the number of elements in array @arr
- * @arr: array to be sized
- */
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
-
#define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL)
#define u64_to_user_ptr(x) ( \
@@ -64,34 +54,6 @@
} \
)
-/**
- * upper_32_bits - return bits 32-63 of a number
- * @n: the number we're accessing
- *
- * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress
- * the "right shift count >= width of type" warning when that quantity is
- * 32-bits.
- */
-#define upper_32_bits(n) ((u32)(((n) >> 16) >> 16))
-
-/**
- * lower_32_bits - return bits 0-31 of a number
- * @n: the number we're accessing
- */
-#define lower_32_bits(n) ((u32)((n) & 0xffffffff))
-
-/**
- * upper_16_bits - return bits 16-31 of a number
- * @n: the number we're accessing
- */
-#define upper_16_bits(n) ((u16)((n) >> 16))
-
-/**
- * lower_16_bits - return bits 0-15 of a number
- * @n: the number we're accessing
- */
-#define lower_16_bits(n) ((u16)((n) & 0xffff))
-
struct completion;
struct user;
@@ -203,41 +165,6 @@ static inline void might_fault(void) { }
void do_exit(long error_code) __noreturn;
-extern int num_to_str(char *buf, int size,
- unsigned long long num, unsigned int width);
-
-/* lib/printf utilities */
-
-extern __printf(2, 3) int sprintf(char *buf, const char * fmt, ...);
-extern __printf(2, 0) int vsprintf(char *buf, const char *, va_list);
-extern __printf(3, 4)
-int snprintf(char *buf, size_t size, const char *fmt, ...);
-extern __printf(3, 0)
-int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
-extern __printf(3, 4)
-int scnprintf(char *buf, size_t size, const char *fmt, ...);
-extern __printf(3, 0)
-int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
-extern __printf(2, 3) __malloc
-char *kasprintf(gfp_t gfp, const char *fmt, ...);
-extern __printf(2, 0) __malloc
-char *kvasprintf(gfp_t gfp, const char *fmt, va_list args);
-extern __printf(2, 0)
-const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args);
-
-extern __scanf(2, 3)
-int sscanf(const char *, const char *, ...);
-extern __scanf(2, 0)
-int vsscanf(const char *, const char *, va_list);
-
-extern int no_hash_pointers_enable(char *str);
-
-extern int get_option(char **str, int *pint);
-extern char *get_options(const char *str, int nints, int *ints);
-extern unsigned long long memparse(const char *ptr, char **retptr);
-extern bool parse_option_str(const char *str, const char *option);
-extern char *next_arg(char *args, char **param, char **val);
-
extern int core_kernel_text(unsigned long addr);
extern int __kernel_text_address(unsigned long addr);
extern int kernel_text_address(unsigned long addr);
@@ -288,6 +215,7 @@ enum ftrace_dump_mode {
DUMP_NONE,
DUMP_ALL,
DUMP_ORIG,
+ DUMP_PARAM,
};
#ifdef CONFIG_TRACING
@@ -457,13 +385,6 @@ ftrace_vprintk(const char *fmt, va_list ap)
static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
#endif /* CONFIG_TRACING */
-/* This counts to 12. Any more, it will return 13th argument. */
-#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
-#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-
-#define __CONCAT(a, b) a ## b
-#define CONCATENATE(a, b) __CONCAT(a, b)
-
/* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
# define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 73f5c120def8..87c79d076d6d 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -206,23 +206,25 @@ struct kernfs_node {
const void *ns; /* namespace tag */
unsigned int hash; /* ns + name hash */
+ unsigned short flags;
+ umode_t mode;
+
union {
struct kernfs_elem_dir dir;
struct kernfs_elem_symlink symlink;
struct kernfs_elem_attr attr;
};
- void *priv;
-
/*
* 64bit unique ID. On 64bit ino setups, id is the ino. On 32bit,
* the low 32bits are ino and upper generation.
*/
u64 id;
- unsigned short flags;
- umode_t mode;
+ void *priv;
struct kernfs_iattrs *iattr;
+
+ struct rcu_head rcu;
};
/*
@@ -316,6 +318,7 @@ struct kernfs_ops {
struct poll_table_struct *pt);
int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma);
+ loff_t (*llseek)(struct kernfs_open_file *of, loff_t offset, int whence);
};
/*
@@ -550,6 +553,10 @@ static inline int kernfs_setattr(struct kernfs_node *kn,
const struct iattr *iattr)
{ return -ENOSYS; }
+static inline __poll_t kernfs_generic_poll(struct kernfs_open_file *of,
+ struct poll_table_struct *pt)
+{ return -ENOSYS; }
+
static inline void kernfs_notify(struct kernfs_node *kn) { }
static inline int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 22b5cd24f581..060835bb82d5 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -15,17 +15,14 @@
#if !defined(__ASSEMBLY__)
-#include <linux/crash_core.h>
+#include <linux/vmcore_info.h>
+#include <linux/crash_reserve.h>
#include <asm/io.h>
#include <linux/range.h>
#include <uapi/linux/kexec.h>
#include <linux/verification.h>
-/* Location of a reserved region to hold the crash kernel.
- */
-extern struct resource crashk_res;
-extern struct resource crashk_low_res;
extern note_buf_t __percpu *crash_notes;
#ifdef CONFIG_KEXEC_CORE
@@ -33,7 +30,9 @@ extern note_buf_t __percpu *crash_notes;
#include <linux/compat.h>
#include <linux/ioport.h>
#include <linux/module.h>
+#include <linux/highmem.h>
#include <asm/kexec.h>
+#include <linux/crash_core.h>
/* Verify architecture specific macros are defined */
@@ -230,21 +229,6 @@ static inline int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
}
#endif
-/* Alignment required for elf header segment */
-#define ELF_CORE_HEADER_ALIGN 4096
-
-struct crash_mem {
- unsigned int max_nr_ranges;
- unsigned int nr_ranges;
- struct range ranges[];
-};
-
-extern int crash_exclude_mem_range(struct crash_mem *mem,
- unsigned long long mstart,
- unsigned long long mend);
-extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
- void **addr, unsigned long *sz);
-
#ifndef arch_kexec_apply_relocations_add
/*
* arch_kexec_apply_relocations_add - apply relocations of type RELA
@@ -334,6 +318,10 @@ struct kimage {
unsigned int preserve_context : 1;
/* If set, we are using file mode kexec syscall */
unsigned int file_mode:1;
+#ifdef CONFIG_CRASH_HOTPLUG
+ /* If set, allow changes to elfcorehdr of kexec_load'd image */
+ unsigned int update_elfcorehdr:1;
+#endif
#ifdef ARCH_HAS_KIMAGE_ARCH
struct kimage_arch arch;
@@ -360,6 +348,12 @@ struct kimage {
struct purgatory_info purgatory_info;
#endif
+#ifdef CONFIG_CRASH_HOTPLUG
+ int hp_action;
+ int elfcorehdr_index;
+ bool elfcorehdr_updated;
+#endif
+
#ifdef CONFIG_IMA_KEXEC
/* Virtual address of IMA measurement buffer for kexec syscall */
void *ima_buffer;
@@ -386,13 +380,6 @@ extern struct page *kimage_alloc_control_pages(struct kimage *image,
static inline int machine_kexec_post_load(struct kimage *image) { return 0; }
#endif
-extern void __crash_kexec(struct pt_regs *);
-extern void crash_kexec(struct pt_regs *);
-int kexec_should_crash(struct task_struct *);
-int kexec_crash_loaded(void);
-void crash_save_cpu(struct pt_regs *regs, int cpu);
-extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
-
extern struct kimage *kexec_image;
extern struct kimage *kexec_crash_image;
@@ -404,36 +391,18 @@ bool kexec_load_permitted(int kexec_image_type);
/* List of defined/legal kexec flags */
#ifndef CONFIG_KEXEC_JUMP
-#define KEXEC_FLAGS KEXEC_ON_CRASH
+#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_UPDATE_ELFCOREHDR)
#else
-#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)
+#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT | KEXEC_UPDATE_ELFCOREHDR)
#endif
/* List of defined/legal kexec file flags */
#define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
- KEXEC_FILE_NO_INITRAMFS)
+ KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG)
/* flag to track if kexec reboot is in progress */
extern bool kexec_in_progress;
-int crash_shrink_memory(unsigned long new_size);
-ssize_t crash_get_memory_size(void);
-
-#ifndef arch_kexec_protect_crashkres
-/*
- * Protection mechanism for crashkernel reserved memory after
- * the kdump kernel is loaded.
- *
- * Provide an empty default implementation here -- architecture
- * code may override this
- */
-static inline void arch_kexec_protect_crashkres(void) { }
-#endif
-
-#ifndef arch_kexec_unprotect_crashkres
-static inline void arch_kexec_unprotect_crashkres(void) { }
-#endif
-
#ifndef page_to_boot_pfn
static inline unsigned long page_to_boot_pfn(struct page *page)
{
@@ -490,6 +459,13 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g
static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
#endif
+extern bool kexec_file_dbg_print;
+
+#define kexec_dprintk(fmt, ...) \
+ printk("%s" fmt, \
+ kexec_file_dbg_print ? KERN_INFO : KERN_DEBUG, \
+ ##__VA_ARGS__)
+
#else /* !CONFIG_KEXEC_CORE */
struct pt_regs;
struct task_struct;
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 7d985a1dfe4a..5caf3ce82373 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -73,6 +73,7 @@ struct key_type {
unsigned int flags;
#define KEY_TYPE_NET_DOMAIN 0x00000001 /* Keys of this type have a net namespace domain */
+#define KEY_TYPE_INSTANT_REAP 0x00000002 /* Keys of this type don't have a delay after expiring */
/* vet a description */
int (*vet_description)(const char *description);
diff --git a/include/linux/key.h b/include/linux/key.h
index 938d7ecfb495..943a432da3ae 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -515,6 +515,7 @@ extern void key_init(void);
#define key_init() do { } while(0)
#define key_free_user_ns(ns) do { } while(0)
#define key_remove_domain(d) do { } while(0)
+#define key_lookup(k) NULL
#endif /* CONFIG_KEYS */
#endif /* __KERNEL__ */
diff --git a/include/linux/kfence.h b/include/linux/kfence.h
index 726857a4b680..88100cc9caba 100644
--- a/include/linux/kfence.h
+++ b/include/linux/kfence.h
@@ -59,15 +59,16 @@ static __always_inline bool is_kfence_address(const void *addr)
}
/**
- * kfence_alloc_pool() - allocate the KFENCE pool via memblock
+ * kfence_alloc_pool_and_metadata() - allocate the KFENCE pool and KFENCE
+ * metadata via memblock
*/
-void __init kfence_alloc_pool(void);
+void __init kfence_alloc_pool_and_metadata(void);
/**
* kfence_init() - perform KFENCE initialization at boot time
*
- * Requires that kfence_alloc_pool() was called before. This sets up the
- * allocation gate timer, and requires that workqueues are available.
+ * Requires that kfence_alloc_pool_and_metadata() was called before. This sets
+ * up the allocation gate timer, and requires that workqueues are available.
*/
void __init kfence_init(void);
@@ -222,8 +223,10 @@ bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *sla
#else /* CONFIG_KFENCE */
+#define kfence_sample_interval (0)
+
static inline bool is_kfence_address(const void *addr) { return false; }
-static inline void kfence_alloc_pool(void) { }
+static inline void kfence_alloc_pool_and_metadata(void) { }
static inline void kfence_init(void) { }
static inline void kfence_shutdown_cache(struct kmem_cache *s) { }
static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; }
diff --git a/include/linux/kmsan_types.h b/include/linux/kmsan_types.h
index 8bfa6c98176d..929287981afe 100644
--- a/include/linux/kmsan_types.h
+++ b/include/linux/kmsan_types.h
@@ -9,6 +9,8 @@
#ifndef _LINUX_KMSAN_TYPES_H
#define _LINUX_KMSAN_TYPES_H
+#include <linux/types.h>
+
/* These constants are defined in the MSan LLVM instrumentation pass. */
#define KMSAN_RETVAL_SIZE 800
#define KMSAN_PARAM_SIZE 800
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index c392c811d9ad..c8219505a79f 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -38,7 +38,7 @@ extern char uevent_helper[];
#endif
/* counter to tag the uevent, read only except for the kobject core */
-extern u64 uevent_seqnum;
+extern atomic64_t uevent_seqnum;
/*
* The actions here must match the index to the string array
@@ -69,14 +69,16 @@ struct kobject {
const struct kobj_type *ktype;
struct kernfs_node *sd; /* sysfs directory entry */
struct kref kref;
-#ifdef CONFIG_DEBUG_KOBJECT_RELEASE
- struct delayed_work release;
-#endif
+
unsigned int state_initialized:1;
unsigned int state_in_sysfs:1;
unsigned int state_add_uevent_sent:1;
unsigned int state_remove_uevent_sent:1;
unsigned int uevent_suppress:1;
+
+#ifdef CONFIG_DEBUG_KOBJECT_RELEASE
+ struct delayed_work release;
+#endif
};
__printf(2, 3) int kobject_set_name(struct kobject *kobj, const char *name, ...);
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 85a64cb95d75..0ff44d6633e3 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -26,8 +26,7 @@
#include <linux/rcupdate.h>
#include <linux/mutex.h>
#include <linux/ftrace.h>
-#include <linux/refcount.h>
-#include <linux/freelist.h>
+#include <linux/objpool.h>
#include <linux/rethook.h>
#include <asm/kprobes.h>
@@ -140,8 +139,8 @@ static inline bool kprobe_ftrace(struct kprobe *p)
*
*/
struct kretprobe_holder {
- struct kretprobe *rp;
- refcount_t ref;
+ struct kretprobe __rcu *rp;
+ struct objpool_head pool;
};
struct kretprobe {
@@ -154,7 +153,6 @@ struct kretprobe {
#ifdef CONFIG_KRETPROBE_ON_RETHOOK
struct rethook *rh;
#else
- struct freelist_head freelist;
struct kretprobe_holder *rph;
#endif
};
@@ -165,10 +163,7 @@ struct kretprobe_instance {
#ifdef CONFIG_KRETPROBE_ON_RETHOOK
struct rethook_node node;
#else
- union {
- struct freelist_node freelist;
- struct rcu_head rcu;
- };
+ struct rcu_head rcu;
struct llist_node llist;
struct kretprobe_holder *rph;
kprobe_opcode_t *ret_addr;
@@ -202,10 +197,8 @@ extern int arch_trampoline_kprobe(struct kprobe *p);
#ifdef CONFIG_KRETPROBE_ON_RETHOOK
static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri)
{
- RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(),
- "Kretprobe is accessed from instance under preemptive context");
-
- return (struct kretprobe *)READ_ONCE(ri->node.rethook->data);
+ /* rethook::data is non-changed field, so that you can access it freely. */
+ return (struct kretprobe *)ri->node.rethook->data;
}
static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri)
{
@@ -250,10 +243,7 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs,
static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri)
{
- RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(),
- "Kretprobe is accessed from instance under preemptive context");
-
- return READ_ONCE(ri->rph->rp);
+ return rcu_dereference_check(ri->rph->rp, rcu_read_lock_any_held());
}
static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri)
@@ -450,6 +440,10 @@ int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
int arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
char *type, char *sym);
+
+int kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data);
+
#else /* !CONFIG_KPROBES: */
static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 899a314bc487..401348e9f92b 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -26,6 +26,22 @@ int ksm_disable(struct mm_struct *mm);
int __ksm_enter(struct mm_struct *mm);
void __ksm_exit(struct mm_struct *mm);
+/*
+ * To identify zeropages that were mapped by KSM, we reuse the dirty bit
+ * in the PTE. If the PTE is dirty, the zeropage was mapped by KSM when
+ * deduplicating memory.
+ */
+#define is_ksm_zero_pte(pte) (is_zero_pfn(pte_pfn(pte)) && pte_dirty(pte))
+
+extern unsigned long ksm_zero_pages;
+
+static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte)
+{
+ if (is_ksm_zero_pte(pte)) {
+ ksm_zero_pages--;
+ mm->ksm_zero_pages--;
+ }
+}
static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
@@ -60,8 +76,8 @@ static inline void ksm_exit(struct mm_struct *mm)
* We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE,
* but what if the vma was unmerged while the page was swapped out?
*/
-struct page *ksm_might_need_to_copy(struct page *page,
- struct vm_area_struct *vma, unsigned long address);
+struct folio *ksm_might_need_to_copy(struct folio *folio,
+ struct vm_area_struct *vma, unsigned long addr);
void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc);
void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
@@ -95,6 +111,10 @@ static inline void ksm_exit(struct mm_struct *mm)
{
}
+static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte)
+{
+}
+
#ifdef CONFIG_MEMORY_FAILURE
static inline void collect_procs_ksm(struct page *page,
struct list_head *to_kill, int force_early)
@@ -109,10 +129,10 @@ static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
return 0;
}
-static inline struct page *ksm_might_need_to_copy(struct page *page,
- struct vm_area_struct *vma, unsigned long address)
+static inline struct folio *ksm_might_need_to_copy(struct folio *folio,
+ struct vm_area_struct *vma, unsigned long addr)
{
- return page;
+ return folio;
}
static inline void rmap_walk_ksm(struct folio *folio,
diff --git a/include/linux/kstrtox.h b/include/linux/kstrtox.h
index 529974e22ea7..7fcf29a4e0de 100644
--- a/include/linux/kstrtox.h
+++ b/include/linux/kstrtox.h
@@ -147,9 +147,4 @@ extern long simple_strtol(const char *,char **,unsigned int);
extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
extern long long simple_strtoll(const char *,char **,unsigned int);
-static inline int strtobool(const char *s, bool *res)
-{
- return kstrtobool(s, res);
-}
-
#endif /* _LINUX_KSTRTOX_H */
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index f1f95a71a4bc..b11f53c1ba2e 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -86,9 +86,9 @@ void free_kthread_struct(struct task_struct *k);
void kthread_bind(struct task_struct *k, unsigned int cpu);
void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
int kthread_stop(struct task_struct *k);
+int kthread_stop_put(struct task_struct *k);
bool kthread_should_stop(void);
bool kthread_should_park(void);
-bool __kthread_should_park(struct task_struct *k);
bool kthread_should_stop_or_park(void);
bool kthread_freezable_should_stop(bool *was_frozen);
void *kthread_func(struct task_struct *k);
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 73f20deb497d..3a4e723eae0f 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -21,12 +21,10 @@
#ifndef _LINUX_KTIME_H
#define _LINUX_KTIME_H
-#include <linux/time.h>
-#include <linux/jiffies.h>
#include <asm/bug.h>
-
-/* Nanosecond scalar representation for kernel time values */
-typedef s64 ktime_t;
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <linux/types.h>
/**
* ktime_set - Set a ktime_t variable from a seconds/nanoseconds value
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9d3ac7720da9..48f31dcd318a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -80,8 +80,8 @@
/* Two fragments for cross MMIO pages. */
#define KVM_MAX_MMIO_FRAGMENTS 2
-#ifndef KVM_ADDRESS_SPACE_NUM
-#define KVM_ADDRESS_SPACE_NUM 1
+#ifndef KVM_MAX_NR_ADDRESS_SPACES
+#define KVM_MAX_NR_ADDRESS_SPACES 1
#endif
/*
@@ -148,6 +148,11 @@ static inline bool kvm_is_error_hva(unsigned long addr)
#endif
+static inline bool kvm_is_error_gpa(gpa_t gpa)
+{
+ return gpa == INVALID_GPA;
+}
+
#define KVM_ERR_PTR_BAD_PAGE (ERR_PTR(-ENOENT))
static inline bool is_error_page(struct page *page)
@@ -190,8 +195,6 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
struct kvm_vcpu *except);
-bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
- unsigned long *vcpu_bitmap);
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
@@ -240,7 +243,6 @@ struct kvm_async_pf {
struct list_head link;
struct list_head queue;
struct kvm_vcpu *vcpu;
- struct mm_struct *mm;
gpa_t cr2_or_gpa;
unsigned long addr;
struct kvm_arch_async_pf arch;
@@ -255,12 +257,17 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
#endif
-#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
+#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
+union kvm_mmu_notifier_arg {
+ pte_t pte;
+ unsigned long attributes;
+};
+
struct kvm_gfn_range {
struct kvm_memory_slot *slot;
gfn_t start;
gfn_t end;
- pte_t pte;
+ union kvm_mmu_notifier_arg arg;
bool may_block;
};
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
@@ -586,8 +593,20 @@ struct kvm_memory_slot {
u32 flags;
short id;
u16 as_id;
+
+#ifdef CONFIG_KVM_PRIVATE_MEM
+ struct {
+ struct file __rcu *file;
+ pgoff_t pgoff;
+ } gmem;
+#endif
};
+static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot)
+{
+ return slot && (slot->flags & KVM_MEM_GUEST_MEMFD);
+}
+
static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot)
{
return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
@@ -662,7 +681,7 @@ struct kvm_irq_routing_table {
* Array indexed by gsi. Each entry contains list of irq chips
* the gsi is connected to.
*/
- struct hlist_head map[];
+ struct hlist_head map[] __counted_by(nr_rt_entries);
};
#endif
@@ -675,13 +694,29 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm);
#define KVM_MEM_SLOTS_NUM SHRT_MAX
#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_INTERNAL_MEM_SLOTS)
-#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
+#if KVM_MAX_NR_ADDRESS_SPACES == 1
+static inline int kvm_arch_nr_memslot_as_ids(struct kvm *kvm)
+{
+ return KVM_MAX_NR_ADDRESS_SPACES;
+}
+
static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
{
return 0;
}
#endif
+/*
+ * Arch code must define kvm_arch_has_private_mem if support for private memory
+ * is enabled.
+ */
+#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
+static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
+{
+ return false;
+}
+#endif
+
struct kvm_memslots {
u64 generation;
atomic_long_t last_used_slot;
@@ -719,9 +754,9 @@ struct kvm {
struct mm_struct *mm; /* userspace tied to this vm */
unsigned long nr_memslot_pages;
/* The two memslot sets - active and inactive (per address space) */
- struct kvm_memslots __memslots[KVM_ADDRESS_SPACE_NUM][2];
+ struct kvm_memslots __memslots[KVM_MAX_NR_ADDRESS_SPACES][2];
/* The current active memslot set for each address space */
- struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
+ struct kvm_memslots __rcu *memslots[KVM_MAX_NR_ADDRESS_SPACES];
struct xarray vcpu_array;
/*
* Protected by slots_lock, but can be read outside if an
@@ -751,7 +786,7 @@ struct kvm {
struct list_head vm_list;
struct mutex lock;
struct kvm_io_bus __rcu *buses[KVM_NR_BUSES];
-#ifdef CONFIG_HAVE_KVM_EVENTFD
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
struct {
spinlock_t lock;
struct list_head items;
@@ -759,8 +794,8 @@ struct kvm {
struct list_head resampler_list;
struct mutex resampler_lock;
} irqfds;
- struct list_head ioeventfds;
#endif
+ struct list_head ioeventfds;
struct kvm_vm_stat stat;
struct kvm_arch arch;
refcount_t users_count;
@@ -776,17 +811,16 @@ struct kvm {
* Update side is protected by irq_lock.
*/
struct kvm_irq_routing_table __rcu *irq_routing;
-#endif
-#ifdef CONFIG_HAVE_KVM_IRQFD
+
struct hlist_head irq_ack_notifier_list;
#endif
-#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
struct mmu_notifier mmu_notifier;
unsigned long mmu_invalidate_seq;
long mmu_invalidate_in_progress;
- unsigned long mmu_invalidate_range_start;
- unsigned long mmu_invalidate_range_end;
+ gfn_t mmu_invalidate_range_start;
+ gfn_t mmu_invalidate_range_end;
#endif
struct list_head devices;
u64 manual_dirty_log_protect;
@@ -805,6 +839,10 @@ struct kvm {
#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
struct notifier_block pm_notifier;
#endif
+#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
+ /* Protected by slots_locks (for writes) and RCU (for reads) */
+ struct xarray mem_attr_array;
+#endif
char stats_id[KVM_STATS_NAME_SIZE];
};
@@ -865,6 +903,25 @@ static inline void kvm_vm_bugged(struct kvm *kvm)
unlikely(__ret); \
})
+/*
+ * Note, "data corruption" refers to corruption of host kernel data structures,
+ * not guest data. Guest data corruption, suspected or confirmed, that is tied
+ * and contained to a single VM should *never* BUG() and potentially panic the
+ * host, i.e. use this variant of KVM_BUG() if and only if a KVM data structure
+ * is corrupted and that corruption can have a cascading effect to other parts
+ * of the hosts and/or to other VMs.
+ */
+#define KVM_BUG_ON_DATA_CORRUPTION(cond, kvm) \
+({ \
+ bool __ret = !!(cond); \
+ \
+ if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) \
+ BUG_ON(__ret); \
+ else if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged)) \
+ kvm_vm_bugged(kvm); \
+ unlikely(__ret); \
+})
+
static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_PROVE_RCU
@@ -944,7 +1001,7 @@ static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm)
}
#endif
-#ifdef CONFIG_HAVE_KVM_IRQFD
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
int kvm_irqfd_init(void);
void kvm_irqfd_exit(void);
#else
@@ -968,7 +1025,7 @@ void kvm_put_kvm_no_destroy(struct kvm *kvm);
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
{
- as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM);
+ as_id = array_index_nospec(as_id, KVM_MAX_NR_ADDRESS_SPACES);
return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
lockdep_is_held(&kvm->slots_lock) ||
!refcount_read(&kvm->users_count));
@@ -1125,9 +1182,9 @@ enum kvm_mr_change {
};
int kvm_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem);
+ const struct kvm_userspace_memory_region2 *mem);
int __kvm_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem);
+ const struct kvm_userspace_memory_region2 *mem);
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot);
void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen);
int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -1266,21 +1323,12 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
*
* @gpc: struct gfn_to_pfn_cache object.
* @kvm: pointer to kvm instance.
- * @vcpu: vCPU to be used for marking pages dirty and to be woken on
- * invalidation.
- * @usage: indicates if the resulting host physical PFN is used while
- * the @vcpu is IN_GUEST_MODE (in which case invalidation of
- * the cache from MMU notifiers---but not for KVM memslot
- * changes!---will also force @vcpu to exit the guest and
- * refresh the cache); and/or if the PFN used directly
- * by KVM (and thus needs a kernel virtual mapping).
*
* This sets up a gfn_to_pfn_cache by initializing locks and assigning the
* immutable attributes. Note, the cache must be zero-allocated (or zeroed by
* the caller before init).
*/
-void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
- struct kvm_vcpu *vcpu, enum pfn_cache_usage usage);
+void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm);
/**
* kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest
@@ -1301,6 +1349,22 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len);
/**
+ * kvm_gpc_activate_hva - prepare a cached kernel mapping and HPA for a given HVA.
+ *
+ * @gpc: struct gfn_to_pfn_cache object.
+ * @hva: userspace virtual address to map.
+ * @len: sanity check; the range being access must fit a single page.
+ *
+ * @return: 0 for success.
+ * -EINVAL for a mapping which would cross a page boundary.
+ * -EFAULT for an untranslatable guest physical address.
+ *
+ * The semantics of this function are the same as those of kvm_gpc_activate(). It
+ * merely bypasses a layer of address translation.
+ */
+int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsigned long hva, unsigned long len);
+
+/**
* kvm_gpc_check - check validity of a gfn_to_pfn_cache.
*
* @gpc: struct gfn_to_pfn_cache object.
@@ -1346,6 +1410,16 @@ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len);
*/
void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc);
+static inline bool kvm_gpc_is_gpa_active(struct gfn_to_pfn_cache *gpc)
+{
+ return gpc->active && !kvm_is_error_gpa(gpc->gpa);
+}
+
+static inline bool kvm_gpc_is_hva_active(struct gfn_to_pfn_cache *gpc)
+{
+ return gpc->active && kvm_is_error_gpa(gpc->gpa);
+}
+
void kvm_sigset_activate(struct kvm_vcpu *vcpu);
void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
@@ -1359,6 +1433,9 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode);
void kvm_flush_remote_tlbs(struct kvm *kvm);
+void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
+void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot);
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min);
@@ -1368,10 +1445,10 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc);
void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
#endif
-void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
- unsigned long end);
-void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start,
- unsigned long end);
+void kvm_mmu_invalidate_begin(struct kvm *kvm);
+void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end);
+void kvm_mmu_invalidate_end(struct kvm *kvm);
+bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
@@ -1387,10 +1464,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
unsigned long mask);
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot);
-#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
- const struct kvm_memory_slot *memslot);
-#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
+#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log);
int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
int *is_dirty, struct kvm_memory_slot **memslot);
@@ -1452,9 +1526,10 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
+bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu);
int kvm_arch_post_init_vm(struct kvm *kvm);
void kvm_arch_pre_destroy_vm(struct kvm *kvm);
-int kvm_arch_create_vm_debugfs(struct kvm *kvm);
+void kvm_arch_create_vm_debugfs(struct kvm *kvm);
#ifndef __KVM_HAVE_ARCH_VM_ALLOC
/*
@@ -1479,11 +1554,23 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
}
#endif
-#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
return -ENOTSUPP;
}
+#else
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
+#endif
+
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
+ gfn_t gfn, u64 nr_pages)
+{
+ return -EOPNOTSUPP;
+}
+#else
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
#endif
#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
@@ -1723,11 +1810,21 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn)
return (hpa_t)pfn << PAGE_SHIFT;
}
-static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
+static inline bool kvm_is_gpa_in_memslot(struct kvm *kvm, gpa_t gpa)
{
unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
- return kvm_is_error_hva(hva);
+ return !kvm_is_error_hva(hva);
+}
+
+static inline void kvm_gpc_mark_dirty_in_slot(struct gfn_to_pfn_cache *gpc)
+{
+ lockdep_assert_held(&gpc->lock);
+
+ if (!gpc->memslot)
+ return;
+
+ mark_page_dirty_in_slot(gpc->kvm, gpc->memslot, gpa_to_gfn(gpc->gpa));
}
enum kvm_stat_kind {
@@ -1914,7 +2011,7 @@ extern const struct _kvm_stats_desc kvm_vm_stats_desc[];
extern const struct kvm_stats_header kvm_vcpu_stats_header;
extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[];
-#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq)
{
if (unlikely(kvm->mmu_invalidate_in_progress))
@@ -1937,9 +2034,9 @@ static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq)
return 0;
}
-static inline int mmu_invalidate_retry_hva(struct kvm *kvm,
+static inline int mmu_invalidate_retry_gfn(struct kvm *kvm,
unsigned long mmu_seq,
- unsigned long hva)
+ gfn_t gfn)
{
lockdep_assert_held(&kvm->mmu_lock);
/*
@@ -1948,14 +2045,50 @@ static inline int mmu_invalidate_retry_hva(struct kvm *kvm,
* that might be being invalidated. Note that it may include some false
* positives, due to shortcuts when handing concurrent invalidations.
*/
- if (unlikely(kvm->mmu_invalidate_in_progress) &&
- hva >= kvm->mmu_invalidate_range_start &&
- hva < kvm->mmu_invalidate_range_end)
- return 1;
+ if (unlikely(kvm->mmu_invalidate_in_progress)) {
+ /*
+ * Dropping mmu_lock after bumping mmu_invalidate_in_progress
+ * but before updating the range is a KVM bug.
+ */
+ if (WARN_ON_ONCE(kvm->mmu_invalidate_range_start == INVALID_GPA ||
+ kvm->mmu_invalidate_range_end == INVALID_GPA))
+ return 1;
+
+ if (gfn >= kvm->mmu_invalidate_range_start &&
+ gfn < kvm->mmu_invalidate_range_end)
+ return 1;
+ }
+
if (kvm->mmu_invalidate_seq != mmu_seq)
return 1;
return 0;
}
+
+/*
+ * This lockless version of the range-based retry check *must* be paired with a
+ * call to the locked version after acquiring mmu_lock, i.e. this is safe to
+ * use only as a pre-check to avoid contending mmu_lock. This version *will*
+ * get false negatives and false positives.
+ */
+static inline bool mmu_invalidate_retry_gfn_unsafe(struct kvm *kvm,
+ unsigned long mmu_seq,
+ gfn_t gfn)
+{
+ /*
+ * Use READ_ONCE() to ensure the in-progress flag and sequence counter
+ * are always read from memory, e.g. so that checking for retry in a
+ * loop won't result in an infinite retry loop. Don't force loads for
+ * start+end, as the key to avoiding infinite retry loops is observing
+ * the 1=>0 transition of in-progress, i.e. getting false negatives
+ * due to stale start+end values is acceptable.
+ */
+ if (unlikely(READ_ONCE(kvm->mmu_invalidate_in_progress)) &&
+ gfn >= kvm->mmu_invalidate_range_start &&
+ gfn < kvm->mmu_invalidate_range_end)
+ return true;
+
+ return READ_ONCE(kvm->mmu_invalidate_seq) != mmu_seq;
+}
#endif
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
@@ -1980,12 +2113,10 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
-#ifdef CONFIG_HAVE_KVM_EVENTFD
-
void kvm_eventfd_init(struct kvm *kvm);
int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
-#ifdef CONFIG_HAVE_KVM_IRQFD
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
void kvm_irqfd_release(struct kvm *kvm);
bool kvm_notify_irqfd_resampler(struct kvm *kvm,
@@ -2006,31 +2137,7 @@ static inline bool kvm_notify_irqfd_resampler(struct kvm *kvm,
{
return false;
}
-#endif
-
-#else
-
-static inline void kvm_eventfd_init(struct kvm *kvm) {}
-
-static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
-{
- return -EINVAL;
-}
-
-static inline void kvm_irqfd_release(struct kvm *kvm) {}
-
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
-static inline void kvm_irq_routing_update(struct kvm *kvm)
-{
-}
-#endif
-
-static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
-{
- return -ENOSYS;
-}
-
-#endif /* CONFIG_HAVE_KVM_EVENTFD */
+#endif /* CONFIG_HAVE_KVM_IRQCHIP */
void kvm_arch_irq_routing_update(struct kvm *kvm);
@@ -2148,8 +2255,6 @@ struct kvm_device_ops {
int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma);
};
-void kvm_device_get(struct kvm_device *dev);
-void kvm_device_put(struct kvm_device *dev);
struct kvm_device *kvm_device_from_filp(struct file *filp);
int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type);
void kvm_unregister_device_ops(u32 type);
@@ -2287,4 +2392,57 @@ static inline void kvm_account_pgtable_pages(void *virt, int nr)
/* Max number of entries allowed for each kvm dirty ring */
#define KVM_DIRTY_RING_MAX_ENTRIES 65536
+static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
+ gpa_t gpa, gpa_t size,
+ bool is_write, bool is_exec,
+ bool is_private)
+{
+ vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
+ vcpu->run->memory_fault.gpa = gpa;
+ vcpu->run->memory_fault.size = size;
+
+ /* RWX flags are not (yet) defined or communicated to userspace. */
+ vcpu->run->memory_fault.flags = 0;
+ if (is_private)
+ vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE;
+}
+
+#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
+static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn)
+{
+ return xa_to_value(xa_load(&kvm->mem_attr_array, gfn));
+}
+
+bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
+ unsigned long attrs);
+bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
+ struct kvm_gfn_range *range);
+bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
+ struct kvm_gfn_range *range);
+
+static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
+{
+ return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) &&
+ kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
+}
+#else
+static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
+{
+ return false;
+}
+#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
+
+#ifdef CONFIG_KVM_PRIVATE_MEM
+int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+ gfn_t gfn, kvm_pfn_t *pfn, int *max_order);
+#else
+static inline int kvm_gmem_get_pfn(struct kvm *kvm,
+ struct kvm_memory_slot *slot, gfn_t gfn,
+ kvm_pfn_t *pfn, int *max_order)
+{
+ KVM_BUG_ON(1, kvm);
+ return -EIO;
+}
+#endif /* CONFIG_KVM_PRIVATE_MEM */
+
#endif
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 6f4737d5046a..d93f6522b2c3 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -6,6 +6,7 @@
struct kvm;
struct kvm_async_pf;
struct kvm_device_ops;
+struct kvm_gfn_range;
struct kvm_interrupt;
struct kvm_irq_routing_table;
struct kvm_memory_slot;
@@ -48,12 +49,6 @@ typedef u64 hfn_t;
typedef hfn_t kvm_pfn_t;
-enum pfn_cache_usage {
- KVM_GUEST_USES_PFN = BIT(0),
- KVM_HOST_USES_PFN = BIT(1),
- KVM_GUEST_AND_HOST_USE_PFN = KVM_GUEST_USES_PFN | KVM_HOST_USES_PFN,
-};
-
struct gfn_to_hva_cache {
u64 generation;
gpa_t gpa;
@@ -68,13 +63,11 @@ struct gfn_to_pfn_cache {
unsigned long uhva;
struct kvm_memory_slot *memslot;
struct kvm *kvm;
- struct kvm_vcpu *vcpu;
struct list_head list;
rwlock_t lock;
struct mutex refresh_lock;
void *khva;
kvm_pfn_t pfn;
- enum pfn_cache_usage usage;
bool active;
bool valid;
};
diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h
index 612b4cab3819..36df927ec4b7 100644
--- a/include/linux/led-class-flash.h
+++ b/include/linux/led-class-flash.h
@@ -85,7 +85,6 @@ static inline struct led_classdev_flash *lcdev_to_flcdev(
return container_of(lcdev, struct led_classdev_flash, led_cdev);
}
-#if IS_ENABLED(CONFIG_LEDS_CLASS_FLASH)
/**
* led_classdev_flash_register_ext - register a new object of LED class with
* init data and with support for flash LEDs
@@ -116,29 +115,6 @@ int devm_led_classdev_flash_register_ext(struct device *parent,
void devm_led_classdev_flash_unregister(struct device *parent,
struct led_classdev_flash *fled_cdev);
-#else
-
-static inline int led_classdev_flash_register_ext(struct device *parent,
- struct led_classdev_flash *fled_cdev,
- struct led_init_data *init_data)
-{
- return 0;
-}
-
-static inline void led_classdev_flash_unregister(struct led_classdev_flash *fled_cdev) {};
-static inline int devm_led_classdev_flash_register_ext(struct device *parent,
- struct led_classdev_flash *fled_cdev,
- struct led_init_data *init_data)
-{
- return 0;
-}
-
-static inline void devm_led_classdev_flash_unregister(struct device *parent,
- struct led_classdev_flash *fled_cdev)
-{};
-
-#endif /* IS_ENABLED(CONFIG_LEDS_CLASS_FLASH) */
-
static inline int led_classdev_flash_register(struct device *parent,
struct led_classdev_flash *fled_cdev)
{
diff --git a/include/linux/led-class-multicolor.h b/include/linux/led-class-multicolor.h
index 210d57bcd767..db9f34c6736e 100644
--- a/include/linux/led-class-multicolor.h
+++ b/include/linux/led-class-multicolor.h
@@ -30,7 +30,6 @@ static inline struct led_classdev_mc *lcdev_to_mccdev(
return container_of(led_cdev, struct led_classdev_mc, led_cdev);
}
-#if IS_ENABLED(CONFIG_LEDS_CLASS_MULTICOLOR)
/**
* led_classdev_multicolor_register_ext - register a new object of led_classdev
* class with support for multicolor LEDs
@@ -64,34 +63,6 @@ int devm_led_classdev_multicolor_register_ext(struct device *parent,
void devm_led_classdev_multicolor_unregister(struct device *parent,
struct led_classdev_mc *mcled_cdev);
-#else
-
-static inline int led_classdev_multicolor_register_ext(struct device *parent,
- struct led_classdev_mc *mcled_cdev,
- struct led_init_data *init_data)
-{
- return 0;
-}
-
-static inline void led_classdev_multicolor_unregister(struct led_classdev_mc *mcled_cdev) {};
-static inline int led_mc_calc_color_components(struct led_classdev_mc *mcled_cdev,
- enum led_brightness brightness)
-{
- return 0;
-}
-
-static inline int devm_led_classdev_multicolor_register_ext(struct device *parent,
- struct led_classdev_mc *mcled_cdev,
- struct led_init_data *init_data)
-{
- return 0;
-}
-
-static inline void devm_led_classdev_multicolor_unregister(struct device *parent,
- struct led_classdev_mc *mcled_cdev)
-{};
-
-#endif /* IS_ENABLED(CONFIG_LEDS_CLASS_MULTICOLOR) */
static inline int led_classdev_multicolor_register(struct device *parent,
struct led_classdev_mc *mcled_cdev)
diff --git a/include/linux/leds-expresswire.h b/include/linux/leds-expresswire.h
new file mode 100644
index 000000000000..a422921f4159
--- /dev/null
+++ b/include/linux/leds-expresswire.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Shared library for Kinetic's ExpressWire protocol.
+ * This protocol works by pulsing the ExpressWire IC's control GPIO.
+ * ktd2692 and ktd2801 are known to use this protocol.
+ */
+
+#ifndef _LEDS_EXPRESSWIRE_H
+#define _LEDS_EXPRESSWIRE_H
+
+#include <linux/types.h>
+
+struct gpio_desc;
+
+struct expresswire_timing {
+ unsigned long poweroff_us;
+ unsigned long detect_delay_us;
+ unsigned long detect_us;
+ unsigned long data_start_us;
+ unsigned long end_of_data_low_us;
+ unsigned long end_of_data_high_us;
+ unsigned long short_bitset_us;
+ unsigned long long_bitset_us;
+};
+
+struct expresswire_common_props {
+ struct gpio_desc *ctrl_gpio;
+ struct expresswire_timing timing;
+};
+
+void expresswire_power_off(struct expresswire_common_props *props);
+void expresswire_enable(struct expresswire_common_props *props);
+void expresswire_start(struct expresswire_common_props *props);
+void expresswire_end(struct expresswire_common_props *props);
+void expresswire_set_bit(struct expresswire_common_props *props, bool bit);
+void expresswire_write_u8(struct expresswire_common_props *props, u8 val);
+
+#endif /* _LEDS_EXPRESSWIRE_H */
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 7d428100b42b..db6b114bb3d9 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -82,15 +82,7 @@ struct led_init_data {
bool devname_mandatory;
};
-#if IS_ENABLED(CONFIG_NEW_LEDS)
enum led_default_state led_init_default_state_get(struct fwnode_handle *fwnode);
-#else
-static inline enum led_default_state
-led_init_default_state_get(struct fwnode_handle *fwnode)
-{
- return LEDS_DEFSTATE_OFF;
-}
-#endif
struct led_hw_trigger_type {
int dummy;
@@ -100,6 +92,7 @@ struct led_classdev {
const char *name;
unsigned int brightness;
unsigned int max_brightness;
+ unsigned int color;
int flags;
/* Lower 16 bits reflect status */
@@ -278,20 +271,9 @@ static inline int led_classdev_register(struct device *parent,
return led_classdev_register_ext(parent, led_cdev, NULL);
}
-#if IS_ENABLED(CONFIG_LEDS_CLASS)
int devm_led_classdev_register_ext(struct device *parent,
struct led_classdev *led_cdev,
struct led_init_data *init_data);
-#else
-static inline int
-devm_led_classdev_register_ext(struct device *parent,
- struct led_classdev *led_cdev,
- struct led_init_data *init_data)
-{
- return 0;
-}
-#endif
-
static inline int devm_led_classdev_register(struct device *parent,
struct led_classdev *led_cdev)
{
@@ -313,6 +295,8 @@ extern struct led_classdev *of_led_get(struct device_node *np, int index);
extern void led_put(struct led_classdev *led_cdev);
struct led_classdev *__must_check devm_of_led_get(struct device *dev,
int index);
+struct led_classdev *__must_check devm_of_led_get_optional(struct device *dev,
+ int index);
/**
* led_blink_set - set blinking with software fallback
@@ -524,23 +508,6 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev)
return led_cdev->trigger_data;
}
-/**
- * led_trigger_rename_static - rename a trigger
- * @name: the new trigger name
- * @trig: the LED trigger to rename
- *
- * Change a LED trigger name by copying the string passed in
- * name into current trigger name, which MUST be large
- * enough for the new string.
- *
- * Note that name must NOT point to the same string used
- * during LED registration, as that could lead to races.
- *
- * This is meant to be used on triggers with statically
- * allocated name.
- */
-void led_trigger_rename_static(const char *name, struct led_trigger *trig);
-
#define module_led_trigger(__led_trigger) \
module_driver(__led_trigger, led_trigger_register, \
led_trigger_unregister)
@@ -585,6 +552,9 @@ enum led_trigger_netdev_modes {
TRIGGER_NETDEV_LINK_10,
TRIGGER_NETDEV_LINK_100,
TRIGGER_NETDEV_LINK_1000,
+ TRIGGER_NETDEV_LINK_2500,
+ TRIGGER_NETDEV_LINK_5000,
+ TRIGGER_NETDEV_LINK_10000,
TRIGGER_NETDEV_HALF_DUPLEX,
TRIGGER_NETDEV_FULL_DUPLEX,
TRIGGER_NETDEV_TX,
@@ -669,7 +639,7 @@ struct gpio_led_platform_data {
gpio_blink_set_t gpio_blink_set;
};
-#ifdef CONFIG_NEW_LEDS
+#ifdef CONFIG_LEDS_GPIO_REGISTER
struct platform_device *gpio_led_register_device(
int id, const struct gpio_led_platform_data *pdata);
#else
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 820f7a3a2749..324d792e7c78 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -107,6 +107,7 @@ enum {
ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */
ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */
+ ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */
ATA_DFLAG_DETACH = (1 << 24),
ATA_DFLAG_DETACHED = (1 << 25),
ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */
@@ -192,6 +193,7 @@ enum {
ATA_PFLAG_UNLOADING = (1 << 9), /* driver is being unloaded */
ATA_PFLAG_UNLOADED = (1 << 10), /* driver is unloaded */
+ ATA_PFLAG_RESUMING = (1 << 16), /* port is being resumed */
ATA_PFLAG_SUSPENDED = (1 << 17), /* port is suspended (power) */
ATA_PFLAG_PM_PENDING = (1 << 18), /* PM operation pending */
ATA_PFLAG_INIT_GTM_VALID = (1 << 19), /* initial gtm data valid */
@@ -222,6 +224,10 @@ enum {
ATA_HOST_PARALLEL_SCAN = (1 << 2), /* Ports on this host can be scanned in parallel */
ATA_HOST_IGNORE_ATA = (1 << 3), /* Ignore ATA devices on this host. */
+ ATA_HOST_NO_PART = (1 << 4), /* Host does not support partial */
+ ATA_HOST_NO_SSC = (1 << 5), /* Host does not support slumber */
+ ATA_HOST_NO_DEVSLP = (1 << 6), /* Host does not support devslp */
+
/* bits 24:31 of host->flags are reserved for LLD specific flags */
/* various lengths of time */
@@ -255,7 +261,7 @@ enum {
* advised to wait only for the following duration before
* doing SRST.
*/
- ATA_TMOUT_PMP_SRST_WAIT = 5000,
+ ATA_TMOUT_PMP_SRST_WAIT = 10000,
/* When the LPM policy is set to ATA_LPM_MAX_POWER, there might
* be a spurious PHY event, so ignore the first PHY event that
@@ -314,9 +320,10 @@ enum {
ATA_EH_ENABLE_LINK = (1 << 3),
ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */
ATA_EH_GET_SUCCESS_SENSE = (1 << 6), /* Get sense data for successful cmd */
+ ATA_EH_SET_ACTIVE = (1 << 7), /* Set a device to active power mode */
ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK |
- ATA_EH_GET_SUCCESS_SENSE,
+ ATA_EH_GET_SUCCESS_SENSE | ATA_EH_SET_ACTIVE,
ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET |
ATA_EH_ENABLE_LINK,
@@ -344,7 +351,6 @@ enum {
ATA_LINK_RESUME_TRIES = 5,
/* how hard are we gonna try to probe/recover devices */
- ATA_PROBE_MAX_TRIES = 3,
ATA_EH_DEV_TRIES = 3,
ATA_EH_PMP_TRIES = 5,
ATA_EH_PMP_LINK_TRIES = 3,
@@ -354,7 +360,7 @@ enum {
/* This should match the actual table size of
* ata_eh_cmd_timeout_table in libata-eh.c.
*/
- ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7,
+ ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 8,
/* Horkage types. May be set by libata or controller on drives
(some horkage may be drive/controller pair dependent */
@@ -466,7 +472,7 @@ enum ata_completion_errors {
/*
* Link power management policy: If you alter this, you also need to
- * alter libata-scsi.c (for the ascii descriptions)
+ * alter libata-sata.c (for the ascii descriptions)
*/
enum ata_lpm_policy {
ATA_LPM_UNKNOWN,
@@ -651,7 +657,7 @@ struct ata_cpr {
struct ata_cpr_log {
u8 nr_cpr;
- struct ata_cpr cpr[];
+ struct ata_cpr cpr[] __counted_by(nr_cpr);
};
struct ata_device {
@@ -977,12 +983,6 @@ struct ata_port_operations {
ssize_t size);
/*
- * Obsolete
- */
- void (*phy_reset)(struct ata_port *ap);
- void (*eng_timeout)(struct ata_port *ap);
-
- /*
* ->inherits must be the last field and all the preceding
* fields must be pointers.
*/
@@ -1116,7 +1116,7 @@ static inline void ata_sas_port_resume(struct ata_port *ap)
extern int ata_ratelimit(void);
extern void ata_msleep(struct ata_port *ap, unsigned int msecs);
extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask,
- u32 val, unsigned long interval, unsigned long timeout);
+ u32 val, unsigned int interval, unsigned int timeout);
extern int atapi_cmd_type(u8 opcode);
extern unsigned int ata_pack_xfermask(unsigned int pio_mask,
unsigned int mwdma_mask,
@@ -1151,6 +1151,7 @@ extern int ata_std_bios_param(struct scsi_device *sdev,
struct block_device *bdev,
sector_t capacity, int geom[]);
extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev);
+extern int ata_scsi_slave_alloc(struct scsi_device *sdev);
extern int ata_scsi_slave_config(struct scsi_device *sdev);
extern void ata_scsi_slave_destroy(struct scsi_device *sdev);
extern int ata_scsi_change_queue_depth(struct scsi_device *sdev,
@@ -1166,11 +1167,11 @@ extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *
* SATA specific code - drivers/ata/libata-sata.c
*/
#ifdef CONFIG_SATA_HOST
-extern const unsigned long sata_deb_timing_normal[];
-extern const unsigned long sata_deb_timing_hotplug[];
-extern const unsigned long sata_deb_timing_long[];
+extern const unsigned int sata_deb_timing_normal[];
+extern const unsigned int sata_deb_timing_hotplug[];
+extern const unsigned int sata_deb_timing_long[];
-static inline const unsigned long *
+static inline const unsigned int *
sata_ehc_deb_timing(struct ata_eh_context *ehc)
{
if (ehc->i.flags & ATA_EHI_HOTPLUGGED)
@@ -1185,14 +1186,14 @@ extern int sata_scr_write(struct ata_link *link, int reg, u32 val);
extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val);
extern int sata_set_spd(struct ata_link *link);
extern int sata_link_hardreset(struct ata_link *link,
- const unsigned long *timing, unsigned long deadline,
+ const unsigned int *timing, unsigned long deadline,
bool *online, int (*check_ready)(struct ata_link *));
-extern int sata_link_resume(struct ata_link *link, const unsigned long *params,
+extern int sata_link_resume(struct ata_link *link, const unsigned int *params,
unsigned long deadline);
extern int ata_eh_read_sense_success_ncq_log(struct ata_link *link);
extern void ata_eh_analyze_ncq_error(struct ata_link *link);
#else
-static inline const unsigned long *
+static inline const unsigned int *
sata_ehc_deb_timing(struct ata_eh_context *ehc)
{
return NULL;
@@ -1212,7 +1213,7 @@ static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val)
}
static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; }
static inline int sata_link_hardreset(struct ata_link *link,
- const unsigned long *timing,
+ const unsigned int *timing,
unsigned long deadline,
bool *online,
int (*check_ready)(struct ata_link *))
@@ -1222,7 +1223,7 @@ static inline int sata_link_hardreset(struct ata_link *link,
return -EOPNOTSUPP;
}
static inline int sata_link_resume(struct ata_link *link,
- const unsigned long *params,
+ const unsigned int *params,
unsigned long deadline)
{
return -EOPNOTSUPP;
@@ -1234,20 +1235,15 @@ static inline int ata_eh_read_sense_success_ncq_log(struct ata_link *link)
static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { }
#endif
extern int sata_link_debounce(struct ata_link *link,
- const unsigned long *params, unsigned long deadline);
+ const unsigned int *params, unsigned long deadline);
extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
bool spm_wakeup);
extern int ata_slave_link_init(struct ata_port *ap);
-extern void ata_sas_port_destroy(struct ata_port *);
extern struct ata_port *ata_sas_port_alloc(struct ata_host *,
struct ata_port_info *, struct Scsi_Host *);
-extern void ata_sas_async_probe(struct ata_port *ap);
-extern int ata_sas_sync_probe(struct ata_port *ap);
-extern int ata_sas_port_init(struct ata_port *);
-extern int ata_sas_port_start(struct ata_port *ap);
+extern void ata_port_probe(struct ata_port *ap);
extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap);
extern void ata_sas_tport_delete(struct ata_port *ap);
-extern void ata_sas_port_stop(struct ata_port *ap);
extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *);
extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap);
extern void ata_tf_to_fis(const struct ata_taskfile *tf,
@@ -1404,6 +1400,7 @@ extern const struct attribute_group *ata_common_sdev_groups[];
.this_id = ATA_SHT_THIS_ID, \
.emulated = ATA_SHT_EMULATED, \
.proc_name = drv_name, \
+ .slave_alloc = ata_scsi_slave_alloc, \
.slave_destroy = ata_scsi_slave_destroy, \
.bios_param = ata_std_bios_param, \
.unlock_native_capacity = ata_scsi_unlock_native_capacity,\
@@ -1565,6 +1562,11 @@ void ata_port_desc(struct ata_port *ap, const char *fmt, ...);
extern void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset,
const char *name);
#endif
+static inline void ata_port_desc_misc(struct ata_port *ap, int irq)
+{
+ ata_port_desc(ap, "irq %d", irq);
+ ata_port_desc(ap, "lpm-pol %d", ap->target_lpm_policy);
+}
static inline bool ata_tag_internal(unsigned int tag)
{
@@ -1785,7 +1787,7 @@ static inline struct ata_queued_cmd *ata_qc_from_tag(struct ata_port *ap,
{
struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
- if (unlikely(!qc) || !ap->ops->error_handler)
+ if (unlikely(!qc))
return qc;
if ((qc->flags & (ATA_QCFLAG_ACTIVE |
@@ -1876,7 +1878,7 @@ static inline int ata_check_ready(u8 status)
}
static inline unsigned long ata_deadline(unsigned long from_jiffies,
- unsigned long timeout_msecs)
+ unsigned int timeout_msecs)
{
return from_jiffies + msecs_to_jiffies(timeout_msecs);
}
@@ -1885,23 +1887,21 @@ static inline unsigned long ata_deadline(unsigned long from_jiffies,
change in future hardware and specs, secondly 0xFF means 'no DMA' but is
> UDMA_0. Dyma ddreigiau */
-static inline int ata_using_mwdma(struct ata_device *adev)
+static inline bool ata_using_mwdma(struct ata_device *adev)
{
- if (adev->dma_mode >= XFER_MW_DMA_0 && adev->dma_mode <= XFER_MW_DMA_4)
- return 1;
- return 0;
+ return adev->dma_mode >= XFER_MW_DMA_0 &&
+ adev->dma_mode <= XFER_MW_DMA_4;
}
-static inline int ata_using_udma(struct ata_device *adev)
+static inline bool ata_using_udma(struct ata_device *adev)
{
- if (adev->dma_mode >= XFER_UDMA_0 && adev->dma_mode <= XFER_UDMA_7)
- return 1;
- return 0;
+ return adev->dma_mode >= XFER_UDMA_0 &&
+ adev->dma_mode <= XFER_UDMA_7;
}
-static inline int ata_dma_enabled(struct ata_device *adev)
+static inline bool ata_dma_enabled(struct ata_device *adev)
{
- return (adev->dma_mode == 0xFF ? 0 : 1);
+ return adev->dma_mode != 0xFF;
}
/**************************************************************************
diff --git a/include/linux/limits.h b/include/linux/limits.h
index f6bcc9369010..38eb7f6f7e88 100644
--- a/include/linux/limits.h
+++ b/include/linux/limits.h
@@ -10,6 +10,8 @@
#define SSIZE_MAX ((ssize_t)(SIZE_MAX >> 1))
#define PHYS_ADDR_MAX (~(phys_addr_t)0)
+#define RESOURCE_SIZE_MAX ((resource_size_t)~0)
+
#define U8_MAX ((u8)~0U)
#define S8_MAX ((s8)(U8_MAX >> 1))
#define S8_MIN ((s8)(-S8_MAX - 1))
diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h
index 15e0e0209da4..287f590ed56b 100644
--- a/include/linux/linkmode.h
+++ b/include/linux/linkmode.h
@@ -10,6 +10,11 @@ static inline void linkmode_zero(unsigned long *dst)
bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
}
+static inline void linkmode_fill(unsigned long *dst)
+{
+ bitmap_fill(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
static inline void linkmode_copy(unsigned long *dst, const unsigned long *src)
{
bitmap_copy(dst, src, __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -43,15 +48,6 @@ static inline void linkmode_set_bit(int nr, volatile unsigned long *addr)
__set_bit(nr, addr);
}
-static inline void linkmode_set_bit_array(const int *array, int array_size,
- unsigned long *addr)
-{
- int i;
-
- for (i = 0; i < array_size; i++)
- linkmode_set_bit(array[i], addr);
-}
-
static inline void linkmode_clear_bit(int nr, volatile unsigned long *addr)
{
__clear_bit(nr, addr);
@@ -71,6 +67,15 @@ static inline int linkmode_test_bit(int nr, const volatile unsigned long *addr)
return test_bit(nr, addr);
}
+static inline void linkmode_set_bit_array(const int *array, int array_size,
+ unsigned long *addr)
+{
+ int i;
+
+ for (i = 0; i < array_size; i++)
+ linkmode_set_bit(array[i], addr);
+}
+
static inline int linkmode_equal(const unsigned long *src1,
const unsigned long *src2)
{
diff --git a/include/linux/list.h b/include/linux/list.h
index f10344dbad4d..5f4b0a39cf46 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -38,11 +38,92 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
WRITE_ONCE(list->prev, list);
}
+#ifdef CONFIG_LIST_HARDENED
+
#ifdef CONFIG_DEBUG_LIST
-extern bool __list_add_valid(struct list_head *new,
- struct list_head *prev,
- struct list_head *next);
-extern bool __list_del_entry_valid(struct list_head *entry);
+# define __list_valid_slowpath
+#else
+# define __list_valid_slowpath __cold __preserve_most
+#endif
+
+/*
+ * Performs the full set of list corruption checks before __list_add().
+ * On list corruption reports a warning, and returns false.
+ */
+extern bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next);
+
+/*
+ * Performs list corruption checks before __list_add(). Returns false if a
+ * corruption is detected, true otherwise.
+ *
+ * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking
+ * inline to catch non-faulting corruptions, and only if a corruption is
+ * detected calls the reporting function __list_add_valid_or_report().
+ */
+static __always_inline bool __list_add_valid(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ bool ret = true;
+
+ if (!IS_ENABLED(CONFIG_DEBUG_LIST)) {
+ /*
+ * With the hardening version, elide checking if next and prev
+ * are NULL, since the immediate dereference of them below would
+ * result in a fault if NULL.
+ *
+ * With the reduced set of checks, we can afford to inline the
+ * checks, which also gives the compiler a chance to elide some
+ * of them completely if they can be proven at compile-time. If
+ * one of the pre-conditions does not hold, the slow-path will
+ * show a report which pre-condition failed.
+ */
+ if (likely(next->prev == prev && prev->next == next && new != prev && new != next))
+ return true;
+ ret = false;
+ }
+
+ ret &= __list_add_valid_or_report(new, prev, next);
+ return ret;
+}
+
+/*
+ * Performs the full set of list corruption checks before __list_del_entry().
+ * On list corruption reports a warning, and returns false.
+ */
+extern bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry);
+
+/*
+ * Performs list corruption checks before __list_del_entry(). Returns false if a
+ * corruption is detected, true otherwise.
+ *
+ * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking
+ * inline to catch non-faulting corruptions, and only if a corruption is
+ * detected calls the reporting function __list_del_entry_valid_or_report().
+ */
+static __always_inline bool __list_del_entry_valid(struct list_head *entry)
+{
+ bool ret = true;
+
+ if (!IS_ENABLED(CONFIG_DEBUG_LIST)) {
+ struct list_head *prev = entry->prev;
+ struct list_head *next = entry->next;
+
+ /*
+ * With the hardening version, elide checking if next and prev
+ * are NULL, LIST_POISON1 or LIST_POISON2, since the immediate
+ * dereference of them below would result in a fault.
+ */
+ if (likely(prev->next == entry && next->prev == entry))
+ return true;
+ ret = false;
+ }
+
+ ret &= __list_del_entry_valid_or_report(entry);
+ return ret;
+}
#else
static inline bool __list_add_valid(struct list_head *new,
struct list_head *prev,
@@ -606,6 +687,14 @@ static inline void list_splice_tail_init(struct list_head *list,
for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next)
/**
+ * list_for_each_reverse - iterate backwards over a list
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @head: the head for your list.
+ */
+#define list_for_each_reverse(pos, head) \
+ for (pos = (head)->prev; pos != (head); pos = pos->prev)
+
+/**
* list_for_each_rcu - Iterate over a list in an RCU-safe fashion
* @pos: the &struct list_head to use as a loop cursor.
* @head: the head for your list.
@@ -677,7 +766,7 @@ static inline size_t list_count_nodes(struct list_head *head)
* @member: the name of the list_head within the struct.
*/
#define list_entry_is_head(pos, head, member) \
- (&pos->member == (head))
+ list_is_head(&pos->member, (head))
/**
* list_for_each_entry - iterate over list of given type
@@ -1030,6 +1119,26 @@ static inline void hlist_move_list(struct hlist_head *old,
old->first = NULL;
}
+/**
+ * hlist_splice_init() - move all entries from one list to another
+ * @from: hlist_head from which entries will be moved
+ * @last: last entry on the @from list
+ * @to: hlist_head to which entries will be moved
+ *
+ * @to can be empty, @from must contain at least @last.
+ */
+static inline void hlist_splice_init(struct hlist_head *from,
+ struct hlist_node *last,
+ struct hlist_head *to)
+{
+ if (to->first)
+ to->first->pprev = &last->next;
+ last->next = to->first;
+ to->first = from->first;
+ from->first->pprev = &to->first;
+ from->first = NULL;
+}
+
#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
#define hlist_for_each(pos, head) \
@@ -1086,4 +1195,19 @@ static inline void hlist_move_list(struct hlist_head *old,
pos && ({ n = pos->member.next; 1; }); \
pos = hlist_entry_safe(n, typeof(*pos), member))
+/**
+ * hlist_count_nodes - count nodes in the hlist
+ * @head: the head for your hlist.
+ */
+static inline size_t hlist_count_nodes(struct hlist_head *head)
+{
+ struct hlist_node *pos;
+ size_t count = 0;
+
+ hlist_for_each(pos, head)
+ count++;
+
+ return count;
+}
+
#endif
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index b35968ee9fb5..792b67ceb631 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -24,6 +24,8 @@ enum lru_status {
LRU_SKIP, /* item cannot be locked, skip */
LRU_RETRY, /* item not freeable. May drop the lock
internally, but has to return locked. */
+ LRU_STOP, /* stop lru list walking. May drop the lock
+ internally, but has to return locked. */
};
struct list_lru_one {
@@ -62,8 +64,6 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
#define list_lru_init(lru) \
__list_lru_init((lru), false, NULL, NULL)
-#define list_lru_init_key(lru, key) \
- __list_lru_init((lru), false, (key), NULL)
#define list_lru_init_memcg(lru, shrinker) \
__list_lru_init((lru), true, NULL, shrinker)
@@ -73,8 +73,10 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren
/**
* list_lru_add: add an element to the lru list's tail
- * @list_lru: the lru pointer
+ * @lru: the lru pointer
* @item: the item to be added.
+ * @nid: the node id of the sublist to add the item to.
+ * @memcg: the cgroup of the sublist to add the item to.
*
* If the element is already part of a list, this function returns doing
* nothing. Therefore the caller does not need to keep state about whether or
@@ -83,24 +85,54 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren
* the caller organize itself in a way that elements can be in more than
* one type of list, it is up to the caller to fully remove the item from
* the previous list (with list_lru_del() for instance) before moving it
- * to @list_lru
+ * to @lru.
+ *
+ * Return: true if the list was updated, false otherwise
+ */
+bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
+ struct mem_cgroup *memcg);
+
+/**
+ * list_lru_add_obj: add an element to the lru list's tail
+ * @lru: the lru pointer
+ * @item: the item to be added.
+ *
+ * This function is similar to list_lru_add(), but the NUMA node and the
+ * memcg of the sublist is determined by @item list_head. This assumption is
+ * valid for slab objects LRU such as dentries, inodes, etc.
*
* Return value: true if the list was updated, false otherwise
*/
-bool list_lru_add(struct list_lru *lru, struct list_head *item);
+bool list_lru_add_obj(struct list_lru *lru, struct list_head *item);
/**
- * list_lru_del: delete an element to the lru list
- * @list_lru: the lru pointer
+ * list_lru_del: delete an element from the lru list
+ * @lru: the lru pointer
* @item: the item to be deleted.
+ * @nid: the node id of the sublist to delete the item from.
+ * @memcg: the cgroup of the sublist to delete the item from.
*
- * This function works analogously as list_lru_add in terms of list
+ * This function works analogously as list_lru_add() in terms of list
* manipulation. The comments about an element already pertaining to
- * a list are also valid for list_lru_del.
+ * a list are also valid for list_lru_del().
*
- * Return value: true if the list was updated, false otherwise
+ * Return: true if the list was updated, false otherwise
*/
-bool list_lru_del(struct list_lru *lru, struct list_head *item);
+bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
+ struct mem_cgroup *memcg);
+
+/**
+ * list_lru_del_obj: delete an element from the lru list
+ * @lru: the lru pointer
+ * @item: the item to be deleted.
+ *
+ * This function is similar to list_lru_del(), but the NUMA node and the
+ * memcg of the sublist is determined by @item list_head. This assumption is
+ * valid for slab objects LRU such as dentries, inodes, etc.
+ *
+ * Return value: true if the list was updated, false otherwise.
+ */
+bool list_lru_del_obj(struct list_lru *lru, struct list_head *item);
/**
* list_lru_count_one: return the number of objects currently held by @lru
@@ -108,9 +140,11 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item);
* @nid: the node id to count from.
* @memcg: the cgroup to count from.
*
- * Always return a non-negative number, 0 for empty lists. There is no
- * guarantee that the list is not updated while the count is being computed.
- * Callers that want such a guarantee need to provide an outer lock.
+ * There is no guarantee that the list is not updated while the count is being
+ * computed. Callers that want such a guarantee need to provide an outer lock.
+ *
+ * Return: 0 for empty lists, otherwise the number of objects
+ * currently held by @lru.
*/
unsigned long list_lru_count_one(struct list_lru *lru,
int nid, struct mem_cgroup *memcg);
@@ -141,7 +175,7 @@ typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item,
struct list_lru_one *list, spinlock_t *lock, void *cb_arg);
/**
- * list_lru_walk_one: walk a list_lru, isolating and disposing freeable items.
+ * list_lru_walk_one: walk a @lru, isolating and disposing freeable items.
* @lru: the lru pointer.
* @nid: the node id to scan from.
* @memcg: the cgroup to scan from.
@@ -150,24 +184,24 @@ typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item,
* @cb_arg: opaque type that will be passed to @isolate
* @nr_to_walk: how many items to scan.
*
- * This function will scan all elements in a particular list_lru, calling the
+ * This function will scan all elements in a particular @lru, calling the
* @isolate callback for each of those items, along with the current list
* spinlock and a caller-provided opaque. The @isolate callback can choose to
* drop the lock internally, but *must* return with the lock held. The callback
- * will return an enum lru_status telling the list_lru infrastructure what to
+ * will return an enum lru_status telling the @lru infrastructure what to
* do with the object being scanned.
*
- * Please note that nr_to_walk does not mean how many objects will be freed,
+ * Please note that @nr_to_walk does not mean how many objects will be freed,
* just how many objects will be scanned.
*
- * Return value: the number of objects effectively removed from the LRU.
+ * Return: the number of objects effectively removed from the LRU.
*/
unsigned long list_lru_walk_one(struct list_lru *lru,
int nid, struct mem_cgroup *memcg,
list_lru_walk_cb isolate, void *cb_arg,
unsigned long *nr_to_walk);
/**
- * list_lru_walk_one_irq: walk a list_lru, isolating and disposing freeable items.
+ * list_lru_walk_one_irq: walk a @lru, isolating and disposing freeable items.
* @lru: the lru pointer.
* @nid: the node id to scan from.
* @memcg: the cgroup to scan from.
@@ -176,7 +210,7 @@ unsigned long list_lru_walk_one(struct list_lru *lru,
* @cb_arg: opaque type that will be passed to @isolate
* @nr_to_walk: how many items to scan.
*
- * Same as @list_lru_walk_one except that the spinlock is acquired with
+ * Same as list_lru_walk_one() except that the spinlock is acquired with
* spin_lock_irq().
*/
unsigned long list_lru_walk_one_irq(struct list_lru *lru,
diff --git a/include/linux/llist.h b/include/linux/llist.h
index 85bda2d02d65..2c982ff7475a 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -74,6 +74,33 @@ static inline void init_llist_head(struct llist_head *list)
}
/**
+ * init_llist_node - initialize lock-less list node
+ * @node: the node to be initialised
+ *
+ * In cases where there is a need to test if a node is on
+ * a list or not, this initialises the node to clearly
+ * not be on any list.
+ */
+static inline void init_llist_node(struct llist_node *node)
+{
+ node->next = node;
+}
+
+/**
+ * llist_on_list - test if a lock-list list node is on a list
+ * @node: the node to test
+ *
+ * When a node is on a list the ->next pointer will be NULL or
+ * some other node. It can never point to itself. We use that
+ * in init_llist_node() to record that a node is not on any list,
+ * and here to test whether it is on any list.
+ */
+static inline bool llist_on_list(const struct llist_node *node)
+{
+ return node->next != node;
+}
+
+/**
* llist_entry - get the struct of this entry
* @ptr: the &struct llist_node pointer.
* @type: the type of the struct this is embedded in.
@@ -249,6 +276,25 @@ static inline struct llist_node *__llist_del_all(struct llist_head *head)
extern struct llist_node *llist_del_first(struct llist_head *head);
+/**
+ * llist_del_first_init - delete first entry from lock-list and mark is as being off-list
+ * @head: the head of lock-less list to delete from.
+ *
+ * This behave the same as llist_del_first() except that llist_init_node() is called
+ * on the returned node so that llist_on_list() will report false for the node.
+ */
+static inline struct llist_node *llist_del_first_init(struct llist_head *head)
+{
+ struct llist_node *n = llist_del_first(head);
+
+ if (n)
+ init_llist_node(n);
+ return n;
+}
+
+extern bool llist_del_first_this(struct llist_head *head,
+ struct llist_node *this);
+
struct llist_node *llist_reverse_order(struct llist_node *head);
#endif /* LLIST_H */
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index f42594a9efe0..1b95fe31051f 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -204,6 +204,8 @@ extern unsigned long nlmsvc_timeout;
extern bool nsm_use_hostnames;
extern u32 nsm_local_state;
+extern struct timer_list nlmsvc_retry;
+
/*
* Lockd client functions
*/
@@ -280,7 +282,7 @@ __be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
struct nlm_host *, struct nlm_lock *,
struct nlm_lock *, struct nlm_cookie *);
__be32 nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
-unsigned long nlmsvc_retry_blocked(void);
+void nlmsvc_retry_blocked(struct svc_rqst *rqstp);
void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
nlm_host_match_fn_t match);
void nlmsvc_grant_reply(struct nlm_cookie *, __be32);
@@ -373,12 +375,12 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
static inline int nlm_compare_locks(const struct file_lock *fl1,
const struct file_lock *fl2)
{
- return file_inode(fl1->fl_file) == file_inode(fl2->fl_file)
- && fl1->fl_pid == fl2->fl_pid
- && fl1->fl_owner == fl2->fl_owner
+ return file_inode(fl1->c.flc_file) == file_inode(fl2->c.flc_file)
+ && fl1->c.flc_pid == fl2->c.flc_pid
+ && fl1->c.flc_owner == fl2->c.flc_owner
&& fl1->fl_start == fl2->fl_start
&& fl1->fl_end == fl2->fl_end
- &&(fl1->fl_type == fl2->fl_type || fl2->fl_type == F_UNLCK);
+ &&(fl1->c.flc_type == fl2->c.flc_type || fl2->c.flc_type == F_UNLCK);
}
extern const struct lock_manager_operations nlmsvc_lock_operations;
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index b60fbcd8cdfa..80cca9426761 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -52,7 +52,7 @@ struct nlm_lock {
* FreeBSD uses 16, Apple Mac OS X 10.3 uses 20. Therefore we set it to
* 32 bytes.
*/
-
+
struct nlm_cookie
{
unsigned char data[NLM_MAXCOOKIELEN];
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 310f85903c91..08b0d1d9d78b 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -82,63 +82,6 @@ struct lock_chain {
u64 chain_key;
};
-#define MAX_LOCKDEP_KEYS_BITS 13
-#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS)
-#define INITIAL_CHAIN_KEY -1
-
-struct held_lock {
- /*
- * One-way hash of the dependency chain up to this point. We
- * hash the hashes step by step as the dependency chain grows.
- *
- * We use it for dependency-caching and we skip detection
- * passes and dependency-updates if there is a cache-hit, so
- * it is absolutely critical for 100% coverage of the validator
- * to have a unique key value for every unique dependency path
- * that can occur in the system, to make a unique hash value
- * as likely as possible - hence the 64-bit width.
- *
- * The task struct holds the current hash value (initialized
- * with zero), here we store the previous hash value:
- */
- u64 prev_chain_key;
- unsigned long acquire_ip;
- struct lockdep_map *instance;
- struct lockdep_map *nest_lock;
-#ifdef CONFIG_LOCK_STAT
- u64 waittime_stamp;
- u64 holdtime_stamp;
-#endif
- /*
- * class_idx is zero-indexed; it points to the element in
- * lock_classes this held lock instance belongs to. class_idx is in
- * the range from 0 to (MAX_LOCKDEP_KEYS-1) inclusive.
- */
- unsigned int class_idx:MAX_LOCKDEP_KEYS_BITS;
- /*
- * The lock-stack is unified in that the lock chains of interrupt
- * contexts nest ontop of process context chains, but we 'separate'
- * the hashes by starting with 0 if we cross into an interrupt
- * context, and we also keep do not add cross-context lock
- * dependencies - the lock usage graph walking covers that area
- * anyway, and we'd just unnecessarily increase the number of
- * dependencies otherwise. [Note: hardirq and softirq contexts
- * are separated from each other too.]
- *
- * The following field is used to detect when we cross into an
- * interrupt context:
- */
- unsigned int irq_context:2; /* bit 0 - soft, bit 1 - hard */
- unsigned int trylock:1; /* 16 bits */
-
- unsigned int read:2; /* see lock_acquire() comment */
- unsigned int check:1; /* see lock_acquire() comment */
- unsigned int hardirqs_off:1;
- unsigned int sync:1;
- unsigned int references:11; /* 32 bits */
- unsigned int pin_count;
-};
-
/*
* Initialization, self-test and debugging-output methods:
*/
@@ -625,6 +568,12 @@ do { \
WARN_ON_ONCE(__lockdep_enabled && !this_cpu_read(hardirq_context)); \
} while (0)
+#define lockdep_assert_no_hardirq() \
+do { \
+ WARN_ON_ONCE(__lockdep_enabled && (this_cpu_read(hardirq_context) || \
+ !this_cpu_read(hardirqs_enabled))); \
+} while (0)
+
#define lockdep_assert_preemption_enabled() \
do { \
WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
@@ -659,6 +608,7 @@ do { \
# define lockdep_assert_irqs_enabled() do { } while (0)
# define lockdep_assert_irqs_disabled() do { } while (0)
# define lockdep_assert_in_irq() do { } while (0)
+# define lockdep_assert_no_hardirq() do { } while (0)
# define lockdep_assert_preemption_enabled() do { } while (0)
# define lockdep_assert_preemption_disabled() do { } while (0)
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
index 2ebc323d345a..70d30d40ea4a 100644
--- a/include/linux/lockdep_types.h
+++ b/include/linux/lockdep_types.h
@@ -127,12 +127,12 @@ struct lock_class {
unsigned long usage_mask;
const struct lock_trace *usage_traces[LOCK_TRACE_STATES];
+ const char *name;
/*
* Generation counter, when doing certain classes of graph walking,
* to ensure that we check one node only once:
*/
int name_version;
- const char *name;
u8 wait_type_inner;
u8 wait_type_outer;
@@ -198,6 +198,63 @@ struct lockdep_map {
struct pin_cookie { unsigned int val; };
+#define MAX_LOCKDEP_KEYS_BITS 13
+#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS)
+#define INITIAL_CHAIN_KEY -1
+
+struct held_lock {
+ /*
+ * One-way hash of the dependency chain up to this point. We
+ * hash the hashes step by step as the dependency chain grows.
+ *
+ * We use it for dependency-caching and we skip detection
+ * passes and dependency-updates if there is a cache-hit, so
+ * it is absolutely critical for 100% coverage of the validator
+ * to have a unique key value for every unique dependency path
+ * that can occur in the system, to make a unique hash value
+ * as likely as possible - hence the 64-bit width.
+ *
+ * The task struct holds the current hash value (initialized
+ * with zero), here we store the previous hash value:
+ */
+ u64 prev_chain_key;
+ unsigned long acquire_ip;
+ struct lockdep_map *instance;
+ struct lockdep_map *nest_lock;
+#ifdef CONFIG_LOCK_STAT
+ u64 waittime_stamp;
+ u64 holdtime_stamp;
+#endif
+ /*
+ * class_idx is zero-indexed; it points to the element in
+ * lock_classes this held lock instance belongs to. class_idx is in
+ * the range from 0 to (MAX_LOCKDEP_KEYS-1) inclusive.
+ */
+ unsigned int class_idx:MAX_LOCKDEP_KEYS_BITS;
+ /*
+ * The lock-stack is unified in that the lock chains of interrupt
+ * contexts nest ontop of process context chains, but we 'separate'
+ * the hashes by starting with 0 if we cross into an interrupt
+ * context, and we also keep do not add cross-context lock
+ * dependencies - the lock usage graph walking covers that area
+ * anyway, and we'd just unnecessarily increase the number of
+ * dependencies otherwise. [Note: hardirq and softirq contexts
+ * are separated from each other too.]
+ *
+ * The following field is used to detect when we cross into an
+ * interrupt context:
+ */
+ unsigned int irq_context:2; /* bit 0 - soft, bit 1 - hard */
+ unsigned int trylock:1; /* 16 bits */
+
+ unsigned int read:2; /* see lock_acquire() comment */
+ unsigned int check:1; /* see lock_acquire() comment */
+ unsigned int hardirqs_off:1;
+ unsigned int sync:1;
+ unsigned int references:11; /* 32 bits */
+ unsigned int pin_count;
+};
+
#else /* !CONFIG_LOCKDEP */
/*
diff --git a/include/linux/logic_pio.h b/include/linux/logic_pio.h
index 54945aa824b4..babf4e3c28ba 100644
--- a/include/linux/logic_pio.h
+++ b/include/linux/logic_pio.h
@@ -39,9 +39,6 @@ struct logic_pio_host_ops {
#ifdef CONFIG_INDIRECT_PIO
u8 logic_inb(unsigned long addr);
-void logic_outb(u8 value, unsigned long addr);
-void logic_outw(u16 value, unsigned long addr);
-void logic_outl(u32 value, unsigned long addr);
u16 logic_inw(unsigned long addr);
u32 logic_inl(unsigned long addr);
void logic_outb(u8 value, unsigned long addr);
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 7308a1a7599b..334e00efbde4 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -32,28 +32,29 @@ LSM_HOOK(int, 0, binder_transaction, const struct cred *from,
LSM_HOOK(int, 0, binder_transfer_binder, const struct cred *from,
const struct cred *to)
LSM_HOOK(int, 0, binder_transfer_file, const struct cred *from,
- const struct cred *to, struct file *file)
+ const struct cred *to, const struct file *file)
LSM_HOOK(int, 0, ptrace_access_check, struct task_struct *child,
unsigned int mode)
LSM_HOOK(int, 0, ptrace_traceme, struct task_struct *parent)
-LSM_HOOK(int, 0, capget, struct task_struct *target, kernel_cap_t *effective,
+LSM_HOOK(int, 0, capget, const struct task_struct *target, kernel_cap_t *effective,
kernel_cap_t *inheritable, kernel_cap_t *permitted)
LSM_HOOK(int, 0, capset, struct cred *new, const struct cred *old,
const kernel_cap_t *effective, const kernel_cap_t *inheritable,
const kernel_cap_t *permitted)
LSM_HOOK(int, 0, capable, const struct cred *cred, struct user_namespace *ns,
int cap, unsigned int opts)
-LSM_HOOK(int, 0, quotactl, int cmds, int type, int id, struct super_block *sb)
+LSM_HOOK(int, 0, quotactl, int cmds, int type, int id, const struct super_block *sb)
LSM_HOOK(int, 0, quota_on, struct dentry *dentry)
LSM_HOOK(int, 0, syslog, int type)
LSM_HOOK(int, 0, settime, const struct timespec64 *ts,
const struct timezone *tz)
-LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages)
+LSM_HOOK(int, 1, vm_enough_memory, struct mm_struct *mm, long pages)
LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm)
-LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, struct file *file)
+LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file)
LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm)
-LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm)
-LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm)
+LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, const struct linux_binprm *bprm)
+LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, const struct linux_binprm *bprm)
+LSM_HOOK(int, 0, fs_context_submount, struct fs_context *fc, struct super_block *reference)
LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc,
struct fs_context *src_sc)
LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc,
@@ -65,7 +66,7 @@ LSM_HOOK(void, LSM_RET_VOID, sb_free_mnt_opts, void *mnt_opts)
LSM_HOOK(int, 0, sb_eat_lsm_opts, char *orig, void **mnt_opts)
LSM_HOOK(int, 0, sb_mnt_opts_compat, struct super_block *sb, void *mnt_opts)
LSM_HOOK(int, 0, sb_remount, struct super_block *sb, void *mnt_opts)
-LSM_HOOK(int, 0, sb_kern_mount, struct super_block *sb)
+LSM_HOOK(int, 0, sb_kern_mount, const struct super_block *sb)
LSM_HOOK(int, 0, sb_show_options, struct seq_file *m, struct super_block *sb)
LSM_HOOK(int, 0, sb_statfs, struct dentry *dentry)
LSM_HOOK(int, 0, sb_mount, const char *dev_name, const struct path *path,
@@ -93,6 +94,8 @@ LSM_HOOK(int, 0, path_mkdir, const struct path *dir, struct dentry *dentry,
LSM_HOOK(int, 0, path_rmdir, const struct path *dir, struct dentry *dentry)
LSM_HOOK(int, 0, path_mknod, const struct path *dir, struct dentry *dentry,
umode_t mode, unsigned int dev)
+LSM_HOOK(void, LSM_RET_VOID, path_post_mknod, struct mnt_idmap *idmap,
+ struct dentry *dentry)
LSM_HOOK(int, 0, path_truncate, const struct path *path)
LSM_HOOK(int, 0, path_symlink, const struct path *dir, struct dentry *dentry,
const char *old_name)
@@ -111,13 +114,15 @@ LSM_HOOK(int, 0, path_notify, const struct path *path, u64 mask,
unsigned int obj_type)
LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode)
LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode)
-LSM_HOOK(int, 0, inode_init_security, struct inode *inode,
- struct inode *dir, const struct qstr *qstr, const char **name,
- void **value, size_t *len)
+LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode,
+ struct inode *dir, const struct qstr *qstr, struct xattr *xattrs,
+ int *xattr_count)
LSM_HOOK(int, 0, inode_init_security_anon, struct inode *inode,
const struct qstr *name, const struct inode *context_inode)
LSM_HOOK(int, 0, inode_create, struct inode *dir, struct dentry *dentry,
umode_t mode)
+LSM_HOOK(void, LSM_RET_VOID, inode_post_create_tmpfile, struct mnt_idmap *idmap,
+ struct inode *inode)
LSM_HOOK(int, 0, inode_link, struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry)
LSM_HOOK(int, 0, inode_unlink, struct inode *dir, struct dentry *dentry)
@@ -134,7 +139,10 @@ LSM_HOOK(int, 0, inode_readlink, struct dentry *dentry)
LSM_HOOK(int, 0, inode_follow_link, struct dentry *dentry, struct inode *inode,
bool rcu)
LSM_HOOK(int, 0, inode_permission, struct inode *inode, int mask)
-LSM_HOOK(int, 0, inode_setattr, struct dentry *dentry, struct iattr *attr)
+LSM_HOOK(int, 0, inode_setattr, struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr)
+LSM_HOOK(void, LSM_RET_VOID, inode_post_setattr, struct mnt_idmap *idmap,
+ struct dentry *dentry, int ia_valid)
LSM_HOOK(int, 0, inode_getattr, const struct path *path)
LSM_HOOK(int, 0, inode_setxattr, struct mnt_idmap *idmap,
struct dentry *dentry, const char *name, const void *value,
@@ -145,12 +153,18 @@ LSM_HOOK(int, 0, inode_getxattr, struct dentry *dentry, const char *name)
LSM_HOOK(int, 0, inode_listxattr, struct dentry *dentry)
LSM_HOOK(int, 0, inode_removexattr, struct mnt_idmap *idmap,
struct dentry *dentry, const char *name)
+LSM_HOOK(void, LSM_RET_VOID, inode_post_removexattr, struct dentry *dentry,
+ const char *name)
LSM_HOOK(int, 0, inode_set_acl, struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name, struct posix_acl *kacl)
+LSM_HOOK(void, LSM_RET_VOID, inode_post_set_acl, struct dentry *dentry,
+ const char *acl_name, struct posix_acl *kacl)
LSM_HOOK(int, 0, inode_get_acl, struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name)
LSM_HOOK(int, 0, inode_remove_acl, struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name)
+LSM_HOOK(void, LSM_RET_VOID, inode_post_remove_acl, struct mnt_idmap *idmap,
+ struct dentry *dentry, const char *acl_name)
LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry)
LSM_HOOK(int, 0, inode_killpriv, struct mnt_idmap *idmap,
struct dentry *dentry)
@@ -167,9 +181,12 @@ LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir,
struct kernfs_node *kn)
LSM_HOOK(int, 0, file_permission, struct file *file, int mask)
LSM_HOOK(int, 0, file_alloc_security, struct file *file)
+LSM_HOOK(void, LSM_RET_VOID, file_release, struct file *file)
LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file)
LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd,
unsigned long arg)
+LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
+ unsigned long arg)
LSM_HOOK(int, 0, mmap_addr, unsigned long addr)
LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot,
unsigned long prot, unsigned long flags)
@@ -183,6 +200,7 @@ LSM_HOOK(int, 0, file_send_sigiotask, struct task_struct *tsk,
struct fown_struct *fown, int sig)
LSM_HOOK(int, 0, file_receive, struct file *file)
LSM_HOOK(int, 0, file_open, struct file *file)
+LSM_HOOK(int, 0, file_post_open, struct file *file, int mask)
LSM_HOOK(int, 0, file_truncate, struct file *file)
LSM_HOOK(int, 0, task_alloc, struct task_struct *task,
unsigned long clone_flags)
@@ -261,6 +279,10 @@ LSM_HOOK(int, 0, sem_semop, struct kern_ipc_perm *perm, struct sembuf *sops,
LSM_HOOK(int, 0, netlink_send, struct sock *sk, struct sk_buff *skb)
LSM_HOOK(void, LSM_RET_VOID, d_instantiate, struct dentry *dentry,
struct inode *inode)
+LSM_HOOK(int, -EOPNOTSUPP, getselfattr, unsigned int attr,
+ struct lsm_ctx __user *ctx, u32 *size, u32 flags)
+LSM_HOOK(int, -EOPNOTSUPP, setselfattr, unsigned int attr,
+ struct lsm_ctx *ctx, u32 size, u32 flags)
LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, const char *name,
char **value)
LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size)
@@ -272,7 +294,7 @@ LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen)
LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode)
LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen)
LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen)
-LSM_HOOK(int, 0, inode_getsecctx, struct inode *inode, void **ctx,
+LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, void **ctx,
u32 *ctxlen)
#if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE)
@@ -308,15 +330,15 @@ LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname)
LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname)
LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how)
LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb)
-LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock,
+LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_stream, struct socket *sock,
sockptr_t optval, sockptr_t optlen, unsigned int len)
-LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock,
+LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_dgram, struct socket *sock,
struct sk_buff *skb, u32 *secid)
LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority)
LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk)
LSM_HOOK(void, LSM_RET_VOID, sk_clone_security, const struct sock *sk,
struct sock *newsk)
-LSM_HOOK(void, LSM_RET_VOID, sk_getsecid, struct sock *sk, u32 *secid)
+LSM_HOOK(void, LSM_RET_VOID, sk_getsecid, const struct sock *sk, u32 *secid)
LSM_HOOK(void, LSM_RET_VOID, sock_graft, struct sock *sk, struct socket *parent)
LSM_HOOK(int, 0, inet_conn_request, const struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
@@ -383,6 +405,9 @@ LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key)
LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred,
enum key_need_perm need_perm)
LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **buffer)
+LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring,
+ struct key *key, const void *payload, size_t payload_len,
+ unsigned long flags, bool create)
#endif /* CONFIG_KEYS */
#ifdef CONFIG_AUDIT
@@ -397,10 +422,17 @@ LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule)
LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size)
LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode)
LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog)
-LSM_HOOK(int, 0, bpf_map_alloc_security, struct bpf_map *map)
-LSM_HOOK(void, LSM_RET_VOID, bpf_map_free_security, struct bpf_map *map)
-LSM_HOOK(int, 0, bpf_prog_alloc_security, struct bpf_prog_aux *aux)
-LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux)
+LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr,
+ struct bpf_token *token)
+LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map)
+LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr,
+ struct bpf_token *token)
+LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog)
+LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr,
+ struct path *path)
+LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token)
+LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd)
+LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap)
#endif /* CONFIG_BPF_SYSCALL */
LSM_HOOK(int, 0, locked_down, enum lockdown_reason what)
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index ab2b2fafa4a4..a2ade0ffe9e7 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -25,9 +25,11 @@
#ifndef __LINUX_LSM_HOOKS_H
#define __LINUX_LSM_HOOKS_H
+#include <uapi/linux/lsm.h>
#include <linux/security.h>
#include <linux/init.h>
#include <linux/rculist.h>
+#include <linux/xattr.h>
union security_list_options {
#define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__);
@@ -41,6 +43,18 @@ struct security_hook_heads {
#undef LSM_HOOK
} __randomize_layout;
+/**
+ * struct lsm_id - Identify a Linux Security Module.
+ * @lsm: name of the LSM, must be approved by the LSM maintainers
+ * @id: LSM ID number from uapi/linux/lsm.h
+ *
+ * Contains the information that identifies the LSM.
+ */
+struct lsm_id {
+ const char *name;
+ u64 id;
+};
+
/*
* Security module hook list structure.
* For use with generic list macros for common operations.
@@ -49,7 +63,7 @@ struct security_hook_list {
struct hlist_node list;
struct hlist_head *head;
union security_list_options hook;
- const char *lsm;
+ const struct lsm_id *lsmid;
} __randomize_layout;
/*
@@ -63,8 +77,27 @@ struct lsm_blob_sizes {
int lbs_ipc;
int lbs_msg_msg;
int lbs_task;
+ int lbs_xattr_count; /* number of xattr slots in new_xattrs array */
};
+/**
+ * lsm_get_xattr_slot - Return the next available slot and increment the index
+ * @xattrs: array storing LSM-provided xattrs
+ * @xattr_count: number of already stored xattrs (updated)
+ *
+ * Retrieve the first available slot in the @xattrs array to fill with an xattr,
+ * and increment @xattr_count.
+ *
+ * Return: The slot to fill in @xattrs if non-NULL, NULL otherwise.
+ */
+static inline struct xattr *lsm_get_xattr_slot(struct xattr *xattrs,
+ int *xattr_count)
+{
+ if (unlikely(!xattrs))
+ return NULL;
+ return &xattrs[(*xattr_count)++];
+}
+
/*
* LSM_RET_VOID is used as the default value in LSM_HOOK definitions for void
* LSM hooks (in include/linux/lsm_hook_defs.h).
@@ -84,7 +117,7 @@ extern struct security_hook_heads security_hook_heads;
extern char *lsm_names;
extern void security_add_hooks(struct security_hook_list *hooks, int count,
- const char *lsm);
+ const struct lsm_id *lsmid);
#define LSM_FLAG_LEGACY_MAJOR BIT(0)
#define LSM_FLAG_EXCLUSIVE BIT(1)
diff --git a/include/linux/lwq.h b/include/linux/lwq.h
new file mode 100644
index 000000000000..d081d5cf8e33
--- /dev/null
+++ b/include/linux/lwq.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef LWQ_H
+#define LWQ_H
+/*
+ * Light-weight single-linked queue built from llist
+ *
+ * Entries can be enqueued from any context with no locking.
+ * Entries can be dequeued from process context with integrated locking.
+ *
+ * This is particularly suitable when work items are queued in
+ * BH or IRQ context, and where work items are handled one at a time
+ * by dedicated threads.
+ */
+#include <linux/container_of.h>
+#include <linux/spinlock.h>
+#include <linux/llist.h>
+
+struct lwq_node {
+ struct llist_node node;
+};
+
+struct lwq {
+ spinlock_t lock;
+ struct llist_node *ready; /* entries to be dequeued */
+ struct llist_head new; /* entries being enqueued */
+};
+
+/**
+ * lwq_init - initialise a lwq
+ * @q: the lwq object
+ */
+static inline void lwq_init(struct lwq *q)
+{
+ spin_lock_init(&q->lock);
+ q->ready = NULL;
+ init_llist_head(&q->new);
+}
+
+/**
+ * lwq_empty - test if lwq contains any entry
+ * @q: the lwq object
+ *
+ * This empty test contains an acquire barrier so that if a wakeup
+ * is sent when lwq_dequeue returns true, it is safe to go to sleep after
+ * a test on lwq_empty().
+ */
+static inline bool lwq_empty(struct lwq *q)
+{
+ /* acquire ensures ordering wrt lwq_enqueue() */
+ return smp_load_acquire(&q->ready) == NULL && llist_empty(&q->new);
+}
+
+struct llist_node *__lwq_dequeue(struct lwq *q);
+/**
+ * lwq_dequeue - dequeue first (oldest) entry from lwq
+ * @q: the queue to dequeue from
+ * @type: the type of object to return
+ * @member: them member in returned object which is an lwq_node.
+ *
+ * Remove a single object from the lwq and return it. This will take
+ * a spinlock and so must always be called in the same context, typcially
+ * process contet.
+ */
+#define lwq_dequeue(q, type, member) \
+ ({ struct llist_node *_n = __lwq_dequeue(q); \
+ _n ? container_of(_n, type, member.node) : NULL; })
+
+struct llist_node *lwq_dequeue_all(struct lwq *q);
+
+/**
+ * lwq_for_each_safe - iterate over detached queue allowing deletion
+ * @_n: iterator variable
+ * @_t1: temporary struct llist_node **
+ * @_t2: temporary struct llist_node *
+ * @_l: address of llist_node pointer from lwq_dequeue_all()
+ * @_member: member in _n where lwq_node is found.
+ *
+ * Iterate over members in a dequeued list. If the iterator variable
+ * is set to NULL, the iterator removes that entry from the queue.
+ */
+#define lwq_for_each_safe(_n, _t1, _t2, _l, _member) \
+ for (_t1 = (_l); \
+ *(_t1) ? (_n = container_of(*(_t1), typeof(*(_n)), _member.node),\
+ _t2 = ((*_t1)->next), \
+ true) \
+ : false; \
+ (_n) ? (_t1 = &(_n)->_member.node.next, 0) \
+ : ((*(_t1) = (_t2)), 0))
+
+/**
+ * lwq_enqueue - add a new item to the end of the queue
+ * @n - the lwq_node embedded in the item to be added
+ * @q - the lwq to append to.
+ *
+ * No locking is needed to append to the queue so this can
+ * be called from any context.
+ * Return %true is the list may have previously been empty.
+ */
+static inline bool lwq_enqueue(struct lwq_node *n, struct lwq *q)
+{
+ /* acquire enqures ordering wrt lwq_dequeue */
+ return llist_add(&n->node, &q->new) &&
+ smp_load_acquire(&q->ready) == NULL;
+}
+
+/**
+ * lwq_enqueue_batch - add a list of new items to the end of the queue
+ * @n - the lwq_node embedded in the first item to be added
+ * @q - the lwq to append to.
+ *
+ * No locking is needed to append to the queue so this can
+ * be called from any context.
+ * Return %true is the list may have previously been empty.
+ */
+static inline bool lwq_enqueue_batch(struct llist_node *n, struct lwq *q)
+{
+ struct llist_node *e = n;
+
+ /* acquire enqures ordering wrt lwq_dequeue */
+ return llist_add_batch(llist_reverse_order(n), e, &q->new) &&
+ smp_load_acquire(&q->ready) == NULL;
+}
+#endif /* LWQ_H */
diff --git a/include/linux/maple.h b/include/linux/maple.h
index 9b140272ee16..9aae44efcfd4 100644
--- a/include/linux/maple.h
+++ b/include/linux/maple.h
@@ -5,7 +5,6 @@
#include <mach/maple.h>
struct device;
-extern struct bus_type maple_bus_type;
/* Maple Bus command and response codes */
enum maple_code {
diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 295548cca8b3..a53ad4dabd7e 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -29,14 +29,12 @@
#define MAPLE_NODE_SLOTS 31 /* 256 bytes including ->parent */
#define MAPLE_RANGE64_SLOTS 16 /* 256 bytes */
#define MAPLE_ARANGE64_SLOTS 10 /* 240 bytes */
-#define MAPLE_ARANGE64_META_MAX 15 /* Out of range for metadata */
#define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 1)
#else
/* 32bit sizes */
#define MAPLE_NODE_SLOTS 63 /* 256 bytes including ->parent */
#define MAPLE_RANGE64_SLOTS 32 /* 256 bytes */
#define MAPLE_ARANGE64_SLOTS 21 /* 240 bytes */
-#define MAPLE_ARANGE64_META_MAX 31 /* Out of range for metadata */
#define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 2)
#endif /* defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) */
@@ -173,6 +171,7 @@ enum maple_type {
#define MT_FLAGS_LOCK_IRQ 0x100
#define MT_FLAGS_LOCK_BH 0x200
#define MT_FLAGS_LOCK_EXTERN 0x300
+#define MT_FLAGS_ALLOC_WRAPPED 0x0800
#define MAPLE_HEIGHT_MAX 31
@@ -184,13 +183,23 @@ enum maple_type {
#ifdef CONFIG_LOCKDEP
typedef struct lockdep_map *lockdep_map_p;
-#define mt_lock_is_held(mt) lock_is_held(mt->ma_external_lock)
+#define mt_lock_is_held(mt) \
+ (!(mt)->ma_external_lock || lock_is_held((mt)->ma_external_lock))
+
+#define mt_write_lock_is_held(mt) \
+ (!(mt)->ma_external_lock || \
+ lock_is_held_type((mt)->ma_external_lock, 0))
+
#define mt_set_external_lock(mt, lock) \
(mt)->ma_external_lock = &(lock)->dep_map
+
+#define mt_on_stack(mt) (mt).ma_external_lock = NULL
#else
typedef struct { /* nothing */ } lockdep_map_p;
-#define mt_lock_is_held(mt) 1
+#define mt_lock_is_held(mt) 1
+#define mt_write_lock_is_held(mt) 1
#define mt_set_external_lock(mt, lock) do { } while (0)
+#define mt_on_stack(mt) do { } while (0)
#endif
/*
@@ -212,8 +221,8 @@ struct maple_tree {
spinlock_t ma_lock;
lockdep_map_p ma_external_lock;
};
- void __rcu *ma_root;
unsigned int ma_flags;
+ void __rcu *ma_root;
};
/**
@@ -248,6 +257,8 @@ struct maple_tree {
struct maple_tree name = MTREE_INIT(name, 0)
#define mtree_lock(mt) spin_lock((&(mt)->ma_lock))
+#define mtree_lock_nested(mas, subclass) \
+ spin_lock_nested((&(mt)->ma_lock), subclass)
#define mtree_unlock(mt) spin_unlock((&(mt)->ma_lock))
/*
@@ -309,6 +320,9 @@ int mtree_insert_range(struct maple_tree *mt, unsigned long first,
int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
void *entry, unsigned long size, unsigned long min,
unsigned long max, gfp_t gfp);
+int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp,
+ void *entry, unsigned long range_lo, unsigned long range_hi,
+ unsigned long *next, gfp_t gfp);
int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
void *entry, unsigned long size, unsigned long min,
unsigned long max, gfp_t gfp);
@@ -319,6 +333,9 @@ int mtree_store(struct maple_tree *mt, unsigned long index,
void *entry, gfp_t gfp);
void *mtree_erase(struct maple_tree *mt, unsigned long index);
+int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp);
+int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp);
+
void mtree_destroy(struct maple_tree *mt);
void __mt_destroy(struct maple_tree *mt);
@@ -337,6 +354,36 @@ static inline bool mtree_empty(const struct maple_tree *mt)
/* Advanced API */
/*
+ * Maple State Status
+ * ma_active means the maple state is pointing to a node and offset and can
+ * continue operating on the tree.
+ * ma_start means we have not searched the tree.
+ * ma_root means we have searched the tree and the entry we found lives in
+ * the root of the tree (ie it has index 0, length 1 and is the only entry in
+ * the tree).
+ * ma_none means we have searched the tree and there is no node in the
+ * tree for this entry. For example, we searched for index 1 in an empty
+ * tree. Or we have a tree which points to a full leaf node and we
+ * searched for an entry which is larger than can be contained in that
+ * leaf node.
+ * ma_pause means the data within the maple state may be stale, restart the
+ * operation
+ * ma_overflow means the search has reached the upper limit of the search
+ * ma_underflow means the search has reached the lower limit of the search
+ * ma_error means there was an error, check the node for the error number.
+ */
+enum maple_status {
+ ma_active,
+ ma_start,
+ ma_root,
+ ma_none,
+ ma_pause,
+ ma_overflow,
+ ma_underflow,
+ ma_error,
+};
+
+/*
* The maple state is defined in the struct ma_state and is used to keep track
* of information during operations, and even between operations when using the
* advanced API.
@@ -368,6 +415,13 @@ static inline bool mtree_empty(const struct maple_tree *mt)
* When returning a value the maple state index and last respectively contain
* the start and end of the range for the entry. Ranges are inclusive in the
* Maple Tree.
+ *
+ * The status of the state is used to determine how the next action should treat
+ * the state. For instance, if the status is ma_start then the next action
+ * should start at the root of the tree and walk down. If the status is
+ * ma_pause then the node may be stale data and should be discarded. If the
+ * status is ma_overflow, then the last action hit the upper limit.
+ *
*/
struct ma_state {
struct maple_tree *tree; /* The tree we're operating in */
@@ -377,9 +431,11 @@ struct ma_state {
unsigned long min; /* The minimum index of this node - implied pivot min */
unsigned long max; /* The maximum index of this node - implied pivot max */
struct maple_alloc *alloc; /* Allocated nodes for this operation */
+ enum maple_status status; /* The status of the state (active, start, none, etc) */
unsigned char depth; /* depth of tree descent during write */
unsigned char offset;
unsigned char mas_flags;
+ unsigned char end; /* The end of the node */
};
struct ma_wr_state {
@@ -389,7 +445,6 @@ struct ma_wr_state {
unsigned long r_max; /* range max */
enum maple_type type; /* mas->node type */
unsigned char offset_end; /* The offset where the write ends */
- unsigned char node_end; /* mas->node end */
unsigned long *pivots; /* mas->node->pivots pointer */
unsigned long end_piv; /* The pivot at the offset end */
void __rcu **slots; /* mas->node->slots pointer */
@@ -398,28 +453,16 @@ struct ma_wr_state {
};
#define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock))
+#define mas_lock_nested(mas, subclass) \
+ spin_lock_nested(&((mas)->tree->ma_lock), subclass)
#define mas_unlock(mas) spin_unlock(&((mas)->tree->ma_lock))
-
/*
* Special values for ma_state.node.
- * MAS_START means we have not searched the tree.
- * MAS_ROOT means we have searched the tree and the entry we found lives in
- * the root of the tree (ie it has index 0, length 1 and is the only entry in
- * the tree).
- * MAS_NONE means we have searched the tree and there is no node in the
- * tree for this entry. For example, we searched for index 1 in an empty
- * tree. Or we have a tree which points to a full leaf node and we
- * searched for an entry which is larger than can be contained in that
- * leaf node.
* MA_ERROR represents an errno. After dropping the lock and attempting
* to resolve the error, the walk would have to be restarted from the
* top of the tree as the tree may have been modified.
*/
-#define MAS_START ((struct maple_enode *)1UL)
-#define MAS_ROOT ((struct maple_enode *)5UL)
-#define MAS_NONE ((struct maple_enode *)9UL)
-#define MAS_PAUSE ((struct maple_enode *)17UL)
#define MA_ERROR(err) \
((struct maple_enode *)(((unsigned long)err << 2) | 2UL))
@@ -428,7 +471,8 @@ struct ma_wr_state {
.tree = mt, \
.index = first, \
.last = end, \
- .node = MAS_START, \
+ .node = NULL, \
+ .status = ma_start, \
.min = 0, \
.max = ULONG_MAX, \
.alloc = NULL, \
@@ -458,8 +502,10 @@ void *mas_find(struct ma_state *mas, unsigned long max);
void *mas_find_range(struct ma_state *mas, unsigned long max);
void *mas_find_rev(struct ma_state *mas, unsigned long min);
void *mas_find_range_rev(struct ma_state *mas, unsigned long max);
-int mas_preallocate(struct ma_state *mas, gfp_t gfp);
-bool mas_is_err(struct ma_state *mas);
+int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp);
+int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
+ void *entry, unsigned long range_lo, unsigned long range_hi,
+ unsigned long *next, gfp_t gfp);
bool mas_nomem(struct ma_state *mas, gfp_t gfp);
void mas_pause(struct ma_state *mas);
@@ -488,19 +534,18 @@ static inline void mas_init(struct ma_state *mas, struct maple_tree *tree,
mas->tree = tree;
mas->index = mas->last = addr;
mas->max = ULONG_MAX;
- mas->node = MAS_START;
+ mas->status = ma_start;
+ mas->node = NULL;
}
-/* Checks if a mas has not found anything */
-static inline bool mas_is_none(const struct ma_state *mas)
+static inline bool mas_is_active(struct ma_state *mas)
{
- return mas->node == MAS_NONE;
+ return mas->status == ma_active;
}
-/* Checks if a mas has been paused */
-static inline bool mas_is_paused(const struct ma_state *mas)
+static inline bool mas_is_err(struct ma_state *mas)
{
- return mas->node == MAS_PAUSE;
+ return mas->status == ma_error;
}
/**
@@ -513,9 +558,10 @@ static inline bool mas_is_paused(const struct ma_state *mas)
*
* Context: Any context.
*/
-static inline void mas_reset(struct ma_state *mas)
+static __always_inline void mas_reset(struct ma_state *mas)
{
- mas->node = MAS_START;
+ mas->status = ma_start;
+ mas->node = NULL;
}
/**
@@ -532,6 +578,150 @@ static inline void mas_reset(struct ma_state *mas)
#define mas_for_each(__mas, __entry, __max) \
while (((__entry) = mas_find((__mas), (__max))) != NULL)
+#ifdef CONFIG_DEBUG_MAPLE_TREE
+enum mt_dump_format {
+ mt_dump_dec,
+ mt_dump_hex,
+};
+
+extern atomic_t maple_tree_tests_run;
+extern atomic_t maple_tree_tests_passed;
+
+void mt_dump(const struct maple_tree *mt, enum mt_dump_format format);
+void mas_dump(const struct ma_state *mas);
+void mas_wr_dump(const struct ma_wr_state *wr_mas);
+void mt_validate(struct maple_tree *mt);
+void mt_cache_shrink(void);
+#define MT_BUG_ON(__tree, __x) do { \
+ atomic_inc(&maple_tree_tests_run); \
+ if (__x) { \
+ pr_info("BUG at %s:%d (%u)\n", \
+ __func__, __LINE__, __x); \
+ mt_dump(__tree, mt_dump_hex); \
+ pr_info("Pass: %u Run:%u\n", \
+ atomic_read(&maple_tree_tests_passed), \
+ atomic_read(&maple_tree_tests_run)); \
+ dump_stack(); \
+ } else { \
+ atomic_inc(&maple_tree_tests_passed); \
+ } \
+} while (0)
+
+#define MAS_BUG_ON(__mas, __x) do { \
+ atomic_inc(&maple_tree_tests_run); \
+ if (__x) { \
+ pr_info("BUG at %s:%d (%u)\n", \
+ __func__, __LINE__, __x); \
+ mas_dump(__mas); \
+ mt_dump((__mas)->tree, mt_dump_hex); \
+ pr_info("Pass: %u Run:%u\n", \
+ atomic_read(&maple_tree_tests_passed), \
+ atomic_read(&maple_tree_tests_run)); \
+ dump_stack(); \
+ } else { \
+ atomic_inc(&maple_tree_tests_passed); \
+ } \
+} while (0)
+
+#define MAS_WR_BUG_ON(__wrmas, __x) do { \
+ atomic_inc(&maple_tree_tests_run); \
+ if (__x) { \
+ pr_info("BUG at %s:%d (%u)\n", \
+ __func__, __LINE__, __x); \
+ mas_wr_dump(__wrmas); \
+ mas_dump((__wrmas)->mas); \
+ mt_dump((__wrmas)->mas->tree, mt_dump_hex); \
+ pr_info("Pass: %u Run:%u\n", \
+ atomic_read(&maple_tree_tests_passed), \
+ atomic_read(&maple_tree_tests_run)); \
+ dump_stack(); \
+ } else { \
+ atomic_inc(&maple_tree_tests_passed); \
+ } \
+} while (0)
+
+#define MT_WARN_ON(__tree, __x) ({ \
+ int ret = !!(__x); \
+ atomic_inc(&maple_tree_tests_run); \
+ if (ret) { \
+ pr_info("WARN at %s:%d (%u)\n", \
+ __func__, __LINE__, __x); \
+ mt_dump(__tree, mt_dump_hex); \
+ pr_info("Pass: %u Run:%u\n", \
+ atomic_read(&maple_tree_tests_passed), \
+ atomic_read(&maple_tree_tests_run)); \
+ dump_stack(); \
+ } else { \
+ atomic_inc(&maple_tree_tests_passed); \
+ } \
+ unlikely(ret); \
+})
+
+#define MAS_WARN_ON(__mas, __x) ({ \
+ int ret = !!(__x); \
+ atomic_inc(&maple_tree_tests_run); \
+ if (ret) { \
+ pr_info("WARN at %s:%d (%u)\n", \
+ __func__, __LINE__, __x); \
+ mas_dump(__mas); \
+ mt_dump((__mas)->tree, mt_dump_hex); \
+ pr_info("Pass: %u Run:%u\n", \
+ atomic_read(&maple_tree_tests_passed), \
+ atomic_read(&maple_tree_tests_run)); \
+ dump_stack(); \
+ } else { \
+ atomic_inc(&maple_tree_tests_passed); \
+ } \
+ unlikely(ret); \
+})
+
+#define MAS_WR_WARN_ON(__wrmas, __x) ({ \
+ int ret = !!(__x); \
+ atomic_inc(&maple_tree_tests_run); \
+ if (ret) { \
+ pr_info("WARN at %s:%d (%u)\n", \
+ __func__, __LINE__, __x); \
+ mas_wr_dump(__wrmas); \
+ mas_dump((__wrmas)->mas); \
+ mt_dump((__wrmas)->mas->tree, mt_dump_hex); \
+ pr_info("Pass: %u Run:%u\n", \
+ atomic_read(&maple_tree_tests_passed), \
+ atomic_read(&maple_tree_tests_run)); \
+ dump_stack(); \
+ } else { \
+ atomic_inc(&maple_tree_tests_passed); \
+ } \
+ unlikely(ret); \
+})
+#else
+#define MT_BUG_ON(__tree, __x) BUG_ON(__x)
+#define MAS_BUG_ON(__mas, __x) BUG_ON(__x)
+#define MAS_WR_BUG_ON(__mas, __x) BUG_ON(__x)
+#define MT_WARN_ON(__tree, __x) WARN_ON(__x)
+#define MAS_WARN_ON(__mas, __x) WARN_ON(__x)
+#define MAS_WR_WARN_ON(__mas, __x) WARN_ON(__x)
+#endif /* CONFIG_DEBUG_MAPLE_TREE */
+
+/**
+ * __mas_set_range() - Set up Maple Tree operation state to a sub-range of the
+ * current location.
+ * @mas: Maple Tree operation state.
+ * @start: New start of range in the Maple Tree.
+ * @last: New end of range in the Maple Tree.
+ *
+ * set the internal maple state values to a sub-range.
+ * Please use mas_set_range() if you do not know where you are in the tree.
+ */
+static inline void __mas_set_range(struct ma_state *mas, unsigned long start,
+ unsigned long last)
+{
+ /* Ensure the range starts within the current slot */
+ MAS_WARN_ON(mas, mas_is_active(mas) &&
+ (mas->index > start || mas->last < start));
+ mas->index = start;
+ mas->last = last;
+}
+
/**
* mas_set_range() - Set up Maple Tree operation state for a different index.
* @mas: Maple Tree operation state.
@@ -545,9 +735,8 @@ static inline void mas_reset(struct ma_state *mas)
static inline
void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last)
{
- mas->index = start;
- mas->last = last;
- mas->node = MAS_START;
+ mas_reset(mas);
+ __mas_set_range(mas, start, last);
}
/**
@@ -662,138 +851,14 @@ void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max);
* mt_for_each - Iterate over each entry starting at index until max.
* @__tree: The Maple Tree
* @__entry: The current entry
- * @__index: The index to update to track the location in the tree
+ * @__index: The index to start the search from. Subsequently used as iterator.
* @__max: The maximum limit for @index
*
- * Note: Will not return the zero entry.
+ * This iterator skips all entries, which resolve to a NULL pointer,
+ * e.g. entries which has been reserved with XA_ZERO_ENTRY.
*/
#define mt_for_each(__tree, __entry, __index, __max) \
for (__entry = mt_find(__tree, &(__index), __max); \
__entry; __entry = mt_find_after(__tree, &(__index), __max))
-
-#ifdef CONFIG_DEBUG_MAPLE_TREE
-enum mt_dump_format {
- mt_dump_dec,
- mt_dump_hex,
-};
-
-extern atomic_t maple_tree_tests_run;
-extern atomic_t maple_tree_tests_passed;
-
-void mt_dump(const struct maple_tree *mt, enum mt_dump_format format);
-void mas_dump(const struct ma_state *mas);
-void mas_wr_dump(const struct ma_wr_state *wr_mas);
-void mt_validate(struct maple_tree *mt);
-void mt_cache_shrink(void);
-#define MT_BUG_ON(__tree, __x) do { \
- atomic_inc(&maple_tree_tests_run); \
- if (__x) { \
- pr_info("BUG at %s:%d (%u)\n", \
- __func__, __LINE__, __x); \
- mt_dump(__tree, mt_dump_hex); \
- pr_info("Pass: %u Run:%u\n", \
- atomic_read(&maple_tree_tests_passed), \
- atomic_read(&maple_tree_tests_run)); \
- dump_stack(); \
- } else { \
- atomic_inc(&maple_tree_tests_passed); \
- } \
-} while (0)
-
-#define MAS_BUG_ON(__mas, __x) do { \
- atomic_inc(&maple_tree_tests_run); \
- if (__x) { \
- pr_info("BUG at %s:%d (%u)\n", \
- __func__, __LINE__, __x); \
- mas_dump(__mas); \
- mt_dump((__mas)->tree, mt_dump_hex); \
- pr_info("Pass: %u Run:%u\n", \
- atomic_read(&maple_tree_tests_passed), \
- atomic_read(&maple_tree_tests_run)); \
- dump_stack(); \
- } else { \
- atomic_inc(&maple_tree_tests_passed); \
- } \
-} while (0)
-
-#define MAS_WR_BUG_ON(__wrmas, __x) do { \
- atomic_inc(&maple_tree_tests_run); \
- if (__x) { \
- pr_info("BUG at %s:%d (%u)\n", \
- __func__, __LINE__, __x); \
- mas_wr_dump(__wrmas); \
- mas_dump((__wrmas)->mas); \
- mt_dump((__wrmas)->mas->tree, mt_dump_hex); \
- pr_info("Pass: %u Run:%u\n", \
- atomic_read(&maple_tree_tests_passed), \
- atomic_read(&maple_tree_tests_run)); \
- dump_stack(); \
- } else { \
- atomic_inc(&maple_tree_tests_passed); \
- } \
-} while (0)
-
-#define MT_WARN_ON(__tree, __x) ({ \
- int ret = !!(__x); \
- atomic_inc(&maple_tree_tests_run); \
- if (ret) { \
- pr_info("WARN at %s:%d (%u)\n", \
- __func__, __LINE__, __x); \
- mt_dump(__tree, mt_dump_hex); \
- pr_info("Pass: %u Run:%u\n", \
- atomic_read(&maple_tree_tests_passed), \
- atomic_read(&maple_tree_tests_run)); \
- dump_stack(); \
- } else { \
- atomic_inc(&maple_tree_tests_passed); \
- } \
- unlikely(ret); \
-})
-
-#define MAS_WARN_ON(__mas, __x) ({ \
- int ret = !!(__x); \
- atomic_inc(&maple_tree_tests_run); \
- if (ret) { \
- pr_info("WARN at %s:%d (%u)\n", \
- __func__, __LINE__, __x); \
- mas_dump(__mas); \
- mt_dump((__mas)->tree, mt_dump_hex); \
- pr_info("Pass: %u Run:%u\n", \
- atomic_read(&maple_tree_tests_passed), \
- atomic_read(&maple_tree_tests_run)); \
- dump_stack(); \
- } else { \
- atomic_inc(&maple_tree_tests_passed); \
- } \
- unlikely(ret); \
-})
-
-#define MAS_WR_WARN_ON(__wrmas, __x) ({ \
- int ret = !!(__x); \
- atomic_inc(&maple_tree_tests_run); \
- if (ret) { \
- pr_info("WARN at %s:%d (%u)\n", \
- __func__, __LINE__, __x); \
- mas_wr_dump(__wrmas); \
- mas_dump((__wrmas)->mas); \
- mt_dump((__wrmas)->mas->tree, mt_dump_hex); \
- pr_info("Pass: %u Run:%u\n", \
- atomic_read(&maple_tree_tests_passed), \
- atomic_read(&maple_tree_tests_run)); \
- dump_stack(); \
- } else { \
- atomic_inc(&maple_tree_tests_passed); \
- } \
- unlikely(ret); \
-})
-#else
-#define MT_BUG_ON(__tree, __x) BUG_ON(__x)
-#define MAS_BUG_ON(__mas, __x) BUG_ON(__x)
-#define MAS_WR_BUG_ON(__mas, __x) BUG_ON(__x)
-#define MT_WARN_ON(__tree, __x) WARN_ON(__x)
-#define MAS_WARN_ON(__mas, __x) WARN_ON(__x)
-#define MAS_WR_WARN_ON(__mas, __x) WARN_ON(__x)
-#endif /* CONFIG_DEBUG_MAPLE_TREE */
-
#endif /*_LINUX_MAPLE_TREE_H */
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 0f06c2287b52..693eba9869e4 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -25,6 +25,8 @@
#define MARVELL_PHY_ID_88X3310 0x002b09a0
#define MARVELL_PHY_ID_88E2110 0x002b09b0
#define MARVELL_PHY_ID_88X2222 0x01410f10
+#define MARVELL_PHY_ID_88Q2110 0x002b0980
+#define MARVELL_PHY_ID_88Q2220 0x002b0b20
/* Marvel 88E1111 in Finisar SFP module with modified PHY ID */
#define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0
diff --git a/include/linux/math.h b/include/linux/math.h
index 2d388650c556..dd4152711de7 100644
--- a/include/linux/math.h
+++ b/include/linux/math.h
@@ -156,6 +156,25 @@ __STRUCT_FRACT(u32)
({ signed type __x = (x); __x < 0 ? -__x : __x; }), other)
/**
+ * abs_diff - return absolute value of the difference between the arguments
+ * @a: the first argument
+ * @b: the second argument
+ *
+ * @a and @b have to be of the same type. With this restriction we compare
+ * signed to signed and unsigned to unsigned. The result is the subtraction
+ * the smaller of the two from the bigger, hence result is always a positive
+ * value.
+ *
+ * Return: an absolute value of the difference between the @a and @b.
+ */
+#define abs_diff(a, b) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ (void)(&__a == &__b); \
+ __a > __b ? (__a - __b) : (__b - __a); \
+})
+
+/**
* reciprocal_scale - "scale" a value into range [0, ep_ro)
* @val: value
* @ep_ro: right open interval endpoint
diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
index b0da04fe087b..34dfcc77f505 100644
--- a/include/linux/mc146818rtc.h
+++ b/include/linux/mc146818rtc.h
@@ -126,10 +126,11 @@ struct cmos_rtc_board_info {
#endif /* ARCH_RTC_LOCATION */
bool mc146818_does_rtc_work(void);
-int mc146818_get_time(struct rtc_time *time);
+int mc146818_get_time(struct rtc_time *time, int timeout);
int mc146818_set_time(struct rtc_time *time);
bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param),
+ int timeout,
void *param);
#endif /* _MC146818RTC_H */
diff --git a/include/linux/mcb.h b/include/linux/mcb.h
index 1e5893138afe..0b971b24a804 100644
--- a/include/linux/mcb.h
+++ b/include/linux/mcb.h
@@ -63,7 +63,6 @@ static inline struct mcb_bus *to_mcb_bus(struct device *dev)
struct mcb_device {
struct device dev;
struct mcb_bus *bus;
- bool is_added;
struct mcb_driver *driver;
u16 id;
int inst;
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index c1b7008826e5..68f8d2e970d4 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -38,6 +38,7 @@ struct mdio_device {
/* Bus address of the MDIO device (0-31) */
int addr;
int flags;
+ int reset_state;
struct gpio_desc *reset_gpio;
struct reset_control *reset_ctrl;
unsigned int reset_assert_delay;
@@ -372,6 +373,10 @@ static inline void mii_t1_adv_m_mod_linkmode_t(unsigned long *advertising, u32 l
{
linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT,
advertising, lpa & MDIO_AN_T1_ADV_M_B10L);
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT1_Full_BIT,
+ advertising, lpa & MDIO_AN_T1_ADV_M_100BT1);
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT1_Full_BIT,
+ advertising, lpa & MDIO_AN_T1_ADV_M_1000BT1);
}
/**
@@ -408,6 +413,10 @@ static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising)
if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, advertising))
result |= MDIO_AN_T1_ADV_M_B10L;
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT1_Full_BIT, advertising))
+ result |= MDIO_AN_T1_ADV_M_100BT1;
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT1_Full_BIT, advertising))
+ result |= MDIO_AN_T1_ADV_M_1000BT1;
return result;
}
@@ -420,7 +429,7 @@ static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising)
* A function that translates value of following registers to the linkmode:
* IEEE 802.3-2018 45.2.3.10 "EEE control and capability 1" register (3.20)
* IEEE 802.3-2018 45.2.7.13 "EEE advertisement 1" register (7.60)
- * IEEE 802.3-2018 45.2.7.14 "EEE "link partner ability 1 register (7.61)
+ * IEEE 802.3-2018 45.2.7.14 "EEE link partner ability 1" register (7.61)
*/
static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val)
{
@@ -439,6 +448,42 @@ static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val)
}
/**
+ * mii_eee_cap2_mod_linkmode_sup_t()
+ * @adv: target the linkmode settings
+ * @val: register value
+ *
+ * A function that translates value of following registers to the linkmode:
+ * IEEE 802.3-2022 45.2.3.11 "EEE control and capability 2" register (3.21)
+ */
+static inline void mii_eee_cap2_mod_linkmode_sup_t(unsigned long *adv, u32 val)
+{
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+ adv, val & MDIO_EEE_2_5GT);
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+ adv, val & MDIO_EEE_5GT);
+}
+
+/**
+ * mii_eee_cap2_mod_linkmode_adv_t()
+ * @adv: target the linkmode advertisement settings
+ * @val: register value
+ *
+ * A function that translates value of following registers to the linkmode:
+ * IEEE 802.3-2022 45.2.7.16 "EEE advertisement 2" register (7.62)
+ * IEEE 802.3-2022 45.2.7.17 "EEE link partner ability 2" register (7.63)
+ * Note: Currently this function is the same as mii_eee_cap2_mod_linkmode_sup_t.
+ * For certain, not yet supported, modes however the bits differ.
+ * Therefore create separate functions already.
+ */
+static inline void mii_eee_cap2_mod_linkmode_adv_t(unsigned long *adv, u32 val)
+{
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+ adv, val & MDIO_EEE_2_5GT);
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+ adv, val & MDIO_EEE_5GT);
+}
+
+/**
* linkmode_to_mii_eee_cap1_t()
* @adv: the linkmode advertisement settings
*
@@ -466,6 +511,25 @@ static inline u32 linkmode_to_mii_eee_cap1_t(unsigned long *adv)
}
/**
+ * linkmode_to_mii_eee_cap2_t()
+ * @adv: the linkmode advertisement settings
+ *
+ * A function that translates linkmode to value for IEEE 802.3-2022 45.2.7.16
+ * "EEE advertisement 2" register (7.62)
+ */
+static inline u32 linkmode_to_mii_eee_cap2_t(unsigned long *adv)
+{
+ u32 result = 0;
+
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, adv))
+ result |= MDIO_EEE_2_5GT;
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, adv))
+ result |= MDIO_EEE_5GT;
+
+ return result;
+}
+
+/**
* mii_10base_t1_adv_mod_linkmode_t()
* @adv: linkmode advertisement settings
* @val: register value
@@ -537,6 +601,8 @@ static inline void mii_c73_mod_linkmode(unsigned long *adv, u16 *lpa)
int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum);
int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val);
+int __mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask,
+ u16 set);
int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum,
u16 mask, u16 set);
@@ -564,6 +630,30 @@ int mdiobus_c45_modify(struct mii_bus *bus, int addr, int devad, u32 regnum,
int mdiobus_c45_modify_changed(struct mii_bus *bus, int addr, int devad,
u32 regnum, u16 mask, u16 set);
+static inline int __mdiodev_read(struct mdio_device *mdiodev, u32 regnum)
+{
+ return __mdiobus_read(mdiodev->bus, mdiodev->addr, regnum);
+}
+
+static inline int __mdiodev_write(struct mdio_device *mdiodev, u32 regnum,
+ u16 val)
+{
+ return __mdiobus_write(mdiodev->bus, mdiodev->addr, regnum, val);
+}
+
+static inline int __mdiodev_modify(struct mdio_device *mdiodev, u32 regnum,
+ u16 mask, u16 set)
+{
+ return __mdiobus_modify(mdiodev->bus, mdiodev->addr, regnum, mask, set);
+}
+
+static inline int __mdiodev_modify_changed(struct mdio_device *mdiodev,
+ u32 regnum, u16 mask, u16 set)
+{
+ return __mdiobus_modify_changed(mdiodev->bus, mdiodev->addr, regnum,
+ mask, set);
+}
+
static inline int mdiodev_read(struct mdio_device *mdiodev, u32 regnum)
{
return mdiobus_read(mdiodev->bus, mdiodev->addr, regnum);
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index fd6e0620658d..b38a56a13f39 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -31,11 +31,11 @@ typedef void (*mei_cldev_cb_t)(struct mei_cl_device *cldev);
* @rx_work: async work to execute Rx event callback
* @rx_cb: Drivers register this callback to get asynchronous ME
* Rx buffer pending notifications.
- * @notif_work: async work to execute FW notif event callback
+ * @notif_work: async work to execute FW notify event callback
* @notif_cb: Drivers register this callback to get asynchronous ME
* FW notification pending notifications.
*
- * @do_match: wheather device can be matched with a driver
+ * @do_match: whether the device can be matched with a driver
* @is_added: device is already scanned
* @priv_data: client private data
*/
@@ -94,15 +94,23 @@ void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv);
ssize_t mei_cldev_send(struct mei_cl_device *cldev, const u8 *buf,
size_t length);
+ssize_t mei_cldev_send_timeout(struct mei_cl_device *cldev, const u8 *buf,
+ size_t length, unsigned long timeout);
ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length);
ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf,
size_t length);
+ssize_t mei_cldev_recv_timeout(struct mei_cl_device *cldev, u8 *buf, size_t length,
+ unsigned long timeout);
ssize_t mei_cldev_send_vtag(struct mei_cl_device *cldev, const u8 *buf,
size_t length, u8 vtag);
+ssize_t mei_cldev_send_vtag_timeout(struct mei_cl_device *cldev, const u8 *buf,
+ size_t length, u8 vtag, unsigned long timeout);
ssize_t mei_cldev_recv_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length,
u8 *vtag);
ssize_t mei_cldev_recv_nonblock_vtag(struct mei_cl_device *cldev, u8 *buf,
size_t length, u8 *vtag);
+ssize_t mei_cldev_recv_vtag_timeout(struct mei_cl_device *cldev, u8 *buf, size_t length,
+ u8 *vtag, unsigned long timeout);
int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb);
int mei_cldev_register_notif_cb(struct mei_cl_device *cldev,
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index f71ff9f0ec81..e2082240586d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -40,6 +40,8 @@ extern unsigned long long max_possible_pfn;
* via a driver, and never indicated in the firmware-provided memory map as
* system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the
* kernel resource tree.
+ * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are
+ * not initialized (only for reserved regions).
*/
enum memblock_flags {
MEMBLOCK_NONE = 0x0, /* No special request */
@@ -47,6 +49,7 @@ enum memblock_flags {
MEMBLOCK_MIRROR = 0x2, /* mirrored region */
MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */
MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */
+ MEMBLOCK_RSRV_NOINIT = 0x10, /* don't initialize struct pages */
};
/**
@@ -118,13 +121,17 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size);
int memblock_physmem_add(phys_addr_t base, phys_addr_t size);
#endif
void memblock_trim_memory(phys_addr_t align);
+unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
+ phys_addr_t base2, phys_addr_t size2);
bool memblock_overlaps_region(struct memblock_type *type,
phys_addr_t base, phys_addr_t size);
+bool memblock_validate_numa_coverage(unsigned long threshold_bytes);
int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
+int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size);
void memblock_free_all(void);
void memblock_free(void *ptr, size_t size);
@@ -259,6 +266,11 @@ static inline bool memblock_is_nomap(struct memblock_region *m)
return m->flags & MEMBLOCK_NOMAP;
}
+static inline bool memblock_is_reserved_noinit(struct memblock_region *m)
+{
+ return m->flags & MEMBLOCK_RSRV_NOINIT;
+}
+
static inline bool memblock_is_driver_managed(struct memblock_region *m)
{
return m->flags & MEMBLOCK_DRIVER_MANAGED;
@@ -581,9 +593,7 @@ extern void *alloc_large_system_hash(const char *tablename,
unsigned long high_limit);
#define HASH_EARLY 0x00000001 /* Allocating during early boot? */
-#define HASH_SMALL 0x00000002 /* sub-page allocation allowed, min
- * shift passed via *_hash_shift */
-#define HASH_ZERO 0x00000004 /* Zero allocated hash table */
+#define HASH_ZERO 0x00000002 /* Zero allocated hash table */
/* Only NUMA needs hash distribution. 64bit NUMA architectures have
* sufficient vmalloc space.
@@ -596,13 +606,11 @@ extern int hashdist; /* Distribute hashes across NUMA nodes? */
#endif
#ifdef CONFIG_MEMTEST
-extern phys_addr_t early_memtest_bad_size; /* Size of faulty ram found by memtest */
-extern bool early_memtest_done; /* Was early memtest done? */
-extern void early_memtest(phys_addr_t start, phys_addr_t end);
+void early_memtest(phys_addr_t start, phys_addr_t end);
+void memtest_report_meminfo(struct seq_file *m);
#else
-static inline void early_memtest(phys_addr_t start, phys_addr_t end)
-{
-}
+static inline void early_memtest(phys_addr_t start, phys_addr_t end) { }
+static inline void memtest_report_meminfo(struct seq_file *m) { }
#endif
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5818af8eca5a..394fd0a887ae 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -14,6 +14,7 @@
#include <linux/vm_event_item.h>
#include <linux/hardirq.h>
#include <linux/jump_label.h>
+#include <linux/kernel.h>
#include <linux/page_counter.h>
#include <linux/vmpressure.h>
#include <linux/eventfd.h>
@@ -21,6 +22,7 @@
#include <linux/vmstat.h>
#include <linux/writeback.h>
#include <linux/page-flags.h>
+#include <linux/shrinker.h>
struct mem_cgroup;
struct obj_cgroup;
@@ -61,7 +63,6 @@ struct mem_cgroup_reclaim_cookie {
#ifdef CONFIG_MEMCG
#define MEM_CGROUP_ID_SHIFT 16
-#define MEM_CGROUP_ID_MAX USHRT_MAX
struct mem_cgroup_id {
int id;
@@ -89,17 +90,6 @@ struct mem_cgroup_reclaim_iter {
unsigned int generation;
};
-/*
- * Bitmap and deferred work of shrinker::id corresponding to memcg-aware
- * shrinkers, which have elements charged to this memcg.
- */
-struct shrinker_info {
- struct rcu_head rcu;
- atomic_long_t *nr_deferred;
- unsigned long *map;
- int map_nr_max;
-};
-
struct lruvec_stats_percpu {
/* Local (CPU and cgroup) state */
long state[NR_VM_NODE_STAT_ITEMS];
@@ -112,6 +102,9 @@ struct lruvec_stats {
/* Aggregated (CPU and subtree) state */
long state[NR_VM_NODE_STAT_ITEMS];
+ /* Non-hierarchical (CPU aggregated) state */
+ long state_local[NR_VM_NODE_STAT_ITEMS];
+
/* Pending child counts during tree propagation */
long state_pending[NR_VM_NODE_STAT_ITEMS];
};
@@ -151,7 +144,7 @@ struct mem_cgroup_threshold_ary {
/* Size of entries[] */
unsigned int size;
/* Array of thresholds */
- struct mem_cgroup_threshold entries[];
+ struct mem_cgroup_threshold entries[] __counted_by(size);
};
struct mem_cgroup_thresholds {
@@ -227,6 +220,12 @@ struct mem_cgroup {
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
unsigned long zswap_max;
+
+ /*
+ * Prevent pages from this memcg from being written back from zswap to
+ * swap, and from being swapped out on zswap store failures.
+ */
+ bool zswap_writeback;
#endif
unsigned long soft_limit;
@@ -284,6 +283,11 @@ struct mem_cgroup {
atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
atomic_long_t memory_events_local[MEMCG_NR_MEMORY_EVENTS];
+ /*
+ * Hint of reclaim pressure for socket memroy management. Note
+ * that this indicator should NOT be used in legacy cgroup mode
+ * where socket memory is accounted/charged separately.
+ */
unsigned long socket_pressure;
/* Legacy tcp memory accounting */
@@ -292,7 +296,13 @@ struct mem_cgroup {
#ifdef CONFIG_MEMCG_KMEM
int kmemcg_id;
- struct obj_cgroup __rcu *objcg;
+ /*
+ * memcg->objcg is wiped out as a part of the objcg repaprenting
+ * process. memcg->orig_objcg preserves a pointer (and a reference)
+ * to the original objcg until the end of live of memcg.
+ */
+ struct obj_cgroup __rcu *objcg;
+ struct obj_cgroup *orig_objcg;
/* list of inherited objcgs, protected by objcg_lock */
struct list_head objcg_list;
#endif
@@ -321,7 +331,7 @@ struct mem_cgroup {
struct deferred_split deferred_split_queue;
#endif
-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_GEN_WALKS_MMU
/* per-memcg mm_struct list */
struct lru_gen_mm_list mm_list;
#endif
@@ -583,7 +593,7 @@ static inline void mem_cgroup_protection(struct mem_cgroup *root,
/*
* There is no reclaim protection applied to a targeted reclaim.
* We are special casing this specific case here because
- * mem_cgroup_protected calculation is not robust enough to keep
+ * mem_cgroup_calculate_protection is not robust enough to keep
* the protection invariant for calculated effective values for
* parallel reclaimers with different reclaim target. This is
* especially a problem for tail memcgs (as they have pages on LRU)
@@ -655,6 +665,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
page_counter_read(&memcg->memory);
}
+void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg);
+
int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);
/**
@@ -679,6 +691,9 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
return __mem_cgroup_charge(folio, mm, gfp);
}
+int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp,
+ long nr_pages);
+
int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm,
gfp_t gfp, swp_entry_t entry);
void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
@@ -698,14 +713,16 @@ static inline void mem_cgroup_uncharge(struct folio *folio)
__mem_cgroup_uncharge(folio);
}
-void __mem_cgroup_uncharge_list(struct list_head *page_list);
-static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
+void __mem_cgroup_uncharge_folios(struct folio_batch *folios);
+static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios)
{
if (mem_cgroup_disabled())
return;
- __mem_cgroup_uncharge_list(page_list);
+ __mem_cgroup_uncharge_folios(folios);
}
+void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages);
+void mem_cgroup_replace_folio(struct folio *old, struct folio *new);
void mem_cgroup_migrate(struct folio *old, struct folio *new);
/**
@@ -762,6 +779,8 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
+struct mem_cgroup *get_mem_cgroup_from_current(void);
+
struct lruvec *folio_lruvec_lock(struct folio *folio);
struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
@@ -807,6 +826,11 @@ static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg)
return !memcg || css_tryget(&memcg->css);
}
+static inline bool mem_cgroup_tryget_online(struct mem_cgroup *memcg)
+{
+ return !memcg || css_tryget_online(&memcg->css);
+}
+
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
if (memcg)
@@ -861,8 +885,7 @@ static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
* parent_mem_cgroup - find the accounting parent of a memcg
* @memcg: memcg whose parent to find
*
- * Returns the parent memcg, or NULL if this is the root or the memory
- * controller is in legacy no-hierarchy mode.
+ * Returns the parent memcg, or NULL if this is the root.
*/
static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
{
@@ -914,7 +937,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
return READ_ONCE(mz->lru_zone_size[zone_idx][lru]);
}
-void mem_cgroup_handle_over_high(void);
+void mem_cgroup_handle_over_high(gfp_t gfp_mask);
unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg);
@@ -1020,14 +1043,12 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
{
struct mem_cgroup_per_node *pn;
long x = 0;
- int cpu;
if (mem_cgroup_disabled())
return node_page_state(lruvec_pgdat(lruvec), idx);
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- for_each_possible_cpu(cpu)
- x += per_cpu(pn->lruvec_stats_percpu->state[idx], cpu);
+ x = READ_ONCE(pn->lruvec_stats.state_local[idx]);
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
@@ -1035,8 +1056,8 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
return x;
}
-void mem_cgroup_flush_stats(void);
-void mem_cgroup_flush_stats_ratelimited(void);
+void mem_cgroup_flush_stats(struct mem_cgroup *memcg);
+void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg);
void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
int val);
@@ -1076,15 +1097,6 @@ static inline void count_memcg_events(struct mem_cgroup *memcg,
local_irq_restore(flags);
}
-static inline void count_memcg_page_event(struct page *page,
- enum vm_event_item idx)
-{
- struct mem_cgroup *memcg = page_memcg(page);
-
- if (memcg)
- count_memcg_events(memcg, idx, 1);
-}
-
static inline void count_memcg_folio_events(struct folio *folio,
enum vm_event_item idx, unsigned long nr)
{
@@ -1149,7 +1161,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
rcu_read_unlock();
}
-void split_page_memcg(struct page *head, unsigned int nr);
+void split_page_memcg(struct page *head, int old_order, int new_order);
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
@@ -1158,7 +1170,6 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
#else /* CONFIG_MEMCG */
#define MEM_CGROUP_ID_SHIFT 0
-#define MEM_CGROUP_ID_MAX 0
static inline struct mem_cgroup *folio_memcg(struct folio *folio)
{
@@ -1186,6 +1197,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
return NULL;
}
+static inline struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg)
+{
+ return NULL;
+}
+
static inline bool folio_memcg_kmem(struct folio *folio)
{
return false;
@@ -1246,12 +1262,23 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
return false;
}
+static inline void mem_cgroup_commit_charge(struct folio *folio,
+ struct mem_cgroup *memcg)
+{
+}
+
static inline int mem_cgroup_charge(struct folio *folio,
struct mm_struct *mm, gfp_t gfp)
{
return 0;
}
+static inline int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg,
+ gfp_t gfp, long nr_pages)
+{
+ return 0;
+}
+
static inline int mem_cgroup_swapin_charge_folio(struct folio *folio,
struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
{
@@ -1266,7 +1293,17 @@ static inline void mem_cgroup_uncharge(struct folio *folio)
{
}
-static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
+static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios)
+{
+}
+
+static inline void mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
+ unsigned int nr_pages)
+{
+}
+
+static inline void mem_cgroup_replace_folio(struct folio *old,
+ struct folio *new)
{
}
@@ -1307,6 +1344,11 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
return NULL;
}
+static inline struct mem_cgroup *get_mem_cgroup_from_current(void)
+{
+ return NULL;
+}
+
static inline
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css)
{
@@ -1322,6 +1364,11 @@ static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg)
return true;
}
+static inline bool mem_cgroup_tryget_online(struct mem_cgroup *memcg)
+{
+ return true;
+}
+
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
}
@@ -1455,7 +1502,7 @@ static inline void mem_cgroup_unlock_pages(void)
rcu_read_unlock();
}
-static inline void mem_cgroup_handle_over_high(void)
+static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask)
{
}
@@ -1521,11 +1568,11 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
return node_page_state(lruvec_pgdat(lruvec), idx);
}
-static inline void mem_cgroup_flush_stats(void)
+static inline void mem_cgroup_flush_stats(struct mem_cgroup *memcg)
{
}
-static inline void mem_cgroup_flush_stats_ratelimited(void)
+static inline void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg)
{
}
@@ -1562,11 +1609,6 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg,
{
}
-static inline void count_memcg_page_event(struct page *page,
- int idx)
-{
-}
-
static inline void count_memcg_folio_events(struct folio *folio,
enum vm_event_item idx, unsigned long nr)
{
@@ -1577,7 +1619,7 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
{
}
-static inline void split_page_memcg(struct page *head, unsigned int nr)
+static inline void split_page_memcg(struct page *head, int old_order, int new_order)
{
}
@@ -1651,18 +1693,18 @@ static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio,
return folio_lruvec_lock_irq(folio);
}
-/* Don't lock again iff page's lruvec locked */
-static inline struct lruvec *folio_lruvec_relock_irqsave(struct folio *folio,
- struct lruvec *locked_lruvec, unsigned long *flags)
+/* Don't lock again iff folio's lruvec locked */
+static inline void folio_lruvec_relock_irqsave(struct folio *folio,
+ struct lruvec **lruvecp, unsigned long *flags)
{
- if (locked_lruvec) {
- if (folio_matches_lruvec(folio, locked_lruvec))
- return locked_lruvec;
+ if (*lruvecp) {
+ if (folio_matches_lruvec(folio, *lruvecp))
+ return;
- unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
+ unlock_page_lruvec_irqrestore(*lruvecp, *flags);
}
- return folio_lruvec_lock_irqsave(folio, flags);
+ *lruvecp = folio_lruvec_lock_irqsave(folio, flags);
}
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -1727,8 +1769,8 @@ void mem_cgroup_sk_alloc(struct sock *sk);
void mem_cgroup_sk_free(struct sock *sk);
static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
{
- if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_pressure)
- return true;
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ return !!memcg->tcpmem_pressure;
do {
if (time_before(jiffies, READ_ONCE(memcg->socket_pressure)))
return true;
@@ -1760,8 +1802,26 @@ bool mem_cgroup_kmem_disabled(void);
int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
void __memcg_kmem_uncharge_page(struct page *page, int order);
-struct obj_cgroup *get_obj_cgroup_from_current(void);
-struct obj_cgroup *get_obj_cgroup_from_page(struct page *page);
+/*
+ * The returned objcg pointer is safe to use without additional
+ * protection within a scope. The scope is defined either by
+ * the current task (similar to the "current" global variable)
+ * or by set_active_memcg() pair.
+ * Please, use obj_cgroup_get() to get a reference if the pointer
+ * needs to be used outside of the local scope.
+ */
+struct obj_cgroup *current_obj_cgroup(void);
+struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio);
+
+static inline struct obj_cgroup *get_obj_cgroup_from_current(void)
+{
+ struct obj_cgroup *objcg = current_obj_cgroup();
+
+ if (objcg)
+ obj_cgroup_get(objcg);
+
+ return objcg;
+}
int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size);
void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size);
@@ -1845,7 +1905,7 @@ static inline void __memcg_kmem_uncharge_page(struct page *page, int order)
{
}
-static inline struct obj_cgroup *get_obj_cgroup_from_page(struct page *page)
+static inline struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
{
return NULL;
}
@@ -1886,6 +1946,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg,
bool obj_cgroup_may_zswap(struct obj_cgroup *objcg);
void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size);
void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size);
+bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg);
#else
static inline bool obj_cgroup_may_zswap(struct obj_cgroup *objcg)
{
@@ -1899,6 +1960,11 @@ static inline void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg,
size_t size)
{
}
+static inline bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg)
+{
+ /* if zswap is disabled, do not block pages going to the swapping device */
+ return true;
+}
#endif
#endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index fc9647b1b4f9..69e781900082 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -6,6 +6,7 @@
#include <linux/nodemask.h>
#include <linux/kref.h>
#include <linux/mmzone.h>
+#include <linux/notifier.h>
/*
* Each tier cover a abstrace distance chunk size of 128
*/
@@ -22,7 +23,9 @@
struct memory_tier;
struct memory_dev_type {
/* list of memory types that are part of same tier as this type */
- struct list_head tier_sibiling;
+ struct list_head tier_sibling;
+ /* list of memory types that are managed by one driver */
+ struct list_head list;
/* abstract distance for this specific memory type */
int adistance;
/* Nodes of same abstract distance */
@@ -30,12 +33,21 @@ struct memory_dev_type {
struct kref kref;
};
+struct access_coordinate;
+
#ifdef CONFIG_NUMA
extern bool numa_demotion_enabled;
+extern struct memory_dev_type *default_dram_type;
struct memory_dev_type *alloc_memory_type(int adistance);
-void destroy_memory_type(struct memory_dev_type *memtype);
+void put_memory_type(struct memory_dev_type *memtype);
void init_node_memory_type(int node, struct memory_dev_type *default_type);
void clear_node_memory_type(int node, struct memory_dev_type *memtype);
+int register_mt_adistance_algorithm(struct notifier_block *nb);
+int unregister_mt_adistance_algorithm(struct notifier_block *nb);
+int mt_calc_adistance(int node, int *adist);
+int mt_set_default_dram_perf(int nid, struct access_coordinate *perf,
+ const char *source);
+int mt_perf_to_adistance(struct access_coordinate *perf, int *adist);
#ifdef CONFIG_MIGRATION
int next_demotion_node(int node);
void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
@@ -60,6 +72,7 @@ static inline bool node_is_toptier(int node)
#else
#define numa_demotion_enabled false
+#define default_dram_type NULL
/*
* CONFIG_NUMA implementation returns non NULL error.
*/
@@ -68,7 +81,7 @@ static inline struct memory_dev_type *alloc_memory_type(int adistance)
return NULL;
}
-static inline void destroy_memory_type(struct memory_dev_type *memtype)
+static inline void put_memory_type(struct memory_dev_type *memtype)
{
}
@@ -97,5 +110,31 @@ static inline bool node_is_toptier(int node)
{
return true;
}
+
+static inline int register_mt_adistance_algorithm(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline int unregister_mt_adistance_algorithm(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline int mt_calc_adistance(int node, int *adist)
+{
+ return NOTIFY_DONE;
+}
+
+static inline int mt_set_default_dram_perf(int nid, struct access_coordinate *perf,
+ const char *source)
+{
+ return -EIO;
+}
+
+static inline int mt_perf_to_adistance(struct access_coordinate *perf, int *adist)
+{
+ return -EIO;
+}
#endif /* CONFIG_NUMA */
#endif /* _LINUX_MEMORY_TIERS_H */
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 31343566c221..c0afee5d126e 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -77,11 +77,7 @@ struct memory_block {
*/
struct zone *zone;
struct device dev;
- /*
- * Number of vmemmap pages. These pages
- * lay at the beginning of the memory block.
- */
- unsigned long nr_vmemmap_pages;
+ struct vmem_altmap *altmap;
struct memory_group *group; /* group (if any) for this block */
struct list_head group_next; /* next block inside memory group */
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
@@ -100,8 +96,17 @@ int set_memory_block_size_order(unsigned int order);
#define MEM_GOING_ONLINE (1<<3)
#define MEM_CANCEL_ONLINE (1<<4)
#define MEM_CANCEL_OFFLINE (1<<5)
+#define MEM_PREPARE_ONLINE (1<<6)
+#define MEM_FINISH_OFFLINE (1<<7)
struct memory_notify {
+ /*
+ * The altmap_start_pfn and altmap_nr_pages fields are designated for
+ * specifying the altmap range and are exclusively intended for use in
+ * MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE notifiers.
+ */
+ unsigned long altmap_start_pfn;
+ unsigned long altmap_nr_pages;
unsigned long start_pfn;
unsigned long nr_pages;
int status_change_nid_normal;
@@ -118,6 +123,7 @@ struct mem_section;
#define DEFAULT_CALLBACK_PRI 0
#define SLAB_CALLBACK_PRI 1
#define HMAT_CALLBACK_PRI 2
+#define CXL_CALLBACK_PRI 5
#define MM_COMPUTE_BATCH_PRI 10
#define CPUSET_CALLBACK_PRI 10
#define MEMTIER_HOTPLUG_PRI 100
@@ -147,7 +153,7 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
extern int register_memory_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb);
int create_memory_block_devices(unsigned long start, unsigned long size,
- unsigned long vmemmap_pages,
+ struct vmem_altmap *altmap,
struct memory_group *group);
void remove_memory_block_devices(unsigned long start, unsigned long size);
extern void memory_dev_init(void);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 013c69753c91..7a9ff464608d 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -97,6 +97,8 @@ typedef int __bitwise mhp_t;
* To do so, we will use the beginning of the hot-added range to build
* the page tables for the memmap array that describes the entire range.
* Only selected architectures support it with SPARSE_VMEMMAP.
+ * This is only a hint, the core kernel can decide to not do this based on
+ * different alignment checks.
*/
#define MHP_MEMMAP_ON_MEMORY ((__force mhp_t)BIT(1))
/*
@@ -104,6 +106,22 @@ typedef int __bitwise mhp_t;
* implies the node id (nid).
*/
#define MHP_NID_IS_MGID ((__force mhp_t)BIT(2))
+/*
+ * The hotplugged memory is completely inaccessible while the memory is
+ * offline. The memory provider will handle MEM_PREPARE_ONLINE /
+ * MEM_FINISH_OFFLINE notifications and make the memory accessible.
+ *
+ * This flag is only relevant when used along with MHP_MEMMAP_ON_MEMORY,
+ * because the altmap cannot be written (e.g., poisoned) when adding
+ * memory -- before it is set online.
+ *
+ * This allows for adding memory with an altmap that is not currently
+ * made available by a hypervisor. When onlining that memory, the
+ * hypervisor can be instructed to make that memory available, and
+ * the onlining phase will not require any memory allocations, which is
+ * helpful in low-memory situations.
+ */
+#define MHP_OFFLINE_INACCESSIBLE ((__force mhp_t)BIT(3))
/*
* Extended parameters for memory hotplug:
@@ -119,6 +137,7 @@ struct mhp_params {
bool mhp_range_allowed(u64 start, u64 size, bool need_mapping);
struct range mhp_get_pluggable_range(bool need_mapping);
+bool mhp_supports_memmap_on_memory(void);
/*
* Zone resizing functions
@@ -152,7 +171,7 @@ extern void adjust_present_page_count(struct page *page,
long nr_pages);
/* VM interface that may be used by firmware interface */
extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
- struct zone *zone);
+ struct zone *zone, bool mhp_off_inaccessible);
extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages);
extern int online_pages(unsigned long pfn, unsigned long nr_pages,
struct zone *zone, struct memory_group *group);
@@ -260,6 +279,11 @@ static inline bool movable_node_is_enabled(void)
return false;
}
+static inline bool mhp_supports_memmap_on_memory(void)
+{
+ return false;
+}
+
static inline void pgdat_kswapd_lock(pg_data_t *pgdat) {}
static inline void pgdat_kswapd_unlock(pg_data_t *pgdat) {}
static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat) {}
@@ -354,7 +378,6 @@ extern struct zone *zone_for_pfn_range(int online_type, int nid,
extern int arch_create_linear_mapping(int nid, u64 start, u64 size,
struct mhp_params *params);
void arch_remove_linear_mapping(u64 start, u64 size);
-extern bool mhp_supports_memmap_on_memory(unsigned long size);
#endif /* CONFIG_MEMORY_HOTPLUG */
#endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index d232de7cdc56..931b118336f4 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -17,6 +17,8 @@
struct mm_struct;
+#define NO_INTERLEAVE_INDEX (-1UL) /* use task il_prev for interleaving */
+
#ifdef CONFIG_NUMA
/*
@@ -89,8 +91,6 @@ static inline struct mempolicy *mpol_dup(struct mempolicy *pol)
return pol;
}
-#define vma_policy(vma) ((vma)->vm_policy)
-
static inline void mpol_get(struct mempolicy *pol)
{
if (pol)
@@ -107,35 +107,30 @@ static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
/*
* Tree of shared policies for a shared memory region.
- * Maintain the policies in a pseudo mm that contains vmas. The vmas
- * carry the policy. As a special twist the pseudo mm is indexed in pages, not
- * bytes, so that we can work with shared memory segments bigger than
- * unsigned long.
*/
-
-struct sp_node {
- struct rb_node nd;
- unsigned long start, end;
- struct mempolicy *policy;
-};
-
struct shared_policy {
struct rb_root root;
rwlock_t lock;
};
+struct sp_node {
+ struct rb_node nd;
+ pgoff_t start, end;
+ struct mempolicy *policy;
+};
int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst);
void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
-int mpol_set_shared_policy(struct shared_policy *info,
- struct vm_area_struct *vma,
- struct mempolicy *new);
-void mpol_free_shared_policy(struct shared_policy *p);
+int mpol_set_shared_policy(struct shared_policy *sp,
+ struct vm_area_struct *vma, struct mempolicy *mpol);
+void mpol_free_shared_policy(struct shared_policy *sp);
struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
- unsigned long idx);
+ pgoff_t idx);
struct mempolicy *get_task_policy(struct task_struct *p);
struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
- unsigned long addr);
+ unsigned long addr, pgoff_t *ilx);
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+ unsigned long addr, int order, pgoff_t *ilx);
bool vma_policy_mof(struct vm_area_struct *vma);
extern void numa_default_policy(void);
@@ -149,8 +144,6 @@ extern int huge_node(struct vm_area_struct *vma,
extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
extern bool mempolicy_in_oom_domain(struct task_struct *tsk,
const nodemask_t *mask);
-extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy);
-
extern unsigned int mempolicy_slab_node(void);
extern enum zone_type policy_zone;
@@ -174,7 +167,7 @@ extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
/* Check if a vma is migratable */
extern bool vma_migratable(struct vm_area_struct *vma);
-extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
+int mpol_misplaced(struct folio *, struct vm_area_struct *, unsigned long);
extern void mpol_put_task_policy(struct task_struct *);
static inline bool mpol_is_preferred_many(struct mempolicy *pol)
@@ -188,12 +181,17 @@ extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone);
struct mempolicy {};
+static inline struct mempolicy *get_task_policy(struct task_struct *p)
+{
+ return NULL;
+}
+
static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
{
return true;
}
-static inline void mpol_put(struct mempolicy *p)
+static inline void mpol_put(struct mempolicy *pol)
{
}
@@ -212,17 +210,22 @@ static inline void mpol_shared_policy_init(struct shared_policy *sp,
{
}
-static inline void mpol_free_shared_policy(struct shared_policy *p)
+static inline void mpol_free_shared_policy(struct shared_policy *sp)
{
}
static inline struct mempolicy *
-mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
+mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx)
{
return NULL;
}
-#define vma_policy(vma) NULL
+static inline struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+ unsigned long addr, int order, pgoff_t *ilx)
+{
+ *ilx = 0;
+ return NULL;
+}
static inline int
vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
@@ -278,7 +281,8 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
}
#endif
-static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
+static inline int mpol_misplaced(struct folio *folio,
+ struct vm_area_struct *vma,
unsigned long address)
{
return -1; /* no node preference */
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 4aae6c06c5f2..16c5cc807ff6 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -51,6 +51,7 @@ extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
extern int mempool_resize(mempool_t *pool, int new_min_nr);
extern void mempool_destroy(mempool_t *pool);
extern void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) __malloc;
+extern void *mempool_alloc_preallocated(mempool_t *pool) __malloc;
extern void mempool_free(void *element, mempool_t *pool);
/*
@@ -94,6 +95,19 @@ static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size)
(void *) size);
}
+void *mempool_kvmalloc(gfp_t gfp_mask, void *pool_data);
+void mempool_kvfree(void *element, void *pool_data);
+
+static inline int mempool_init_kvmalloc_pool(mempool_t *pool, int min_nr, size_t size)
+{
+ return mempool_init(pool, min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size);
+}
+
+static inline mempool_t *mempool_create_kvmalloc_pool(int min_nr, size_t size)
+{
+ return mempool_create(min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size);
+}
+
/*
* A mempool_alloc_t and mempool_free_t for a simple page allocator that
* allocates pages of the order specified by pool_data
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 1314d9c5f05b..3f7143ade32c 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -25,6 +25,7 @@ struct vmem_altmap {
unsigned long free;
unsigned long align;
unsigned long alloc;
+ bool inaccessible;
};
/*
@@ -108,7 +109,7 @@ struct dev_pagemap_ops {
* @altmap: pre-allocated/reserved memory for vmemmap allocations
* @ref: reference count that pins the devm_memremap_pages() mapping
* @done: completion for @ref
- * @type: memory type: see MEMORY_* in memory_hotplug.h
+ * @type: memory type: see MEMORY_* above in memremap.h
* @flags: PGMAP_* flags to specify defailed behavior
* @vmemmap_shift: structural definition of how the vmemmap page metadata
* is populated, specifically the metadata page order.
@@ -196,8 +197,6 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap);
bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn);
-unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
-void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
unsigned long memremap_compat_align(void);
#else
static inline void *devm_memremap_pages(struct device *dev,
@@ -228,16 +227,6 @@ static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn)
return false;
}
-static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
-{
- return 0;
-}
-
-static inline void vmem_altmap_free(struct vmem_altmap *altmap,
- unsigned long nr_pfns)
-{
-}
-
/* when memremap_pages() is disabled all archs can remap a single page */
static inline unsigned long memremap_compat_align(void)
{
diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h
index 473545a2c425..6fa21791fc85 100644
--- a/include/linux/mfd/88pm860x.h
+++ b/include/linux/mfd/88pm860x.h
@@ -472,13 +472,7 @@ extern int pm860x_bulk_read(struct i2c_client *, int, int, unsigned char *);
extern int pm860x_bulk_write(struct i2c_client *, int, int, unsigned char *);
extern int pm860x_set_bits(struct i2c_client *, int, unsigned char,
unsigned char);
-extern int pm860x_page_reg_read(struct i2c_client *, int);
extern int pm860x_page_reg_write(struct i2c_client *, int, unsigned char);
extern int pm860x_page_bulk_read(struct i2c_client *, int, int,
unsigned char *);
-extern int pm860x_page_bulk_write(struct i2c_client *, int, int,
- unsigned char *);
-extern int pm860x_page_set_bits(struct i2c_client *, int, unsigned char,
- unsigned char);
-
#endif /* __LINUX_MFD_88PM860X_H */
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index 302a330c5c84..76d326ea8eba 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -382,10 +382,6 @@ struct ab8500_platform_data {
struct ab8500_sysctrl_platform_data *sysctrl;
};
-extern int ab8500_init(struct ab8500 *ab8500,
- enum ab8500_version version);
-extern int ab8500_exit(struct ab8500 *ab8500);
-
extern int ab8500_suspend(struct ab8500 *ab8500);
static inline int is_ab8500(struct ab8500 *ab)
@@ -503,13 +499,7 @@ static inline int is_ab9540_2p0_or_earlier(struct ab8500 *ab)
void ab8500_override_turn_on_stat(u8 mask, u8 set);
-#ifdef CONFIG_AB8500_DEBUG
-extern int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);
-void ab8500_dump_all_banks(struct device *dev);
-void ab8500_debug_register_interrupt(int line);
-#else
static inline void ab8500_dump_all_banks(struct device *dev) {}
static inline void ab8500_debug_register_interrupt(int line) {}
-#endif
#endif /* MFD_AB8500_H */
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index 47e7a3a61ce6..e8bcad641d8c 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -92,7 +92,7 @@ struct mfd_cell {
* (above) when matching OF nodes with devices that have identical
* compatible strings
*/
- const u64 of_reg;
+ u64 of_reg;
/* Set to 'true' to use 'of_reg' (above) - allows for of_reg=0 */
bool use_of_reg;
diff --git a/include/linux/mfd/cs42l43-regs.h b/include/linux/mfd/cs42l43-regs.h
new file mode 100644
index 000000000000..c39a49269cb7
--- /dev/null
+++ b/include/linux/mfd/cs42l43-regs.h
@@ -0,0 +1,1184 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * cs42l43 register definitions
+ *
+ * Copyright (c) 2022-2023 Cirrus Logic, Inc. and
+ * Cirrus Logic International Semiconductor Ltd.
+ */
+
+#ifndef CS42L43_CORE_REGS_H
+#define CS42L43_CORE_REGS_H
+
+/* Registers */
+#define CS42L43_GEN_INT_STAT_1 0x000000C0
+#define CS42L43_GEN_INT_MASK_1 0x000000C1
+#define CS42L43_DEVID 0x00003000
+#define CS42L43_REVID 0x00003004
+#define CS42L43_RELID 0x0000300C
+#define CS42L43_SFT_RESET 0x00003020
+#define CS42L43_DRV_CTRL1 0x00006004
+#define CS42L43_DRV_CTRL3 0x0000600C
+#define CS42L43_DRV_CTRL4 0x00006010
+#define CS42L43_DRV_CTRL_5 0x00006014
+#define CS42L43_GPIO_CTRL1 0x00006034
+#define CS42L43_GPIO_CTRL2 0x00006038
+#define CS42L43_GPIO_STS 0x0000603C
+#define CS42L43_GPIO_FN_SEL 0x00006040
+#define CS42L43_MCLK_SRC_SEL 0x00007004
+#define CS42L43_CCM_BLK_CLK_CONTROL 0x00007010
+#define CS42L43_SAMPLE_RATE1 0x00007014
+#define CS42L43_SAMPLE_RATE2 0x00007018
+#define CS42L43_SAMPLE_RATE3 0x0000701C
+#define CS42L43_SAMPLE_RATE4 0x00007020
+#define CS42L43_PLL_CONTROL 0x00007034
+#define CS42L43_FS_SELECT1 0x00007038
+#define CS42L43_FS_SELECT2 0x0000703C
+#define CS42L43_FS_SELECT3 0x00007040
+#define CS42L43_FS_SELECT4 0x00007044
+#define CS42L43_PDM_CONTROL 0x0000704C
+#define CS42L43_ASP_CLK_CONFIG1 0x00007058
+#define CS42L43_ASP_CLK_CONFIG2 0x0000705C
+#define CS42L43_OSC_DIV_SEL 0x00007068
+#define CS42L43_ADC_B_CTRL1 0x00008000
+#define CS42L43_ADC_B_CTRL2 0x00008004
+#define CS42L43_DECIM_HPF_WNF_CTRL1 0x0000803C
+#define CS42L43_DECIM_HPF_WNF_CTRL2 0x00008040
+#define CS42L43_DECIM_HPF_WNF_CTRL3 0x00008044
+#define CS42L43_DECIM_HPF_WNF_CTRL4 0x00008048
+#define CS42L43_DMIC_PDM_CTRL 0x0000804C
+#define CS42L43_DECIM_VOL_CTRL_CH1_CH2 0x00008050
+#define CS42L43_DECIM_VOL_CTRL_CH3_CH4 0x00008054
+#define CS42L43_DECIM_VOL_CTRL_UPDATE 0x00008058
+#define CS42L43_INTP_VOLUME_CTRL1 0x00009008
+#define CS42L43_INTP_VOLUME_CTRL2 0x0000900C
+#define CS42L43_AMP1_2_VOL_RAMP 0x00009010
+#define CS42L43_ASP_CTRL 0x0000A000
+#define CS42L43_ASP_FSYNC_CTRL1 0x0000A004
+#define CS42L43_ASP_FSYNC_CTRL2 0x0000A008
+#define CS42L43_ASP_FSYNC_CTRL3 0x0000A00C
+#define CS42L43_ASP_FSYNC_CTRL4 0x0000A010
+#define CS42L43_ASP_DATA_CTRL 0x0000A018
+#define CS42L43_ASP_RX_EN 0x0000A020
+#define CS42L43_ASP_TX_EN 0x0000A024
+#define CS42L43_ASP_RX_CH1_CTRL 0x0000A028
+#define CS42L43_ASP_RX_CH2_CTRL 0x0000A02C
+#define CS42L43_ASP_RX_CH3_CTRL 0x0000A030
+#define CS42L43_ASP_RX_CH4_CTRL 0x0000A034
+#define CS42L43_ASP_RX_CH5_CTRL 0x0000A038
+#define CS42L43_ASP_RX_CH6_CTRL 0x0000A03C
+#define CS42L43_ASP_TX_CH1_CTRL 0x0000A068
+#define CS42L43_ASP_TX_CH2_CTRL 0x0000A06C
+#define CS42L43_ASP_TX_CH3_CTRL 0x0000A070
+#define CS42L43_ASP_TX_CH4_CTRL 0x0000A074
+#define CS42L43_ASP_TX_CH5_CTRL 0x0000A078
+#define CS42L43_ASP_TX_CH6_CTRL 0x0000A07C
+#define CS42L43_OTP_REVISION_ID 0x0000B02C
+#define CS42L43_ASPTX1_INPUT 0x0000C200
+#define CS42L43_ASPTX2_INPUT 0x0000C210
+#define CS42L43_ASPTX3_INPUT 0x0000C220
+#define CS42L43_ASPTX4_INPUT 0x0000C230
+#define CS42L43_ASPTX5_INPUT 0x0000C240
+#define CS42L43_ASPTX6_INPUT 0x0000C250
+#define CS42L43_SWIRE_DP1_CH1_INPUT 0x0000C280
+#define CS42L43_SWIRE_DP1_CH2_INPUT 0x0000C290
+#define CS42L43_SWIRE_DP1_CH3_INPUT 0x0000C2A0
+#define CS42L43_SWIRE_DP1_CH4_INPUT 0x0000C2B0
+#define CS42L43_SWIRE_DP2_CH1_INPUT 0x0000C2C0
+#define CS42L43_SWIRE_DP2_CH2_INPUT 0x0000C2D0
+#define CS42L43_SWIRE_DP3_CH1_INPUT 0x0000C2E0
+#define CS42L43_SWIRE_DP3_CH2_INPUT 0x0000C2F0
+#define CS42L43_SWIRE_DP4_CH1_INPUT 0x0000C300
+#define CS42L43_SWIRE_DP4_CH2_INPUT 0x0000C310
+#define CS42L43_ASRC_INT1_INPUT1 0x0000C400
+#define CS42L43_ASRC_INT2_INPUT1 0x0000C410
+#define CS42L43_ASRC_INT3_INPUT1 0x0000C420
+#define CS42L43_ASRC_INT4_INPUT1 0x0000C430
+#define CS42L43_ASRC_DEC1_INPUT1 0x0000C440
+#define CS42L43_ASRC_DEC2_INPUT1 0x0000C450
+#define CS42L43_ASRC_DEC3_INPUT1 0x0000C460
+#define CS42L43_ASRC_DEC4_INPUT1 0x0000C470
+#define CS42L43_ISRC1INT1_INPUT1 0x0000C500
+#define CS42L43_ISRC1INT2_INPUT1 0x0000C510
+#define CS42L43_ISRC1DEC1_INPUT1 0x0000C520
+#define CS42L43_ISRC1DEC2_INPUT1 0x0000C530
+#define CS42L43_ISRC2INT1_INPUT1 0x0000C540
+#define CS42L43_ISRC2INT2_INPUT1 0x0000C550
+#define CS42L43_ISRC2DEC1_INPUT1 0x0000C560
+#define CS42L43_ISRC2DEC2_INPUT1 0x0000C570
+#define CS42L43_EQ1MIX_INPUT1 0x0000C580
+#define CS42L43_EQ1MIX_INPUT2 0x0000C584
+#define CS42L43_EQ1MIX_INPUT3 0x0000C588
+#define CS42L43_EQ1MIX_INPUT4 0x0000C58C
+#define CS42L43_EQ2MIX_INPUT1 0x0000C590
+#define CS42L43_EQ2MIX_INPUT2 0x0000C594
+#define CS42L43_EQ2MIX_INPUT3 0x0000C598
+#define CS42L43_EQ2MIX_INPUT4 0x0000C59C
+#define CS42L43_SPDIF1_INPUT1 0x0000C600
+#define CS42L43_SPDIF2_INPUT1 0x0000C610
+#define CS42L43_AMP1MIX_INPUT1 0x0000C620
+#define CS42L43_AMP1MIX_INPUT2 0x0000C624
+#define CS42L43_AMP1MIX_INPUT3 0x0000C628
+#define CS42L43_AMP1MIX_INPUT4 0x0000C62C
+#define CS42L43_AMP2MIX_INPUT1 0x0000C630
+#define CS42L43_AMP2MIX_INPUT2 0x0000C634
+#define CS42L43_AMP2MIX_INPUT3 0x0000C638
+#define CS42L43_AMP2MIX_INPUT4 0x0000C63C
+#define CS42L43_AMP3MIX_INPUT1 0x0000C640
+#define CS42L43_AMP3MIX_INPUT2 0x0000C644
+#define CS42L43_AMP3MIX_INPUT3 0x0000C648
+#define CS42L43_AMP3MIX_INPUT4 0x0000C64C
+#define CS42L43_AMP4MIX_INPUT1 0x0000C650
+#define CS42L43_AMP4MIX_INPUT2 0x0000C654
+#define CS42L43_AMP4MIX_INPUT3 0x0000C658
+#define CS42L43_AMP4MIX_INPUT4 0x0000C65C
+#define CS42L43_ASRC_INT_ENABLES 0x0000E000
+#define CS42L43_ASRC_DEC_ENABLES 0x0000E004
+#define CS42L43_PDNCNTL 0x00010000
+#define CS42L43_RINGSENSE_DEB_CTRL 0x0001001C
+#define CS42L43_TIPSENSE_DEB_CTRL 0x00010020
+#define CS42L43_TIP_RING_SENSE_INTERRUPT_STATUS 0x00010028
+#define CS42L43_HS2 0x00010040
+#define CS42L43_HS_STAT 0x00010048
+#define CS42L43_MCU_SW_INTERRUPT 0x00010094
+#define CS42L43_STEREO_MIC_CTRL 0x000100A4
+#define CS42L43_STEREO_MIC_CLAMP_CTRL 0x000100C4
+#define CS42L43_BLOCK_EN2 0x00010104
+#define CS42L43_BLOCK_EN3 0x00010108
+#define CS42L43_BLOCK_EN4 0x0001010C
+#define CS42L43_BLOCK_EN5 0x00010110
+#define CS42L43_BLOCK_EN6 0x00010114
+#define CS42L43_BLOCK_EN7 0x00010118
+#define CS42L43_BLOCK_EN8 0x0001011C
+#define CS42L43_BLOCK_EN9 0x00010120
+#define CS42L43_BLOCK_EN10 0x00010124
+#define CS42L43_BLOCK_EN11 0x00010128
+#define CS42L43_TONE_CH1_CTRL 0x00010134
+#define CS42L43_TONE_CH2_CTRL 0x00010138
+#define CS42L43_MIC_DETECT_CONTROL_1 0x00011074
+#define CS42L43_DETECT_STATUS_1 0x0001107C
+#define CS42L43_HS_BIAS_SENSE_AND_CLAMP_AUTOCONTROL 0x00011090
+#define CS42L43_MIC_DETECT_CONTROL_ANDROID 0x000110B0
+#define CS42L43_ISRC1_CTRL 0x00012004
+#define CS42L43_ISRC2_CTRL 0x00013004
+#define CS42L43_CTRL_REG 0x00014000
+#define CS42L43_FDIV_FRAC 0x00014004
+#define CS42L43_CAL_RATIO 0x00014008
+#define CS42L43_SPI_CLK_CONFIG1 0x00016004
+#define CS42L43_SPI_CONFIG1 0x00016010
+#define CS42L43_SPI_CONFIG2 0x00016014
+#define CS42L43_SPI_CONFIG3 0x00016018
+#define CS42L43_SPI_CONFIG4 0x00016024
+#define CS42L43_SPI_STATUS1 0x00016100
+#define CS42L43_SPI_STATUS2 0x00016104
+#define CS42L43_TRAN_CONFIG1 0x00016200
+#define CS42L43_TRAN_CONFIG2 0x00016204
+#define CS42L43_TRAN_CONFIG3 0x00016208
+#define CS42L43_TRAN_CONFIG4 0x0001620C
+#define CS42L43_TRAN_CONFIG5 0x00016220
+#define CS42L43_TRAN_CONFIG6 0x00016224
+#define CS42L43_TRAN_CONFIG7 0x00016228
+#define CS42L43_TRAN_CONFIG8 0x0001622C
+#define CS42L43_TRAN_STATUS1 0x00016300
+#define CS42L43_TRAN_STATUS2 0x00016304
+#define CS42L43_TRAN_STATUS3 0x00016308
+#define CS42L43_TX_DATA 0x00016400
+#define CS42L43_RX_DATA 0x00016600
+#define CS42L43_DACCNFG1 0x00017000
+#define CS42L43_DACCNFG2 0x00017004
+#define CS42L43_HPPATHVOL 0x0001700C
+#define CS42L43_PGAVOL 0x00017014
+#define CS42L43_LOADDETRESULTS 0x00017018
+#define CS42L43_LOADDETENA 0x00017024
+#define CS42L43_CTRL 0x00017028
+#define CS42L43_COEFF_DATA_IN0 0x00018000
+#define CS42L43_COEFF_RD_WR0 0x00018008
+#define CS42L43_INIT_DONE0 0x00018010
+#define CS42L43_START_EQZ0 0x00018014
+#define CS42L43_MUTE_EQ_IN0 0x0001801C
+#define CS42L43_DECIM_INT 0x0001B000
+#define CS42L43_EQ_INT 0x0001B004
+#define CS42L43_ASP_INT 0x0001B008
+#define CS42L43_PLL_INT 0x0001B00C
+#define CS42L43_SOFT_INT 0x0001B010
+#define CS42L43_SWIRE_INT 0x0001B014
+#define CS42L43_MSM_INT 0x0001B018
+#define CS42L43_ACC_DET_INT 0x0001B01C
+#define CS42L43_I2C_TGT_INT 0x0001B020
+#define CS42L43_SPI_MSTR_INT 0x0001B024
+#define CS42L43_SW_TO_SPI_BRIDGE_INT 0x0001B028
+#define CS42L43_OTP_INT 0x0001B02C
+#define CS42L43_CLASS_D_AMP_INT 0x0001B030
+#define CS42L43_GPIO_INT 0x0001B034
+#define CS42L43_ASRC_INT 0x0001B038
+#define CS42L43_HPOUT_INT 0x0001B03C
+#define CS42L43_DECIM_MASK 0x0001B0A0
+#define CS42L43_EQ_MIX_MASK 0x0001B0A4
+#define CS42L43_ASP_MASK 0x0001B0A8
+#define CS42L43_PLL_MASK 0x0001B0AC
+#define CS42L43_SOFT_MASK 0x0001B0B0
+#define CS42L43_SWIRE_MASK 0x0001B0B4
+#define CS42L43_MSM_MASK 0x0001B0B8
+#define CS42L43_ACC_DET_MASK 0x0001B0BC
+#define CS42L43_I2C_TGT_MASK 0x0001B0C0
+#define CS42L43_SPI_MSTR_MASK 0x0001B0C4
+#define CS42L43_SW_TO_SPI_BRIDGE_MASK 0x0001B0C8
+#define CS42L43_OTP_MASK 0x0001B0CC
+#define CS42L43_CLASS_D_AMP_MASK 0x0001B0D0
+#define CS42L43_GPIO_INT_MASK 0x0001B0D4
+#define CS42L43_ASRC_MASK 0x0001B0D8
+#define CS42L43_HPOUT_MASK 0x0001B0DC
+#define CS42L43_DECIM_INT_SHADOW 0x0001B300
+#define CS42L43_EQ_MIX_INT_SHADOW 0x0001B304
+#define CS42L43_ASP_INT_SHADOW 0x0001B308
+#define CS42L43_PLL_INT_SHADOW 0x0001B30C
+#define CS42L43_SOFT_INT_SHADOW 0x0001B310
+#define CS42L43_SWIRE_INT_SHADOW 0x0001B314
+#define CS42L43_MSM_INT_SHADOW 0x0001B318
+#define CS42L43_ACC_DET_INT_SHADOW 0x0001B31C
+#define CS42L43_I2C_TGT_INT_SHADOW 0x0001B320
+#define CS42L43_SPI_MSTR_INT_SHADOW 0x0001B324
+#define CS42L43_SW_TO_SPI_BRIDGE_SHADOW 0x0001B328
+#define CS42L43_OTP_INT_SHADOW 0x0001B32C
+#define CS42L43_CLASS_D_AMP_INT_SHADOW 0x0001B330
+#define CS42L43_GPIO_SHADOW 0x0001B334
+#define CS42L43_ASRC_SHADOW 0x0001B338
+#define CS42L43_HP_OUT_SHADOW 0x0001B33C
+#define CS42L43_BOOT_CONTROL 0x00101000
+#define CS42L43_BLOCK_EN 0x00101008
+#define CS42L43_SHUTTER_CONTROL 0x0010100C
+#define CS42L43_MCU_SW_REV 0x00114000
+#define CS42L43_PATCH_START_ADDR 0x00114004
+#define CS42L43_NEED_CONFIGS 0x0011400C
+#define CS42L43_BOOT_STATUS 0x0011401C
+#define CS42L43_FW_SH_BOOT_CFG_NEED_CONFIGS 0x0011F8F8
+#define CS42L43_FW_MISSION_CTRL_NEED_CONFIGS 0x0011FE00
+#define CS42L43_FW_MISSION_CTRL_HAVE_CONFIGS 0x0011FE04
+#define CS42L43_FW_MISSION_CTRL_MM_CTRL_SELECTION 0x0011FE0C
+#define CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_REG 0x0011FE10
+#define CS42L43_MCU_RAM_MAX 0x0011FFFF
+
+/* CS42L43_DEVID */
+#define CS42L43_DEVID_VAL 0x00042A43
+
+/* CS42L43_GEN_INT_STAT_1 */
+#define CS42L43_INT_STAT_GEN1_MASK 0x00000001
+#define CS42L43_INT_STAT_GEN1_SHIFT 0
+
+/* CS42L43_SFT_RESET */
+#define CS42L43_SFT_RESET_MASK 0xFF000000
+#define CS42L43_SFT_RESET_SHIFT 24
+
+#define CS42L43_SFT_RESET_VAL 0x5A000000
+
+/* CS42L43_DRV_CTRL1 */
+#define CS42L43_ASP_DOUT_DRV_MASK 0x00038000
+#define CS42L43_ASP_DOUT_DRV_SHIFT 15
+#define CS42L43_ASP_FSYNC_DRV_MASK 0x00000E00
+#define CS42L43_ASP_FSYNC_DRV_SHIFT 9
+#define CS42L43_ASP_BCLK_DRV_MASK 0x000001C0
+#define CS42L43_ASP_BCLK_DRV_SHIFT 6
+
+/* CS42L43_DRV_CTRL3 */
+#define CS42L43_I2C_ADDR_DRV_MASK 0x30000000
+#define CS42L43_I2C_ADDR_DRV_SHIFT 28
+#define CS42L43_I2C_SDA_DRV_MASK 0x0C000000
+#define CS42L43_I2C_SDA_DRV_SHIFT 26
+#define CS42L43_PDMOUT2_CLK_DRV_MASK 0x00E00000
+#define CS42L43_PDMOUT2_CLK_DRV_SHIFT 21
+#define CS42L43_PDMOUT2_DATA_DRV_MASK 0x001C0000
+#define CS42L43_PDMOUT2_DATA_DRV_SHIFT 18
+#define CS42L43_PDMOUT1_CLK_DRV_MASK 0x00038000
+#define CS42L43_PDMOUT1_CLK_DRV_SHIFT 15
+#define CS42L43_PDMOUT1_DATA_DRV_MASK 0x00007000
+#define CS42L43_PDMOUT1_DATA_DRV_SHIFT 12
+#define CS42L43_SPI_MISO_DRV_MASK 0x00000038
+#define CS42L43_SPI_MISO_DRV_SHIFT 3
+
+/* CS42L43_DRV_CTRL4 */
+#define CS42L43_GPIO3_DRV_MASK 0x00000E00
+#define CS42L43_GPIO3_DRV_SHIFT 9
+#define CS42L43_GPIO2_DRV_MASK 0x000001C0
+#define CS42L43_GPIO2_DRV_SHIFT 6
+#define CS42L43_GPIO1_DRV_MASK 0x00000038
+#define CS42L43_GPIO1_DRV_SHIFT 3
+
+/* CS42L43_DRV_CTRL_5 */
+#define CS42L43_I2C_SCL_DRV_MASK 0x18000000
+#define CS42L43_I2C_SCL_DRV_SHIFT 27
+#define CS42L43_SPI_SCK_DRV_MASK 0x07000000
+#define CS42L43_SPI_SCK_DRV_SHIFT 24
+#define CS42L43_SPI_MOSI_DRV_MASK 0x00E00000
+#define CS42L43_SPI_MOSI_DRV_SHIFT 21
+#define CS42L43_SPI_SSB_DRV_MASK 0x001C0000
+#define CS42L43_SPI_SSB_DRV_SHIFT 18
+#define CS42L43_ASP_DIN_DRV_MASK 0x000001C0
+#define CS42L43_ASP_DIN_DRV_SHIFT 6
+
+/* CS42L43_GPIO_CTRL1 */
+#define CS42L43_GPIO3_POL_MASK 0x00040000
+#define CS42L43_GPIO3_POL_SHIFT 18
+#define CS42L43_GPIO2_POL_MASK 0x00020000
+#define CS42L43_GPIO2_POL_SHIFT 17
+#define CS42L43_GPIO1_POL_MASK 0x00010000
+#define CS42L43_GPIO1_POL_SHIFT 16
+#define CS42L43_GPIO3_LVL_MASK 0x00000400
+#define CS42L43_GPIO3_LVL_SHIFT 10
+#define CS42L43_GPIO2_LVL_MASK 0x00000200
+#define CS42L43_GPIO2_LVL_SHIFT 9
+#define CS42L43_GPIO1_LVL_MASK 0x00000100
+#define CS42L43_GPIO1_LVL_SHIFT 8
+#define CS42L43_GPIO3_DIR_MASK 0x00000004
+#define CS42L43_GPIO3_DIR_SHIFT 2
+#define CS42L43_GPIO2_DIR_MASK 0x00000002
+#define CS42L43_GPIO2_DIR_SHIFT 1
+#define CS42L43_GPIO1_DIR_MASK 0x00000001
+#define CS42L43_GPIO1_DIR_SHIFT 0
+
+/* CS42L43_GPIO_CTRL2 */
+#define CS42L43_GPIO3_DEGLITCH_BYP_MASK 0x00000004
+#define CS42L43_GPIO3_DEGLITCH_BYP_SHIFT 2
+#define CS42L43_GPIO2_DEGLITCH_BYP_MASK 0x00000002
+#define CS42L43_GPIO2_DEGLITCH_BYP_SHIFT 1
+#define CS42L43_GPIO1_DEGLITCH_BYP_MASK 0x00000001
+#define CS42L43_GPIO1_DEGLITCH_BYP_SHIFT 0
+
+/* CS42L43_GPIO_STS */
+#define CS42L43_GPIO3_STS_MASK 0x00000004
+#define CS42L43_GPIO3_STS_SHIFT 2
+#define CS42L43_GPIO2_STS_MASK 0x00000002
+#define CS42L43_GPIO2_STS_SHIFT 1
+#define CS42L43_GPIO1_STS_MASK 0x00000001
+#define CS42L43_GPIO1_STS_SHIFT 0
+
+/* CS42L43_GPIO_FN_SEL */
+#define CS42L43_GPIO3_FN_SEL_MASK 0x00000004
+#define CS42L43_GPIO3_FN_SEL_SHIFT 2
+#define CS42L43_GPIO1_FN_SEL_MASK 0x00000001
+#define CS42L43_GPIO1_FN_SEL_SHIFT 0
+
+/* CS42L43_MCLK_SRC_SEL */
+#define CS42L43_OSC_PLL_MCLK_SEL_MASK 0x00000001
+#define CS42L43_OSC_PLL_MCLK_SEL_SHIFT 0
+
+/* CS42L43_SAMPLE_RATE1..CS42L43_SAMPLE_RATE4 */
+#define CS42L43_SAMPLE_RATE_MASK 0x0000001F
+#define CS42L43_SAMPLE_RATE_SHIFT 0
+
+/* CS42L43_PLL_CONTROL */
+#define CS42L43_PLL_REFCLK_EN_MASK 0x00000008
+#define CS42L43_PLL_REFCLK_EN_SHIFT 3
+#define CS42L43_PLL_REFCLK_DIV_MASK 0x00000006
+#define CS42L43_PLL_REFCLK_DIV_SHIFT 1
+#define CS42L43_PLL_REFCLK_SRC_MASK 0x00000001
+#define CS42L43_PLL_REFCLK_SRC_SHIFT 0
+
+/* CS42L43_FS_SELECT1 */
+#define CS42L43_ASP_RATE_MASK 0x00000003
+#define CS42L43_ASP_RATE_SHIFT 0
+
+/* CS42L43_FS_SELECT2 */
+#define CS42L43_ASRC_DEC_OUT_RATE_MASK 0x000000C0
+#define CS42L43_ASRC_DEC_OUT_RATE_SHIFT 6
+#define CS42L43_ASRC_INT_OUT_RATE_MASK 0x00000030
+#define CS42L43_ASRC_INT_OUT_RATE_SHIFT 4
+#define CS42L43_ASRC_DEC_IN_RATE_MASK 0x0000000C
+#define CS42L43_ASRC_DEC_IN_RATE_SHIFT 2
+#define CS42L43_ASRC_INT_IN_RATE_MASK 0x00000003
+#define CS42L43_ASRC_INT_IN_RATE_SHIFT 0
+
+/* CS42L43_FS_SELECT3 */
+#define CS42L43_HPOUT_RATE_MASK 0x0000C000
+#define CS42L43_HPOUT_RATE_SHIFT 14
+#define CS42L43_EQZ_RATE_MASK 0x00003000
+#define CS42L43_EQZ_RATE_SHIFT 12
+#define CS42L43_DIAGGEN_RATE_MASK 0x00000C00
+#define CS42L43_DIAGGEN_RATE_SHIFT 10
+#define CS42L43_DECIM_CH4_RATE_MASK 0x00000300
+#define CS42L43_DECIM_CH4_RATE_SHIFT 8
+#define CS42L43_DECIM_CH3_RATE_MASK 0x000000C0
+#define CS42L43_DECIM_CH3_RATE_SHIFT 6
+#define CS42L43_DECIM_CH2_RATE_MASK 0x00000030
+#define CS42L43_DECIM_CH2_RATE_SHIFT 4
+#define CS42L43_DECIM_CH1_RATE_MASK 0x0000000C
+#define CS42L43_DECIM_CH1_RATE_SHIFT 2
+#define CS42L43_AMP1_2_RATE_MASK 0x00000003
+#define CS42L43_AMP1_2_RATE_SHIFT 0
+
+/* CS42L43_FS_SELECT4 */
+#define CS42L43_SW_DP7_RATE_MASK 0x00C00000
+#define CS42L43_SW_DP7_RATE_SHIFT 22
+#define CS42L43_SW_DP6_RATE_MASK 0x00300000
+#define CS42L43_SW_DP6_RATE_SHIFT 20
+#define CS42L43_SPDIF_RATE_MASK 0x000C0000
+#define CS42L43_SPDIF_RATE_SHIFT 18
+#define CS42L43_SW_DP5_RATE_MASK 0x00030000
+#define CS42L43_SW_DP5_RATE_SHIFT 16
+#define CS42L43_SW_DP4_RATE_MASK 0x0000C000
+#define CS42L43_SW_DP4_RATE_SHIFT 14
+#define CS42L43_SW_DP3_RATE_MASK 0x00003000
+#define CS42L43_SW_DP3_RATE_SHIFT 12
+#define CS42L43_SW_DP2_RATE_MASK 0x00000C00
+#define CS42L43_SW_DP2_RATE_SHIFT 10
+#define CS42L43_SW_DP1_RATE_MASK 0x00000300
+#define CS42L43_SW_DP1_RATE_SHIFT 8
+#define CS42L43_ISRC2_LOW_RATE_MASK 0x000000C0
+#define CS42L43_ISRC2_LOW_RATE_SHIFT 6
+#define CS42L43_ISRC2_HIGH_RATE_MASK 0x00000030
+#define CS42L43_ISRC2_HIGH_RATE_SHIFT 4
+#define CS42L43_ISRC1_LOW_RATE_MASK 0x0000000C
+#define CS42L43_ISRC1_LOW_RATE_SHIFT 2
+#define CS42L43_ISRC1_HIGH_RATE_MASK 0x00000003
+#define CS42L43_ISRC1_HIGH_RATE_SHIFT 0
+
+/* CS42L43_PDM_CONTROL */
+#define CS42L43_PDM2_CLK_DIV_MASK 0x0000000C
+#define CS42L43_PDM2_CLK_DIV_SHIFT 2
+#define CS42L43_PDM1_CLK_DIV_MASK 0x00000003
+#define CS42L43_PDM1_CLK_DIV_SHIFT 0
+
+/* CS42L43_ASP_CLK_CONFIG1 */
+#define CS42L43_ASP_BCLK_N_MASK 0x03FF0000
+#define CS42L43_ASP_BCLK_N_SHIFT 16
+#define CS42L43_ASP_BCLK_M_MASK 0x000003FF
+#define CS42L43_ASP_BCLK_M_SHIFT 0
+
+/* CS42L43_ASP_CLK_CONFIG2 */
+#define CS42L43_ASP_MASTER_MODE_MASK 0x00000002
+#define CS42L43_ASP_MASTER_MODE_SHIFT 1
+#define CS42L43_ASP_BCLK_INV_MASK 0x00000001
+#define CS42L43_ASP_BCLK_INV_SHIFT 0
+
+/* CS42L43_OSC_DIV_SEL */
+#define CS42L43_OSC_DIV2_EN_MASK 0x00000001
+#define CS42L43_OSC_DIV2_EN_SHIFT 0
+
+/* CS42L43_ADC_B_CTRL1..CS42L43_ADC_B_CTRL1 */
+#define CS42L43_PGA_WIDESWING_MODE_EN_MASK 0x00000080
+#define CS42L43_PGA_WIDESWING_MODE_EN_SHIFT 7
+#define CS42L43_ADC_AIN_SEL_MASK 0x00000010
+#define CS42L43_ADC_AIN_SEL_SHIFT 4
+#define CS42L43_ADC_PGA_GAIN_MASK 0x0000000F
+#define CS42L43_ADC_PGA_GAIN_SHIFT 0
+
+/* CS42L43_DECIM_HPF_WNF_CTRL1..CS42L43_DECIM_HPF_WNF_CTRL4 */
+#define CS42L43_DECIM_WNF_CF_MASK 0x00000070
+#define CS42L43_DECIM_WNF_CF_SHIFT 4
+#define CS42L43_DECIM_WNF_EN_MASK 0x00000008
+#define CS42L43_DECIM_WNF_EN_SHIFT 3
+#define CS42L43_DECIM_HPF_CF_MASK 0x00000006
+#define CS42L43_DECIM_HPF_CF_SHIFT 1
+#define CS42L43_DECIM_HPF_EN_MASK 0x00000001
+#define CS42L43_DECIM_HPF_EN_SHIFT 0
+
+/* CS42L43_DMIC_PDM_CTRL */
+#define CS42L43_PDM2R_INV_MASK 0x00000020
+#define CS42L43_PDM2R_INV_SHIFT 5
+#define CS42L43_PDM2L_INV_MASK 0x00000010
+#define CS42L43_PDM2L_INV_SHIFT 4
+#define CS42L43_PDM1R_INV_MASK 0x00000008
+#define CS42L43_PDM1R_INV_SHIFT 3
+#define CS42L43_PDM1L_INV_MASK 0x00000004
+#define CS42L43_PDM1L_INV_SHIFT 2
+
+/* CS42L43_DECIM_VOL_CTRL_CH1_CH2 */
+#define CS42L43_DECIM2_MUTE_MASK 0x80000000
+#define CS42L43_DECIM2_MUTE_SHIFT 31
+#define CS42L43_DECIM2_VOL_MASK 0x3FC00000
+#define CS42L43_DECIM2_VOL_SHIFT 22
+#define CS42L43_DECIM2_VD_RAMP_MASK 0x00380000
+#define CS42L43_DECIM2_VD_RAMP_SHIFT 19
+#define CS42L43_DECIM2_VI_RAMP_MASK 0x00070000
+#define CS42L43_DECIM2_VI_RAMP_SHIFT 16
+#define CS42L43_DECIM1_MUTE_MASK 0x00008000
+#define CS42L43_DECIM1_MUTE_SHIFT 15
+#define CS42L43_DECIM1_VOL_MASK 0x00003FC0
+#define CS42L43_DECIM1_VOL_SHIFT 6
+#define CS42L43_DECIM1_VD_RAMP_MASK 0x00000038
+#define CS42L43_DECIM1_VD_RAMP_SHIFT 3
+#define CS42L43_DECIM1_VI_RAMP_MASK 0x00000007
+#define CS42L43_DECIM1_VI_RAMP_SHIFT 0
+
+/* CS42L43_DECIM_VOL_CTRL_CH3_CH4 */
+#define CS42L43_DECIM4_MUTE_MASK 0x80000000
+#define CS42L43_DECIM4_MUTE_SHIFT 31
+#define CS42L43_DECIM4_VOL_MASK 0x3FC00000
+#define CS42L43_DECIM4_VOL_SHIFT 22
+#define CS42L43_DECIM4_VD_RAMP_MASK 0x00380000
+#define CS42L43_DECIM4_VD_RAMP_SHIFT 19
+#define CS42L43_DECIM4_VI_RAMP_MASK 0x00070000
+#define CS42L43_DECIM4_VI_RAMP_SHIFT 16
+#define CS42L43_DECIM3_MUTE_MASK 0x00008000
+#define CS42L43_DECIM3_MUTE_SHIFT 15
+#define CS42L43_DECIM3_VOL_MASK 0x00003FC0
+#define CS42L43_DECIM3_VOL_SHIFT 6
+#define CS42L43_DECIM3_VD_RAMP_MASK 0x00000038
+#define CS42L43_DECIM3_VD_RAMP_SHIFT 3
+#define CS42L43_DECIM3_VI_RAMP_MASK 0x00000007
+#define CS42L43_DECIM3_VI_RAMP_SHIFT 0
+
+/* CS42L43_DECIM_VOL_CTRL_UPDATE */
+#define CS42L43_DECIM4_VOL_UPDATE_MASK 0x00000008
+#define CS42L43_DECIM4_VOL_UPDATE_SHIFT 3
+#define CS42L43_DECIM3_VOL_UPDATE_MASK 0x00000004
+#define CS42L43_DECIM3_VOL_UPDATE_SHIFT 2
+#define CS42L43_DECIM2_VOL_UPDATE_MASK 0x00000002
+#define CS42L43_DECIM2_VOL_UPDATE_SHIFT 1
+#define CS42L43_DECIM1_VOL_UPDATE_MASK 0x00000001
+#define CS42L43_DECIM1_VOL_UPDATE_SHIFT 0
+
+/* CS42L43_INTP_VOLUME_CTRL1..CS42L43_INTP_VOLUME_CTRL2 */
+#define CS42L43_AMP1_2_VU_MASK 0x00000200
+#define CS42L43_AMP1_2_VU_SHIFT 9
+#define CS42L43_AMP_MUTE_MASK 0x00000100
+#define CS42L43_AMP_MUTE_SHIFT 8
+#define CS42L43_AMP_VOL_MASK 0x000000FF
+#define CS42L43_AMP_VOL_SHIFT 0
+
+/* CS42L43_AMP1_2_VOL_RAMP */
+#define CS42L43_AMP1_2_VD_RAMP_MASK 0x00000070
+#define CS42L43_AMP1_2_VD_RAMP_SHIFT 4
+#define CS42L43_AMP1_2_VI_RAMP_MASK 0x00000007
+#define CS42L43_AMP1_2_VI_RAMP_SHIFT 0
+
+/* CS42L43_ASP_CTRL */
+#define CS42L43_ASP_FSYNC_MODE_MASK 0x00000004
+#define CS42L43_ASP_FSYNC_MODE_SHIFT 2
+#define CS42L43_ASP_BCLK_EN_MASK 0x00000002
+#define CS42L43_ASP_BCLK_EN_SHIFT 1
+#define CS42L43_ASP_FSYNC_EN_MASK 0x00000001
+#define CS42L43_ASP_FSYNC_EN_SHIFT 0
+
+/* CS42L43_ASP_FSYNC_CTRL1 */
+#define CS42L43_ASP_FSYNC_M_MASK 0x0007FFFF
+#define CS42L43_ASP_FSYNC_M_SHIFT 0
+
+/* CS42L43_ASP_FSYNC_CTRL3 */
+#define CS42L43_ASP_FSYNC_IN_INV_MASK 0x00000002
+#define CS42L43_ASP_FSYNC_IN_INV_SHIFT 1
+#define CS42L43_ASP_FSYNC_OUT_INV_MASK 0x00000001
+#define CS42L43_ASP_FSYNC_OUT_INV_SHIFT 0
+
+/* CS42L43_ASP_FSYNC_CTRL4 */
+#define CS42L43_ASP_NUM_BCLKS_PER_FSYNC_MASK 0x00001FFE
+#define CS42L43_ASP_NUM_BCLKS_PER_FSYNC_SHIFT 1
+
+/* CS42L43_ASP_DATA_CTRL */
+#define CS42L43_ASP_FSYNC_FRAME_START_PHASE_MASK 0x00000008
+#define CS42L43_ASP_FSYNC_FRAME_START_PHASE_SHIFT 3
+#define CS42L43_ASP_FSYNC_FRAME_START_DLY_MASK 0x00000007
+#define CS42L43_ASP_FSYNC_FRAME_START_DLY_SHIFT 0
+
+/* CS42L43_ASP_RX_EN */
+#define CS42L43_ASP_RX_CH6_EN_MASK 0x00000020
+#define CS42L43_ASP_RX_CH6_EN_SHIFT 5
+#define CS42L43_ASP_RX_CH5_EN_MASK 0x00000010
+#define CS42L43_ASP_RX_CH5_EN_SHIFT 4
+#define CS42L43_ASP_RX_CH4_EN_MASK 0x00000008
+#define CS42L43_ASP_RX_CH4_EN_SHIFT 3
+#define CS42L43_ASP_RX_CH3_EN_MASK 0x00000004
+#define CS42L43_ASP_RX_CH3_EN_SHIFT 2
+#define CS42L43_ASP_RX_CH2_EN_MASK 0x00000002
+#define CS42L43_ASP_RX_CH2_EN_SHIFT 1
+#define CS42L43_ASP_RX_CH1_EN_MASK 0x00000001
+#define CS42L43_ASP_RX_CH1_EN_SHIFT 0
+
+/* CS42L43_ASP_TX_EN */
+#define CS42L43_ASP_TX_CH6_EN_MASK 0x00000020
+#define CS42L43_ASP_TX_CH6_EN_SHIFT 5
+#define CS42L43_ASP_TX_CH5_EN_MASK 0x00000010
+#define CS42L43_ASP_TX_CH5_EN_SHIFT 4
+#define CS42L43_ASP_TX_CH4_EN_MASK 0x00000008
+#define CS42L43_ASP_TX_CH4_EN_SHIFT 3
+#define CS42L43_ASP_TX_CH3_EN_MASK 0x00000004
+#define CS42L43_ASP_TX_CH3_EN_SHIFT 2
+#define CS42L43_ASP_TX_CH2_EN_MASK 0x00000002
+#define CS42L43_ASP_TX_CH2_EN_SHIFT 1
+#define CS42L43_ASP_TX_CH1_EN_MASK 0x00000001
+#define CS42L43_ASP_TX_CH1_EN_SHIFT 0
+
+/* CS42L43_ASP_RX_CH1_CTRL..CS42L43_ASP_TX_CH6_CTRL */
+#define CS42L43_ASP_CH_WIDTH_MASK 0x001F0000
+#define CS42L43_ASP_CH_WIDTH_SHIFT 16
+#define CS42L43_ASP_CH_SLOT_MASK 0x00001FFE
+#define CS42L43_ASP_CH_SLOT_SHIFT 1
+#define CS42L43_ASP_CH_SLOT_PHASE_MASK 0x00000001
+#define CS42L43_ASP_CH_SLOT_PHASE_SHIFT 0
+
+/* CS42L43_ASPTX1_INPUT..CS42L43_AMP4MIX_INPUT4 */
+#define CS42L43_MIXER_VOL_MASK 0x00FE0000
+#define CS42L43_MIXER_VOL_SHIFT 17
+#define CS42L43_MIXER_SRC_MASK 0x000001FF
+#define CS42L43_MIXER_SRC_SHIFT 0
+
+/* CS42L43_ASRC_INT_ENABLES */
+#define CS42L43_ASRC_INT4_EN_MASK 0x00000008
+#define CS42L43_ASRC_INT4_EN_SHIFT 3
+#define CS42L43_ASRC_INT3_EN_MASK 0x00000004
+#define CS42L43_ASRC_INT3_EN_SHIFT 2
+#define CS42L43_ASRC_INT2_EN_MASK 0x00000002
+#define CS42L43_ASRC_INT2_EN_SHIFT 1
+#define CS42L43_ASRC_INT1_EN_MASK 0x00000001
+#define CS42L43_ASRC_INT1_EN_SHIFT 0
+
+/* CS42L43_ASRC_DEC_ENABLES */
+#define CS42L43_ASRC_DEC4_EN_MASK 0x00000008
+#define CS42L43_ASRC_DEC4_EN_SHIFT 3
+#define CS42L43_ASRC_DEC3_EN_MASK 0x00000004
+#define CS42L43_ASRC_DEC3_EN_SHIFT 2
+#define CS42L43_ASRC_DEC2_EN_MASK 0x00000002
+#define CS42L43_ASRC_DEC2_EN_SHIFT 1
+#define CS42L43_ASRC_DEC1_EN_MASK 0x00000001
+#define CS42L43_ASRC_DEC1_EN_SHIFT 0
+
+/* CS42L43_PDNCNTL */
+#define CS42L43_RING_SENSE_EN_MASK 0x00000002
+#define CS42L43_RING_SENSE_EN_SHIFT 1
+
+/* CS42L43_RINGSENSE_DEB_CTRL */
+#define CS42L43_RINGSENSE_INV_MASK 0x00000080
+#define CS42L43_RINGSENSE_INV_SHIFT 7
+#define CS42L43_RINGSENSE_PULLUP_PDNB_MASK 0x00000040
+#define CS42L43_RINGSENSE_PULLUP_PDNB_SHIFT 6
+#define CS42L43_RINGSENSE_FALLING_DB_TIME_MASK 0x00000038
+#define CS42L43_RINGSENSE_FALLING_DB_TIME_SHIFT 3
+#define CS42L43_RINGSENSE_RISING_DB_TIME_MASK 0x00000007
+#define CS42L43_RINGSENSE_RISING_DB_TIME_SHIFT 0
+
+/* CS42L43_TIPSENSE_DEB_CTRL */
+#define CS42L43_TIPSENSE_INV_MASK 0x00000080
+#define CS42L43_TIPSENSE_INV_SHIFT 7
+#define CS42L43_TIPSENSE_FALLING_DB_TIME_MASK 0x00000038
+#define CS42L43_TIPSENSE_FALLING_DB_TIME_SHIFT 3
+#define CS42L43_TIPSENSE_RISING_DB_TIME_MASK 0x00000007
+#define CS42L43_TIPSENSE_RISING_DB_TIME_SHIFT 0
+
+/* CS42L43_TIP_RING_SENSE_INTERRUPT_STATUS */
+#define CS42L43_TIPSENSE_UNPLUG_DB_STS_MASK 0x00000008
+#define CS42L43_TIPSENSE_UNPLUG_DB_STS_SHIFT 3
+#define CS42L43_TIPSENSE_PLUG_DB_STS_MASK 0x00000004
+#define CS42L43_TIPSENSE_PLUG_DB_STS_SHIFT 2
+#define CS42L43_RINGSENSE_UNPLUG_DB_STS_MASK 0x00000002
+#define CS42L43_RINGSENSE_UNPLUG_DB_STS_SHIFT 1
+#define CS42L43_RINGSENSE_PLUG_DB_STS_MASK 0x00000001
+#define CS42L43_RINGSENSE_PLUG_DB_STS_SHIFT 0
+
+/* CS42L43_HS2 */
+#define CS42L43_HS_CLAMP_DISABLE_MASK 0x10000000
+#define CS42L43_HS_CLAMP_DISABLE_SHIFT 28
+#define CS42L43_HSBIAS_RAMP_MASK 0x0C000000
+#define CS42L43_HSBIAS_RAMP_SHIFT 26
+#define CS42L43_HSDET_MODE_MASK 0x00018000
+#define CS42L43_HSDET_MODE_SHIFT 15
+#define CS42L43_HSDET_MANUAL_MODE_MASK 0x00006000
+#define CS42L43_HSDET_MANUAL_MODE_SHIFT 13
+#define CS42L43_AUTO_HSDET_TIME_MASK 0x00000700
+#define CS42L43_AUTO_HSDET_TIME_SHIFT 8
+#define CS42L43_AMP3_4_GNDREF_HS3_SEL_MASK 0x00000080
+#define CS42L43_AMP3_4_GNDREF_HS3_SEL_SHIFT 7
+#define CS42L43_AMP3_4_GNDREF_HS4_SEL_MASK 0x00000040
+#define CS42L43_AMP3_4_GNDREF_HS4_SEL_SHIFT 6
+#define CS42L43_HSBIAS_GNDREF_HS3_SEL_MASK 0x00000020
+#define CS42L43_HSBIAS_GNDREF_HS3_SEL_SHIFT 5
+#define CS42L43_HSBIAS_GNDREF_HS4_SEL_MASK 0x00000010
+#define CS42L43_HSBIAS_GNDREF_HS4_SEL_SHIFT 4
+#define CS42L43_HSBIAS_OUT_HS3_SEL_MASK 0x00000008
+#define CS42L43_HSBIAS_OUT_HS3_SEL_SHIFT 3
+#define CS42L43_HSBIAS_OUT_HS4_SEL_MASK 0x00000004
+#define CS42L43_HSBIAS_OUT_HS4_SEL_SHIFT 2
+#define CS42L43_HSGND_HS3_SEL_MASK 0x00000002
+#define CS42L43_HSGND_HS3_SEL_SHIFT 1
+#define CS42L43_HSGND_HS4_SEL_MASK 0x00000001
+#define CS42L43_HSGND_HS4_SEL_SHIFT 0
+
+/* CS42L43_HS_STAT */
+#define CS42L43_HSDET_TYPE_STS_MASK 0x00000007
+#define CS42L43_HSDET_TYPE_STS_SHIFT 0
+
+/* CS42L43_MCU_SW_INTERRUPT */
+#define CS42L43_CONTROL_IND_MASK 0x00000004
+#define CS42L43_CONTROL_IND_SHIFT 2
+#define CS42L43_CONFIGS_IND_MASK 0x00000002
+#define CS42L43_CONFIGS_IND_SHIFT 1
+#define CS42L43_PATCH_IND_MASK 0x00000001
+#define CS42L43_PATCH_IND_SHIFT 0
+
+/* CS42L43_STEREO_MIC_CTRL */
+#define CS42L43_HS2_BIAS_SENSE_EN_MASK 0x00000020
+#define CS42L43_HS2_BIAS_SENSE_EN_SHIFT 5
+#define CS42L43_HS1_BIAS_SENSE_EN_MASK 0x00000010
+#define CS42L43_HS1_BIAS_SENSE_EN_SHIFT 4
+#define CS42L43_HS2_BIAS_EN_MASK 0x00000008
+#define CS42L43_HS2_BIAS_EN_SHIFT 3
+#define CS42L43_HS1_BIAS_EN_MASK 0x00000004
+#define CS42L43_HS1_BIAS_EN_SHIFT 2
+#define CS42L43_JACK_STEREO_CONFIG_MASK 0x00000003
+#define CS42L43_JACK_STEREO_CONFIG_SHIFT 0
+
+/* CS42L43_STEREO_MIC_CLAMP_CTRL */
+#define CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_MASK 0x00000002
+#define CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_SHIFT 1
+#define CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_MASK 0x00000001
+#define CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_SHIFT 0
+
+/* CS42L43_BLOCK_EN2 */
+#define CS42L43_SPI_MSTR_EN_MASK 0x00000001
+#define CS42L43_SPI_MSTR_EN_SHIFT 0
+
+/* CS42L43_BLOCK_EN3 */
+#define CS42L43_PDM2_DIN_R_EN_MASK 0x00000020
+#define CS42L43_PDM2_DIN_R_EN_SHIFT 5
+#define CS42L43_PDM2_DIN_L_EN_MASK 0x00000010
+#define CS42L43_PDM2_DIN_L_EN_SHIFT 4
+#define CS42L43_PDM1_DIN_R_EN_MASK 0x00000008
+#define CS42L43_PDM1_DIN_R_EN_SHIFT 3
+#define CS42L43_PDM1_DIN_L_EN_MASK 0x00000004
+#define CS42L43_PDM1_DIN_L_EN_SHIFT 2
+#define CS42L43_ADC2_EN_MASK 0x00000002
+#define CS42L43_ADC2_EN_SHIFT 1
+#define CS42L43_ADC1_EN_MASK 0x00000001
+#define CS42L43_ADC1_EN_SHIFT 0
+
+/* CS42L43_BLOCK_EN4 */
+#define CS42L43_ASRC_DEC_BANK_EN_MASK 0x00000002
+#define CS42L43_ASRC_DEC_BANK_EN_SHIFT 1
+#define CS42L43_ASRC_INT_BANK_EN_MASK 0x00000001
+#define CS42L43_ASRC_INT_BANK_EN_SHIFT 0
+
+/* CS42L43_BLOCK_EN5 */
+#define CS42L43_ISRC2_BANK_EN_MASK 0x00000002
+#define CS42L43_ISRC2_BANK_EN_SHIFT 1
+#define CS42L43_ISRC1_BANK_EN_MASK 0x00000001
+#define CS42L43_ISRC1_BANK_EN_SHIFT 0
+
+/* CS42L43_BLOCK_EN6 */
+#define CS42L43_MIXER_EN_MASK 0x00000001
+#define CS42L43_MIXER_EN_SHIFT 0
+
+/* CS42L43_BLOCK_EN7 */
+#define CS42L43_EQ_EN_MASK 0x00000001
+#define CS42L43_EQ_EN_SHIFT 0
+
+/* CS42L43_BLOCK_EN8 */
+#define CS42L43_HP_EN_MASK 0x00000001
+#define CS42L43_HP_EN_SHIFT 0
+
+/* CS42L43_BLOCK_EN9 */
+#define CS42L43_TONE_EN_MASK 0x00000001
+#define CS42L43_TONE_EN_SHIFT 0
+
+/* CS42L43_BLOCK_EN10 */
+#define CS42L43_AMP2_EN_MASK 0x00000002
+#define CS42L43_AMP2_EN_SHIFT 1
+#define CS42L43_AMP1_EN_MASK 0x00000001
+#define CS42L43_AMP1_EN_SHIFT 0
+
+/* CS42L43_BLOCK_EN11 */
+#define CS42L43_SPDIF_EN_MASK 0x00000001
+#define CS42L43_SPDIF_EN_SHIFT 0
+
+/* CS42L43_TONE_CH1_CTRL..CS42L43_TONE_CH2_CTRL */
+#define CS42L43_TONE_FREQ_MASK 0x00000070
+#define CS42L43_TONE_FREQ_SHIFT 4
+#define CS42L43_TONE_SEL_MASK 0x0000000F
+#define CS42L43_TONE_SEL_SHIFT 0
+
+/* CS42L43_MIC_DETECT_CONTROL_1 */
+#define CS42L43_BUTTON_DETECT_MODE_MASK 0x00000018
+#define CS42L43_BUTTON_DETECT_MODE_SHIFT 3
+#define CS42L43_HSBIAS_MODE_MASK 0x00000006
+#define CS42L43_HSBIAS_MODE_SHIFT 1
+#define CS42L43_MIC_LVL_DET_DISABLE_MASK 0x00000001
+#define CS42L43_MIC_LVL_DET_DISABLE_SHIFT 0
+
+/* CS42L43_DETECT_STATUS_1 */
+#define CS42L43_HSDET_DC_STS_MASK 0x01FF0000
+#define CS42L43_HSDET_DC_STS_SHIFT 16
+#define CS42L43_JACKDET_STS_MASK 0x00000080
+#define CS42L43_JACKDET_STS_SHIFT 7
+#define CS42L43_HSBIAS_CLAMP_STS_MASK 0x00000040
+#define CS42L43_HSBIAS_CLAMP_STS_SHIFT 6
+
+/* CS42L43_HS_BIAS_SENSE_AND_CLAMP_AUTOCONTROL */
+#define CS42L43_JACKDET_MODE_MASK 0xC0000000
+#define CS42L43_JACKDET_MODE_SHIFT 30
+#define CS42L43_JACKDET_INV_MASK 0x20000000
+#define CS42L43_JACKDET_INV_SHIFT 29
+#define CS42L43_JACKDET_DB_TIME_MASK 0x03000000
+#define CS42L43_JACKDET_DB_TIME_SHIFT 24
+#define CS42L43_S0_AUTO_ADCMUTE_DISABLE_MASK 0x00800000
+#define CS42L43_S0_AUTO_ADCMUTE_DISABLE_SHIFT 23
+#define CS42L43_HSBIAS_SENSE_EN_MASK 0x00000080
+#define CS42L43_HSBIAS_SENSE_EN_SHIFT 7
+#define CS42L43_AUTO_HSBIAS_CLAMP_EN_MASK 0x00000040
+#define CS42L43_AUTO_HSBIAS_CLAMP_EN_SHIFT 6
+#define CS42L43_JACKDET_SENSE_EN_MASK 0x00000020
+#define CS42L43_JACKDET_SENSE_EN_SHIFT 5
+#define CS42L43_HSBIAS_SENSE_TRIP_MASK 0x00000007
+#define CS42L43_HSBIAS_SENSE_TRIP_SHIFT 0
+
+/* CS42L43_MIC_DETECT_CONTROL_ANDROID */
+#define CS42L43_HSDET_LVL_COMBWIDTH_MASK 0xC0000000
+#define CS42L43_HSDET_LVL_COMBWIDTH_SHIFT 30
+#define CS42L43_HSDET_LVL2_THRESH_MASK 0x01FF0000
+#define CS42L43_HSDET_LVL2_THRESH_SHIFT 16
+#define CS42L43_HSDET_LVL1_THRESH_MASK 0x000001FF
+#define CS42L43_HSDET_LVL1_THRESH_SHIFT 0
+
+/* CS42L43_ISRC1_CTRL..CS42L43_ISRC2_CTRL */
+#define CS42L43_ISRC_INT2_EN_MASK 0x00000200
+#define CS42L43_ISRC_INT2_EN_SHIFT 9
+#define CS42L43_ISRC_INT1_EN_MASK 0x00000100
+#define CS42L43_ISRC_INT1_EN_SHIFT 8
+#define CS42L43_ISRC_DEC2_EN_MASK 0x00000002
+#define CS42L43_ISRC_DEC2_EN_SHIFT 1
+#define CS42L43_ISRC_DEC1_EN_MASK 0x00000001
+#define CS42L43_ISRC_DEC1_EN_SHIFT 0
+
+/* CS42L43_CTRL_REG */
+#define CS42L43_PLL_MODE_BYPASS_500_MASK 0x00000004
+#define CS42L43_PLL_MODE_BYPASS_500_SHIFT 2
+#define CS42L43_PLL_MODE_BYPASS_1029_MASK 0x00000002
+#define CS42L43_PLL_MODE_BYPASS_1029_SHIFT 1
+#define CS42L43_PLL_EN_MASK 0x00000001
+#define CS42L43_PLL_EN_SHIFT 0
+
+/* CS42L43_FDIV_FRAC */
+#define CS42L43_PLL_DIV_INT_MASK 0xFF000000
+#define CS42L43_PLL_DIV_INT_SHIFT 24
+#define CS42L43_PLL_DIV_FRAC_BYTE2_MASK 0x00FF0000
+#define CS42L43_PLL_DIV_FRAC_BYTE2_SHIFT 16
+#define CS42L43_PLL_DIV_FRAC_BYTE1_MASK 0x0000FF00
+#define CS42L43_PLL_DIV_FRAC_BYTE1_SHIFT 8
+#define CS42L43_PLL_DIV_FRAC_BYTE0_MASK 0x000000FF
+#define CS42L43_PLL_DIV_FRAC_BYTE0_SHIFT 0
+
+/* CS42L43_CAL_RATIO */
+#define CS42L43_PLL_CAL_RATIO_MASK 0x000000FF
+#define CS42L43_PLL_CAL_RATIO_SHIFT 0
+
+/* CS42L43_SPI_CLK_CONFIG1 */
+#define CS42L43_SCLK_DIV_MASK 0x0000000F
+#define CS42L43_SCLK_DIV_SHIFT 0
+
+/* CS42L43_SPI_CONFIG1 */
+#define CS42L43_SPI_SS_IDLE_DUR_MASK 0x0F000000
+#define CS42L43_SPI_SS_IDLE_DUR_SHIFT 24
+#define CS42L43_SPI_SS_DELAY_DUR_MASK 0x000F0000
+#define CS42L43_SPI_SS_DELAY_DUR_SHIFT 16
+#define CS42L43_SPI_THREE_WIRE_MASK 0x00000100
+#define CS42L43_SPI_THREE_WIRE_SHIFT 8
+#define CS42L43_SPI_DPHA_MASK 0x00000040
+#define CS42L43_SPI_DPHA_SHIFT 6
+#define CS42L43_SPI_CPHA_MASK 0x00000020
+#define CS42L43_SPI_CPHA_SHIFT 5
+#define CS42L43_SPI_CPOL_MASK 0x00000010
+#define CS42L43_SPI_CPOL_SHIFT 4
+#define CS42L43_SPI_SS_SEL_MASK 0x00000007
+#define CS42L43_SPI_SS_SEL_SHIFT 0
+
+/* CS42L43_SPI_CONFIG2 */
+#define CS42L43_SPI_SS_FRC_MASK 0x00000001
+#define CS42L43_SPI_SS_FRC_SHIFT 0
+
+/* CS42L43_SPI_CONFIG3 */
+#define CS42L43_SPI_WDT_ENA_MASK 0x00000001
+#define CS42L43_SPI_WDT_ENA_SHIFT 0
+
+/* CS42L43_SPI_CONFIG4 */
+#define CS42L43_SPI_STALL_ENA_MASK 0x00010000
+#define CS42L43_SPI_STALL_ENA_SHIFT 16
+
+/* CS42L43_SPI_STATUS1 */
+#define CS42L43_SPI_ABORT_STS_MASK 0x00000002
+#define CS42L43_SPI_ABORT_STS_SHIFT 1
+#define CS42L43_SPI_DONE_STS_MASK 0x00000001
+#define CS42L43_SPI_DONE_STS_SHIFT 0
+
+/* CS42L43_SPI_STATUS2 */
+#define CS42L43_SPI_RX_DONE_STS_MASK 0x00000010
+#define CS42L43_SPI_RX_DONE_STS_SHIFT 4
+#define CS42L43_SPI_TX_DONE_STS_MASK 0x00000001
+#define CS42L43_SPI_TX_DONE_STS_SHIFT 0
+
+/* CS42L43_TRAN_CONFIG1 */
+#define CS42L43_SPI_START_MASK 0x00000001
+#define CS42L43_SPI_START_SHIFT 0
+
+/* CS42L43_TRAN_CONFIG2 */
+#define CS42L43_SPI_ABORT_MASK 0x00000001
+#define CS42L43_SPI_ABORT_SHIFT 0
+
+/* CS42L43_TRAN_CONFIG3 */
+#define CS42L43_SPI_WORD_SIZE_MASK 0x00070000
+#define CS42L43_SPI_WORD_SIZE_SHIFT 16
+#define CS42L43_SPI_CMD_MASK 0x00000003
+#define CS42L43_SPI_CMD_SHIFT 0
+
+/* CS42L43_TRAN_CONFIG4 */
+#define CS42L43_SPI_TX_LENGTH_MASK 0x0000FFFF
+#define CS42L43_SPI_TX_LENGTH_SHIFT 0
+
+/* CS42L43_TRAN_CONFIG5 */
+#define CS42L43_SPI_RX_LENGTH_MASK 0x0000FFFF
+#define CS42L43_SPI_RX_LENGTH_SHIFT 0
+
+/* CS42L43_TRAN_CONFIG6 */
+#define CS42L43_SPI_TX_BLOCK_LENGTH_MASK 0x0000000F
+#define CS42L43_SPI_TX_BLOCK_LENGTH_SHIFT 0
+
+/* CS42L43_TRAN_CONFIG7 */
+#define CS42L43_SPI_RX_BLOCK_LENGTH_MASK 0x0000000F
+#define CS42L43_SPI_RX_BLOCK_LENGTH_SHIFT 0
+
+/* CS42L43_TRAN_CONFIG8 */
+#define CS42L43_SPI_RX_DONE_MASK 0x00000010
+#define CS42L43_SPI_RX_DONE_SHIFT 4
+#define CS42L43_SPI_TX_DONE_MASK 0x00000001
+#define CS42L43_SPI_TX_DONE_SHIFT 0
+
+/* CS42L43_TRAN_STATUS1 */
+#define CS42L43_SPI_BUSY_STS_MASK 0x00000100
+#define CS42L43_SPI_BUSY_STS_SHIFT 8
+#define CS42L43_SPI_RX_REQUEST_MASK 0x00000010
+#define CS42L43_SPI_RX_REQUEST_SHIFT 4
+#define CS42L43_SPI_TX_REQUEST_MASK 0x00000001
+#define CS42L43_SPI_TX_REQUEST_SHIFT 0
+
+/* CS42L43_TRAN_STATUS2 */
+#define CS42L43_SPI_TX_BYTE_COUNT_MASK 0x0000FFFF
+#define CS42L43_SPI_TX_BYTE_COUNT_SHIFT 0
+
+/* CS42L43_TRAN_STATUS3 */
+#define CS42L43_SPI_RX_BYTE_COUNT_MASK 0x0000FFFF
+#define CS42L43_SPI_RX_BYTE_COUNT_SHIFT 0
+
+/* CS42L43_TX_DATA */
+#define CS42L43_SPI_TX_DATA_MASK 0xFFFFFFFF
+#define CS42L43_SPI_TX_DATA_SHIFT 0
+
+/* CS42L43_RX_DATA */
+#define CS42L43_SPI_RX_DATA_MASK 0xFFFFFFFF
+#define CS42L43_SPI_RX_DATA_SHIFT 0
+
+/* CS42L43_DACCNFG1 */
+#define CS42L43_HP_MSTR_VOL_CTRL_EN_MASK 0x00000008
+#define CS42L43_HP_MSTR_VOL_CTRL_EN_SHIFT 3
+#define CS42L43_AMP4_INV_MASK 0x00000002
+#define CS42L43_AMP4_INV_SHIFT 1
+#define CS42L43_AMP3_INV_MASK 0x00000001
+#define CS42L43_AMP3_INV_SHIFT 0
+
+/* CS42L43_DACCNFG2 */
+#define CS42L43_HP_AUTO_CLAMP_DISABLE_MASK 0x00000002
+#define CS42L43_HP_AUTO_CLAMP_DISABLE_SHIFT 1
+#define CS42L43_HP_HPF_EN_MASK 0x00000001
+#define CS42L43_HP_HPF_EN_SHIFT 0
+
+/* CS42L43_HPPATHVOL */
+#define CS42L43_AMP4_PATH_VOL_MASK 0x01FF0000
+#define CS42L43_AMP4_PATH_VOL_SHIFT 16
+#define CS42L43_AMP3_PATH_VOL_MASK 0x000001FF
+#define CS42L43_AMP3_PATH_VOL_SHIFT 0
+
+/* CS42L43_PGAVOL */
+#define CS42L43_HP_PATH_VOL_RAMP_MASK 0x0003C000
+#define CS42L43_HP_PATH_VOL_RAMP_SHIFT 14
+#define CS42L43_HP_PATH_VOL_ZC_MASK 0x00002000
+#define CS42L43_HP_PATH_VOL_ZC_SHIFT 13
+#define CS42L43_HP_PATH_VOL_SFT_MASK 0x00001000
+#define CS42L43_HP_PATH_VOL_SFT_SHIFT 12
+#define CS42L43_HP_DIG_VOL_RAMP_MASK 0x00000F00
+#define CS42L43_HP_DIG_VOL_RAMP_SHIFT 8
+#define CS42L43_HP_ANA_VOL_RAMP_MASK 0x0000000F
+#define CS42L43_HP_ANA_VOL_RAMP_SHIFT 0
+
+/* CS42L43_LOADDETRESULTS */
+#define CS42L43_AMP3_RES_DET_MASK 0x00000003
+#define CS42L43_AMP3_RES_DET_SHIFT 0
+
+/* CS42L43_LOADDETENA */
+#define CS42L43_HPLOAD_DET_EN_MASK 0x00000001
+#define CS42L43_HPLOAD_DET_EN_SHIFT 0
+
+/* CS42L43_CTRL */
+#define CS42L43_ADPTPWR_MODE_MASK 0x00000007
+#define CS42L43_ADPTPWR_MODE_SHIFT 0
+
+/* CS42L43_COEFF_RD_WR0 */
+#define CS42L43_WRITE_MODE_MASK 0x00000002
+#define CS42L43_WRITE_MODE_SHIFT 1
+
+/* CS42L43_INIT_DONE0 */
+#define CS42L43_INITIALIZE_DONE_MASK 0x00000001
+#define CS42L43_INITIALIZE_DONE_SHIFT 0
+
+/* CS42L43_START_EQZ0 */
+#define CS42L43_START_FILTER_MASK 0x00000001
+#define CS42L43_START_FILTER_SHIFT 0
+
+/* CS42L43_MUTE_EQ_IN0 */
+#define CS42L43_MUTE_EQ_CH2_MASK 0x00000002
+#define CS42L43_MUTE_EQ_CH2_SHIFT 1
+#define CS42L43_MUTE_EQ_CH1_MASK 0x00000001
+#define CS42L43_MUTE_EQ_CH1_SHIFT 0
+
+/* CS42L43_PLL_INT */
+#define CS42L43_PLL_LOST_LOCK_INT_MASK 0x00000002
+#define CS42L43_PLL_LOST_LOCK_INT_SHIFT 1
+#define CS42L43_PLL_READY_INT_MASK 0x00000001
+#define CS42L43_PLL_READY_INT_SHIFT 0
+
+/* CS42L43_SOFT_INT */
+#define CS42L43_CONTROL_APPLIED_INT_MASK 0x00000010
+#define CS42L43_CONTROL_APPLIED_INT_SHIFT 4
+#define CS42L43_CONTROL_WARN_INT_MASK 0x00000008
+#define CS42L43_CONTROL_WARN_INT_SHIFT 3
+#define CS42L43_PATCH_WARN_INT_MASK 0x00000002
+#define CS42L43_PATCH_WARN_INT_SHIFT 1
+#define CS42L43_PATCH_APPLIED_INT_MASK 0x00000001
+#define CS42L43_PATCH_APPLIED_INT_SHIFT 0
+
+/* CS42L43_MSM_INT */
+#define CS42L43_HP_STARTUP_DONE_INT_MASK 0x00000800
+#define CS42L43_HP_STARTUP_DONE_INT_SHIFT 11
+#define CS42L43_HP_SHUTDOWN_DONE_INT_MASK 0x00000400
+#define CS42L43_HP_SHUTDOWN_DONE_INT_SHIFT 10
+#define CS42L43_HSDET_DONE_INT_MASK 0x00000200
+#define CS42L43_HSDET_DONE_INT_SHIFT 9
+#define CS42L43_TIPSENSE_UNPLUG_DB_INT_MASK 0x00000080
+#define CS42L43_TIPSENSE_UNPLUG_DB_INT_SHIFT 7
+#define CS42L43_TIPSENSE_PLUG_DB_INT_MASK 0x00000040
+#define CS42L43_TIPSENSE_PLUG_DB_INT_SHIFT 6
+#define CS42L43_RINGSENSE_UNPLUG_DB_INT_MASK 0x00000020
+#define CS42L43_RINGSENSE_UNPLUG_DB_INT_SHIFT 5
+#define CS42L43_RINGSENSE_PLUG_DB_INT_MASK 0x00000010
+#define CS42L43_RINGSENSE_PLUG_DB_INT_SHIFT 4
+#define CS42L43_TIPSENSE_UNPLUG_PDET_INT_MASK 0x00000008
+#define CS42L43_TIPSENSE_UNPLUG_PDET_INT_SHIFT 3
+#define CS42L43_TIPSENSE_PLUG_PDET_INT_MASK 0x00000004
+#define CS42L43_TIPSENSE_PLUG_PDET_INT_SHIFT 2
+#define CS42L43_RINGSENSE_UNPLUG_PDET_INT_MASK 0x00000002
+#define CS42L43_RINGSENSE_UNPLUG_PDET_INT_SHIFT 1
+#define CS42L43_RINGSENSE_PLUG_PDET_INT_MASK 0x00000001
+#define CS42L43_RINGSENSE_PLUG_PDET_INT_SHIFT 0
+
+/* CS42L43_ACC_DET_INT */
+#define CS42L43_HS2_BIAS_SENSE_INT_MASK 0x00000800
+#define CS42L43_HS2_BIAS_SENSE_INT_SHIFT 11
+#define CS42L43_HS1_BIAS_SENSE_INT_MASK 0x00000400
+#define CS42L43_HS1_BIAS_SENSE_INT_SHIFT 10
+#define CS42L43_DC_DETECT1_FALSE_INT_MASK 0x00000080
+#define CS42L43_DC_DETECT1_FALSE_INT_SHIFT 7
+#define CS42L43_DC_DETECT1_TRUE_INT_MASK 0x00000040
+#define CS42L43_DC_DETECT1_TRUE_INT_SHIFT 6
+#define CS42L43_HSBIAS_CLAMPED_INT_MASK 0x00000008
+#define CS42L43_HSBIAS_CLAMPED_INT_SHIFT 3
+#define CS42L43_HS3_4_BIAS_SENSE_INT_MASK 0x00000001
+#define CS42L43_HS3_4_BIAS_SENSE_INT_SHIFT 0
+
+/* CS42L43_SPI_MSTR_INT */
+#define CS42L43_IRQ_SPI_STALLING_INT_MASK 0x00000004
+#define CS42L43_IRQ_SPI_STALLING_INT_SHIFT 2
+#define CS42L43_IRQ_SPI_STS_INT_MASK 0x00000002
+#define CS42L43_IRQ_SPI_STS_INT_SHIFT 1
+#define CS42L43_IRQ_SPI_BLOCK_INT_MASK 0x00000001
+#define CS42L43_IRQ_SPI_BLOCK_INT_SHIFT 0
+
+/* CS42L43_SW_TO_SPI_BRIDGE_INT */
+#define CS42L43_SW2SPI_BUF_OVF_UDF_INT_MASK 0x00000001
+#define CS42L43_SW2SPI_BUF_OVF_UDF_INT_SHIFT 0
+
+/* CS42L43_CLASS_D_AMP_INT */
+#define CS42L43_AMP2_CLK_STOP_FAULT_INT_MASK 0x00002000
+#define CS42L43_AMP2_CLK_STOP_FAULT_INT_SHIFT 13
+#define CS42L43_AMP1_CLK_STOP_FAULT_INT_MASK 0x00001000
+#define CS42L43_AMP1_CLK_STOP_FAULT_INT_SHIFT 12
+#define CS42L43_AMP2_VDDSPK_FAULT_INT_MASK 0x00000800
+#define CS42L43_AMP2_VDDSPK_FAULT_INT_SHIFT 11
+#define CS42L43_AMP1_VDDSPK_FAULT_INT_MASK 0x00000400
+#define CS42L43_AMP1_VDDSPK_FAULT_INT_SHIFT 10
+#define CS42L43_AMP2_SHUTDOWN_DONE_INT_MASK 0x00000200
+#define CS42L43_AMP2_SHUTDOWN_DONE_INT_SHIFT 9
+#define CS42L43_AMP1_SHUTDOWN_DONE_INT_MASK 0x00000100
+#define CS42L43_AMP1_SHUTDOWN_DONE_INT_SHIFT 8
+#define CS42L43_AMP2_STARTUP_DONE_INT_MASK 0x00000080
+#define CS42L43_AMP2_STARTUP_DONE_INT_SHIFT 7
+#define CS42L43_AMP1_STARTUP_DONE_INT_MASK 0x00000040
+#define CS42L43_AMP1_STARTUP_DONE_INT_SHIFT 6
+#define CS42L43_AMP2_THERM_SHDN_INT_MASK 0x00000020
+#define CS42L43_AMP2_THERM_SHDN_INT_SHIFT 5
+#define CS42L43_AMP1_THERM_SHDN_INT_MASK 0x00000010
+#define CS42L43_AMP1_THERM_SHDN_INT_SHIFT 4
+#define CS42L43_AMP2_THERM_WARN_INT_MASK 0x00000008
+#define CS42L43_AMP2_THERM_WARN_INT_SHIFT 3
+#define CS42L43_AMP1_THERM_WARN_INT_MASK 0x00000004
+#define CS42L43_AMP1_THERM_WARN_INT_SHIFT 2
+#define CS42L43_AMP2_SCDET_INT_MASK 0x00000002
+#define CS42L43_AMP2_SCDET_INT_SHIFT 1
+#define CS42L43_AMP1_SCDET_INT_MASK 0x00000001
+#define CS42L43_AMP1_SCDET_INT_SHIFT 0
+
+/* CS42L43_GPIO_INT */
+#define CS42L43_GPIO3_FALL_INT_MASK 0x00000020
+#define CS42L43_GPIO3_FALL_INT_SHIFT 5
+#define CS42L43_GPIO3_RISE_INT_MASK 0x00000010
+#define CS42L43_GPIO3_RISE_INT_SHIFT 4
+#define CS42L43_GPIO2_FALL_INT_MASK 0x00000008
+#define CS42L43_GPIO2_FALL_INT_SHIFT 3
+#define CS42L43_GPIO2_RISE_INT_MASK 0x00000004
+#define CS42L43_GPIO2_RISE_INT_SHIFT 2
+#define CS42L43_GPIO1_FALL_INT_MASK 0x00000002
+#define CS42L43_GPIO1_FALL_INT_SHIFT 1
+#define CS42L43_GPIO1_RISE_INT_MASK 0x00000001
+#define CS42L43_GPIO1_RISE_INT_SHIFT 0
+
+/* CS42L43_HPOUT_INT */
+#define CS42L43_HP_ILIMIT_INT_MASK 0x00000002
+#define CS42L43_HP_ILIMIT_INT_SHIFT 1
+#define CS42L43_HP_LOADDET_DONE_INT_MASK 0x00000001
+#define CS42L43_HP_LOADDET_DONE_INT_SHIFT 0
+
+/* CS42L43_BOOT_CONTROL */
+#define CS42L43_LOCK_HW_STS_MASK 0x00000002
+#define CS42L43_LOCK_HW_STS_SHIFT 1
+
+/* CS42L43_BLOCK_EN */
+#define CS42L43_MCU_EN_MASK 0x00000001
+#define CS42L43_MCU_EN_SHIFT 0
+
+/* CS42L43_SHUTTER_CONTROL */
+#define CS42L43_STATUS_SPK_SHUTTER_MUTE_MASK 0x00008000
+#define CS42L43_STATUS_SPK_SHUTTER_MUTE_SHIFT 15
+#define CS42L43_SPK_SHUTTER_CFG_MASK 0x00000F00
+#define CS42L43_SPK_SHUTTER_CFG_SHIFT 8
+#define CS42L43_STATUS_MIC_SHUTTER_MUTE_MASK 0x00000080
+#define CS42L43_STATUS_MIC_SHUTTER_MUTE_SHIFT 7
+#define CS42L43_MIC_SHUTTER_CFG_MASK 0x0000000F
+#define CS42L43_MIC_SHUTTER_CFG_SHIFT 0
+
+/* CS42L43_MCU_SW_REV */
+#define CS42L43_BIOS_SUBMINOR_REV_MASK 0xFF000000
+#define CS42L43_BIOS_SUBMINOR_REV_SHIFT 24
+#define CS42L43_BIOS_MINOR_REV_MASK 0x00F00000
+#define CS42L43_BIOS_MINOR_REV_SHIFT 20
+#define CS42L43_BIOS_MAJOR_REV_MASK 0x000F0000
+#define CS42L43_BIOS_MAJOR_REV_SHIFT 16
+#define CS42L43_FW_SUBMINOR_REV_MASK 0x0000FF00
+#define CS42L43_FW_SUBMINOR_REV_SHIFT 8
+#define CS42L43_FW_MINOR_REV_MASK 0x000000F0
+#define CS42L43_FW_MINOR_REV_SHIFT 4
+#define CS42L43_FW_MAJOR_REV_MASK 0x0000000F
+#define CS42L43_FW_MAJOR_REV_SHIFT 0
+
+/* CS42L43_NEED_CONFIGS */
+#define CS42L43_FW_PATCH_NEED_CFG_MASK 0x80000000
+#define CS42L43_FW_PATCH_NEED_CFG_SHIFT 31
+
+/* CS42L43_FW_MISSION_CTRL_MM_CTRL_SELECTION */
+#define CS42L43_FW_MM_CTRL_MCU_SEL_MASK 0x00000001
+#define CS42L43_FW_MM_CTRL_MCU_SEL_SHIFT 0
+
+/* CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_REG */
+#define CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_DISABLE_VAL 0xF05AA50F
+
+#endif /* CS42L43_CORE_REGS_H */
diff --git a/include/linux/mfd/cs42l43.h b/include/linux/mfd/cs42l43.h
new file mode 100644
index 000000000000..2239d8585e78
--- /dev/null
+++ b/include/linux/mfd/cs42l43.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * CS42L43 core driver external data
+ *
+ * Copyright (C) 2022-2023 Cirrus Logic, Inc. and
+ * Cirrus Logic International Semiconductor Ltd.
+ */
+
+#ifndef CS42L43_CORE_EXT_H
+#define CS42L43_CORE_EXT_H
+
+#include <linux/completion.h>
+#include <linux/mutex.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/workqueue.h>
+
+#define CS42L43_N_SUPPLIES 3
+
+struct device;
+struct gpio_desc;
+struct sdw_slave;
+
+enum cs42l43_irq_numbers {
+ CS42L43_PLL_LOST_LOCK,
+ CS42L43_PLL_READY,
+
+ CS42L43_HP_STARTUP_DONE,
+ CS42L43_HP_SHUTDOWN_DONE,
+ CS42L43_HSDET_DONE,
+ CS42L43_TIPSENSE_UNPLUG_DB,
+ CS42L43_TIPSENSE_PLUG_DB,
+ CS42L43_RINGSENSE_UNPLUG_DB,
+ CS42L43_RINGSENSE_PLUG_DB,
+ CS42L43_TIPSENSE_UNPLUG_PDET,
+ CS42L43_TIPSENSE_PLUG_PDET,
+ CS42L43_RINGSENSE_UNPLUG_PDET,
+ CS42L43_RINGSENSE_PLUG_PDET,
+
+ CS42L43_HS2_BIAS_SENSE,
+ CS42L43_HS1_BIAS_SENSE,
+ CS42L43_DC_DETECT1_FALSE,
+ CS42L43_DC_DETECT1_TRUE,
+ CS42L43_HSBIAS_CLAMPED,
+ CS42L43_HS3_4_BIAS_SENSE,
+
+ CS42L43_AMP2_CLK_STOP_FAULT,
+ CS42L43_AMP1_CLK_STOP_FAULT,
+ CS42L43_AMP2_VDDSPK_FAULT,
+ CS42L43_AMP1_VDDSPK_FAULT,
+ CS42L43_AMP2_SHUTDOWN_DONE,
+ CS42L43_AMP1_SHUTDOWN_DONE,
+ CS42L43_AMP2_STARTUP_DONE,
+ CS42L43_AMP1_STARTUP_DONE,
+ CS42L43_AMP2_THERM_SHDN,
+ CS42L43_AMP1_THERM_SHDN,
+ CS42L43_AMP2_THERM_WARN,
+ CS42L43_AMP1_THERM_WARN,
+ CS42L43_AMP2_SCDET,
+ CS42L43_AMP1_SCDET,
+
+ CS42L43_GPIO3_FALL,
+ CS42L43_GPIO3_RISE,
+ CS42L43_GPIO2_FALL,
+ CS42L43_GPIO2_RISE,
+ CS42L43_GPIO1_FALL,
+ CS42L43_GPIO1_RISE,
+
+ CS42L43_HP_ILIMIT,
+ CS42L43_HP_LOADDET_DONE,
+};
+
+struct cs42l43 {
+ struct device *dev;
+ struct regmap *regmap;
+ struct sdw_slave *sdw;
+
+ struct regulator *vdd_p;
+ struct regulator *vdd_d;
+ struct regulator_bulk_data core_supplies[CS42L43_N_SUPPLIES];
+
+ struct gpio_desc *reset;
+
+ int irq;
+ struct regmap_irq_chip irq_chip;
+ struct regmap_irq_chip_data *irq_data;
+
+ struct work_struct boot_work;
+ struct completion device_attach;
+ struct completion device_detach;
+ struct completion firmware_download;
+ int firmware_error;
+
+ unsigned int sdw_freq;
+ /* Lock to gate control of the PLL and its sources. */
+ struct mutex pll_lock;
+
+ bool sdw_pll_active;
+ bool attached;
+ bool hw_lock;
+};
+
+#endif /* CS42L43_CORE_EXT_H */
diff --git a/include/linux/mfd/da9055/pdata.h b/include/linux/mfd/da9055/pdata.h
index d3f126990ad0..137a2b067512 100644
--- a/include/linux/mfd/da9055/pdata.h
+++ b/include/linux/mfd/da9055/pdata.h
@@ -7,7 +7,6 @@
#define DA9055_MAX_REGULATORS 8
struct da9055;
-struct gpio_desc;
enum gpio_select {
NO_GPIO = 0,
@@ -24,16 +23,6 @@ struct da9055_pdata {
/* Enable RTC in RESET Mode */
bool reset_enable;
/*
- * GPI muxed pin to control
- * regulator state A/B, 0 if not available.
- */
- int *gpio_ren;
- /*
- * GPI muxed pin to control
- * regulator set, 0 if not available.
- */
- int *gpio_rsel;
- /*
* Regulator mode control bits value (GPI offset) that
* controls the regulator state, 0 if not available.
*/
@@ -43,7 +32,5 @@ struct da9055_pdata {
* controls the regulator set A/B, 0 if not available.
*/
enum gpio_select *reg_rsel;
- /* GPIO descriptors to enable regulator, NULL if not available */
- struct gpio_desc **ena_gpiods;
};
#endif /* __DA9055_PDATA_H */
diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h
index e7a7e70fdb38..dd0fc891b228 100644
--- a/include/linux/mfd/dbx500-prcmu.h
+++ b/include/linux/mfd/dbx500-prcmu.h
@@ -556,16 +556,6 @@ static inline void prcmu_clear(unsigned int reg, u32 bits)
#define PRCMU_QOS_ARM_OPP 3
#define PRCMU_QOS_DEFAULT_VALUE -1
-static inline unsigned long prcmu_qos_get_cpufreq_opp_delay(void)
-{
- return 0;
-}
-
-static inline int prcmu_qos_requirement(int prcmu_qos_class)
-{
- return 0;
-}
-
static inline int prcmu_qos_add_requirement(int prcmu_qos_class,
char *name, s32 value)
{
@@ -582,15 +572,4 @@ static inline void prcmu_qos_remove_requirement(int prcmu_qos_class, char *name)
{
}
-static inline int prcmu_qos_add_notifier(int prcmu_qos_class,
- struct notifier_block *notifier)
-{
- return 0;
-}
-static inline int prcmu_qos_remove_notifier(int prcmu_qos_class,
- struct notifier_block *notifier)
-{
- return 0;
-}
-
#endif /* __MACH_PRCMU_H */
diff --git a/include/linux/mfd/hi655x-pmic.h b/include/linux/mfd/hi655x-pmic.h
index 6a012784dd1b..194556851ccf 100644
--- a/include/linux/mfd/hi655x-pmic.h
+++ b/include/linux/mfd/hi655x-pmic.h
@@ -52,7 +52,6 @@
#define OTMP_D1R_INT_MASK BIT(OTMP_D1R_INT)
struct hi655x_pmic {
- struct resource *res;
struct device *dev;
struct regmap *regmap;
struct gpio_desc *gpio;
diff --git a/include/linux/mfd/idtRC38xxx_reg.h b/include/linux/mfd/idtRC38xxx_reg.h
new file mode 100644
index 000000000000..ec11872f51ad
--- /dev/null
+++ b/include/linux/mfd/idtRC38xxx_reg.h
@@ -0,0 +1,273 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Register Map - Based on PolarBear_CSRs.RevA.xlsx (2023-04-21)
+ *
+ * Copyright (C) 2023 Integrated Device Technology, Inc., a Renesas Company.
+ */
+#ifndef MFD_IDTRC38XXX_REG
+#define MFD_IDTRC38XXX_REG
+
+/* GLOBAL */
+#define SOFT_RESET_CTRL (0x15) /* Specific to FC3W */
+#define MISC_CTRL (0x14) /* Specific to FC3A */
+#define APLL_REINIT BIT(1)
+#define APLL_REINIT_VFC3A BIT(2)
+
+#define DEVICE_ID (0x2)
+#define DEVICE_ID_MASK (0x1000) /* Bit 12 is 1 if FC3W and 0 if FC3A */
+#define DEVICE_ID_SHIFT (12)
+
+/* FOD */
+#define FOD_0 (0x300)
+#define FOD_0_VFC3A (0x400)
+#define FOD_1 (0x340)
+#define FOD_1_VFC3A (0x440)
+#define FOD_2 (0x380)
+#define FOD_2_VFC3A (0x480)
+
+/* TDCAPLL */
+#define TDC_CTRL (0x44a) /* Specific to FC3W */
+#define TDC_ENABLE_CTRL (0x169) /* Specific to FC3A */
+#define TDC_DAC_CAL_CTRL (0x16a) /* Specific to FC3A */
+#define TDC_EN BIT(0)
+#define TDC_DAC_RECAL_REQ BIT(1)
+#define TDC_DAC_RECAL_REQ_VFC3A BIT(0)
+
+#define TDC_FB_DIV_INT_CNFG (0x442)
+#define TDC_FB_DIV_INT_CNFG_VFC3A (0x162)
+#define TDC_FB_DIV_INT_MASK GENMASK(7, 0)
+#define TDC_REF_DIV_CNFG (0x443)
+#define TDC_REF_DIV_CNFG_VFC3A (0x163)
+#define TDC_REF_DIV_CONFIG_MASK GENMASK(2, 0)
+
+/* TIME SYNC CHANNEL */
+#define TIME_CLOCK_SRC (0xa01) /* Specific to FC3W */
+#define TIME_CLOCK_COUNT (0xa00) /* Specific to FC3W */
+#define TIME_CLOCK_COUNT_MASK GENMASK(5, 0)
+
+#define SUB_SYNC_GEN_CNFG (0xa04)
+
+#define TOD_COUNTER_READ_REQ (0xa5f)
+#define TOD_COUNTER_READ_REQ_VFC3A (0x6df)
+#define TOD_SYNC_LOAD_VAL_CTRL (0xa10)
+#define TOD_SYNC_LOAD_VAL_CTRL_VFC3A (0x690)
+#define SYNC_COUNTER_MASK GENMASK_ULL(51, 0)
+#define SUB_SYNC_COUNTER_MASK GENMASK(30, 0)
+#define TOD_SYNC_LOAD_REQ_CTRL (0xa21)
+#define TOD_SYNC_LOAD_REQ_CTRL_VFC3A (0x6a1)
+#define SYNC_LOAD_ENABLE BIT(1)
+#define SUB_SYNC_LOAD_ENABLE BIT(0)
+#define SYNC_LOAD_REQ BIT(0)
+
+#define LPF_MODE_CNFG (0xa80)
+#define LPF_MODE_CNFG_VFC3A (0x700)
+enum lpf_mode {
+ LPF_DISABLED = 0,
+ LPF_WP = 1,
+ LPF_HOLDOVER = 2,
+ LPF_WF = 3,
+ LPF_INVALID = 4
+};
+#define LPF_CTRL (0xa98)
+#define LPF_CTRL_VFC3A (0x718)
+#define LPF_EN BIT(0)
+
+#define LPF_BW_CNFG (0xa81)
+#define LPF_BW_SHIFT GENMASK(7, 3)
+#define LPF_BW_MULT GENMASK(2, 0)
+#define LPF_BW_SHIFT_DEFAULT (0xb)
+#define LPF_BW_MULT_DEFAULT (0x0)
+#define LPF_BW_SHIFT_1PPS (0x5)
+
+#define LPF_WR_PHASE_CTRL (0xaa8)
+#define LPF_WR_PHASE_CTRL_VFC3A (0x728)
+#define LPF_WR_FREQ_CTRL (0xab0)
+#define LPF_WR_FREQ_CTRL_VFC3A (0x730)
+
+#define TIME_CLOCK_TDC_FANOUT_CNFG (0xB00)
+#define TIME_SYNC_TO_TDC_EN BIT(0)
+#define SIG1_MUX_SEL_MASK GENMASK(7, 4)
+#define SIG2_MUX_SEL_MASK GENMASK(11, 8)
+enum tdc_mux_sel {
+ REF0 = 0,
+ REF1 = 1,
+ REF2 = 2,
+ REF3 = 3,
+ REF_CLK5 = 4,
+ REF_CLK6 = 5,
+ DPLL_FB_TO_TDC = 6,
+ DPLL_FB_DIVIDED_TO_TDC = 7,
+ TIME_CLK_DIVIDED = 8,
+ TIME_SYNC = 9,
+};
+
+#define TIME_CLOCK_MEAS_CNFG (0xB04)
+#define TDC_MEAS_MODE BIT(0)
+enum tdc_meas_mode {
+ CONTINUOUS = 0,
+ ONE_SHOT = 1,
+ MEAS_MODE_INVALID = 2,
+};
+
+#define TIME_CLOCK_MEAS_DIV_CNFG (0xB08)
+#define TIME_REF_DIV_MASK GENMASK(29, 24)
+
+#define TIME_CLOCK_MEAS_CTRL (0xB10)
+#define TDC_MEAS_EN BIT(0)
+#define TDC_MEAS_START BIT(1)
+
+#define TDC_FIFO_READ_REQ (0xB2F)
+#define TDC_FIFO_READ (0xB30)
+#define COARSE_MEAS_MASK GENMASK_ULL(39, 13)
+#define FINE_MEAS_MASK GENMASK(12, 0)
+
+#define TDC_FIFO_CTRL (0xB12)
+#define FIFO_CLEAR BIT(0)
+#define TDC_FIFO_STS (0xB38)
+#define FIFO_FULL BIT(1)
+#define FIFO_EMPTY BIT(0)
+#define TDC_FIFO_EVENT (0xB39)
+#define FIFO_OVERRUN BIT(1)
+
+/* DPLL */
+#define MAX_REFERENCE_INDEX (3)
+#define MAX_NUM_REF_PRIORITY (4)
+
+#define MAX_DPLL_INDEX (2)
+
+#define DPLL_STS (0x580)
+#define DPLL_STS_VFC3A (0x571)
+#define DPLL_STATE_STS_MASK (0x70)
+#define DPLL_STATE_STS_SHIFT (4)
+#define DPLL_REF_SEL_STS_MASK (0x6)
+#define DPLL_REF_SEL_STS_SHIFT (1)
+
+#define DPLL_REF_PRIORITY_CNFG (0x502)
+#define DPLL_REFX_PRIORITY_DISABLE_MASK (0xf)
+#define DPLL_REF0_PRIORITY_ENABLE_AND_SET_MASK (0x31)
+#define DPLL_REF1_PRIORITY_ENABLE_AND_SET_MASK (0xc2)
+#define DPLL_REF2_PRIORITY_ENABLE_AND_SET_MASK (0x304)
+#define DPLL_REF3_PRIORITY_ENABLE_AND_SET_MASK (0xc08)
+#define DPLL_REF0_PRIORITY_SHIFT (4)
+#define DPLL_REF1_PRIORITY_SHIFT (6)
+#define DPLL_REF2_PRIORITY_SHIFT (8)
+#define DPLL_REF3_PRIORITY_SHIFT (10)
+
+enum dpll_state {
+ DPLL_STATE_MIN = 0,
+ DPLL_STATE_FREERUN = DPLL_STATE_MIN,
+ DPLL_STATE_LOCKED = 1,
+ DPLL_STATE_HOLDOVER = 2,
+ DPLL_STATE_WRITE_FREQUENCY = 3,
+ DPLL_STATE_ACQUIRE = 4,
+ DPLL_STATE_HITLESS_SWITCH = 5,
+ DPLL_STATE_MAX = DPLL_STATE_HITLESS_SWITCH
+};
+
+/* REFMON */
+#define LOSMON_STS_0 (0x81e)
+#define LOSMON_STS_0_VFC3A (0x18e)
+#define LOSMON_STS_1 (0x82e)
+#define LOSMON_STS_1_VFC3A (0x19e)
+#define LOSMON_STS_2 (0x83e)
+#define LOSMON_STS_2_VFC3A (0x1ae)
+#define LOSMON_STS_3 (0x84e)
+#define LOSMON_STS_3_VFC3A (0x1be)
+#define LOS_STS_MASK (0x1)
+
+#define FREQMON_STS_0 (0x874)
+#define FREQMON_STS_0_VFC3A (0x1d4)
+#define FREQMON_STS_1 (0x894)
+#define FREQMON_STS_1_VFC3A (0x1f4)
+#define FREQMON_STS_2 (0x8b4)
+#define FREQMON_STS_2_VFC3A (0x214)
+#define FREQMON_STS_3 (0x8d4)
+#define FREQMON_STS_3_VFC3A (0x234)
+#define FREQ_FAIL_STS_SHIFT (31)
+
+/* Firmware interface */
+#define TIME_CLK_FREQ_ADDR (0xffa0)
+#define XTAL_FREQ_ADDR (0xffa1)
+
+/*
+ * Return register address and field mask based on passed in firmware version
+ */
+#define IDTFC3_FW_REG(FW, VER, REG) (((FW) < (VER)) ? (REG) : (REG##_##VER))
+#define IDTFC3_FW_FIELD(FW, VER, FIELD) (((FW) < (VER)) ? (FIELD) : (FIELD##_##VER))
+enum fw_version {
+ V_DEFAULT = 0,
+ VFC3W = 1,
+ VFC3A = 2
+};
+
+/* XTAL_FREQ_ADDR/TIME_CLK_FREQ_ADDR */
+enum {
+ FREQ_MIN = 0,
+ FREQ_25M = 1,
+ FREQ_49_152M = 2,
+ FREQ_50M = 3,
+ FREQ_100M = 4,
+ FREQ_125M = 5,
+ FREQ_250M = 6,
+ FREQ_MAX
+};
+
+struct idtfc3_hw_param {
+ u32 xtal_freq;
+ u32 time_clk_freq;
+};
+
+struct idtfc3_fwrc {
+ u8 hiaddr;
+ u8 loaddr;
+ u8 value;
+ u8 reserved;
+} __packed;
+
+static inline void idtfc3_default_hw_param(struct idtfc3_hw_param *hw_param)
+{
+ hw_param->xtal_freq = 49152000;
+ hw_param->time_clk_freq = 25000000;
+}
+
+static inline int idtfc3_set_hw_param(struct idtfc3_hw_param *hw_param,
+ u16 addr, u8 val)
+{
+ if (addr == XTAL_FREQ_ADDR)
+ switch (val) {
+ case FREQ_49_152M:
+ hw_param->xtal_freq = 49152000;
+ break;
+ case FREQ_50M:
+ hw_param->xtal_freq = 50000000;
+ break;
+ default:
+ return -EINVAL;
+ }
+ else if (addr == TIME_CLK_FREQ_ADDR)
+ switch (val) {
+ case FREQ_25M:
+ hw_param->time_clk_freq = 25000000;
+ break;
+ case FREQ_50M:
+ hw_param->time_clk_freq = 50000000;
+ break;
+ case FREQ_100M:
+ hw_param->time_clk_freq = 100000000;
+ break;
+ case FREQ_125M:
+ hw_param->time_clk_freq = 125000000;
+ break;
+ case FREQ_250M:
+ hw_param->time_clk_freq = 250000000;
+ break;
+ default:
+ return -EINVAL;
+ }
+ else
+ return -EFAULT;
+
+ return 0;
+}
+
+#endif
diff --git a/include/linux/mfd/lp8788.h b/include/linux/mfd/lp8788.h
index 3d5c480d58ea..51b47966a04d 100644
--- a/include/linux/mfd/lp8788.h
+++ b/include/linux/mfd/lp8788.h
@@ -10,7 +10,6 @@
#ifndef __MFD_LP8788_H__
#define __MFD_LP8788_H__
-#include <linux/gpio.h>
#include <linux/irqdomain.h>
#include <linux/pwm.h>
#include <linux/regmap.h>
@@ -159,21 +158,17 @@ struct lp8788;
/*
* lp8788_buck1_dvs
- * @gpio : gpio pin number for dvs control
* @vsel : dvs selector for buck v1 register
*/
struct lp8788_buck1_dvs {
- int gpio;
enum lp8788_dvs_sel vsel;
};
/*
* lp8788_buck2_dvs
- * @gpio : two gpio pin numbers are used for dvs
* @vsel : dvs selector for buck v2 register
*/
struct lp8788_buck2_dvs {
- int gpio[LP8788_NUM_BUCK2_DVS];
enum lp8788_dvs_sel vsel;
};
@@ -268,8 +263,8 @@ struct lp8788_vib_platform_data {
* @buck_data : regulator initial data for buck
* @dldo_data : regulator initial data for digital ldo
* @aldo_data : regulator initial data for analog ldo
- * @buck1_dvs : gpio configurations for buck1 dvs
- * @buck2_dvs : gpio configurations for buck2 dvs
+ * @buck1_dvs : configurations for buck1 dvs
+ * @buck2_dvs : configurations for buck2 dvs
* @chg_pdata : platform data for charger driver
* @alarm_sel : rtc alarm selection (1 or 2)
* @bl_pdata : configurable data for backlight driver
diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h
index ea4a4b1b246a..1fbda1f8967d 100644
--- a/include/linux/mfd/lpc_ich.h
+++ b/include/linux/mfd/lpc_ich.h
@@ -15,7 +15,7 @@
#define ICH_RES_GPE0 1
/* GPIO compatibility */
-enum {
+enum lpc_gpio_versions {
ICH_I3100_GPIO,
ICH_V5_GPIO,
ICH_V6_GPIO,
@@ -26,11 +26,14 @@ enum {
AVOTON_GPIO,
};
+struct lpc_ich_gpio_info;
+
struct lpc_ich_info {
char name[32];
unsigned int iTCO_version;
- unsigned int gpio_version;
+ enum lpc_gpio_versions gpio_version;
enum intel_spi_type spi_type;
+ const struct lpc_ich_gpio_info *gpio_info;
u8 use_gpio;
};
diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h
index 3acceeedbaba..ea635d12a741 100644
--- a/include/linux/mfd/max77686-private.h
+++ b/include/linux/mfd/max77686-private.h
@@ -441,8 +441,4 @@ enum max77686_types {
TYPE_MAX77802,
};
-extern int max77686_irq_init(struct max77686_dev *max77686);
-extern void max77686_irq_exit(struct max77686_dev *max77686);
-extern int max77686_irq_resume(struct max77686_dev *max77686);
-
#endif /* __LINUX_MFD_MAX77686_PRIV_H */
diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index 311f7d3d2323..54444ff2a5de 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -405,7 +405,7 @@ enum max77693_haptic_reg {
MAX77693_HAPTIC_REG_END,
};
-/* max77693-pmic LSCNFG configuraton register */
+/* max77693-pmic LSCNFG configuration register */
#define MAX77693_PMIC_LOW_SYS_MASK 0x80
#define MAX77693_PMIC_LOW_SYS_SHIFT 7
diff --git a/include/linux/mfd/max77843-private.h b/include/linux/mfd/max77843-private.h
index 0bc7454c4dbe..2fb4db67f110 100644
--- a/include/linux/mfd/max77843-private.h
+++ b/include/linux/mfd/max77843-private.h
@@ -198,7 +198,7 @@ enum max77843_irq_muic {
#define MAX77843_MCONFIG_MEN_MASK BIT(MCONFIG_MEN_SHIFT)
#define MAX77843_MCONFIG_PDIV_MASK (0x3 << MCONFIG_PDIV_SHIFT)
-/* Max77843 charger insterrupts */
+/* Max77843 charger interrupts */
#define MAX77843_CHG_BYP_I BIT(0)
#define MAX77843_CHG_BATP_I BIT(2)
#define MAX77843_CHG_BAT_I BIT(3)
diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h
index 6193905abbb5..5c2cc1103437 100644
--- a/include/linux/mfd/max8997.h
+++ b/include/linux/mfd/max8997.h
@@ -178,7 +178,6 @@ struct max8997_platform_data {
*
*/
bool ignore_gpiodvs_side_effect;
- int buck125_gpios[3]; /* GPIO of [0]SET1, [1]SET2, [2]SET3 */
int buck125_default_idx; /* Default value of SET1, 2, 3 */
unsigned int buck1_voltage[8]; /* buckx_voltage in uV */
bool buck1_gpiodvs;
diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h
index 79c020bd0c70..a054e55c8646 100644
--- a/include/linux/mfd/max8998.h
+++ b/include/linux/mfd/max8998.h
@@ -65,10 +65,7 @@ struct max8998_regulator_data {
* be other than the preset values.
* @buck1_voltage: BUCK1 DVS mode 1 voltage registers
* @buck2_voltage: BUCK2 DVS mode 2 voltage registers
- * @buck1_set1: BUCK1 gpio pin 1 to set output voltage
- * @buck1_set2: BUCK1 gpio pin 2 to set output voltage
* @buck1_default_idx: Default for BUCK1 gpio pin 1, 2
- * @buck2_set3: BUCK2 gpio pin to set output voltage
* @buck2_default_idx: Default for BUCK2 gpio pin.
* @wakeup: Allow to wake up from suspend
* @rtc_delay: LP3974 RTC chip bug that requires delay after a register
@@ -91,10 +88,7 @@ struct max8998_platform_data {
bool buck_voltage_lock;
int buck1_voltage[4];
int buck2_voltage[2];
- int buck1_set1;
- int buck1_set2;
int buck1_default_idx;
- int buck2_set3;
int buck2_default_idx;
bool wakeup;
bool rtc_delay;
diff --git a/include/linux/mfd/mt6358/registers.h b/include/linux/mfd/mt6358/registers.h
index 3d33517f178c..d83e87298ac4 100644
--- a/include/linux/mfd/mt6358/registers.h
+++ b/include/linux/mfd/mt6358/registers.h
@@ -262,6 +262,12 @@
#define MT6358_LDO_VBIF28_CON3 0x1db0
#define MT6358_VCAMA1_ANA_CON0 0x1e08
#define MT6358_VCAMA2_ANA_CON0 0x1e0c
+#define MT6358_VFE28_ANA_CON0 0x1e10
+#define MT6358_VCN28_ANA_CON0 0x1e14
+#define MT6358_VBIF28_ANA_CON0 0x1e18
+#define MT6358_VAUD28_ANA_CON0 0x1e1c
+#define MT6358_VAUX18_ANA_CON0 0x1e20
+#define MT6358_VXO22_ANA_CON0 0x1e24
#define MT6358_VCN33_ANA_CON0 0x1e28
#define MT6358_VSIM1_ANA_CON0 0x1e2c
#define MT6358_VSIM2_ANA_CON0 0x1e30
@@ -288,4 +294,21 @@
#define MT6358_AUD_TOP_INT_CON0 0x2228
#define MT6358_AUD_TOP_INT_STATUS0 0x2234
+/*
+ * MT6366 has no VCAM*, but has other regulators in its place. The names
+ * keep the MT6358 prefix for ease of use in the regulator driver.
+ */
+#define MT6358_LDO_VSRAM_CON5 0x1bf8
+#define MT6358_LDO_VM18_CON0 MT6358_LDO_VCAMA1_CON0
+#define MT6358_LDO_VM18_CON1 MT6358_LDO_VCAMA1_CON1
+#define MT6358_LDO_VM18_CON2 MT6358_LDO_VCAMA1_CON2
+#define MT6358_LDO_VMDDR_CON0 MT6358_LDO_VCAMA2_CON0
+#define MT6358_LDO_VMDDR_CON1 MT6358_LDO_VCAMA2_CON1
+#define MT6358_LDO_VMDDR_CON2 MT6358_LDO_VCAMA2_CON2
+#define MT6358_LDO_VSRAM_CORE_CON0 MT6358_LDO_VCAMD_CON0
+#define MT6358_LDO_VSRAM_CORE_DBG0 0x1cb6
+#define MT6358_LDO_VSRAM_CORE_DBG1 0x1cb8
+#define MT6358_VM18_ANA_CON0 MT6358_VCAMA1_ANA_CON0
+#define MT6358_VMDDR_ANA_CON0 MT6358_VCAMD_ANA_CON0
+
#endif /* __MFD_MT6358_REGISTERS_H__ */
diff --git a/include/linux/mfd/rz-mtu3.h b/include/linux/mfd/rz-mtu3.h
index c5173bc06270..8421d49500bf 100644
--- a/include/linux/mfd/rz-mtu3.h
+++ b/include/linux/mfd/rz-mtu3.h
@@ -151,7 +151,6 @@ struct rz_mtu3 {
void *priv_data;
};
-#if IS_ENABLED(CONFIG_RZ_MTU3)
static inline bool rz_mtu3_request_channel(struct rz_mtu3_channel *ch)
{
mutex_lock(&ch->lock);
@@ -188,70 +187,5 @@ void rz_mtu3_32bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u32 val);
void rz_mtu3_shared_reg_write(struct rz_mtu3_channel *ch, u16 off, u16 val);
void rz_mtu3_shared_reg_update_bit(struct rz_mtu3_channel *ch, u16 off,
u16 pos, u8 val);
-#else
-static inline bool rz_mtu3_request_channel(struct rz_mtu3_channel *ch)
-{
- return false;
-}
-
-static inline void rz_mtu3_release_channel(struct rz_mtu3_channel *ch)
-{
-}
-
-static inline bool rz_mtu3_is_enabled(struct rz_mtu3_channel *ch)
-{
- return false;
-}
-
-static inline void rz_mtu3_disable(struct rz_mtu3_channel *ch)
-{
-}
-
-static inline int rz_mtu3_enable(struct rz_mtu3_channel *ch)
-{
- return 0;
-}
-
-static inline u8 rz_mtu3_8bit_ch_read(struct rz_mtu3_channel *ch, u16 off)
-{
- return 0;
-}
-
-static inline u16 rz_mtu3_16bit_ch_read(struct rz_mtu3_channel *ch, u16 off)
-{
- return 0;
-}
-
-static inline u32 rz_mtu3_32bit_ch_read(struct rz_mtu3_channel *ch, u16 off)
-{
- return 0;
-}
-
-static inline u16 rz_mtu3_shared_reg_read(struct rz_mtu3_channel *ch, u16 off)
-{
- return 0;
-}
-
-static inline void rz_mtu3_8bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u8 val)
-{
-}
-
-static inline void rz_mtu3_16bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u16 val)
-{
-}
-
-static inline void rz_mtu3_32bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u32 val)
-{
-}
-
-static inline void rz_mtu3_shared_reg_write(struct rz_mtu3_channel *ch, u16 off, u16 val)
-{
-}
-
-static inline void rz_mtu3_shared_reg_update_bit(struct rz_mtu3_channel *ch,
- u16 off, u16 pos, u8 val)
-{
-}
-#endif
#endif /* __MFD_RZ_MTU3_H__ */
diff --git a/include/linux/mfd/si476x-platform.h b/include/linux/mfd/si476x-platform.h
index 18363b773d07..cb99e16ca947 100644
--- a/include/linux/mfd/si476x-platform.h
+++ b/include/linux/mfd/si476x-platform.h
@@ -10,7 +10,7 @@
#ifndef __SI476X_PLATFORM_H__
#define __SI476X_PLATFORM_H__
-/* It is possible to select one of the four adresses using pins A0
+/* It is possible to select one of the four addresses using pins A0
* and A1 on SI476x */
#define SI476X_I2C_ADDR_1 0x60
#define SI476X_I2C_ADDR_2 0x61
diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h
index 1b94325febb3..ca35af30745f 100644
--- a/include/linux/mfd/stm32-timers.h
+++ b/include/linux/mfd/stm32-timers.h
@@ -102,6 +102,15 @@ enum stm32_timers_dmas {
STM32_TIMERS_MAX_DMAS,
};
+/* STM32 Timer may have either a unique global interrupt or 4 interrupt lines */
+enum stm32_timers_irqs {
+ STM32_TIMERS_IRQ_GLOBAL_BRK, /* global or brk IRQ */
+ STM32_TIMERS_IRQ_UP,
+ STM32_TIMERS_IRQ_TRG_COM,
+ STM32_TIMERS_IRQ_CC,
+ STM32_TIMERS_MAX_IRQS,
+};
+
/**
* struct stm32_timers_dma - STM32 timer DMA handling.
* @completion: end of DMA transfer completion
@@ -123,6 +132,8 @@ struct stm32_timers {
struct regmap *regmap;
u32 max_arr;
struct stm32_timers_dma dma; /* Only to be used by the parent */
+ unsigned int nr_irqs;
+ int irq[STM32_TIMERS_MAX_IRQS];
};
#if IS_REACHABLE(CONFIG_MFD_STM32_TIMERS)
diff --git a/include/linux/mfd/sun4i-gpadc.h b/include/linux/mfd/sun4i-gpadc.h
index ea0ccf33a459..021f820f9d52 100644
--- a/include/linux/mfd/sun4i-gpadc.h
+++ b/include/linux/mfd/sun4i-gpadc.h
@@ -81,8 +81,8 @@
#define SUN4I_GPADC_TEMP_DATA 0x20
#define SUN4I_GPADC_DATA 0x24
-#define SUN4I_GPADC_IRQ_FIFO_DATA 0
-#define SUN4I_GPADC_IRQ_TEMP_DATA 1
+#define SUN4I_GPADC_IRQ_FIFO_DATA 1
+#define SUN4I_GPADC_IRQ_TEMP_DATA 2
/* 10s delay before suspending the IP */
#define SUN4I_GPADC_AUTOSUSPEND_DELAY 10000
diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h
index fecc2fa2a364..c315903f6dab 100644
--- a/include/linux/mfd/syscon.h
+++ b/include/linux/mfd/syscon.h
@@ -17,20 +17,17 @@
struct device_node;
#ifdef CONFIG_MFD_SYSCON
-extern struct regmap *device_node_to_regmap(struct device_node *np);
-extern struct regmap *syscon_node_to_regmap(struct device_node *np);
-extern struct regmap *syscon_regmap_lookup_by_compatible(const char *s);
-extern struct regmap *syscon_regmap_lookup_by_phandle(
- struct device_node *np,
- const char *property);
-extern struct regmap *syscon_regmap_lookup_by_phandle_args(
- struct device_node *np,
- const char *property,
- int arg_count,
- unsigned int *out_args);
-extern struct regmap *syscon_regmap_lookup_by_phandle_optional(
- struct device_node *np,
- const char *property);
+struct regmap *device_node_to_regmap(struct device_node *np);
+struct regmap *syscon_node_to_regmap(struct device_node *np);
+struct regmap *syscon_regmap_lookup_by_compatible(const char *s);
+struct regmap *syscon_regmap_lookup_by_phandle(struct device_node *np,
+ const char *property);
+struct regmap *syscon_regmap_lookup_by_phandle_args(struct device_node *np,
+ const char *property,
+ int arg_count,
+ unsigned int *out_args);
+struct regmap *syscon_regmap_lookup_by_phandle_optional(struct device_node *np,
+ const char *property);
#else
static inline struct regmap *device_node_to_regmap(struct device_node *np)
{
diff --git a/include/linux/mfd/tps65086.h b/include/linux/mfd/tps65086.h
index 16f87cccc003..9185b5cd8371 100644
--- a/include/linux/mfd/tps65086.h
+++ b/include/linux/mfd/tps65086.h
@@ -13,8 +13,9 @@
#include <linux/regmap.h>
/* List of registers for TPS65086 */
-#define TPS65086_DEVICEID 0x01
-#define TPS65086_IRQ 0x02
+#define TPS65086_DEVICEID1 0x00
+#define TPS65086_DEVICEID2 0x01
+#define TPS65086_IRQ 0x02
#define TPS65086_IRQ_MASK 0x03
#define TPS65086_PMICSTAT 0x04
#define TPS65086_SHUTDNSRC 0x05
@@ -75,10 +76,16 @@
#define TPS65086_IRQ_SHUTDN_MASK BIT(3)
#define TPS65086_IRQ_FAULT_MASK BIT(7)
-/* DEVICEID Register field definitions */
-#define TPS65086_DEVICEID_PART_MASK GENMASK(3, 0)
-#define TPS65086_DEVICEID_OTP_MASK GENMASK(5, 4)
-#define TPS65086_DEVICEID_REV_MASK GENMASK(7, 6)
+/* DEVICEID1 Register field definitions */
+#define TPS6508640_ID 0x00
+#define TPS65086401_ID 0x01
+#define TPS6508641_ID 0x10
+#define TPS65086470_ID 0x70
+
+/* DEVICEID2 Register field definitions */
+#define TPS65086_DEVICEID2_PART_MASK GENMASK(3, 0)
+#define TPS65086_DEVICEID2_OTP_MASK GENMASK(5, 4)
+#define TPS65086_DEVICEID2_REV_MASK GENMASK(7, 6)
/* VID Masks */
#define BUCK_VID_MASK GENMASK(7, 1)
@@ -92,6 +99,8 @@ enum tps65086_irqs {
TPS65086_IRQ_FAULT,
};
+struct tps65086_regulator_config;
+
/**
* struct tps65086 - state holder for the tps65086 driver
*
@@ -100,6 +109,8 @@ enum tps65086_irqs {
struct tps65086 {
struct device *dev;
struct regmap *regmap;
+ unsigned int chip_id;
+ const struct tps65086_regulator_config *reg_config;
/* IRQ Data */
int irq;
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 701925db75b3..f67ef0a4e041 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -749,7 +749,7 @@
#define VDDCTRL_ST_SHIFT 0
-/*Register VDDCTRL_OP (0x28) bit definitios */
+/*Register VDDCTRL_OP (0x28) bit definitions */
#define VDDCTRL_OP_CMD_MASK 0x80
#define VDDCTRL_OP_CMD_SHIFT 7
#define VDDCTRL_OP_SEL_MASK 0x7F
diff --git a/include/linux/mfd/twl.h b/include/linux/mfd/twl.h
index c062d91a67d9..85dc406173db 100644
--- a/include/linux/mfd/twl.h
+++ b/include/linux/mfd/twl.h
@@ -461,6 +461,7 @@ static inline int twl6030_mmc_card_detect(struct device *dev, int slot)
#define TWL4030_PM_MASTER_GLOBAL_TST 0xb6
+#define TWL6030_PHOENIX_DEV_ON 0x06
/*----------------------------------------------------------------------*/
/* Power bus message definitions */
diff --git a/include/linux/mhi.h b/include/linux/mhi.h
index f6de4b6ecfc7..77b8c0a26674 100644
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -266,6 +266,7 @@ struct mhi_event_config {
* struct mhi_controller_config - Root MHI controller configuration
* @max_channels: Maximum number of channels supported
* @timeout_ms: Timeout value for operations. 0 means use default
+ * @ready_timeout_ms: Timeout value for waiting device to be ready (optional)
* @buf_len: Size of automatically allocated buffers. 0 means use default
* @num_channels: Number of channels defined in @ch_cfg
* @ch_cfg: Array of defined channels
@@ -277,6 +278,7 @@ struct mhi_event_config {
struct mhi_controller_config {
u32 max_channels;
u32 timeout_ms;
+ u32 ready_timeout_ms;
u32 buf_len;
u32 num_channels;
const struct mhi_channel_config *ch_cfg;
@@ -299,6 +301,10 @@ struct mhi_controller_config {
* @iova_start: IOMMU starting address for data (required)
* @iova_stop: IOMMU stop address for data (required)
* @fw_image: Firmware image name for normal booting (optional)
+ * @fw_data: Firmware image data content for normal booting, used only
+ * if fw_image is NULL and fbc_download is true (optional)
+ * @fw_sz: Firmware image data size for normal booting, used only if fw_image
+ * is NULL and fbc_download is true (optional)
* @edl_image: Firmware image name for emergency download mode (optional)
* @rddm_size: RAM dump size that host should allocate for debugging purpose
* @sbl_size: SBL image size downloaded through BHIe (optional)
@@ -314,18 +320,14 @@ struct mhi_controller_config {
* @hw_ev_rings: Number of hardware event rings
* @sw_ev_rings: Number of software event rings
* @nr_irqs: Number of IRQ allocated by bus master (required)
- * @family_number: MHI controller family number
- * @device_number: MHI controller device number
- * @major_version: MHI controller major revision number
- * @minor_version: MHI controller minor revision number
* @serial_number: MHI controller serial number obtained from BHI
- * @oem_pk_hash: MHI controller OEM PK Hash obtained from BHI
* @mhi_event: MHI event ring configurations table
* @mhi_cmd: MHI command ring configurations table
* @mhi_ctxt: MHI device context, shared memory between host and device
* @pm_mutex: Mutex for suspend/resume operation
* @pm_lock: Lock for protecting MHI power management state
* @timeout_ms: Timeout in ms for state transitions
+ * @ready_timeout_ms: Timeout in ms for waiting device to be ready (optional)
* @pm_state: MHI power management state
* @db_access: DB access states
* @ee: MHI device execution environment
@@ -362,15 +364,6 @@ struct mhi_controller_config {
* Fields marked as (required) need to be populated by the controller driver
* before calling mhi_register_controller(). For the fields marked as (optional)
* they can be populated depending on the usecase.
- *
- * The following fields are present for the purpose of implementing any device
- * specific quirks or customizations for specific MHI revisions used in device
- * by the controller drivers. The MHI stack will just populate these fields
- * during mhi_register_controller():
- * family_number
- * device_number
- * major_version
- * minor_version
*/
struct mhi_controller {
struct device *cntrl_dev;
@@ -384,6 +377,8 @@ struct mhi_controller {
dma_addr_t iova_start;
dma_addr_t iova_stop;
const char *fw_image;
+ const u8 *fw_data;
+ size_t fw_sz;
const char *edl_image;
size_t rddm_size;
size_t sbl_size;
@@ -399,12 +394,7 @@ struct mhi_controller {
u32 hw_ev_rings;
u32 sw_ev_rings;
u32 nr_irqs;
- u32 family_number;
- u32 device_number;
- u32 major_version;
- u32 minor_version;
u32 serial_number;
- u32 oem_pk_hash[MHI_MAX_OEM_PK_HASH_SEGMENTS];
struct mhi_event *mhi_event;
struct mhi_cmd *mhi_cmd;
@@ -413,6 +403,7 @@ struct mhi_controller {
struct mutex pm_mutex;
rwlock_t pm_lock;
u32 timeout_ms;
+ u32 ready_timeout_ms;
u32 pm_state;
u32 db_access;
enum mhi_ee_type ee;
diff --git a/include/linux/mhi_ep.h b/include/linux/mhi_ep.h
index f198a8ac7ee7..11bf3212f782 100644
--- a/include/linux/mhi_ep.h
+++ b/include/linux/mhi_ep.h
@@ -50,6 +50,27 @@ struct mhi_ep_db_info {
};
/**
+ * struct mhi_ep_buf_info - MHI Endpoint transfer buffer info
+ * @mhi_dev: MHI device associated with this buffer
+ * @dev_addr: Address of the buffer in endpoint
+ * @host_addr: Address of the bufffer in host
+ * @size: Size of the buffer
+ * @code: Transfer completion code
+ * @cb: Callback to be executed by controller drivers after transfer completion (async)
+ * @cb_buf: Opaque buffer to be passed to the callback
+ */
+struct mhi_ep_buf_info {
+ struct mhi_ep_device *mhi_dev;
+ void *dev_addr;
+ u64 host_addr;
+ size_t size;
+ int code;
+
+ void (*cb)(struct mhi_ep_buf_info *buf_info);
+ void *cb_buf;
+};
+
+/**
* struct mhi_ep_cntrl - MHI Endpoint controller structure
* @cntrl_dev: Pointer to the struct device of physical bus acting as the MHI
* Endpoint controller
@@ -82,8 +103,10 @@ struct mhi_ep_db_info {
* @raise_irq: CB function for raising IRQ to the host
* @alloc_map: CB function for allocating memory in endpoint for storing host context and mapping it
* @unmap_free: CB function to unmap and free the allocated memory in endpoint for storing host context
- * @read_from_host: CB function for reading from host memory from endpoint
- * @write_to_host: CB function for writing to host memory from endpoint
+ * @read_sync: CB function for reading from host memory synchronously
+ * @write_sync: CB function for writing to host memory synchronously
+ * @read_async: CB function for reading from host memory asynchronously
+ * @write_async: CB function for writing to host memory asynchronously
* @mhi_state: MHI Endpoint state
* @max_chan: Maximum channels supported by the endpoint controller
* @mru: MRU (Maximum Receive Unit) value of the endpoint controller
@@ -128,14 +151,19 @@ struct mhi_ep_cntrl {
struct work_struct reset_work;
struct work_struct cmd_ring_work;
struct work_struct ch_ring_work;
+ struct kmem_cache *ring_item_cache;
+ struct kmem_cache *ev_ring_el_cache;
+ struct kmem_cache *tre_buf_cache;
void (*raise_irq)(struct mhi_ep_cntrl *mhi_cntrl, u32 vector);
int (*alloc_map)(struct mhi_ep_cntrl *mhi_cntrl, u64 pci_addr, phys_addr_t *phys_ptr,
void __iomem **virt, size_t size);
void (*unmap_free)(struct mhi_ep_cntrl *mhi_cntrl, u64 pci_addr, phys_addr_t phys,
void __iomem *virt, size_t size);
- int (*read_from_host)(struct mhi_ep_cntrl *mhi_cntrl, u64 from, void *to, size_t size);
- int (*write_to_host)(struct mhi_ep_cntrl *mhi_cntrl, void *from, u64 to, size_t size);
+ int (*read_sync)(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_buf_info *buf_info);
+ int (*write_sync)(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_buf_info *buf_info);
+ int (*read_async)(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_buf_info *buf_info);
+ int (*write_async)(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_buf_info *buf_info);
enum mhi_state mhi_state;
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 8bef1ab62bba..591bf5b5e8dc 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -41,9 +41,10 @@
#define PHY_ID_KSZ9477 0x00221631
/* struct phy_device dev_flags definitions */
-#define MICREL_PHY_50MHZ_CLK 0x00000001
-#define MICREL_PHY_FXEN 0x00000002
-#define MICREL_KSZ8_P1_ERRATA 0x00000003
+#define MICREL_PHY_50MHZ_CLK BIT(0)
+#define MICREL_PHY_FXEN BIT(1)
+#define MICREL_KSZ8_P1_ERRATA BIT(2)
+#define MICREL_NO_EEE BIT(3)
#define MICREL_KSZ9021_EXTREG_CTRL 0xB
#define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC
@@ -63,6 +64,10 @@
#define KSZ886X_BMCR_DISABLE_TRANSMIT BIT(1)
#define KSZ886X_BMCR_DISABLE_LED BIT(0)
+/* PHY Special Control/Status Register (Reg 31) */
#define KSZ886X_CTRL_MDIX_STAT BIT(4)
+#define KSZ886X_CTRL_FORCE_LINK BIT(3)
+#define KSZ886X_CTRL_PWRSAVE BIT(2)
+#define KSZ886X_CTRL_REMOTE_LOOPBACK BIT(1)
#endif /* _MICREL_PHY_H */
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 711dd9412561..2ce13e8a309b 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -142,10 +142,10 @@ const struct movable_operations *page_movable_ops(struct page *page)
}
#ifdef CONFIG_NUMA_BALANCING
-int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
+int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma,
int node);
#else
-static inline int migrate_misplaced_page(struct page *page,
+static inline int migrate_misplaced_folio(struct folio *folio,
struct vm_area_struct *vma, int node)
{
return -EAGAIN; /* can't migrate now */
diff --git a/include/linux/mii_timestamper.h b/include/linux/mii_timestamper.h
index fa940bbaf8ae..26b04f73f214 100644
--- a/include/linux/mii_timestamper.h
+++ b/include/linux/mii_timestamper.h
@@ -9,6 +9,7 @@
#include <linux/device.h>
#include <linux/ethtool.h>
#include <linux/skbuff.h>
+#include <linux/net_tstamp.h>
struct phy_device;
@@ -51,7 +52,8 @@ struct mii_timestamper {
struct sk_buff *skb, int type);
int (*hwtstamp)(struct mii_timestamper *mii_ts,
- struct ifreq *ifreq);
+ struct kernel_hwtstamp_config *kernel_config,
+ struct netlink_ext_ack *extack);
void (*link_state)(struct mii_timestamper *mii_ts,
struct phy_device *phydev);
diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index 44077837385f..d52daf45861b 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -35,31 +35,33 @@ static __always_inline
void min_heapify(struct min_heap *heap, int pos,
const struct min_heap_callbacks *func)
{
- void *left, *right, *parent, *smallest;
+ void *left, *right;
void *data = heap->data;
+ void *root = data + pos * func->elem_size;
+ int i = pos, j;
+ /* Find the sift-down path all the way to the leaves. */
for (;;) {
- if (pos * 2 + 1 >= heap->nr)
+ if (i * 2 + 2 >= heap->nr)
break;
+ left = data + (i * 2 + 1) * func->elem_size;
+ right = data + (i * 2 + 2) * func->elem_size;
+ i = func->less(left, right) ? i * 2 + 1 : i * 2 + 2;
+ }
- left = data + ((pos * 2 + 1) * func->elem_size);
- parent = data + (pos * func->elem_size);
- smallest = parent;
- if (func->less(left, smallest))
- smallest = left;
-
- if (pos * 2 + 2 < heap->nr) {
- right = data + ((pos * 2 + 2) * func->elem_size);
- if (func->less(right, smallest))
- smallest = right;
- }
- if (smallest == parent)
- break;
- func->swp(smallest, parent);
- if (smallest == left)
- pos = (pos * 2) + 1;
- else
- pos = (pos * 2) + 2;
+ /* Special case for the last leaf with no sibling. */
+ if (i * 2 + 2 == heap->nr)
+ i = i * 2 + 1;
+
+ /* Backtrack to the correct location. */
+ while (i != pos && func->less(root, data + i * func->elem_size))
+ i = (i - 1) / 2;
+
+ /* Shift the element into its correct place. */
+ j = i;
+ while (i != pos) {
+ i = (i - 1) / 2;
+ func->swp(data + i * func->elem_size, data + j * func->elem_size);
}
}
@@ -70,7 +72,7 @@ void min_heapify_all(struct min_heap *heap,
{
int i;
- for (i = heap->nr / 2; i >= 0; i--)
+ for (i = heap->nr / 2 - 1; i >= 0; i--)
min_heapify(heap, i, func);
}
diff --git a/include/linux/minmax.h b/include/linux/minmax.h
index 396df1121bff..2ec559284a9f 100644
--- a/include/linux/minmax.h
+++ b/include/linux/minmax.h
@@ -2,59 +2,77 @@
#ifndef _LINUX_MINMAX_H
#define _LINUX_MINMAX_H
+#include <linux/build_bug.h>
+#include <linux/compiler.h>
#include <linux/const.h>
+#include <linux/types.h>
/*
* min()/max()/clamp() macros must accomplish three things:
*
- * - avoid multiple evaluations of the arguments (so side-effects like
+ * - Avoid multiple evaluations of the arguments (so side-effects like
* "x++" happen only once) when non-constant.
- * - perform strict type-checking (to generate warnings instead of
- * nasty runtime surprises). See the "unnecessary" pointer comparison
- * in __typecheck().
- * - retain result as a constant expressions when called with only
+ * - Retain result as a constant expressions when called with only
* constant expressions (to avoid tripping VLA warnings in stack
* allocation usage).
+ * - Perform signed v unsigned type-checking (to generate compile
+ * errors instead of nasty runtime surprises).
+ * - Unsigned char/short are always promoted to signed int and can be
+ * compared against signed or unsigned arguments.
+ * - Unsigned arguments can be compared against non-negative signed constants.
+ * - Comparison of a signed argument against an unsigned constant fails
+ * even if the constant is below __INT_MAX__ and could be cast to int.
*/
#define __typecheck(x, y) \
(!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
-#define __no_side_effects(x, y) \
- (__is_constexpr(x) && __is_constexpr(y))
+/* is_signed_type() isn't a constexpr for pointer types */
+#define __is_signed(x) \
+ __builtin_choose_expr(__is_constexpr(is_signed_type(typeof(x))), \
+ is_signed_type(typeof(x)), 0)
-#define __safe_cmp(x, y) \
- (__typecheck(x, y) && __no_side_effects(x, y))
+/* True for a non-negative signed int constant */
+#define __is_noneg_int(x) \
+ (__builtin_choose_expr(__is_constexpr(x) && __is_signed(x), x, -1) >= 0)
-#define __cmp(x, y, op) ((x) op (y) ? (x) : (y))
+#define __types_ok(x, y) \
+ (__is_signed(x) == __is_signed(y) || \
+ __is_signed((x) + 0) == __is_signed((y) + 0) || \
+ __is_noneg_int(x) || __is_noneg_int(y))
-#define __cmp_once(x, y, unique_x, unique_y, op) ({ \
- typeof(x) unique_x = (x); \
- typeof(y) unique_y = (y); \
- __cmp(unique_x, unique_y, op); })
+#define __cmp_op_min <
+#define __cmp_op_max >
-#define __careful_cmp(x, y, op) \
- __builtin_choose_expr(__safe_cmp(x, y), \
- __cmp(x, y, op), \
- __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
+#define __cmp(op, x, y) ((x) __cmp_op_##op (y) ? (x) : (y))
+
+#define __cmp_once(op, x, y, unique_x, unique_y) ({ \
+ typeof(x) unique_x = (x); \
+ typeof(y) unique_y = (y); \
+ static_assert(__types_ok(x, y), \
+ #op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \
+ __cmp(op, unique_x, unique_y); })
+
+#define __careful_cmp(op, x, y) \
+ __builtin_choose_expr(__is_constexpr((x) - (y)), \
+ __cmp(op, x, y), \
+ __cmp_once(op, x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y)))
#define __clamp(val, lo, hi) \
((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val)))
-#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \
- typeof(val) unique_val = (val); \
- typeof(lo) unique_lo = (lo); \
- typeof(hi) unique_hi = (hi); \
- __clamp(unique_val, unique_lo, unique_hi); })
-
-#define __clamp_input_check(lo, hi) \
- (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
- __is_constexpr((lo) > (hi)), (lo) > (hi), false)))
+#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \
+ typeof(val) unique_val = (val); \
+ typeof(lo) unique_lo = (lo); \
+ typeof(hi) unique_hi = (hi); \
+ static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), \
+ (lo) <= (hi), true), \
+ "clamp() low limit " #lo " greater than high limit " #hi); \
+ static_assert(__types_ok(val, lo), "clamp() 'lo' signedness error"); \
+ static_assert(__types_ok(val, hi), "clamp() 'hi' signedness error"); \
+ __clamp(unique_val, unique_lo, unique_hi); })
#define __careful_clamp(val, lo, hi) ({ \
- __clamp_input_check(lo, hi) + \
- __builtin_choose_expr(__typecheck(val, lo) && __typecheck(val, hi) && \
- __typecheck(hi, lo) && __is_constexpr(val) && \
- __is_constexpr(lo) && __is_constexpr(hi), \
+ __builtin_choose_expr(__is_constexpr((val) - (lo) + (hi)), \
__clamp(val, lo, hi), \
__clamp_once(val, lo, hi, __UNIQUE_ID(__val), \
__UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); })
@@ -64,14 +82,31 @@
* @x: first value
* @y: second value
*/
-#define min(x, y) __careful_cmp(x, y, <)
+#define min(x, y) __careful_cmp(min, x, y)
/**
* max - return maximum of two values of the same or compatible types
* @x: first value
* @y: second value
*/
-#define max(x, y) __careful_cmp(x, y, >)
+#define max(x, y) __careful_cmp(max, x, y)
+
+/**
+ * umin - return minimum of two non-negative values
+ * Signed types are zero extended to match a larger unsigned type.
+ * @x: first value
+ * @y: second value
+ */
+#define umin(x, y) \
+ __careful_cmp(min, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull)
+
+/**
+ * umax - return maximum of two non-negative values
+ * @x: first value
+ * @y: second value
+ */
+#define umax(x, y) \
+ __careful_cmp(max, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull)
/**
* min3 - return minimum of three values
@@ -123,7 +158,7 @@
* @x: first value
* @y: second value
*/
-#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <)
+#define min_t(type, x, y) __careful_cmp(min, (type)(x), (type)(y))
/**
* max_t - return maximum of two values, using the specified type
@@ -131,7 +166,50 @@
* @x: first value
* @y: second value
*/
-#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >)
+#define max_t(type, x, y) __careful_cmp(max, (type)(x), (type)(y))
+
+/*
+ * Do not check the array parameter using __must_be_array().
+ * In the following legit use-case where the "array" passed is a simple pointer,
+ * __must_be_array() will return a failure.
+ * --- 8< ---
+ * int *buff
+ * ...
+ * min = min_array(buff, nb_items);
+ * --- 8< ---
+ *
+ * The first typeof(&(array)[0]) is needed in order to support arrays of both
+ * 'int *buff' and 'int buff[N]' types.
+ *
+ * The array can be an array of const items.
+ * typeof() keeps the const qualifier. Use __unqual_scalar_typeof() in order
+ * to discard the const qualifier for the __element variable.
+ */
+#define __minmax_array(op, array, len) ({ \
+ typeof(&(array)[0]) __array = (array); \
+ typeof(len) __len = (len); \
+ __unqual_scalar_typeof(__array[0]) __element = __array[--__len];\
+ while (__len--) \
+ __element = op(__element, __array[__len]); \
+ __element; })
+
+/**
+ * min_array - return minimum of values present in an array
+ * @array: array
+ * @len: array length
+ *
+ * Note that @len must not be zero (empty array).
+ */
+#define min_array(array, len) __minmax_array(min, array, len)
+
+/**
+ * max_array - return maximum of values present in an array
+ * @array: array
+ * @len: array length
+ *
+ * Note that @len must not be zero (empty array).
+ */
+#define max_array(array, len) __minmax_array(max, array, len)
/**
* clamp_t - return a value clamped to a given range using a given type
@@ -158,6 +236,32 @@
*/
#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
+static inline bool in_range64(u64 val, u64 start, u64 len)
+{
+ return (val - start) < len;
+}
+
+static inline bool in_range32(u32 val, u32 start, u32 len)
+{
+ return (val - start) < len;
+}
+
+/**
+ * in_range - Determine if a value lies within a range.
+ * @val: Value to test.
+ * @start: First value in range.
+ * @len: Number of values in range.
+ *
+ * This is more efficient than "if (start <= val && val < (start + len))".
+ * It also gives a different answer if @start + @len overflows the size of
+ * the type by a sufficient amount to encompass @val. Decide for yourself
+ * which behaviour you want, or prove that start + len never overflow.
+ * Do not blindly replace one form with the other.
+ */
+#define in_range(val, start, len) \
+ ((sizeof(start) | sizeof(len) | sizeof(val)) <= sizeof(u32) ? \
+ in_range32(val, start, len) : in_range64(val, start, len))
+
/**
* swap - swap values of @a and @b
* @a: first value
diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h
index c238207d1615..e799b1f8d05b 100644
--- a/include/linux/misc_cgroup.h
+++ b/include/linux/misc_cgroup.h
@@ -31,17 +31,18 @@ struct misc_cg;
* struct misc_res: Per cgroup per misc type resource
* @max: Maximum limit on the resource.
* @usage: Current usage of the resource.
- * @failed: True if charged failed for the resource in a cgroup.
+ * @events: Number of times, the resource limit exceeded.
*/
struct misc_res {
- unsigned long max;
- atomic_long_t usage;
- atomic_long_t events;
+ u64 max;
+ atomic64_t usage;
+ atomic64_t events;
};
/**
* struct misc_cg - Miscellaneous controller's cgroup structure.
* @css: cgroup subsys state object.
+ * @events_file: Handle for the misc resources events file.
* @res: Array of misc resources usage in the cgroup.
*/
struct misc_cg {
@@ -53,12 +54,10 @@ struct misc_cg {
struct misc_res res[MISC_CG_RES_TYPES];
};
-unsigned long misc_cg_res_total_usage(enum misc_res_type type);
-int misc_cg_set_capacity(enum misc_res_type type, unsigned long capacity);
-int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg,
- unsigned long amount);
-void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg,
- unsigned long amount);
+u64 misc_cg_res_total_usage(enum misc_res_type type);
+int misc_cg_set_capacity(enum misc_res_type type, u64 capacity);
+int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, u64 amount);
+void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg, u64 amount);
/**
* css_misc() - Get misc cgroup from the css.
@@ -99,27 +98,26 @@ static inline void put_misc_cg(struct misc_cg *cg)
#else /* !CONFIG_CGROUP_MISC */
-static inline unsigned long misc_cg_res_total_usage(enum misc_res_type type)
+static inline u64 misc_cg_res_total_usage(enum misc_res_type type)
{
return 0;
}
-static inline int misc_cg_set_capacity(enum misc_res_type type,
- unsigned long capacity)
+static inline int misc_cg_set_capacity(enum misc_res_type type, u64 capacity)
{
return 0;
}
static inline int misc_cg_try_charge(enum misc_res_type type,
struct misc_cg *cg,
- unsigned long amount)
+ u64 amount)
{
return 0;
}
static inline void misc_cg_uncharge(enum misc_res_type type,
struct misc_cg *cg,
- unsigned long amount)
+ u64 amount)
{
}
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 6646634a0b9d..27f42f713c89 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -33,6 +33,7 @@
#ifndef MLX4_DEVICE_H
#define MLX4_DEVICE_H
+#include <linux/auxiliary_bus.h>
#include <linux/if_ether.h>
#include <linux/pci.h>
#include <linux/completion.h>
@@ -889,6 +890,12 @@ struct mlx4_dev {
u8 uar_page_shift;
};
+struct mlx4_adev {
+ struct auxiliary_device adev;
+ struct mlx4_dev *mdev;
+ int idx;
+};
+
struct mlx4_clock_params {
u64 offset;
u8 bar;
@@ -1087,6 +1094,19 @@ static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
(offset & (PAGE_SIZE - 1));
}
+static inline int mlx4_is_bonded(struct mlx4_dev *dev)
+{
+ return !!(dev->flags & MLX4_FLAG_BONDED);
+}
+
+static inline int mlx4_is_mf_bonded(struct mlx4_dev *dev)
+{
+ return (mlx4_is_bonded(dev) && mlx4_is_mfunc(dev));
+}
+
+int mlx4_queue_bond_work(struct mlx4_dev *dev, int is_bonded, u8 v2p_p1,
+ u8 v2p_p2);
+
int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn);
void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn);
int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn);
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index 1834c8fad12e..69825223081f 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -34,8 +34,12 @@
#define MLX4_DRIVER_H
#include <net/devlink.h>
+#include <linux/auxiliary_bus.h>
+#include <linux/notifier.h>
#include <linux/mlx4/device.h>
+#define MLX4_ADEV_NAME "mlx4_core"
+
struct mlx4_dev;
#define MLX4_MAC_MASK 0xffffffffffffULL
@@ -54,41 +58,19 @@ enum {
MLX4_INTFF_BONDING = 1 << 0
};
-struct mlx4_interface {
- void * (*add) (struct mlx4_dev *dev);
- void (*remove)(struct mlx4_dev *dev, void *context);
- void (*event) (struct mlx4_dev *dev, void *context,
- enum mlx4_dev_event event, unsigned long param);
- void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port);
- void (*activate)(struct mlx4_dev *dev, void *context);
- struct list_head list;
+struct mlx4_adrv {
+ struct auxiliary_driver adrv;
enum mlx4_protocol protocol;
int flags;
};
-int mlx4_register_interface(struct mlx4_interface *intf);
-void mlx4_unregister_interface(struct mlx4_interface *intf);
-
-int mlx4_bond(struct mlx4_dev *dev);
-int mlx4_unbond(struct mlx4_dev *dev);
-static inline int mlx4_is_bonded(struct mlx4_dev *dev)
-{
- return !!(dev->flags & MLX4_FLAG_BONDED);
-}
-
-static inline int mlx4_is_mf_bonded(struct mlx4_dev *dev)
-{
- return (mlx4_is_bonded(dev) && mlx4_is_mfunc(dev));
-}
-
-struct mlx4_port_map {
- u8 port1;
- u8 port2;
-};
-
-int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p);
+int mlx4_register_auxiliary_driver(struct mlx4_adrv *madrv);
+void mlx4_unregister_auxiliary_driver(struct mlx4_adrv *madrv);
-void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port);
+int mlx4_register_event_notifier(struct mlx4_dev *dev,
+ struct notifier_block *nb);
+int mlx4_unregister_event_notifier(struct mlx4_dev *dev,
+ struct notifier_block *nb);
struct devlink_port *mlx4_get_devlink_port(struct mlx4_dev *dev, int port);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 80cc12a9a531..01275c6e8468 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -364,6 +364,11 @@ enum mlx5_event {
enum mlx5_driver_event {
MLX5_DRIVER_EVENT_TYPE_TRAP = 0,
MLX5_DRIVER_EVENT_UPLINK_NETDEV,
+ MLX5_DRIVER_EVENT_MACSEC_SA_ADDED,
+ MLX5_DRIVER_EVENT_MACSEC_SA_DELETED,
+ MLX5_DRIVER_EVENT_SF_PEER_DEVLINK,
+ MLX5_DRIVER_EVENT_AFFILIATION_DONE,
+ MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
};
enum {
@@ -913,7 +918,7 @@ static inline u8 get_cqe_tls_offload(struct mlx5_cqe64 *cqe)
return (cqe->tls_outer_l3_tunneled >> 3) & 0x3;
}
-static inline bool cqe_has_vlan(struct mlx5_cqe64 *cqe)
+static inline bool cqe_has_vlan(const struct mlx5_cqe64 *cqe)
{
return cqe->l4_l3_hdr_type & 0x1;
}
@@ -1208,9 +1213,7 @@ enum mlx5_cap_type {
MLX5_CAP_FLOW_TABLE,
MLX5_CAP_ESWITCH_FLOW_TABLE,
MLX5_CAP_ESWITCH,
- MLX5_CAP_RESERVED,
- MLX5_CAP_VECTOR_CALC,
- MLX5_CAP_QOS,
+ MLX5_CAP_QOS = 0xc,
MLX5_CAP_DEBUG,
MLX5_CAP_RESERVED_14,
MLX5_CAP_DEV_MEM,
@@ -1220,7 +1223,6 @@ enum mlx5_cap_type {
MLX5_CAP_DEV_EVENT = 0x14,
MLX5_CAP_IPSEC,
MLX5_CAP_CRYPTO = 0x1a,
- MLX5_CAP_DEV_SHAMPO = 0x1d,
MLX5_CAP_MACSEC = 0x1f,
MLX5_CAP_GENERAL_2 = 0x20,
MLX5_CAP_PORT_SELECTION = 0x25,
@@ -1239,7 +1241,6 @@ enum mlx5_pcam_feature_groups {
enum mlx5_mcam_reg_groups {
MLX5_MCAM_REGS_FIRST_128 = 0x0,
- MLX5_MCAM_REGS_0x9080_0x90FF = 0x1,
MLX5_MCAM_REGS_0x9100_0x917F = 0x2,
MLX5_MCAM_REGS_NUM = 0x3,
};
@@ -1279,10 +1280,6 @@ enum mlx5_qcam_feature_groups {
MLX5_GET(per_protocol_networking_offload_caps,\
mdev->caps.hca[MLX5_CAP_ETHERNET_OFFLOADS]->cur, cap)
-#define MLX5_CAP_ETH_MAX(mdev, cap) \
- MLX5_GET(per_protocol_networking_offload_caps,\
- mdev->caps.hca[MLX5_CAP_ETHERNET_OFFLOADS]->max, cap)
-
#define MLX5_CAP_IPOIB_ENHANCED(mdev, cap) \
MLX5_GET(per_protocol_networking_offload_caps,\
mdev->caps.hca[MLX5_CAP_IPOIB_ENHANCED_OFFLOADS]->cur, cap)
@@ -1305,77 +1302,40 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP64_FLOWTABLE(mdev, cap) \
MLX5_GET64(flow_table_nic_cap, (mdev)->caps.hca[MLX5_CAP_FLOW_TABLE]->cur, cap)
-#define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \
- MLX5_GET(flow_table_nic_cap, mdev->caps.hca[MLX5_CAP_FLOW_TABLE]->max, cap)
-
#define MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) \
MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.cap)
-#define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \
- MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap)
-
#define MLX5_CAP_FLOWTABLE_NIC_TX(mdev, cap) \
MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit.cap)
-#define MLX5_CAP_FLOWTABLE_NIC_TX_MAX(mdev, cap) \
- MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit.cap)
-
#define MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) \
MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_sniffer.cap)
-#define MLX5_CAP_FLOWTABLE_SNIFFER_RX_MAX(mdev, cap) \
- MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive_sniffer.cap)
-
#define MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) \
MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_sniffer.cap)
-#define MLX5_CAP_FLOWTABLE_SNIFFER_TX_MAX(mdev, cap) \
- MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit_sniffer.cap)
-
#define MLX5_CAP_FLOWTABLE_RDMA_RX(mdev, cap) \
MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_rdma.cap)
-#define MLX5_CAP_FLOWTABLE_RDMA_RX_MAX(mdev, cap) \
- MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive_rdma.cap)
-
#define MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) \
MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_rdma.cap)
-#define MLX5_CAP_FLOWTABLE_RDMA_TX_MAX(mdev, cap) \
- MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit_rdma.cap)
-
#define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \
MLX5_GET(flow_table_eswitch_cap, \
mdev->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->cur, cap)
-#define MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, cap) \
- MLX5_GET(flow_table_eswitch_cap, \
- mdev->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->max, cap)
-
#define MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) \
MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_nic_esw_fdb.cap)
-#define MLX5_CAP_ESW_FLOWTABLE_FDB_MAX(mdev, cap) \
- MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_nic_esw_fdb.cap)
-
#define MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) \
MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_esw_acl_egress.cap)
-#define MLX5_CAP_ESW_EGRESS_ACL_MAX(mdev, cap) \
- MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_egress.cap)
-
#define MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) \
MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_esw_acl_ingress.cap)
-#define MLX5_CAP_ESW_INGRESS_ACL_MAX(mdev, cap) \
- MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_ingress.cap)
-
#define MLX5_CAP_ESW_FT_FIELD_SUPPORT_2(mdev, cap) \
MLX5_CAP_ESW_FLOWTABLE(mdev, ft_field_support_2_esw_fdb.cap)
-#define MLX5_CAP_ESW_FT_FIELD_SUPPORT_2_MAX(mdev, cap) \
- MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, ft_field_support_2_esw_fdb.cap)
-
#define MLX5_CAP_ESW(mdev, cap) \
MLX5_GET(e_switch_cap, \
mdev->caps.hca[MLX5_CAP_ESWITCH]->cur, cap)
@@ -1384,10 +1344,6 @@ enum mlx5_qcam_feature_groups {
MLX5_GET64(flow_table_eswitch_cap, \
(mdev)->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->cur, cap)
-#define MLX5_CAP_ESW_MAX(mdev, cap) \
- MLX5_GET(e_switch_cap, \
- mdev->caps.hca[MLX5_CAP_ESWITCH]->max, cap)
-
#define MLX5_CAP_PORT_SELECTION(mdev, cap) \
MLX5_GET(port_selection_cap, \
mdev->caps.hca[MLX5_CAP_PORT_SELECTION]->cur, cap)
@@ -1400,26 +1356,15 @@ enum mlx5_qcam_feature_groups {
MLX5_GET(adv_virtualization_cap, \
mdev->caps.hca[MLX5_CAP_ADV_VIRTUALIZATION]->cur, cap)
-#define MLX5_CAP_ADV_VIRTUALIZATION_MAX(mdev, cap) \
- MLX5_GET(adv_virtualization_cap, \
- mdev->caps.hca[MLX5_CAP_ADV_VIRTUALIZATION]->max, cap)
-
#define MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) \
MLX5_CAP_PORT_SELECTION(mdev, flow_table_properties_port_selection.cap)
-#define MLX5_CAP_FLOWTABLE_PORT_SELECTION_MAX(mdev, cap) \
- MLX5_CAP_PORT_SELECTION_MAX(mdev, flow_table_properties_port_selection.cap)
-
#define MLX5_CAP_ODP(mdev, cap)\
MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, cap)
#define MLX5_CAP_ODP_MAX(mdev, cap)\
MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->max, cap)
-#define MLX5_CAP_VECTOR_CALC(mdev, cap) \
- MLX5_GET(vector_calc_cap, \
- mdev->caps.hca[MLX5_CAP_VECTOR_CALC]->cur, cap)
-
#define MLX5_CAP_QOS(mdev, cap)\
MLX5_GET(qos_cap, mdev->caps.hca[MLX5_CAP_QOS]->cur, cap)
@@ -1436,10 +1381,6 @@ enum mlx5_qcam_feature_groups {
MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_FIRST_128], \
mng_access_reg_cap_mask.access_regs.reg)
-#define MLX5_CAP_MCAM_REG1(mdev, reg) \
- MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_0x9080_0x90FF], \
- mng_access_reg_cap_mask.access_regs1.reg)
-
#define MLX5_CAP_MCAM_REG2(mdev, reg) \
MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_0x9100_0x917F], \
mng_access_reg_cap_mask.access_regs2.reg)
@@ -1485,9 +1426,6 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP_CRYPTO(mdev, cap)\
MLX5_GET(crypto_cap, (mdev)->caps.hca[MLX5_CAP_CRYPTO]->cur, cap)
-#define MLX5_CAP_DEV_SHAMPO(mdev, cap)\
- MLX5_GET(shampo_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_SHAMPO], cap)
-
#define MLX5_CAP_MACSEC(mdev, cap)\
MLX5_GET(macsec_cap, (mdev)->caps.hca[MLX5_CAP_MACSEC]->cur, cap)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 25d0528f9219..bf9324a31ae9 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -134,6 +134,7 @@ enum {
MLX5_REG_PCAM = 0x507f,
MLX5_REG_NODE_DESC = 0x6001,
MLX5_REG_HOST_ENDIANNESS = 0x7004,
+ MLX5_REG_MTCAP = 0x9009,
MLX5_REG_MTMP = 0x900A,
MLX5_REG_MCIA = 0x9014,
MLX5_REG_MFRL = 0x9028,
@@ -149,11 +150,14 @@ enum {
MLX5_REG_MTPPSE = 0x9054,
MLX5_REG_MTUTC = 0x9055,
MLX5_REG_MPEGC = 0x9056,
+ MLX5_REG_MPIR = 0x9059,
MLX5_REG_MCQS = 0x9060,
MLX5_REG_MCQI = 0x9061,
MLX5_REG_MCC = 0x9062,
MLX5_REG_MCDA = 0x9063,
MLX5_REG_MCAM = 0x907f,
+ MLX5_REG_MSECQ = 0x9155,
+ MLX5_REG_MSEES = 0x9156,
MLX5_REG_MIRC = 0x9162,
MLX5_REG_SBCAM = 0xB01F,
MLX5_REG_RESOURCE_DUMP = 0xC000,
@@ -287,18 +291,23 @@ struct mlx5_cmd_stats {
struct mlx5_cmd {
struct mlx5_nb nb;
+ /* members which needs to be queried or reinitialized each reload */
+ struct {
+ u16 cmdif_rev;
+ u8 log_sz;
+ u8 log_stride;
+ int max_reg_cmds;
+ unsigned long bitmask;
+ struct semaphore sem;
+ struct semaphore pages_sem;
+ struct semaphore throttle_sem;
+ } vars;
enum mlx5_cmdif_state state;
void *cmd_alloc_buf;
dma_addr_t alloc_dma;
int alloc_size;
void *cmd_buf;
dma_addr_t dma;
- u16 cmdif_rev;
- u8 log_sz;
- u8 log_stride;
- int max_reg_cmds;
- int events;
- u32 __iomem *vector;
/* protect command queue allocations
*/
@@ -308,12 +317,8 @@ struct mlx5_cmd {
*/
spinlock_t token_lock;
u8 token;
- unsigned long bitmask;
char wq_name[MLX5_CMD_WQ_MAX_NAME];
struct workqueue_struct *wq;
- struct semaphore sem;
- struct semaphore pages_sem;
- struct semaphore throttle_sem;
int mode;
u16 allowed_opcode;
struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS];
@@ -321,7 +326,7 @@ struct mlx5_cmd {
struct mlx5_cmd_debug dbg;
struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES];
int checksum_disabled;
- struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX];
+ struct xarray stats;
};
struct mlx5_cmd_mailbox {
@@ -501,7 +506,7 @@ struct mlx5_events;
struct mlx5_mpfs;
struct mlx5_eswitch;
struct mlx5_lag;
-struct mlx5_devcom;
+struct mlx5_devcom_dev;
struct mlx5_fw_reset;
struct mlx5_eq_table;
struct mlx5_irq_table;
@@ -611,6 +616,7 @@ struct mlx5_priv {
int adev_idx;
int sw_vhca_id;
struct mlx5_events *events;
+ struct mlx5_vhca_events *vhca_events;
struct mlx5_flow_steering *steering;
struct mlx5_mpfs *mpfs;
@@ -618,7 +624,8 @@ struct mlx5_priv {
struct mlx5_core_sriov sriov;
struct mlx5_lag *lag;
u32 flags;
- struct mlx5_devcom *devcom;
+ struct mlx5_devcom_dev *devc;
+ struct mlx5_devcom_comp_dev *hca_devcom_comp;
struct mlx5_fw_reset *fw_reset;
struct mlx5_core_roce roce;
struct mlx5_fc_stats fc_stats;
@@ -672,6 +679,9 @@ struct mlx5e_resources {
struct mlx5_td td;
u32 mkey;
struct mlx5_sq_bfreg bfreg;
+#define MLX5_MAX_NUM_TC 8
+ u32 tisn[MLX5_MAX_PORTS][MLX5_MAX_NUM_TC];
+ bool tisn_valid;
} hw_objs;
struct net_device *uplink_netdev;
struct mutex uplink_netdev_lock;
@@ -682,6 +692,7 @@ enum mlx5_sw_icm_type {
MLX5_SW_ICM_TYPE_STEERING,
MLX5_SW_ICM_TYPE_HEADER_MODIFY,
MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN,
+ MLX5_SW_ICM_TYPE_SW_ENCAP,
};
#define MLX5_MAX_RESERVED_GIDS 8
@@ -725,7 +736,6 @@ struct mlx5_fw_tracer;
struct mlx5_vxlan;
struct mlx5_geneve;
struct mlx5_hv_vhca;
-struct mlx5_thermal;
#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
@@ -804,7 +814,16 @@ struct mlx5_core_dev {
struct mlx5_rsc_dump *rsc_dump;
u32 vsc_addr;
struct mlx5_hv_vhca *hv_vhca;
- struct mlx5_thermal *thermal;
+ struct mlx5_hwmon *hwmon;
+ u64 num_block_tc;
+ u64 num_block_ipsec;
+#ifdef CONFIG_MLX5_MACSEC
+ struct mlx5_macsec_fs *macsec_fs;
+ /* MACsec notifier chain to sync MACsec core and IB database */
+ struct blocking_notifier_head macsec_nh;
+#endif
+ u64 num_ipsec_offloads;
+ struct mlx5_sd *sd;
};
struct mlx5_db {
@@ -1018,7 +1037,8 @@ bool mlx5_cmd_is_down(struct mlx5_core_dev *dev);
void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev);
void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *mdev);
-int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
+void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data);
+
void mlx5_health_cleanup(struct mlx5_core_dev *dev);
int mlx5_health_init(struct mlx5_core_dev *dev);
void mlx5_start_health_poll(struct mlx5_core_dev *dev);
@@ -1029,10 +1049,6 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
struct mlx5_frag_buf *buf, int node);
void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf);
-struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
- gfp_t flags, int npages);
-void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
- struct mlx5_cmd_mailbox *head);
int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in,
int inlen);
int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey);
@@ -1046,8 +1062,6 @@ void mlx5_pagealloc_start(struct mlx5_core_dev *dev);
void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev);
void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev);
-void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
- s32 npages, bool ec_function);
int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev);
void mlx5_register_debugfs(void);
@@ -1055,7 +1069,7 @@ void mlx5_unregister_debugfs(void);
void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm);
void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
-int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn);
+int mlx5_comp_eqn_get(struct mlx5_core_dev *dev, u16 vecidx, int *eqn);
int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
@@ -1087,8 +1101,6 @@ int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
__be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev);
void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
-int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
- struct mlx5_odp_caps *odp_caps);
int mlx5_init_rl_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
@@ -1105,9 +1117,8 @@ int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
bool map_wc, bool fast_path);
void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
-unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev);
-struct cpumask *
-mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector);
+unsigned int mlx5_comp_vectors_max(struct mlx5_core_dev *dev);
+int mlx5_comp_vector_get_cpu(struct mlx5_core_dev *dev, int vector);
unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev);
int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
u8 roce_version, u8 roce_l3_type, const u8 *gid,
@@ -1191,12 +1202,6 @@ int mlx5_sriov_blocking_notifier_register(struct mlx5_core_dev *mdev,
void mlx5_sriov_blocking_notifier_unregister(struct mlx5_core_dev *mdev,
int vf_id,
struct notifier_block *nb);
-#ifdef CONFIG_MLX5_CORE_IPOIB
-struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
- struct ib_device *ibdev,
- const char *name,
- void (*setup)(struct net_device *));
-#endif /* CONFIG_MLX5_CORE_IPOIB */
int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev,
struct ib_device *device,
struct rdma_netdev_alloc_params *params);
@@ -1320,6 +1325,52 @@ static inline bool mlx5_get_roce_state(struct mlx5_core_dev *dev)
return mlx5_is_roce_on(dev);
}
+#ifdef CONFIG_MLX5_MACSEC
+static inline bool mlx5e_is_macsec_device(const struct mlx5_core_dev *mdev)
+{
+ if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD))
+ return false;
+
+ if (!MLX5_CAP_GEN(mdev, log_max_dek))
+ return false;
+
+ if (!MLX5_CAP_MACSEC(mdev, log_max_macsec_offload))
+ return false;
+
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(mdev, macsec_decrypt) ||
+ !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, reformat_remove_macsec))
+ return false;
+
+ if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, macsec_encrypt) ||
+ !MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_macsec))
+ return false;
+
+ if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_encrypt) &&
+ !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_encrypt))
+ return false;
+
+ if (!MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_128_decrypt) &&
+ !MLX5_CAP_MACSEC(mdev, macsec_crypto_esp_aes_gcm_256_decrypt))
+ return false;
+
+ return true;
+}
+
+#define NIC_RDMA_BOTH_DIRS_CAPS (MLX5_FT_NIC_RX_2_NIC_RX_RDMA | MLX5_FT_NIC_TX_RDMA_2_NIC_TX)
+
+static inline bool mlx5_is_macsec_roce_supported(struct mlx5_core_dev *mdev)
+{
+ if (((MLX5_CAP_GEN_2(mdev, flow_table_type_2_type) &
+ NIC_RDMA_BOTH_DIRS_CAPS) != NIC_RDMA_BOTH_DIRS_CAPS) ||
+ !MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, max_modify_header_actions) ||
+ !mlx5e_is_macsec_device(mdev) || !mdev->macsec_fs)
+ return false;
+
+ return true;
+}
+#endif
+
enum {
MLX5_OCTWORD = 16,
};
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index e2701ed0200e..df73a2ccc9af 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -7,6 +7,7 @@
#define _MLX5_ESWITCH_
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
#include <net/devlink.h>
#define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager)
@@ -144,6 +145,9 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, \
ESW_TUN_OPTS_OFFSET + 1)
+/* reuse tun_opts for the mapped ipsec obj id when tun_id is 0 (invalid) */
+#define ESW_IPSEC_RX_MAPPED_ID_MASK GENMASK(ESW_TUN_OPTS_BITS - 1, 0)
+
u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev);
u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw);
@@ -207,4 +211,11 @@ static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev)
return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS;
}
+/* The returned number is valid only when the dev is eswitch manager. */
+static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev)
+{
+ return mlx5_core_is_ecpf_esw_manager(dev) ?
+ MLX5_VPORT_ECPF : MLX5_VPORT_PF;
+}
+
#endif
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 2cb404c7ea13..3fb428ce7d1c 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -67,6 +67,7 @@ enum {
MLX5_FLOW_TABLE_TERMINATION = BIT(2),
MLX5_FLOW_TABLE_UNMANAGED = BIT(3),
MLX5_FLOW_TABLE_OTHER_VPORT = BIT(4),
+ MLX5_FLOW_TABLE_UPLINK_VPORT = BIT(5),
};
#define LEFTOVERS_RULE_NUM 2
@@ -105,15 +106,19 @@ enum mlx5_flow_namespace_type {
MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS,
MLX5_FLOW_NAMESPACE_RDMA_RX_IPSEC,
MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC,
+ MLX5_FLOW_NAMESPACE_RDMA_RX_MACSEC,
+ MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC,
};
enum {
FDB_BYPASS_PATH,
+ FDB_CRYPTO_INGRESS,
FDB_TC_OFFLOAD,
FDB_FT_OFFLOAD,
FDB_TC_MISS,
FDB_BR_OFFLOAD,
FDB_SLOW_PATH,
+ FDB_CRYPTO_EGRESS,
FDB_PER_VPORT,
};
@@ -127,6 +132,7 @@ struct mlx5_flow_handle;
enum {
FLOW_CONTEXT_HAS_TAG = BIT(0),
+ FLOW_CONTEXT_UPLINK_HAIRPIN_EN = BIT(1),
};
struct mlx5_flow_context {
diff --git a/include/linux/mlx5/macsec.h b/include/linux/mlx5/macsec.h
new file mode 100644
index 000000000000..f7ff4c2a95d0
--- /dev/null
+++ b/include/linux/mlx5/macsec.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef MLX5_MACSEC_H
+#define MLX5_MACSEC_H
+
+#ifdef CONFIG_MLX5_MACSEC
+struct mlx5_macsec_event_data {
+ struct mlx5_macsec_fs *macsec_fs;
+ void *macdev;
+ u32 fs_id;
+ bool is_tx;
+};
+
+int mlx5_macsec_add_roce_rule(void *macdev, const struct sockaddr *addr, u16 gid_idx,
+ struct list_head *tx_rules_list, struct list_head *rx_rules_list,
+ struct mlx5_macsec_fs *macsec_fs);
+
+void mlx5_macsec_del_roce_rule(u16 gid_idx, struct mlx5_macsec_fs *macsec_fs,
+ struct list_head *tx_rules_list, struct list_head *rx_rules_list);
+
+void mlx5_macsec_add_roce_sa_rules(u32 fs_id, const struct sockaddr *addr, u16 gid_idx,
+ struct list_head *tx_rules_list,
+ struct list_head *rx_rules_list,
+ struct mlx5_macsec_fs *macsec_fs, bool is_tx);
+
+void mlx5_macsec_del_roce_sa_rules(u32 fs_id, struct mlx5_macsec_fs *macsec_fs,
+ struct list_head *tx_rules_list,
+ struct list_head *rx_rules_list, bool is_tx);
+
+#endif
+#endif /* MLX5_MACSEC_H */
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 33344a71c3e3..c940b329a475 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -65,9 +65,11 @@ enum {
enum {
MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0,
+ MLX5_SET_HCA_CAP_OP_MOD_ETHERNET_OFFLOADS = 0x1,
MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2,
MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3,
MLX5_SET_HCA_CAP_OP_MOD_ROCE = 0x4,
+ MLX5_SET_HCA_CAP_OP_MOD_IPSEC = 0x15,
MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2 = 0x20,
MLX5_SET_HCA_CAP_OP_MOD_PORT_SELECTION = 0x25,
};
@@ -310,6 +312,7 @@ enum {
MLX5_CMD_OP_QUERY_VHCA_STATE = 0xb0d,
MLX5_CMD_OP_MODIFY_VHCA_STATE = 0xb0e,
MLX5_CMD_OP_SYNC_CRYPTO = 0xb12,
+ MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS = 0xb16,
MLX5_CMD_OP_MAX
};
@@ -432,7 +435,7 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 flow_table_modify[0x1];
u8 reformat[0x1];
u8 decap[0x1];
- u8 reserved_at_9[0x1];
+ u8 reset_root_to_default[0x1];
u8 pop_vlan[0x1];
u8 push_vlan[0x1];
u8 reserved_at_c[0x1];
@@ -464,10 +467,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 reformat_add_esp_trasport[0x1];
u8 reformat_l2_to_l3_esp_tunnel[0x1];
- u8 reserved_at_42[0x1];
+ u8 reformat_add_esp_transport_over_udp[0x1];
u8 reformat_del_esp_trasport[0x1];
u8 reformat_l3_esp_tunnel_to_l2[0x1];
- u8 reserved_at_45[0x1];
+ u8 reformat_del_esp_transport_over_udp[0x1];
u8 execute_aso[0x1];
u8 reserved_at_47[0x19];
@@ -618,7 +621,7 @@ struct mlx5_ifc_fte_match_set_misc_bits {
u8 reserved_at_140[0x8];
u8 bth_dst_qp[0x18];
- u8 reserved_at_160[0x20];
+ u8 inner_esp_spi[0x20];
u8 outer_esp_spi[0x20];
u8 reserved_at_1a0[0x60];
};
@@ -1100,7 +1103,7 @@ struct mlx5_ifc_roce_cap_bits {
u8 sw_r_roce_src_udp_port[0x1];
u8 fl_rc_qp_when_roce_disabled[0x1];
u8 fl_rc_qp_when_roce_enabled[0x1];
- u8 reserved_at_7[0x1];
+ u8 roce_cc_general[0x1];
u8 qp_ooo_transmit_default[0x1];
u8 reserved_at_9[0x15];
u8 qp_ts_format[0x2];
@@ -1190,7 +1193,8 @@ struct mlx5_ifc_device_mem_cap_bits {
u8 log_sw_icm_alloc_granularity[0x6];
u8 log_steering_sw_icm_size[0x8];
- u8 reserved_at_120[0x18];
+ u8 log_indirect_encap_sw_icm_size[0x8];
+ u8 reserved_at_128[0x10];
u8 log_header_modify_pattern_sw_icm_size[0x8];
u8 header_modify_sw_icm_start_address[0x40];
@@ -1201,7 +1205,11 @@ struct mlx5_ifc_device_mem_cap_bits {
u8 memic_operations[0x20];
- u8 reserved_at_220[0x5e0];
+ u8 reserved_at_220[0x20];
+
+ u8 indirect_encap_sw_icm_start_address[0x40];
+
+ u8 reserved_at_280[0x580];
};
struct mlx5_ifc_device_event_cap_bits {
@@ -1229,7 +1237,14 @@ struct mlx5_ifc_virtio_emulation_cap_bits {
u8 max_emulated_devices[0x8];
u8 max_num_virtio_queues[0x18];
- u8 reserved_at_a0[0x60];
+ u8 reserved_at_a0[0x20];
+
+ u8 reserved_at_c0[0x13];
+ u8 desc_group_mkey_supported[0x1];
+ u8 freeze_to_rdy_supported[0x1];
+ u8 reserved_at_d5[0xb];
+
+ u8 reserved_at_e0[0x20];
u8 umem_1_buffer_param_a[0x20];
@@ -1314,33 +1329,6 @@ struct mlx5_ifc_odp_cap_bits {
u8 reserved_at_120[0x6E0];
};
-struct mlx5_ifc_calc_op {
- u8 reserved_at_0[0x10];
- u8 reserved_at_10[0x9];
- u8 op_swap_endianness[0x1];
- u8 op_min[0x1];
- u8 op_xor[0x1];
- u8 op_or[0x1];
- u8 op_and[0x1];
- u8 op_max[0x1];
- u8 op_add[0x1];
-};
-
-struct mlx5_ifc_vector_calc_cap_bits {
- u8 calc_matrix[0x1];
- u8 reserved_at_1[0x1f];
- u8 reserved_at_20[0x8];
- u8 max_vec_count[0x8];
- u8 reserved_at_30[0xd];
- u8 max_chunk_size[0x3];
- struct mlx5_ifc_calc_op calc0;
- struct mlx5_ifc_calc_op calc1;
- struct mlx5_ifc_calc_op calc2;
- struct mlx5_ifc_calc_op calc3;
-
- u8 reserved_at_c0[0x720];
-};
-
struct mlx5_ifc_tls_cap_bits {
u8 tls_1_2_aes_gcm_128[0x1];
u8 tls_1_3_aes_gcm_128[0x1];
@@ -1819,7 +1807,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 disable_local_lb_uc[0x1];
u8 disable_local_lb_mc[0x1];
u8 log_min_hairpin_wq_data_sz[0x5];
- u8 reserved_at_3e8[0x2];
+ u8 reserved_at_3e8[0x1];
+ u8 silent_mode[0x1];
u8 vhca_state[0x1];
u8 log_max_vlan_list[0x5];
u8 reserved_at_3f0[0x3];
@@ -1836,7 +1825,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_460[0x1];
u8 ats[0x1];
- u8 reserved_at_462[0x1];
+ u8 cross_vhca_rqt[0x1];
u8 log_max_uctx[0x5];
u8 reserved_at_468[0x1];
u8 crypto[0x1];
@@ -1959,6 +1948,15 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 match_definer_format_supported[0x40];
};
+enum {
+ MLX5_CROSS_VHCA_OBJ_TO_OBJ_SUPPORTED_LOCAL_FLOW_TABLE_TO_REMOTE_FLOW_TABLE_MISS = 0x80000,
+ MLX5_CROSS_VHCA_OBJ_TO_OBJ_SUPPORTED_LOCAL_FLOW_TABLE_ROOT_TO_REMOTE_FLOW_TABLE = (1ULL << 20),
+};
+
+enum {
+ MLX5_ALLOWED_OBJ_FOR_OTHER_VHCA_ACCESS_FLOW_TABLE = 0x200,
+};
+
struct mlx5_ifc_cmd_hca_cap_2_bits {
u8 reserved_at_0[0x80];
@@ -1973,9 +1971,15 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
u8 reserved_at_c0[0x8];
u8 migration_multi_load[0x1];
u8 migration_tracking_state[0x1];
- u8 reserved_at_ca[0x16];
+ u8 reserved_at_ca[0x6];
+ u8 migration_in_chunks[0x1];
+ u8 reserved_at_d1[0xf];
+
+ u8 cross_vhca_object_to_object_supported[0x20];
- u8 reserved_at_e0[0xc0];
+ u8 allowed_object_for_other_vhca_access[0x40];
+
+ u8 reserved_at_140[0x60];
u8 flow_table_type_2_type[0x8];
u8 reserved_at_1a8[0x3];
@@ -1996,7 +2000,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
u8 reserved_at_260[0x120];
u8 reserved_at_380[0x10];
u8 ec_vf_vport_base[0x10];
- u8 reserved_at_3a0[0x460];
+
+ u8 reserved_at_3a0[0x10];
+ u8 max_rqt_vhca_id[0x10];
+
+ u8 reserved_at_3c0[0x440];
};
enum mlx5_ifc_flow_destination_type {
@@ -2155,6 +2163,13 @@ struct mlx5_ifc_rq_num_bits {
u8 rq_num[0x18];
};
+struct mlx5_ifc_rq_vhca_bits {
+ u8 reserved_at_0[0x8];
+ u8 rq_num[0x18];
+ u8 reserved_at_20[0x10];
+ u8 rq_vhca_id[0x10];
+};
+
struct mlx5_ifc_mac_address_layout_bits {
u8 reserved_at_0[0x10];
u8 mac_addr_47_32[0x10];
@@ -3435,20 +3450,6 @@ struct mlx5_ifc_roce_addr_layout_bits {
u8 reserved_at_e0[0x20];
};
-struct mlx5_ifc_shampo_cap_bits {
- u8 reserved_at_0[0x3];
- u8 shampo_log_max_reservation_size[0x5];
- u8 reserved_at_8[0x3];
- u8 shampo_log_min_reservation_size[0x5];
- u8 shampo_min_mss_size[0x10];
-
- u8 reserved_at_20[0x3];
- u8 shampo_max_log_headers_entry_size[0x5];
- u8 reserved_at_28[0x18];
-
- u8 reserved_at_40[0x7c0];
-};
-
struct mlx5_ifc_crypto_cap_bits {
u8 reserved_at_0[0x3];
u8 synchronize_dek[0x1];
@@ -3484,16 +3485,15 @@ union mlx5_ifc_hca_cap_union_bits {
struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap;
struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
struct mlx5_ifc_port_selection_cap_bits port_selection_cap;
- struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap;
struct mlx5_ifc_qos_cap_bits qos_cap;
struct mlx5_ifc_debug_cap_bits debug_cap;
struct mlx5_ifc_fpga_cap_bits fpga_cap;
struct mlx5_ifc_tls_cap_bits tls_cap;
struct mlx5_ifc_device_mem_cap_bits device_mem_cap;
struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap;
- struct mlx5_ifc_shampo_cap_bits shampo_cap;
struct mlx5_ifc_macsec_cap_bits macsec_cap;
struct mlx5_ifc_crypto_cap_bits crypto_cap;
+ struct mlx5_ifc_ipsec_cap_bits ipsec_cap;
u8 reserved_at_0[0x8000];
};
@@ -3576,7 +3576,7 @@ struct mlx5_ifc_flow_context_bits {
u8 action[0x10];
u8 extended_destination[0x1];
- u8 reserved_at_81[0x1];
+ u8 uplink_hairpin_en[0x1];
u8 flow_source[0x2];
u8 encrypt_decrypt_type[0x4];
u8 destination_list_size[0x18];
@@ -3920,7 +3920,10 @@ struct mlx5_ifc_rqtc_bits {
u8 reserved_at_e0[0x6a0];
- struct mlx5_ifc_rq_num_bits rq_num[];
+ union {
+ DECLARE_FLEX_ARRAY(struct mlx5_ifc_rq_num_bits, rq_num);
+ DECLARE_FLEX_ARRAY(struct mlx5_ifc_rq_vhca_bits, rq_vhca);
+ };
};
enum {
@@ -4033,8 +4036,13 @@ struct mlx5_ifc_nic_vport_context_bits {
u8 affiliation_criteria[0x4];
u8 affiliated_vhca_id[0x10];
- u8 reserved_at_60[0xd0];
+ u8 reserved_at_60[0xa0];
+
+ u8 reserved_at_100[0x1];
+ u8 sd_group[0x3];
+ u8 reserved_at_104[0x1c];
+ u8 reserved_at_120[0x10];
u8 mtu[0x10];
u8 system_image_guid[0x40];
@@ -4763,7 +4771,10 @@ struct mlx5_ifc_set_l2_table_entry_in_bits {
u8 reserved_at_c0[0x20];
- u8 reserved_at_e0[0x13];
+ u8 reserved_at_e0[0x10];
+ u8 silent_mode_valid[0x1];
+ u8 silent_mode[0x1];
+ u8 reserved_at_f2[0x1];
u8 vlan_valid[0x1];
u8 vlan[0xc];
@@ -6409,6 +6420,28 @@ struct mlx5_ifc_general_obj_out_cmd_hdr_bits {
u8 reserved_at_60[0x20];
};
+struct mlx5_ifc_allow_other_vhca_access_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+ u8 reserved_at_40[0x50];
+ u8 object_type_to_be_accessed[0x10];
+ u8 object_id_to_be_accessed[0x20];
+ u8 reserved_at_c0[0x40];
+ union {
+ u8 access_key_raw[0x100];
+ u8 access_key[8][0x20];
+ };
+};
+
+struct mlx5_ifc_allow_other_vhca_access_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+ u8 syndrome[0x20];
+ u8 reserved_at_40[0x40];
+};
+
struct mlx5_ifc_modify_header_arg_bits {
u8 reserved_at_0[0x80];
@@ -6431,6 +6464,24 @@ struct mlx5_ifc_create_match_definer_out_bits {
struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr;
};
+struct mlx5_ifc_alias_context_bits {
+ u8 vhca_id_to_be_accessed[0x10];
+ u8 reserved_at_10[0xd];
+ u8 status[0x3];
+ u8 object_id_to_be_accessed[0x20];
+ u8 reserved_at_40[0x40];
+ union {
+ u8 access_key_raw[0x100];
+ u8 access_key[8][0x20];
+ };
+ u8 metadata[0x80];
+};
+
+struct mlx5_ifc_create_alias_obj_in_bits {
+ struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
+ struct mlx5_ifc_alias_context_bits alias_ctx;
+};
+
enum {
MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0,
MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1,
@@ -6665,9 +6716,12 @@ enum mlx5_reformat_ctx_type {
MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4,
MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4 = 0x5,
MLX5_REFORMAT_TYPE_L2_TO_L3_ESP_TUNNEL = 0x6,
+ MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV4 = 0x7,
MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT = 0x8,
MLX5_REFORMAT_TYPE_L3_ESP_TUNNEL_TO_L2 = 0x9,
+ MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT_OVER_UDP = 0xa,
MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb,
+ MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV6 = 0xc,
MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf,
MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10,
MLX5_REFORMAT_TYPE_ADD_MACSEC = 0x11,
@@ -10065,6 +10119,19 @@ struct mlx5_ifc_mpegc_reg_bits {
u8 reserved_at_60[0x100];
};
+struct mlx5_ifc_mpir_reg_bits {
+ u8 sdm[0x1];
+ u8 reserved_at_1[0x1b];
+ u8 host_buses[0x4];
+
+ u8 reserved_at_20[0x20];
+
+ u8 local_port[0x8];
+ u8 reserved_at_28[0x18];
+
+ u8 reserved_at_60[0x20];
+};
+
enum {
MLX5_MTUTC_FREQ_ADJ_UNITS_PPB = 0x0,
MLX5_MTUTC_FREQ_ADJ_UNITS_SCALED_PPM = 0x1,
@@ -10079,7 +10146,10 @@ enum {
struct mlx5_ifc_mtutc_reg_bits {
u8 reserved_at_0[0x5];
u8 freq_adj_units[0x3];
- u8 reserved_at_8[0x14];
+ u8 reserved_at_8[0x3];
+ u8 log_max_freq_adjustment[0x5];
+
+ u8 reserved_at_10[0xc];
u8 operation[0x4];
u8 freq_adjustment[0x20];
@@ -10183,7 +10253,9 @@ struct mlx5_ifc_mcam_access_reg_bits {
u8 mcqi[0x1];
u8 mcqs[0x1];
- u8 regs_95_to_87[0x9];
+ u8 regs_95_to_90[0x6];
+ u8 mpir[0x1];
+ u8 regs_88_to_87[0x2];
u8 mpegc[0x1];
u8 mtutc[0x1];
u8 regs_84_to_68[0x11];
@@ -10191,9 +10263,13 @@ struct mlx5_ifc_mcam_access_reg_bits {
u8 regs_63_to_46[0x12];
u8 mrtc[0x1];
- u8 regs_44_to_32[0xd];
+ u8 regs_44_to_41[0x4];
+ u8 mfrl[0x1];
+ u8 regs_39_to_32[0x8];
- u8 regs_31_to_0[0x20];
+ u8 regs_31_to_10[0x16];
+ u8 mtmp[0x1];
+ u8 regs_8_to_0[0x9];
};
struct mlx5_ifc_mcam_access_reg_bits1 {
@@ -10211,7 +10287,9 @@ struct mlx5_ifc_mcam_access_reg_bits2 {
u8 mirc[0x1];
u8 regs_97_to_96[0x2];
- u8 regs_95_to_64[0x20];
+ u8 regs_95_to_87[0x09];
+ u8 synce_registers[0x2];
+ u8 regs_84_to_64[0x15];
u8 regs_63_to_32[0x20];
@@ -10587,6 +10665,7 @@ enum {
MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER = 0x0,
MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED = 0x1,
MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC = 0x2,
+ MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET = 0x7,
};
enum {
@@ -10607,6 +10686,7 @@ enum {
MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_INV = 0xe,
MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR = 0xf,
MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR = 0x10,
+ MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PCI_POISONED_ERR = 0x12,
};
struct mlx5_ifc_initial_seg_bits {
@@ -10853,8 +10933,9 @@ enum {
MLX5_MFRL_REG_RESET_STATE_IDLE = 0,
MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION = 1,
MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS = 2,
- MLX5_MFRL_REG_RESET_STATE_TIMEOUT = 3,
+ MLX5_MFRL_REG_RESET_STATE_NEG_TIMEOUT = 3,
MLX5_MFRL_REG_RESET_STATE_NACK = 4,
+ MLX5_MFRL_REG_RESET_STATE_UNLOAD_TIMEOUT = 5,
};
enum {
@@ -10946,6 +11027,15 @@ struct mlx5_ifc_mrtc_reg_bits {
u8 time_l[0x20];
};
+struct mlx5_ifc_mtcap_reg_bits {
+ u8 reserved_at_0[0x19];
+ u8 sensor_count[0x7];
+
+ u8 reserved_at_20[0x20];
+
+ u8 sensor_map[0x40];
+};
+
struct mlx5_ifc_mtmp_reg_bits {
u8 reserved_at_0[0x14];
u8 sensor_index[0xc];
@@ -11033,6 +11123,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
struct mlx5_ifc_mfrl_reg_bits mfrl_reg;
struct mlx5_ifc_mtutc_reg_bits mtutc_reg;
struct mlx5_ifc_mrtc_reg_bits mrtc_reg;
+ struct mlx5_ifc_mtcap_reg_bits mtcap_reg;
struct mlx5_ifc_mtmp_reg_bits mtmp_reg;
u8 reserved_at_0[0x60e0];
};
@@ -11941,6 +12032,7 @@ enum {
MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24,
MLX5_GENERAL_OBJECT_TYPES_MACSEC = 0x27,
MLX5_GENERAL_OBJECT_TYPES_INT_KEK = 0x47,
+ MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS = 0xff15,
};
enum {
@@ -11960,6 +12052,13 @@ enum {
MLX5_IPSEC_ASO_INC_SN = 0x2,
};
+enum {
+ MLX5_IPSEC_ASO_REPLAY_WIN_32BIT = 0x0,
+ MLX5_IPSEC_ASO_REPLAY_WIN_64BIT = 0x1,
+ MLX5_IPSEC_ASO_REPLAY_WIN_128BIT = 0x2,
+ MLX5_IPSEC_ASO_REPLAY_WIN_256BIT = 0x3,
+};
+
struct mlx5_ifc_ipsec_aso_bits {
u8 valid[0x1];
u8 reserved_at_201[0x1];
@@ -12416,7 +12515,8 @@ struct mlx5_ifc_query_vhca_migration_state_in_bits {
u8 op_mod[0x10];
u8 incremental[0x1];
- u8 reserved_at_41[0xf];
+ u8 chunk[0x1];
+ u8 reserved_at_42[0xe];
u8 vhca_id[0x10];
u8 reserved_at_60[0x20];
@@ -12432,7 +12532,11 @@ struct mlx5_ifc_query_vhca_migration_state_out_bits {
u8 required_umem_size[0x20];
- u8 reserved_at_a0[0x160];
+ u8 reserved_at_a0[0x20];
+
+ u8 remaining_total_size[0x40];
+
+ u8 reserved_at_100[0x100];
};
struct mlx5_ifc_save_vhca_state_in_bits {
@@ -12464,7 +12568,7 @@ struct mlx5_ifc_save_vhca_state_out_bits {
u8 actual_image_size[0x20];
- u8 reserved_at_60[0x20];
+ u8 next_required_umem_size[0x20];
};
struct mlx5_ifc_load_vhca_state_in_bits {
@@ -12573,4 +12677,72 @@ struct mlx5_ifc_modify_page_track_obj_in_bits {
struct mlx5_ifc_page_track_bits obj_context;
};
+struct mlx5_ifc_query_page_track_obj_out_bits {
+ struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr;
+ struct mlx5_ifc_page_track_bits obj_context;
+};
+
+struct mlx5_ifc_msecq_reg_bits {
+ u8 reserved_at_0[0x20];
+
+ u8 reserved_at_20[0x12];
+ u8 network_option[0x2];
+ u8 local_ssm_code[0x4];
+ u8 local_enhanced_ssm_code[0x8];
+
+ u8 local_clock_identity[0x40];
+
+ u8 reserved_at_80[0x180];
+};
+
+enum {
+ MLX5_MSEES_FIELD_SELECT_ENABLE = BIT(0),
+ MLX5_MSEES_FIELD_SELECT_ADMIN_STATUS = BIT(1),
+ MLX5_MSEES_FIELD_SELECT_ADMIN_FREQ_MEASURE = BIT(2),
+};
+
+enum mlx5_msees_admin_status {
+ MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING = 0x0,
+ MLX5_MSEES_ADMIN_STATUS_TRACK = 0x1,
+};
+
+enum mlx5_msees_oper_status {
+ MLX5_MSEES_OPER_STATUS_FREE_RUNNING = 0x0,
+ MLX5_MSEES_OPER_STATUS_SELF_TRACK = 0x1,
+ MLX5_MSEES_OPER_STATUS_OTHER_TRACK = 0x2,
+ MLX5_MSEES_OPER_STATUS_HOLDOVER = 0x3,
+ MLX5_MSEES_OPER_STATUS_FAIL_HOLDOVER = 0x4,
+ MLX5_MSEES_OPER_STATUS_FAIL_FREE_RUNNING = 0x5,
+};
+
+enum mlx5_msees_failure_reason {
+ MLX5_MSEES_FAILURE_REASON_UNDEFINED_ERROR = 0x0,
+ MLX5_MSEES_FAILURE_REASON_PORT_DOWN = 0x1,
+ MLX5_MSEES_FAILURE_REASON_TOO_HIGH_FREQUENCY_DIFF = 0x2,
+ MLX5_MSEES_FAILURE_REASON_NET_SYNCHRONIZER_DEVICE_ERROR = 0x3,
+ MLX5_MSEES_FAILURE_REASON_LACK_OF_RESOURCES = 0x4,
+};
+
+struct mlx5_ifc_msees_reg_bits {
+ u8 reserved_at_0[0x8];
+ u8 local_port[0x8];
+ u8 pnat[0x2];
+ u8 lp_msb[0x2];
+ u8 reserved_at_14[0xc];
+
+ u8 field_select[0x20];
+
+ u8 admin_status[0x4];
+ u8 oper_status[0x4];
+ u8 ho_acq[0x1];
+ u8 reserved_at_49[0xc];
+ u8 admin_freq_measure[0x1];
+ u8 oper_freq_measure[0x1];
+ u8 failure_reason[0x9];
+
+ u8 frequency_diff[0x20];
+
+ u8 reserved_at_80[0x180];
+};
+
#endif /* MLX5_IFC_H */
diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
index 9becdc3fa503..40371c916cf9 100644
--- a/include/linux/mlx5/mlx5_ifc_vdpa.h
+++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
@@ -74,7 +74,11 @@ struct mlx5_ifc_virtio_q_bits {
u8 reserved_at_320[0x8];
u8 pd[0x18];
- u8 reserved_at_340[0xc0];
+ u8 reserved_at_340[0x20];
+
+ u8 desc_group_mkey[0x20];
+
+ u8 reserved_at_380[0x80];
};
struct mlx5_ifc_virtio_net_q_object_bits {
@@ -141,6 +145,11 @@ enum {
MLX5_VIRTQ_MODIFY_MASK_STATE = (u64)1 << 0,
MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_PARAMS = (u64)1 << 3,
MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_DUMP_ENABLE = (u64)1 << 4,
+ MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS = (u64)1 << 6,
+ MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX = (u64)1 << 7,
+ MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX = (u64)1 << 8,
+ MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY = (u64)1 << 11,
+ MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY = (u64)1 << 14,
};
enum {
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 98b2e1e149f9..26092c78a985 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -115,8 +115,9 @@ enum mlx5e_ext_link_mode {
MLX5E_100GAUI_1_100GBASE_CR_KR = 11,
MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12,
MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13,
- MLX5E_400GAUI_8 = 15,
+ MLX5E_400GAUI_8_400GBASE_CR8 = 15,
MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16,
+ MLX5E_800GAUI_8_800GBASE_CR8_KR8 = 19,
MLX5E_EXT_LINK_MODES_NUMBER,
};
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index bd53cf4be7bd..f0e55bf3ec8b 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -269,7 +269,10 @@ struct mlx5_wqe_eth_seg {
union {
struct {
__be16 sz;
- u8 start[2];
+ union {
+ u8 start[2];
+ DECLARE_FLEX_ARRAY(u8, data);
+ };
} inline_hdr;
struct {
__be16 type;
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index fbb9bf447889..c36cc6d82926 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -72,6 +72,7 @@ int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu);
int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu);
int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
u64 *system_image_guid);
+int mlx5_query_nic_vport_sd_group(struct mlx5_core_dev *mdev, u8 *sd_group);
int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid);
int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
u16 vport, u64 node_guid);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 34f9dba17c1a..b6bdaa18b9e9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -36,6 +36,7 @@ struct anon_vma;
struct anon_vma_chain;
struct user_struct;
struct pt_regs;
+struct folio_batch;
extern int sysctl_page_lock_unfairness;
@@ -86,7 +87,7 @@ extern int sysctl_legacy_va_layout;
#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
extern const int mmap_rnd_bits_min;
-extern const int mmap_rnd_bits_max;
+extern int mmap_rnd_bits_max __ro_after_init;
extern int mmap_rnd_bits __read_mostly;
#endif
#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
@@ -226,7 +227,6 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
/* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */
#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE)
-#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
static inline struct folio *lru_to_folio(struct list_head *head)
{
return list_entry((head)->prev, struct folio, lru);
@@ -319,11 +319,13 @@ extern unsigned int kobjsize(const void *objp);
#define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0)
#define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1)
#define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2)
#define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3)
#define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4)
+#define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5)
#endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
#ifdef CONFIG_ARCH_HAS_PKEYS
@@ -339,14 +341,27 @@ extern unsigned int kobjsize(const void *objp);
#endif
#endif /* CONFIG_ARCH_HAS_PKEYS */
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+/*
+ * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
+ * support core mm.
+ *
+ * These VMAs will get a single end guard page. This helps userspace protect
+ * itself from attacks. A single page is enough for current shadow stack archs
+ * (x86). See the comments near alloc_shstk() in arch/x86/kernel/shstk.c
+ * for more details on the guard size.
+ */
+# define VM_SHADOW_STACK VM_HIGH_ARCH_5
+#else
+# define VM_SHADOW_STACK VM_NONE
+#endif
+
#if defined(CONFIG_X86)
# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */
#elif defined(CONFIG_PPC)
# define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */
#elif defined(CONFIG_PARISC)
# define VM_GROWSUP VM_ARCH_1
-#elif defined(CONFIG_IA64)
-# define VM_GROWSUP VM_ARCH_1
#elif defined(CONFIG_SPARC64)
# define VM_SPARC_ADI VM_ARCH_1 /* Uses ADI tag for access control */
# define VM_ARCH_CLEAR VM_SPARC_ADI
@@ -370,12 +385,26 @@ extern unsigned int kobjsize(const void *objp);
#endif
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
-# define VM_UFFD_MINOR_BIT 37
+# define VM_UFFD_MINOR_BIT 38
# define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */
#else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
# define VM_UFFD_MINOR VM_NONE
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
+/*
+ * This flag is used to connect VFIO to arch specific KVM code. It
+ * indicates that the memory under this VMA is safe for use with any
+ * non-cachable memory type inside KVM. Some VFIO devices, on some
+ * platforms, are thought to be unsafe and can cause machine crashes
+ * if KVM does not lock down the memory type.
+ */
+#ifdef CONFIG_64BIT
+#define VM_ALLOW_ANY_UNCACHED_BIT 39
+#define VM_ALLOW_ANY_UNCACHED BIT(VM_ALLOW_ANY_UNCACHED_BIT)
+#else
+#define VM_ALLOW_ANY_UNCACHED VM_NONE
+#endif
+
/* Bits set in the VMA until the stack is in its final location */
#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
@@ -397,6 +426,8 @@ extern unsigned int kobjsize(const void *objp);
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
#endif
+#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
+
#ifdef CONFIG_STACK_GROWSUP
#define VM_STACK VM_GROWSUP
#define VM_STACK_EARLY VM_GROWSDOWN
@@ -532,13 +563,6 @@ struct vm_fault {
*/
};
-/* page entry size for vm->huge_fault() */
-enum page_entry_size {
- PE_SIZE_PTE = 0,
- PE_SIZE_PMD,
- PE_SIZE_PUD,
-};
-
/*
* These are the virtual MM functions - opening of an area, closing and
* unmapping it (needed to keep files on disk up-to-date etc), pointer
@@ -562,8 +586,7 @@ struct vm_operations_struct {
int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
unsigned long end, unsigned long newflags);
vm_fault_t (*fault)(struct vm_fault *vmf);
- vm_fault_t (*huge_fault)(struct vm_fault *vmf,
- enum page_entry_size pe_size);
+ vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
vm_fault_t (*map_pages)(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
unsigned long (*pagesize)(struct vm_area_struct * area);
@@ -608,7 +631,7 @@ struct vm_operations_struct {
* policy.
*/
struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
- unsigned long addr);
+ unsigned long addr, pgoff_t *ilx);
#endif
/*
* Called by vm_normal_page() for special PTEs to find the
@@ -679,6 +702,7 @@ static inline void vma_end_read(struct vm_area_struct *vma)
rcu_read_unlock();
}
+/* WARNING! Can only be used if mmap_lock is expected to be write-locked */
static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
{
mmap_assert_write_locked(vma->vm_mm);
@@ -691,6 +715,11 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
return (vma->vm_lock_seq == *mm_lock_seq);
}
+/*
+ * Begin writing to a VMA.
+ * Exclude concurrent readers under the per-VMA lock until the currently
+ * write-locked mmap_lock is dropped or downgraded.
+ */
static inline void vma_start_write(struct vm_area_struct *vma)
{
int mm_lock_seq;
@@ -709,26 +738,17 @@ static inline void vma_start_write(struct vm_area_struct *vma)
up_write(&vma->vm_lock->lock);
}
-static inline bool vma_try_start_write(struct vm_area_struct *vma)
+static inline void vma_assert_write_locked(struct vm_area_struct *vma)
{
int mm_lock_seq;
- if (__is_vma_write_locked(vma, &mm_lock_seq))
- return true;
-
- if (!down_write_trylock(&vma->vm_lock->lock))
- return false;
-
- WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq);
- up_write(&vma->vm_lock->lock);
- return true;
+ VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
}
-static inline void vma_assert_write_locked(struct vm_area_struct *vma)
+static inline void vma_assert_locked(struct vm_area_struct *vma)
{
- int mm_lock_seq;
-
- VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
+ if (!rwsem_is_locked(&vma->vm_lock->lock))
+ vma_assert_write_locked(vma);
}
static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached)
@@ -739,6 +759,22 @@ static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached)
vma->detached = detached;
}
+static inline void release_fault_lock(struct vm_fault *vmf)
+{
+ if (vmf->flags & FAULT_FLAG_VMA_LOCK)
+ vma_end_read(vmf->vma);
+ else
+ mmap_read_unlock(vmf->vma->vm_mm);
+}
+
+static inline void assert_fault_locked(struct vm_fault *vmf)
+{
+ if (vmf->flags & FAULT_FLAG_VMA_LOCK)
+ vma_assert_locked(vmf->vma);
+ else
+ mmap_assert_locked(vmf->vma->vm_mm);
+}
+
struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
unsigned long address);
@@ -748,25 +784,45 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
{ return false; }
static inline void vma_end_read(struct vm_area_struct *vma) {}
static inline void vma_start_write(struct vm_area_struct *vma) {}
-static inline bool vma_try_start_write(struct vm_area_struct *vma)
- { return true; }
-static inline void vma_assert_write_locked(struct vm_area_struct *vma) {}
+static inline void vma_assert_write_locked(struct vm_area_struct *vma)
+ { mmap_assert_write_locked(vma->vm_mm); }
static inline void vma_mark_detached(struct vm_area_struct *vma,
bool detached) {}
+static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
+ unsigned long address)
+{
+ return NULL;
+}
+
+static inline void vma_assert_locked(struct vm_area_struct *vma)
+{
+ mmap_assert_locked(vma->vm_mm);
+}
+
+static inline void release_fault_lock(struct vm_fault *vmf)
+{
+ mmap_read_unlock(vmf->vma->vm_mm);
+}
+
+static inline void assert_fault_locked(struct vm_fault *vmf)
+{
+ mmap_assert_locked(vmf->vma->vm_mm);
+}
+
#endif /* CONFIG_PER_VMA_LOCK */
+extern const struct vm_operations_struct vma_dummy_vm_ops;
+
/*
* WARNING: vma_init does not initialize vma->vm_lock.
* Use vm_area_alloc()/vm_area_free() if vma needs locking.
*/
static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
{
- static const struct vm_operations_struct dummy_vm_ops = {};
-
memset(vma, 0, sizeof(*vma));
vma->vm_mm = mm;
- vma->vm_ops = &dummy_vm_ops;
+ vma->vm_ops = &vma_dummy_vm_ops;
INIT_LIST_HEAD(&vma->anon_vma_chain);
vma_mark_detached(vma, false);
vma_numab_state_init(vma);
@@ -779,18 +835,22 @@ static inline void vm_flags_init(struct vm_area_struct *vma,
ACCESS_PRIVATE(vma, __vm_flags) = flags;
}
-/* Use when VMA is part of the VMA tree and modifications need coordination */
+/*
+ * Use when VMA is part of the VMA tree and modifications need coordination
+ * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and
+ * it should be locked explicitly beforehand.
+ */
static inline void vm_flags_reset(struct vm_area_struct *vma,
vm_flags_t flags)
{
- vma_start_write(vma);
+ vma_assert_write_locked(vma);
vm_flags_init(vma, flags);
}
static inline void vm_flags_reset_once(struct vm_area_struct *vma,
vm_flags_t flags)
{
- vma_start_write(vma);
+ vma_assert_write_locked(vma);
WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags);
}
@@ -839,6 +899,31 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma)
return !vma->vm_ops;
}
+/*
+ * Indicate if the VMA is a heap for the given task; for
+ * /proc/PID/maps that is the heap of the main task.
+ */
+static inline bool vma_is_initial_heap(const struct vm_area_struct *vma)
+{
+ return vma->vm_start < vma->vm_mm->brk &&
+ vma->vm_end > vma->vm_mm->start_brk;
+}
+
+/*
+ * Indicate if the VMA is a stack for the given task; for
+ * /proc/PID/maps that is the stack of the main task.
+ */
+static inline bool vma_is_initial_stack(const struct vm_area_struct *vma)
+{
+ /*
+ * We make no effort to guess what a given thread considers to be
+ * its "stack". It's not even well-defined for programs written
+ * languages like Go.
+ */
+ return vma->vm_start <= vma->vm_mm->start_stack &&
+ vma->vm_end >= vma->vm_mm->start_stack;
+}
+
static inline bool vma_is_temporary_stack(struct vm_area_struct *vma)
{
int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
@@ -869,6 +954,17 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
return vma->vm_flags & VM_ACCESS_FLAGS;
}
+static inline bool is_shared_maywrite(vm_flags_t vm_flags)
+{
+ return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
+ (VM_SHARED | VM_MAYWRITE);
+}
+
+static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma)
+{
+ return is_shared_maywrite(vma->vm_flags);
+}
+
static inline
struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
{
@@ -917,6 +1013,17 @@ static inline int vma_iter_bulk_alloc(struct vma_iterator *vmi,
return mas_expected_entries(&vmi->mas, count);
}
+static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
+ unsigned long start, unsigned long end, gfp_t gfp)
+{
+ __mas_set_range(&vmi->mas, start, end - 1);
+ mas_store_gfp(&vmi->mas, NULL, gfp);
+ if (unlikely(mas_is_err(&vmi->mas)))
+ return -ENOMEM;
+
+ return 0;
+}
+
/* Free any unused preallocations */
static inline void vma_iter_free(struct vma_iterator *vmi)
{
@@ -976,7 +1083,7 @@ struct inode;
* compound_order() can be called without holding a reference, which means
* that niceties like page_folio() don't work. These callers should be
* prepared to handle wild return values. For example, PG_head may be
- * set before _folio_order is initialised, or this may be a tail page.
+ * set before the order is initialised, or this may be a tail page.
* See compaction.c for some good examples.
*/
static inline unsigned int compound_order(struct page *page)
@@ -985,7 +1092,7 @@ static inline unsigned int compound_order(struct page *page)
if (!test_bit(PG_head, &folio->flags))
return 0;
- return folio->_folio_order;
+ return folio->_flags_1 & 0xff;
}
/**
@@ -1001,7 +1108,7 @@ static inline unsigned int folio_order(struct folio *folio)
{
if (!folio_test_large(folio))
return 0;
- return folio->_folio_order;
+ return folio->_flags_1 & 0xff;
}
#include <linux/huge_mm.h>
@@ -1072,11 +1179,6 @@ unsigned long vmalloc_to_pfn(const void *addr);
* On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there
* is no special casing required.
*/
-
-#ifndef is_ioremap_addr
-#define is_ioremap_addr(x) is_vmalloc_addr(x)
-#endif
-
#ifdef CONFIG_MMU
extern bool is_vmalloc_addr(const void *x);
extern int is_vmalloc_or_module_addr(const void *x);
@@ -1095,7 +1197,7 @@ static inline int is_vmalloc_or_module_addr(const void *x)
* How many times the entire folio is mapped as a single unit (eg by a
* PMD or PUD entry). This is probably not what you want, except for
* debugging purposes - it does not include PTE-mapped sub-pages; look
- * at folio_mapcount() or page_mapcount() or total_mapcount() instead.
+ * at folio_mapcount() or page_mapcount() instead.
*/
static inline int folio_entire_mapcount(struct folio *folio)
{
@@ -1121,14 +1223,16 @@ static inline void page_mapcount_reset(struct page *page)
* a large folio, it includes the number of times this page is mapped
* as part of that folio.
*
- * The result is undefined for pages which cannot be mapped into userspace.
- * For example SLAB or special types of pages. See function page_has_type().
- * They use this field in struct page differently.
+ * Will report 0 for pages which cannot be mapped into userspace, eg
+ * slab, page tables and similar.
*/
static inline int page_mapcount(struct page *page)
{
int mapcount = atomic_read(&page->_mapcount) + 1;
+ /* Handle page_has_type() pages */
+ if (mapcount < 0)
+ mapcount = 0;
if (unlikely(PageCompound(page)))
mapcount += folio_entire_mapcount(page_folio(page));
@@ -1155,13 +1259,6 @@ static inline int folio_mapcount(struct folio *folio)
return folio_total_mapcount(folio);
}
-static inline int total_mapcount(struct page *page)
-{
- if (likely(!PageCompound(page)))
- return atomic_read(&page->_mapcount) + 1;
- return folio_total_mapcount(page_folio(page));
-}
-
static inline bool folio_large_is_mapped(struct folio *folio)
{
/*
@@ -1220,33 +1317,6 @@ void folio_copy(struct folio *dst, struct folio *src);
unsigned long nr_free_buffer_pages(void);
-/*
- * Compound pages have a destructor function. Provide a
- * prototype for that function and accessor functions.
- * These are _only_ valid on the head of a compound page.
- */
-typedef void compound_page_dtor(struct page *);
-
-/* Keep the enum in sync with compound_page_dtors array in mm/page_alloc.c */
-enum compound_dtor_id {
- NULL_COMPOUND_DTOR,
- COMPOUND_PAGE_DTOR,
-#ifdef CONFIG_HUGETLB_PAGE
- HUGETLB_PAGE_DTOR,
-#endif
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- TRANSHUGE_PAGE_DTOR,
-#endif
- NR_COMPOUND_DTORS,
-};
-
-static inline void folio_set_compound_dtor(struct folio *folio,
- enum compound_dtor_id compound_dtor)
-{
- VM_BUG_ON_FOLIO(compound_dtor >= NR_COMPOUND_DTORS, folio);
- folio->_folio_dtor = compound_dtor;
-}
-
void destroy_large_folio(struct folio *folio);
/* Returns the number of bytes in this potentially compound page. */
@@ -1282,8 +1352,6 @@ static inline unsigned long thp_size(struct page *page)
return PAGE_SIZE << thp_order(page);
}
-void free_compound_page(struct page *page);
-
#ifdef CONFIG_MMU
/*
* Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when
@@ -1294,15 +1362,15 @@ void free_compound_page(struct page *page);
static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
{
if (likely(vma->vm_flags & VM_WRITE))
- pte = pte_mkwrite(pte);
+ pte = pte_mkwrite(pte, vma);
return pte;
}
vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page);
-void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr);
+void set_pte_range(struct vm_fault *vmf, struct folio *folio,
+ struct page *page, unsigned int nr, unsigned long addr);
vm_fault_t finish_fault(struct vm_fault *vmf);
-vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
#endif
/*
@@ -1460,6 +1528,8 @@ static inline void folio_put_refs(struct folio *folio, int refs)
__folio_put(folio);
}
+void folios_put_refs(struct folio_batch *folios, unsigned int *refs);
+
/*
* union release_pages_arg - an array of pages or folios
*
@@ -1482,18 +1552,19 @@ void release_pages(release_pages_arg, int nr);
/**
* folios_put - Decrement the reference count on an array of folios.
* @folios: The folios.
- * @nr: How many folios there are.
*
- * Like folio_put(), but for an array of folios. This is more efficient
- * than writing the loop yourself as it will optimise the locks which
- * need to be taken if the folios are freed.
+ * Like folio_put(), but for a batch of folios. This is more efficient
+ * than writing the loop yourself as it will optimise the locks which need
+ * to be taken if the folios are freed. The folios batch is returned
+ * empty and ready to be reused for another batch; there is no need to
+ * reinitialise it.
*
* Context: May be called in process or interrupt context, but not in NMI
* context. May be called while holding a spinlock.
*/
-static inline void folios_put(struct folio **folios, unsigned int nr)
+static inline void folios_put(struct folio_batch *folios)
{
- release_pages(folios, nr);
+ folios_put_refs(folios, NULL);
}
static inline void put_page(struct page *page)
@@ -1586,13 +1657,11 @@ static inline int page_zone_id(struct page *page)
}
#ifdef NODE_NOT_IN_PAGE_FLAGS
-extern int page_to_nid(const struct page *page);
+int page_to_nid(const struct page *page);
#else
static inline int page_to_nid(const struct page *page)
{
- struct page *p = (struct page *)page;
-
- return (PF_POISONED_CHECK(p)->flags >> NODES_PGSHIFT) & NODES_MASK;
+ return (PF_POISONED_CHECK(page)->flags >> NODES_PGSHIFT) & NODES_MASK;
}
#endif
@@ -1651,26 +1720,26 @@ static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid)
#define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid)
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
-static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
+static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid)
{
- return xchg(&page->_last_cpupid, cpupid & LAST_CPUPID_MASK);
+ return xchg(&folio->_last_cpupid, cpupid & LAST_CPUPID_MASK);
}
-static inline int page_cpupid_last(struct page *page)
+static inline int folio_last_cpupid(struct folio *folio)
{
- return page->_last_cpupid;
+ return folio->_last_cpupid;
}
static inline void page_cpupid_reset_last(struct page *page)
{
page->_last_cpupid = -1 & LAST_CPUPID_MASK;
}
#else
-static inline int page_cpupid_last(struct page *page)
+static inline int folio_last_cpupid(struct folio *folio)
{
- return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK;
+ return (folio->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK;
}
-extern int page_cpupid_xchg_last(struct page *page, int cpupid);
+int folio_xchg_last_cpupid(struct folio *folio, int cpupid);
static inline void page_cpupid_reset_last(struct page *page)
{
@@ -1678,11 +1747,12 @@ static inline void page_cpupid_reset_last(struct page *page)
}
#endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */
-static inline int xchg_page_access_time(struct page *page, int time)
+static inline int folio_xchg_access_time(struct folio *folio, int time)
{
int last_time;
- last_time = page_cpupid_xchg_last(page, time >> PAGE_ACCESS_TIME_BUCKETS);
+ last_time = folio_xchg_last_cpupid(folio,
+ time >> PAGE_ACCESS_TIME_BUCKETS);
return last_time << PAGE_ACCESS_TIME_BUCKETS;
}
@@ -1691,24 +1761,24 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
unsigned int pid_bit;
pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG));
- if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->access_pids[1])) {
- __set_bit(pid_bit, &vma->numab_state->access_pids[1]);
+ if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->pids_active[1])) {
+ __set_bit(pid_bit, &vma->numab_state->pids_active[1]);
}
}
#else /* !CONFIG_NUMA_BALANCING */
-static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
+static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid)
{
- return page_to_nid(page); /* XXX */
+ return folio_nid(folio); /* XXX */
}
-static inline int xchg_page_access_time(struct page *page, int time)
+static inline int folio_xchg_access_time(struct folio *folio, int time)
{
return 0;
}
-static inline int page_cpupid_last(struct page *page)
+static inline int folio_last_cpupid(struct folio *folio)
{
- return page_to_nid(page); /* XXX */
+ return folio_nid(folio); /* XXX */
}
static inline int cpupid_to_nid(int cpupid)
@@ -1760,7 +1830,7 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
static inline u8 page_kasan_tag(const struct page *page)
{
- u8 tag = 0xff;
+ u8 tag = KASAN_TAG_KERNEL;
if (kasan_enabled()) {
tag = (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK;
@@ -1789,7 +1859,7 @@ static inline void page_kasan_tag_set(struct page *page, u8 tag)
static inline void page_kasan_tag_reset(struct page *page)
{
if (kasan_enabled())
- page_kasan_tag_set(page, 0xff);
+ page_kasan_tag_set(page, KASAN_TAG_KERNEL);
}
#else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */
@@ -1909,15 +1979,15 @@ static inline bool page_maybe_dma_pinned(struct page *page)
*
* The caller has to hold the PT lock and the vma->vm_mm->->write_protect_seq.
*/
-static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
- struct page *page)
+static inline bool folio_needs_cow_for_dma(struct vm_area_struct *vma,
+ struct folio *folio)
{
VM_BUG_ON(!(raw_read_seqcount(&vma->vm_mm->write_protect_seq) & 1));
if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags))
return false;
- return page_maybe_dma_pinned(page);
+ return folio_maybe_dma_pinned(folio);
}
/**
@@ -2006,10 +2076,17 @@ static inline long folio_nr_pages(struct folio *folio)
#ifdef CONFIG_64BIT
return folio->_folio_nr_pages;
#else
- return 1L << folio->_folio_order;
+ return 1L << (folio->_flags_1 & 0xff);
#endif
}
+/* Only hugetlbfs can allocate folios larger than MAX_ORDER */
+#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
+#define MAX_FOLIO_NR_PAGES (1UL << PUD_ORDER)
+#else
+#define MAX_FOLIO_NR_PAGES MAX_ORDER_NR_PAGES
+#endif
+
/*
* compound_nr() returns the number of pages in this potentially compound
* page. compound_nr() can be called on a tail page, and is defined to
@@ -2024,7 +2101,7 @@ static inline unsigned long compound_nr(struct page *page)
#ifdef CONFIG_64BIT
return folio->_folio_nr_pages;
#else
- return 1L << folio->_folio_order;
+ return 1L << (folio->_flags_1 & 0xff);
#endif
}
@@ -2132,11 +2209,6 @@ static inline int arch_make_folio_accessible(struct folio *folio)
*/
#include <linux/vmstat.h>
-static __always_inline void *lowmem_page_address(const struct page *page)
-{
- return page_to_virt(page);
-}
-
#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
#define HASHED_PAGE_VIRTUAL
#endif
@@ -2159,6 +2231,11 @@ void set_page_address(struct page *page, void *virtual);
void page_address_init(void);
#endif
+static __always_inline void *lowmem_page_address(const struct page *page)
+{
+ return page_to_virt(page);
+}
+
#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
#define page_address(page) lowmem_page_address(page)
#define set_page_address(page, address) do { } while(0)
@@ -2170,7 +2247,6 @@ static inline void *folio_address(const struct folio *folio)
return page_address(&folio->page);
}
-extern void *page_rmapping(struct page *page);
extern pgoff_t __page_file_index(struct page *page);
/*
@@ -2238,18 +2314,6 @@ extern void pagefault_out_of_memory(void);
#define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1))
/*
- * Flags passed to show_mem() and show_free_areas() to suppress output in
- * various contexts.
- */
-#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */
-
-extern void __show_free_areas(unsigned int flags, nodemask_t *nodemask, int max_zone_idx);
-static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodemask)
-{
- __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1);
-}
-
-/*
* Parameter block passed down to zap_pte_range in exceptional cases.
*/
struct zap_details {
@@ -2305,6 +2369,8 @@ struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr,
pte_t pte);
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
pte_t pte);
+struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t pmd);
struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t pmd);
@@ -2317,9 +2383,9 @@ static inline void zap_vma_pages(struct vm_area_struct *vma)
zap_page_range_single(vma, vma->vm_start,
vma->vm_end - vma->vm_start, NULL);
}
-void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
+void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
struct vm_area_struct *start_vma, unsigned long start,
- unsigned long end, bool mm_wr_locked);
+ unsigned long end, unsigned long tree_end, bool mm_wr_locked);
struct mmu_notifier_range;
@@ -2340,7 +2406,8 @@ extern void truncate_pagecache(struct inode *inode, loff_t new);
extern void truncate_setsize(struct inode *inode, loff_t newsize);
void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
-int generic_error_remove_page(struct address_space *mapping, struct page *page);
+int generic_error_remove_folio(struct address_space *mapping,
+ struct folio *folio);
struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
unsigned long address, struct pt_regs *regs);
@@ -2391,8 +2458,6 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
-extern int __access_remote_vm(struct mm_struct *mm, unsigned long addr,
- void *buf, int len, unsigned int gup_flags);
long get_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
@@ -2403,6 +2468,9 @@ long pin_user_pages_remote(struct mm_struct *mm,
unsigned int gup_flags, struct page **pages,
int *locked);
+/*
+ * Retrieves a single page alongside its VMA. Does not support FOLL_NOWAIT.
+ */
static inline struct page *get_user_page_vma_remote(struct mm_struct *mm,
unsigned long addr,
int gup_flags,
@@ -2410,12 +2478,15 @@ static inline struct page *get_user_page_vma_remote(struct mm_struct *mm,
{
struct page *page;
struct vm_area_struct *vma;
- int got = get_user_pages_remote(mm, addr, 1, gup_flags, &page, NULL);
+ int got;
+
+ if (WARN_ON_ONCE(unlikely(gup_flags & FOLL_NOWAIT)))
+ return ERR_PTR(-EINVAL);
+
+ got = get_user_pages_remote(mm, addr, 1, gup_flags, &page, NULL);
if (got < 0)
return ERR_PTR(got);
- if (got == 0)
- return NULL;
vma = vma_lookup(mm, addr);
if (WARN_ON_ONCE(!vma)) {
@@ -2458,7 +2529,7 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen);
extern unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long old_addr, struct vm_area_struct *new_vma,
unsigned long new_addr, unsigned long len,
- bool need_rmap_locks);
+ bool need_rmap_locks, bool for_stack);
/*
* Flags used by change_protection(). For now we make it a bitmap so
@@ -2546,19 +2617,19 @@ static inline void dec_mm_counter(struct mm_struct *mm, int member)
mm_trace_rss_stat(mm, member);
}
-/* Optimized variant when page is already known not to be PageAnon */
-static inline int mm_counter_file(struct page *page)
+/* Optimized variant when folio is already known not to be anon */
+static inline int mm_counter_file(struct folio *folio)
{
- if (PageSwapBacked(page))
+ if (folio_test_swapbacked(folio))
return MM_SHMEMPAGES;
return MM_FILEPAGES;
}
-static inline int mm_counter(struct page *page)
+static inline int mm_counter(struct folio *folio)
{
- if (PageAnon(page))
+ if (folio_test_anon(folio))
return MM_ANONPAGES;
- return mm_counter_file(page);
+ return mm_counter_file(folio);
}
static inline unsigned long get_mm_rss(struct mm_struct *mm)
@@ -2606,14 +2677,6 @@ static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
*maxrss = hiwater_rss;
}
-#if defined(SPLIT_RSS_COUNTING)
-void sync_mm_rss(struct mm_struct *mm);
-#else
-static inline void sync_mm_rss(struct mm_struct *mm)
-{
-}
-#endif
-
#ifndef CONFIG_ARCH_HAS_PTE_SPECIAL
static inline int pte_special(pte_t pte)
{
@@ -2766,42 +2829,93 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
}
#endif /* CONFIG_MMU */
+static inline struct ptdesc *virt_to_ptdesc(const void *x)
+{
+ return page_ptdesc(virt_to_page(x));
+}
+
+static inline void *ptdesc_to_virt(const struct ptdesc *pt)
+{
+ return page_to_virt(ptdesc_page(pt));
+}
+
+static inline void *ptdesc_address(const struct ptdesc *pt)
+{
+ return folio_address(ptdesc_folio(pt));
+}
+
+static inline bool pagetable_is_reserved(struct ptdesc *pt)
+{
+ return folio_test_reserved(ptdesc_folio(pt));
+}
+
+/**
+ * pagetable_alloc - Allocate pagetables
+ * @gfp: GFP flags
+ * @order: desired pagetable order
+ *
+ * pagetable_alloc allocates memory for page tables as well as a page table
+ * descriptor to describe that memory.
+ *
+ * Return: The ptdesc describing the allocated page tables.
+ */
+static inline struct ptdesc *pagetable_alloc(gfp_t gfp, unsigned int order)
+{
+ struct page *page = alloc_pages(gfp | __GFP_COMP, order);
+
+ return page_ptdesc(page);
+}
+
+/**
+ * pagetable_free - Free pagetables
+ * @pt: The page table descriptor
+ *
+ * pagetable_free frees the memory of all page tables described by a page
+ * table descriptor and the memory for the descriptor itself.
+ */
+static inline void pagetable_free(struct ptdesc *pt)
+{
+ struct page *page = ptdesc_page(pt);
+
+ __free_pages(page, compound_order(page));
+}
+
#if USE_SPLIT_PTE_PTLOCKS
#if ALLOC_SPLIT_PTLOCKS
void __init ptlock_cache_init(void);
-extern bool ptlock_alloc(struct page *page);
-extern void ptlock_free(struct page *page);
+bool ptlock_alloc(struct ptdesc *ptdesc);
+void ptlock_free(struct ptdesc *ptdesc);
-static inline spinlock_t *ptlock_ptr(struct page *page)
+static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc)
{
- return page->ptl;
+ return ptdesc->ptl;
}
#else /* ALLOC_SPLIT_PTLOCKS */
static inline void ptlock_cache_init(void)
{
}
-static inline bool ptlock_alloc(struct page *page)
+static inline bool ptlock_alloc(struct ptdesc *ptdesc)
{
return true;
}
-static inline void ptlock_free(struct page *page)
+static inline void ptlock_free(struct ptdesc *ptdesc)
{
}
-static inline spinlock_t *ptlock_ptr(struct page *page)
+static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc)
{
- return &page->ptl;
+ return &ptdesc->ptl;
}
#endif /* ALLOC_SPLIT_PTLOCKS */
static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
- return ptlock_ptr(pmd_page(*pmd));
+ return ptlock_ptr(page_ptdesc(pmd_page(*pmd)));
}
-static inline bool ptlock_init(struct page *page)
+static inline bool ptlock_init(struct ptdesc *ptdesc)
{
/*
* prep_new_page() initialize page->private (and therefore page->ptl)
@@ -2810,10 +2924,10 @@ static inline bool ptlock_init(struct page *page)
* It can happen if arch try to use slab for page table allocation:
* slab code uses page->slab_cache, which share storage with page->ptl.
*/
- VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page);
- if (!ptlock_alloc(page))
+ VM_BUG_ON_PAGE(*(unsigned long *)&ptdesc->ptl, ptdesc_page(ptdesc));
+ if (!ptlock_alloc(ptdesc))
return false;
- spin_lock_init(ptlock_ptr(page));
+ spin_lock_init(ptlock_ptr(ptdesc));
return true;
}
@@ -2826,24 +2940,28 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
return &mm->page_table_lock;
}
static inline void ptlock_cache_init(void) {}
-static inline bool ptlock_init(struct page *page) { return true; }
-static inline void ptlock_free(struct page *page) {}
+static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
+static inline void ptlock_free(struct ptdesc *ptdesc) {}
#endif /* USE_SPLIT_PTE_PTLOCKS */
-static inline bool pgtable_pte_page_ctor(struct page *page)
+static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc)
{
- if (!ptlock_init(page))
+ struct folio *folio = ptdesc_folio(ptdesc);
+
+ if (!ptlock_init(ptdesc))
return false;
- __SetPageTable(page);
- inc_lruvec_page_state(page, NR_PAGETABLE);
+ __folio_set_pgtable(folio);
+ lruvec_stat_add_folio(folio, NR_PAGETABLE);
return true;
}
-static inline void pgtable_pte_page_dtor(struct page *page)
+static inline void pagetable_pte_dtor(struct ptdesc *ptdesc)
{
- ptlock_free(page);
- __ClearPageTable(page);
- dec_lruvec_page_state(page, NR_PAGETABLE);
+ struct folio *folio = ptdesc_folio(ptdesc);
+
+ ptlock_free(ptdesc);
+ __folio_clear_pgtable(folio);
+ lruvec_stat_sub_folio(folio, NR_PAGETABLE);
}
pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp);
@@ -2892,28 +3010,33 @@ static inline struct page *pmd_pgtable_page(pmd_t *pmd)
return virt_to_page((void *)((unsigned long) pmd & mask));
}
+static inline struct ptdesc *pmd_ptdesc(pmd_t *pmd)
+{
+ return page_ptdesc(pmd_pgtable_page(pmd));
+}
+
static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
- return ptlock_ptr(pmd_pgtable_page(pmd));
+ return ptlock_ptr(pmd_ptdesc(pmd));
}
-static inline bool pmd_ptlock_init(struct page *page)
+static inline bool pmd_ptlock_init(struct ptdesc *ptdesc)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- page->pmd_huge_pte = NULL;
+ ptdesc->pmd_huge_pte = NULL;
#endif
- return ptlock_init(page);
+ return ptlock_init(ptdesc);
}
-static inline void pmd_ptlock_free(struct page *page)
+static inline void pmd_ptlock_free(struct ptdesc *ptdesc)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- VM_BUG_ON_PAGE(page->pmd_huge_pte, page);
+ VM_BUG_ON_PAGE(ptdesc->pmd_huge_pte, ptdesc_page(ptdesc));
#endif
- ptlock_free(page);
+ ptlock_free(ptdesc);
}
-#define pmd_huge_pte(mm, pmd) (pmd_pgtable_page(pmd)->pmd_huge_pte)
+#define pmd_huge_pte(mm, pmd) (pmd_ptdesc(pmd)->pmd_huge_pte)
#else
@@ -2922,8 +3045,8 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
return &mm->page_table_lock;
}
-static inline bool pmd_ptlock_init(struct page *page) { return true; }
-static inline void pmd_ptlock_free(struct page *page) {}
+static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) { return true; }
+static inline void pmd_ptlock_free(struct ptdesc *ptdesc) {}
#define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte)
@@ -2936,20 +3059,24 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
return ptl;
}
-static inline bool pgtable_pmd_page_ctor(struct page *page)
+static inline bool pagetable_pmd_ctor(struct ptdesc *ptdesc)
{
- if (!pmd_ptlock_init(page))
+ struct folio *folio = ptdesc_folio(ptdesc);
+
+ if (!pmd_ptlock_init(ptdesc))
return false;
- __SetPageTable(page);
- inc_lruvec_page_state(page, NR_PAGETABLE);
+ __folio_set_pgtable(folio);
+ lruvec_stat_add_folio(folio, NR_PAGETABLE);
return true;
}
-static inline void pgtable_pmd_page_dtor(struct page *page)
+static inline void pagetable_pmd_dtor(struct ptdesc *ptdesc)
{
- pmd_ptlock_free(page);
- __ClearPageTable(page);
- dec_lruvec_page_state(page, NR_PAGETABLE);
+ struct folio *folio = ptdesc_folio(ptdesc);
+
+ pmd_ptlock_free(ptdesc);
+ __folio_clear_pgtable(folio);
+ lruvec_stat_sub_folio(folio, NR_PAGETABLE);
}
/*
@@ -2971,6 +3098,22 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
return ptl;
}
+static inline void pagetable_pud_ctor(struct ptdesc *ptdesc)
+{
+ struct folio *folio = ptdesc_folio(ptdesc);
+
+ __folio_set_pgtable(folio);
+ lruvec_stat_add_folio(folio, NR_PAGETABLE);
+}
+
+static inline void pagetable_pud_dtor(struct ptdesc *ptdesc)
+{
+ struct folio *folio = ptdesc_folio(ptdesc);
+
+ __folio_clear_pgtable(folio);
+ lruvec_stat_sub_folio(folio, NR_PAGETABLE);
+}
+
extern void __init pagecache_init(void);
extern void free_initmem(void);
@@ -3004,6 +3147,11 @@ static inline void mark_page_reserved(struct page *page)
adjust_managed_page_count(page, -1);
}
+static inline void free_reserved_ptdesc(struct ptdesc *pt)
+{
+ free_reserved_page(ptdesc_page(pt));
+}
+
/*
* Default method to free all the __init memory into the buddy system.
* The freed pages will be poisoned with pattern "poison" if it's within
@@ -3069,9 +3217,9 @@ extern void mem_init(void);
extern void __init mmap_init(void);
extern void __show_mem(unsigned int flags, nodemask_t *nodemask, int max_zone_idx);
-static inline void show_mem(unsigned int flags, nodemask_t *nodemask)
+static inline void show_mem(void)
{
- __show_mem(flags, nodemask, MAX_NR_ZONES - 1);
+ __show_mem(0, NULL, MAX_NR_ZONES - 1);
}
extern long si_mem_available(void);
extern void si_meminfo(struct sysinfo * val);
@@ -3130,22 +3278,73 @@ extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct vm_area_struct *next);
extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long start, unsigned long end, pgoff_t pgoff);
-extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi,
- struct mm_struct *, struct vm_area_struct *prev, unsigned long addr,
- unsigned long end, unsigned long vm_flags, struct anon_vma *,
- struct file *, pgoff_t, struct mempolicy *, struct vm_userfaultfd_ctx,
- struct anon_vma_name *);
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
-extern int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *,
- unsigned long addr, int new_below);
-extern int split_vma(struct vma_iterator *vmi, struct vm_area_struct *,
- unsigned long addr, int new_below);
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void unlink_file_vma(struct vm_area_struct *);
extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
unsigned long addr, unsigned long len, pgoff_t pgoff,
bool *need_rmap_locks);
extern void exit_mmap(struct mm_struct *);
+struct vm_area_struct *vma_modify(struct vma_iterator *vmi,
+ struct vm_area_struct *prev,
+ struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long vm_flags,
+ struct mempolicy *policy,
+ struct vm_userfaultfd_ctx uffd_ctx,
+ struct anon_vma_name *anon_name);
+
+/* We are about to modify the VMA's flags. */
+static inline struct vm_area_struct
+*vma_modify_flags(struct vma_iterator *vmi,
+ struct vm_area_struct *prev,
+ struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long new_flags)
+{
+ return vma_modify(vmi, prev, vma, start, end, new_flags,
+ vma_policy(vma), vma->vm_userfaultfd_ctx,
+ anon_vma_name(vma));
+}
+
+/* We are about to modify the VMA's flags and/or anon_name. */
+static inline struct vm_area_struct
+*vma_modify_flags_name(struct vma_iterator *vmi,
+ struct vm_area_struct *prev,
+ struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ unsigned long new_flags,
+ struct anon_vma_name *new_name)
+{
+ return vma_modify(vmi, prev, vma, start, end, new_flags,
+ vma_policy(vma), vma->vm_userfaultfd_ctx, new_name);
+}
+
+/* We are about to modify the VMA's memory policy. */
+static inline struct vm_area_struct
+*vma_modify_policy(struct vma_iterator *vmi,
+ struct vm_area_struct *prev,
+ struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct mempolicy *new_pol)
+{
+ return vma_modify(vmi, prev, vma, start, end, vma->vm_flags,
+ new_pol, vma->vm_userfaultfd_ctx, anon_vma_name(vma));
+}
+
+/* We are about to modify the VMA's flags and/or uffd context. */
+static inline struct vm_area_struct
+*vma_modify_flags_uffd(struct vma_iterator *vmi,
+ struct vm_area_struct *prev,
+ struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long new_flags,
+ struct vm_userfaultfd_ctx new_ctx)
+{
+ return vma_modify(vmi, prev, vma, start, end, new_flags,
+ vma_policy(vma), new_ctx, anon_vma_name(vma));
+}
static inline int check_data_rlimit(unsigned long rlim,
unsigned long new,
@@ -3193,7 +3392,8 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr,
struct list_head *uf);
extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
- unsigned long pgoff, unsigned long *populate, struct list_head *uf);
+ vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
+ struct list_head *uf);
extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
unsigned long start, size_t len, struct list_head *uf,
bool unlock);
@@ -3216,8 +3416,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len)
static inline void mm_populate(unsigned long addr, unsigned long len) {}
#endif
-/* These take the mm semaphore themselves */
-extern int __must_check vm_brk(unsigned long, unsigned long);
+/* This takes the mm semaphore itself */
extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long);
extern int vm_munmap(unsigned long, size_t);
extern unsigned long __must_check vm_mmap(struct file *, unsigned long,
@@ -3281,15 +3480,26 @@ struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
return mtree_load(&mm->mm_mt, addr);
}
+static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
+{
+ if (vma->vm_flags & VM_GROWSDOWN)
+ return stack_guard_gap;
+
+ /* See reasoning around the VM_SHADOW_STACK definition */
+ if (vma->vm_flags & VM_SHADOW_STACK)
+ return PAGE_SIZE;
+
+ return 0;
+}
+
static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
{
+ unsigned long gap = stack_guard_start_gap(vma);
unsigned long vm_start = vma->vm_start;
- if (vma->vm_flags & VM_GROWSDOWN) {
- vm_start -= stack_guard_gap;
- if (vm_start > vma->vm_start)
- vm_start = 0;
- }
+ vm_start -= gap;
+ if (vm_start > vma->vm_start)
+ vm_start = 0;
return vm_start;
}
@@ -3403,6 +3613,24 @@ static inline vm_fault_t vmf_error(int err)
return VM_FAULT_SIGBUS;
}
+/*
+ * Convert errno to return value for ->page_mkwrite() calls.
+ *
+ * This should eventually be merged with vmf_error() above, but will need a
+ * careful audit of all vmf_error() callers.
+ */
+static inline vm_fault_t vmf_fs_error(int err)
+{
+ if (err == 0)
+ return VM_FAULT_LOCKED;
+ if (err == -EFAULT || err == -EAGAIN)
+ return VM_FAULT_NOPAGE;
+ if (err == -ENOMEM)
+ return VM_FAULT_OOM;
+ /* -ENOSPC, -EDQUOT, -EIO ... */
+ return VM_FAULT_SIGBUS;
+}
+
struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
unsigned int foll_flags);
@@ -3509,8 +3737,8 @@ static inline bool debug_pagealloc_enabled(void)
}
/*
- * For use in fast paths after init_debug_pagealloc() has run, or when a
- * false negative result is not harmful when called too early.
+ * For use in fast paths after mem_debugging_and_hardening_init() has run,
+ * or when a false negative result is not harmful when called too early.
*/
static inline bool debug_pagealloc_enabled_static(void)
{
@@ -3665,13 +3893,58 @@ void vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap);
#endif
-#ifdef CONFIG_ARCH_WANT_OPTIMIZE_VMEMMAP
-static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap,
- struct dev_pagemap *pgmap)
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
{
- return is_power_of_2(sizeof(struct page)) &&
- pgmap && (pgmap_vmemmap_nr(pgmap) > 1) && !altmap;
+ /* number of pfns from base where pfn_to_page() is valid */
+ if (altmap)
+ return altmap->reserve + altmap->free;
+ return 0;
+}
+
+static inline void vmem_altmap_free(struct vmem_altmap *altmap,
+ unsigned long nr_pfns)
+{
+ altmap->alloc -= nr_pfns;
+}
+#else
+static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
+{
+ return 0;
+}
+
+static inline void vmem_altmap_free(struct vmem_altmap *altmap,
+ unsigned long nr_pfns)
+{
+}
+#endif
+
+#define VMEMMAP_RESERVE_NR 2
+#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
+static inline bool __vmemmap_can_optimize(struct vmem_altmap *altmap,
+ struct dev_pagemap *pgmap)
+{
+ unsigned long nr_pages;
+ unsigned long nr_vmemmap_pages;
+
+ if (!pgmap || !is_power_of_2(sizeof(struct page)))
+ return false;
+
+ nr_pages = pgmap_vmemmap_nr(pgmap);
+ nr_vmemmap_pages = ((nr_pages * sizeof(struct page)) >> PAGE_SHIFT);
+ /*
+ * For vmemmap optimization with DAX we need minimum 2 vmemmap
+ * pages. See layout diagram in Documentation/mm/vmemmap_dedup.rst
+ */
+ return !altmap && (nr_vmemmap_pages > VMEMMAP_RESERVE_NR);
}
+/*
+ * If we don't have an architecture override, use the generic rule
+ */
+#ifndef vmemmap_can_optimize
+#define vmemmap_can_optimize __vmemmap_can_optimize
+#endif
+
#else
static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap,
struct dev_pagemap *pgmap)
@@ -3691,6 +3964,7 @@ enum mf_flags {
MF_UNPOISON = 1 << 4,
MF_SW_SIMULATED = 1 << 5,
MF_NO_RETRY = 1 << 6,
+ MF_MEM_PRE_REMOVE = 1 << 7,
};
int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
unsigned long count, int mf_flags);
@@ -3860,25 +4134,26 @@ static inline void mem_dump_obj(void *object) {}
#endif
/**
- * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it
+ * seal_check_write - Check for F_SEAL_WRITE or F_SEAL_FUTURE_WRITE flags and
+ * handle them.
* @seals: the seals to check
* @vma: the vma to operate on
*
- * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on
- * the vma flags. Return 0 if check pass, or <0 for errors.
+ * Check whether F_SEAL_WRITE or F_SEAL_FUTURE_WRITE are set; if so, do proper
+ * check/handling on the vma flags. Return 0 if check pass, or <0 for errors.
*/
-static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
+static inline int seal_check_write(int seals, struct vm_area_struct *vma)
{
- if (seals & F_SEAL_FUTURE_WRITE) {
+ if (seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
/*
* New PROT_WRITE and MAP_SHARED mmaps are not allowed when
- * "future write" seal active.
+ * write seals are active.
*/
if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
return -EPERM;
/*
- * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
+ * Since an F_SEAL_[FUTURE_]WRITE sealed memfd can be mapped as
* MAP_SHARED and read-only, take care to not allow mprotect to
* revert protections on such mappings. Do this only for shared
* mappings. For private mappings, don't need to mask
@@ -3922,4 +4197,11 @@ static inline void accept_memory(phys_addr_t start, phys_addr_t end)
#endif
+static inline bool pfn_is_unaccepted_memory(unsigned long pfn)
+{
+ phys_addr_t paddr = pfn << PAGE_SHIFT;
+
+ return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE);
+}
+
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 21d6c72bcc71..f4fe593c1400 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -4,6 +4,7 @@
#include <linux/atomic.h>
#include <linux/huge_mm.h>
+#include <linux/mm_types.h>
#include <linux/swap.h>
#include <linux/string.h>
#include <linux/userfaultfd_k.h>
@@ -231,22 +232,27 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
if (folio_test_unevictable(folio) || !lrugen->enabled)
return false;
/*
- * There are three common cases for this page:
- * 1. If it's hot, e.g., freshly faulted in or previously hot and
- * migrated, add it to the youngest generation.
- * 2. If it's cold but can't be evicted immediately, i.e., an anon page
- * not in swapcache or a dirty page pending writeback, add it to the
- * second oldest generation.
- * 3. Everything else (clean, cold) is added to the oldest generation.
+ * There are four common cases for this page:
+ * 1. If it's hot, i.e., freshly faulted in, add it to the youngest
+ * generation, and it's protected over the rest below.
+ * 2. If it can't be evicted immediately, i.e., a dirty page pending
+ * writeback, add it to the second youngest generation.
+ * 3. If it should be evicted first, e.g., cold and clean from
+ * folio_rotate_reclaimable(), add it to the oldest generation.
+ * 4. Everything else falls between 2 & 3 above and is added to the
+ * second oldest generation if it's considered inactive, or the
+ * oldest generation otherwise. See lru_gen_is_active().
*/
if (folio_test_active(folio))
seq = lrugen->max_seq;
else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) ||
(folio_test_reclaim(folio) &&
(folio_test_dirty(folio) || folio_test_writeback(folio))))
- seq = lrugen->min_seq[type] + 1;
- else
+ seq = lrugen->max_seq - 1;
+ else if (reclaiming || lrugen->min_seq[type] + MIN_NR_GENS >= lrugen->max_seq)
seq = lrugen->min_seq[type];
+ else
+ seq = lrugen->min_seq[type] + 1;
gen = lru_gen_from_seq(seq);
flags = (gen + 1UL) << LRU_GEN_PGOFF;
@@ -352,15 +358,6 @@ void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio)
}
#ifdef CONFIG_ANON_VMA_NAME
-/*
- * mmap_lock should be read-locked when calling anon_vma_name(). Caller should
- * either keep holding the lock while using the returned pointer or it should
- * raise anon_vma_name refcount before releasing the lock.
- */
-extern struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma);
-extern struct anon_vma_name *anon_vma_name_alloc(const char *name);
-extern void anon_vma_name_free(struct kref *kref);
-
/* mmap_lock should be read-locked */
static inline void anon_vma_name_get(struct anon_vma_name *anon_name)
{
@@ -415,16 +412,6 @@ static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
}
#else /* CONFIG_ANON_VMA_NAME */
-static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
-{
- return NULL;
-}
-
-static inline struct anon_vma_name *anon_vma_name_alloc(const char *name)
-{
- return NULL;
-}
-
static inline void anon_vma_name_get(struct anon_vma_name *anon_name) {}
static inline void anon_vma_name_put(struct anon_vma_name *anon_name) {}
static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
@@ -523,6 +510,27 @@ static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
return atomic_read(&mm->tlb_flush_pending) > 1;
}
+#ifdef CONFIG_MMU
+/*
+ * Computes the pte marker to copy from the given source entry into dst_vma.
+ * If no marker should be copied, returns 0.
+ * The caller should insert a new pte created with make_pte_marker().
+ */
+static inline pte_marker copy_pte_marker(
+ swp_entry_t entry, struct vm_area_struct *dst_vma)
+{
+ pte_marker srcm = pte_marker_get(entry);
+ /* Always copy error entries. */
+ pte_marker dstm = srcm & PTE_MARKER_POISONED;
+
+ /* Only copy PTE markers if UFFD register matches. */
+ if ((srcm & PTE_MARKER_UFFD_WP) && userfaultfd_wp(dst_vma))
+ dstm |= PTE_MARKER_UFFD_WP;
+
+ return dstm;
+}
+#endif
+
/*
* If this pte is wr-protected by uffd-wp in any form, arm the special pte to
* replace a none pte. NOTE! This should only be called when *pte is already
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7d30dc4ff0ff..5240bd7bca33 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -125,36 +125,11 @@ struct page {
struct page_pool *pp;
unsigned long _pp_mapping_pad;
unsigned long dma_addr;
- union {
- /**
- * dma_addr_upper: might require a 64-bit
- * value on 32-bit architectures.
- */
- unsigned long dma_addr_upper;
- /**
- * For frag page support, not supported in
- * 32-bit architectures with 64-bit DMA.
- */
- atomic_long_t pp_frag_count;
- };
+ atomic_long_t pp_ref_count;
};
struct { /* Tail pages of compound page */
unsigned long compound_head; /* Bit zero is set */
};
- struct { /* Page table pages */
- unsigned long _pt_pad_1; /* compound_head */
- pgtable_t pmd_huge_pte; /* protected by page->ptl */
- unsigned long _pt_pad_2; /* mapping */
- union {
- struct mm_struct *pt_mm; /* x86 pgds only */
- atomic_t pt_frag_refcount; /* powerpc */
- };
-#if ALLOC_SPLIT_PTLOCKS
- spinlock_t *ptl;
-#else
- spinlock_t ptl;
-#endif
- };
struct { /* ZONE_DEVICE pages */
/** @pgmap: Points to the hosting device page map. */
struct dev_pagemap *pgmap;
@@ -213,6 +188,10 @@ struct page {
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
+#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+ int _last_cpupid;
+#endif
+
#ifdef CONFIG_KMSAN
/*
* KMSAN metadata for this page:
@@ -224,10 +203,6 @@ struct page {
struct page *kmsan_shadow;
struct page *kmsan_origin;
#endif
-
-#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
- int _last_cpupid;
-#endif
} _struct_page_alignment;
/*
@@ -235,8 +210,8 @@ struct page {
*
* An 'encoded_page' pointer is a pointer to a regular 'struct page', but
* with the low bits of the pointer indicating extra context-dependent
- * information. Not super-common, but happens in mmu_gather and mlock
- * handling, and this acts as a type system check on that use.
+ * information. Only used in mmu_gather handling, and this acts as a type
+ * system check on that use.
*
* We only really have two guaranteed bits in general, although you could
* play with 'struct page' alignment (see CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
@@ -245,23 +220,56 @@ struct page {
* Use the supplied helper functions to endcode/decode the pointer and bits.
*/
struct encoded_page;
-#define ENCODE_PAGE_BITS 3ul
+
+#define ENCODED_PAGE_BITS 3ul
+
+/* Perform rmap removal after we have flushed the TLB. */
+#define ENCODED_PAGE_BIT_DELAY_RMAP 1ul
+
+/*
+ * The next item in an encoded_page array is the "nr_pages" argument, specifying
+ * the number of consecutive pages starting from this page, that all belong to
+ * the same folio. For example, "nr_pages" corresponds to the number of folio
+ * references that must be dropped. If this bit is not set, "nr_pages" is
+ * implicitly 1.
+ */
+#define ENCODED_PAGE_BIT_NR_PAGES_NEXT 2ul
+
static __always_inline struct encoded_page *encode_page(struct page *page, unsigned long flags)
{
- BUILD_BUG_ON(flags > ENCODE_PAGE_BITS);
+ BUILD_BUG_ON(flags > ENCODED_PAGE_BITS);
return (struct encoded_page *)(flags | (unsigned long)page);
}
static inline unsigned long encoded_page_flags(struct encoded_page *page)
{
- return ENCODE_PAGE_BITS & (unsigned long)page;
+ return ENCODED_PAGE_BITS & (unsigned long)page;
}
static inline struct page *encoded_page_ptr(struct encoded_page *page)
{
- return (struct page *)(~ENCODE_PAGE_BITS & (unsigned long)page);
+ return (struct page *)(~ENCODED_PAGE_BITS & (unsigned long)page);
}
+static __always_inline struct encoded_page *encode_nr_pages(unsigned long nr)
+{
+ VM_WARN_ON_ONCE((nr << 2) >> 2 != nr);
+ return (struct encoded_page *)(nr << 2);
+}
+
+static __always_inline unsigned long encoded_nr_pages(struct encoded_page *page)
+{
+ return ((unsigned long)page) >> 2;
+}
+
+/*
+ * A swap entry has to fit into a "unsigned long", as the entry is hidden
+ * in the "index" field of the swapper address space.
+ */
+typedef struct {
+ unsigned long val;
+} swp_entry_t;
+
/**
* struct folio - Represents a contiguous set of bytes.
* @flags: Identical to the page flags.
@@ -272,14 +280,14 @@ static inline struct page *encoded_page_ptr(struct encoded_page *page)
* @index: Offset within the file, in units of pages. For anonymous memory,
* this is the index from the beginning of the mmap.
* @private: Filesystem per-folio data (see folio_attach_private()).
- * Used for swp_entry_t if folio_test_swapcache().
+ * @swap: Used for swp_entry_t if folio_test_swapcache().
* @_mapcount: Do not access this member directly. Use folio_mapcount() to
* find out how many times this folio is mapped by userspace.
* @_refcount: Do not access this member directly. Use folio_ref_count()
* to find how many references there are to this folio.
* @memcg_data: Memory Control Group data.
- * @_folio_dtor: Which destructor to use for this folio.
- * @_folio_order: Do not use directly, call folio_order().
+ * @virtual: Virtual address in the kernel direct map.
+ * @_last_cpupid: IDs of last CPU and last process that accessed the folio.
* @_entire_mapcount: Do not use directly, call folio_entire_mapcount().
* @_nr_pages_mapped: Do not use directly, call folio_mapcount().
* @_pincount: Do not use directly, call folio_maybe_dma_pinned().
@@ -317,12 +325,21 @@ struct folio {
};
struct address_space *mapping;
pgoff_t index;
- void *private;
+ union {
+ void *private;
+ swp_entry_t swap;
+ };
atomic_t _mapcount;
atomic_t _refcount;
#ifdef CONFIG_MEMCG
unsigned long memcg_data;
#endif
+#if defined(WANT_PAGE_VIRTUAL)
+ void *virtual;
+#endif
+#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+ int _last_cpupid;
+#endif
/* private: the union with struct page is transitional */
};
struct page page;
@@ -331,9 +348,8 @@ struct folio {
struct {
unsigned long _flags_1;
unsigned long _head_1;
+ unsigned long _folio_avail;
/* public: */
- unsigned char _folio_dtor;
- unsigned char _folio_order;
atomic_t _entire_mapcount;
atomic_t _nr_pages_mapped;
atomic_t _pincount;
@@ -379,6 +395,12 @@ FOLIO_MATCH(_refcount, _refcount);
#ifdef CONFIG_MEMCG
FOLIO_MATCH(memcg_data, memcg_data);
#endif
+#if defined(WANT_PAGE_VIRTUAL)
+FOLIO_MATCH(virtual, virtual);
+#endif
+#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+FOLIO_MATCH(_last_cpupid, _last_cpupid);
+#endif
#undef FOLIO_MATCH
#define FOLIO_MATCH(pg, fl) \
static_assert(offsetof(struct folio, fl) == \
@@ -391,8 +413,92 @@ FOLIO_MATCH(compound_head, _head_1);
offsetof(struct page, pg) + 2 * sizeof(struct page))
FOLIO_MATCH(flags, _flags_2);
FOLIO_MATCH(compound_head, _head_2);
+FOLIO_MATCH(flags, _flags_2a);
+FOLIO_MATCH(compound_head, _head_2a);
#undef FOLIO_MATCH
+/**
+ * struct ptdesc - Memory descriptor for page tables.
+ * @__page_flags: Same as page flags. Powerpc only.
+ * @pt_rcu_head: For freeing page table pages.
+ * @pt_list: List of used page tables. Used for s390 and x86.
+ * @_pt_pad_1: Padding that aliases with page's compound head.
+ * @pmd_huge_pte: Protected by ptdesc->ptl, used for THPs.
+ * @__page_mapping: Aliases with page->mapping. Unused for page tables.
+ * @pt_index: Used for s390 gmap.
+ * @pt_mm: Used for x86 pgds.
+ * @pt_frag_refcount: For fragmented page table tracking. Powerpc only.
+ * @_pt_pad_2: Padding to ensure proper alignment.
+ * @ptl: Lock for the page table.
+ * @__page_type: Same as page->page_type. Unused for page tables.
+ * @__page_refcount: Same as page refcount.
+ * @pt_memcg_data: Memcg data. Tracked for page tables here.
+ *
+ * This struct overlays struct page for now. Do not modify without a good
+ * understanding of the issues.
+ */
+struct ptdesc {
+ unsigned long __page_flags;
+
+ union {
+ struct rcu_head pt_rcu_head;
+ struct list_head pt_list;
+ struct {
+ unsigned long _pt_pad_1;
+ pgtable_t pmd_huge_pte;
+ };
+ };
+ unsigned long __page_mapping;
+
+ union {
+ pgoff_t pt_index;
+ struct mm_struct *pt_mm;
+ atomic_t pt_frag_refcount;
+ };
+
+ union {
+ unsigned long _pt_pad_2;
+#if ALLOC_SPLIT_PTLOCKS
+ spinlock_t *ptl;
+#else
+ spinlock_t ptl;
+#endif
+ };
+ unsigned int __page_type;
+ atomic_t __page_refcount;
+#ifdef CONFIG_MEMCG
+ unsigned long pt_memcg_data;
+#endif
+};
+
+#define TABLE_MATCH(pg, pt) \
+ static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt))
+TABLE_MATCH(flags, __page_flags);
+TABLE_MATCH(compound_head, pt_list);
+TABLE_MATCH(compound_head, _pt_pad_1);
+TABLE_MATCH(mapping, __page_mapping);
+TABLE_MATCH(index, pt_index);
+TABLE_MATCH(rcu_head, pt_rcu_head);
+TABLE_MATCH(page_type, __page_type);
+TABLE_MATCH(_refcount, __page_refcount);
+#ifdef CONFIG_MEMCG
+TABLE_MATCH(memcg_data, pt_memcg_data);
+#endif
+#undef TABLE_MATCH
+static_assert(sizeof(struct ptdesc) <= sizeof(struct page));
+
+#define ptdesc_page(pt) (_Generic((pt), \
+ const struct ptdesc *: (const struct page *)(pt), \
+ struct ptdesc *: (struct page *)(pt)))
+
+#define ptdesc_folio(pt) (_Generic((pt), \
+ const struct ptdesc *: (const struct folio *)(pt), \
+ struct ptdesc *: (struct folio *)(pt)))
+
+#define page_ptdesc(p) (_Generic((p), \
+ const struct page *: (const struct ptdesc *)(p), \
+ struct page *: (struct ptdesc *)(p)))
+
/*
* Used for sizing the vmemmap region on some architectures
*/
@@ -471,14 +577,65 @@ struct anon_vma_name {
char name[];
};
+#ifdef CONFIG_ANON_VMA_NAME
+/*
+ * mmap_lock should be read-locked when calling anon_vma_name(). Caller should
+ * either keep holding the lock while using the returned pointer or it should
+ * raise anon_vma_name refcount before releasing the lock.
+ */
+struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma);
+struct anon_vma_name *anon_vma_name_alloc(const char *name);
+void anon_vma_name_free(struct kref *kref);
+#else /* CONFIG_ANON_VMA_NAME */
+static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
+{
+ return NULL;
+}
+
+static inline struct anon_vma_name *anon_vma_name_alloc(const char *name)
+{
+ return NULL;
+}
+#endif
+
struct vma_lock {
struct rw_semaphore lock;
};
struct vma_numab_state {
+ /*
+ * Initialised as time in 'jiffies' after which VMA
+ * should be scanned. Delays first scan of new VMA by at
+ * least sysctl_numa_balancing_scan_delay:
+ */
unsigned long next_scan;
- unsigned long next_pid_reset;
- unsigned long access_pids[2];
+
+ /*
+ * Time in jiffies when pids_active[] is reset to
+ * detect phase change behaviour:
+ */
+ unsigned long pids_active_reset;
+
+ /*
+ * Approximate tracking of PIDs that trapped a NUMA hinting
+ * fault. May produce false positives due to hash collisions.
+ *
+ * [0] Previous PID tracking
+ * [1] Current PID tracking
+ *
+ * Window moves after next_pid_reset has expired approximately
+ * every VMA_PID_RESET_PERIOD jiffies:
+ */
+ unsigned long pids_active[2];
+
+ /* MM scan sequence ID when scan first started after VMA creation */
+ int start_scan_seq;
+
+ /*
+ * MM scan sequence ID when the VMA was last completely scanned.
+ * A VMA is not eligible for scanning if prev_scan_seq == numa_scan_seq
+ */
+ int prev_scan_seq;
};
/*
@@ -587,6 +744,12 @@ struct vm_area_struct {
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
} __randomize_layout;
+#ifdef CONFIG_NUMA
+#define vma_policy(vma) ((vma)->vm_policy)
+#else
+#define vma_policy(vma) NULL
+#endif
+
#ifdef CONFIG_SCHED_MM_CID
struct mm_cid {
u64 time;
@@ -595,6 +758,7 @@ struct mm_cid {
#endif
struct kioctx_table;
+struct iommu_mm_data;
struct mm_struct {
struct {
/*
@@ -806,13 +970,13 @@ struct mm_struct {
#endif
struct work_struct async_put_work;
-#ifdef CONFIG_IOMMU_SVA
- u32 pasid;
+#ifdef CONFIG_IOMMU_MM_DATA
+ struct iommu_mm_data *iommu_mm;
#endif
#ifdef CONFIG_KSM
/*
* Represent how many pages of this process are involved in KSM
- * merging.
+ * merging (not including ksm_zero_pages).
*/
unsigned long ksm_merging_pages;
/*
@@ -820,8 +984,13 @@ struct mm_struct {
* including merged and not merged.
*/
unsigned long ksm_rmap_items;
-#endif
-#ifdef CONFIG_LRU_GEN
+ /*
+ * Represent how many empty pages are merged with kernel zero
+ * pages when enabling KSM use_zero_pages.
+ */
+ unsigned long ksm_zero_pages;
+#endif /* CONFIG_KSM */
+#ifdef CONFIG_LRU_GEN_WALKS_MMU
struct {
/* this mm_struct is on lru_gen_mm_list */
struct list_head list;
@@ -836,7 +1005,7 @@ struct mm_struct {
struct mem_cgroup *memcg;
#endif
} lru_gen;
-#endif /* CONFIG_LRU_GEN */
+#endif /* CONFIG_LRU_GEN_WALKS_MMU */
} __randomize_layout;
/*
@@ -874,11 +1043,13 @@ struct lru_gen_mm_list {
spinlock_t lock;
};
+#endif /* CONFIG_LRU_GEN */
+
+#ifdef CONFIG_LRU_GEN_WALKS_MMU
+
void lru_gen_add_mm(struct mm_struct *mm);
void lru_gen_del_mm(struct mm_struct *mm);
-#ifdef CONFIG_MEMCG
void lru_gen_migrate_mm(struct mm_struct *mm);
-#endif
static inline void lru_gen_init_mm(struct mm_struct *mm)
{
@@ -899,7 +1070,7 @@ static inline void lru_gen_use_mm(struct mm_struct *mm)
WRITE_ONCE(mm->lru_gen.bitmap, -1);
}
-#else /* !CONFIG_LRU_GEN */
+#else /* !CONFIG_LRU_GEN_WALKS_MMU */
static inline void lru_gen_add_mm(struct mm_struct *mm)
{
@@ -909,11 +1080,9 @@ static inline void lru_gen_del_mm(struct mm_struct *mm)
{
}
-#ifdef CONFIG_MEMCG
static inline void lru_gen_migrate_mm(struct mm_struct *mm)
{
}
-#endif
static inline void lru_gen_init_mm(struct mm_struct *mm)
{
@@ -923,7 +1092,7 @@ static inline void lru_gen_use_mm(struct mm_struct *mm)
{
}
-#endif /* CONFIG_LRU_GEN */
+#endif /* CONFIG_LRU_GEN_WALKS_MMU */
struct vma_iterator {
struct ma_state mas;
@@ -934,7 +1103,8 @@ struct vma_iterator {
.mas = { \
.tree = &(__mm)->mm_mt, \
.index = __addr, \
- .node = MAS_START, \
+ .node = NULL, \
+ .status = ma_start, \
}, \
}
@@ -1105,7 +1275,8 @@ enum vm_fault_reason {
{ VM_FAULT_RETRY, "RETRY" }, \
{ VM_FAULT_FALLBACK, "FALLBACK" }, \
{ VM_FAULT_DONE_COW, "DONE_COW" }, \
- { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" }
+ { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" }, \
+ { VM_FAULT_COMPLETED, "COMPLETED" }
struct vm_special_mapping {
const char *name; /* The name, e.g. "[vdso]". */
@@ -1139,14 +1310,6 @@ enum tlb_flush_reason {
NR_TLB_FLUSH_REASONS,
};
- /*
- * A swap entry has to fit into a "unsigned long", as the entry is hidden
- * in the "index" field of the swapper address space.
- */
-typedef struct {
- unsigned long val;
-} swp_entry_t;
-
/**
* enum fault_flag - Fault flag definitions.
* @FAULT_FLAG_WRITE: Fault was a write fault.
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 5414b5c6a103..a2f6179b672b 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -9,9 +9,6 @@
*/
#include <linux/types.h>
-#include <linux/threads.h>
-#include <linux/atomic.h>
-#include <linux/cpumask.h>
#include <asm/page.h>
@@ -36,6 +33,8 @@ enum {
NR_MM_COUNTERS
};
+struct page;
+
struct page_frag {
struct page *page;
#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
@@ -52,8 +51,8 @@ struct tlbflush_unmap_batch {
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
/*
* The arch code makes the following promise: generic code can modify a
- * PTE, then call arch_tlbbatch_add_mm() (which internally provides all
- * needed barriers), then call arch_tlbbatch_flush(), and the entries
+ * PTE, then call arch_tlbbatch_add_pending() (which internally provides
+ * all needed barriers), then call arch_tlbbatch_flush(), and the entries
* will be flushed on all CPUs by the time that arch_tlbbatch_flush()
* returns.
*/
diff --git a/include/linux/mman.h b/include/linux/mman.h
index cee1e4b566d8..bcb201ab7a41 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -15,6 +15,9 @@
#ifndef MAP_32BIT
#define MAP_32BIT 0
#endif
+#ifndef MAP_ABOVE4G
+#define MAP_ABOVE4G 0
+#endif
#ifndef MAP_HUGE_2MB
#define MAP_HUGE_2MB 0
#endif
@@ -50,6 +53,7 @@
| MAP_STACK \
| MAP_HUGETLB \
| MAP_32BIT \
+ | MAP_ABOVE4G \
| MAP_HUGE_2MB \
| MAP_HUGE_1GB)
@@ -152,11 +156,20 @@ calc_vm_flag_bits(unsigned long flags)
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) |
_calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) |
+ _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) |
arch_calc_vm_flag_bits(flags);
}
unsigned long vm_commit_limit(void);
+#ifndef arch_memory_deny_write_exec_supported
+static inline bool arch_memory_deny_write_exec_supported(void)
+{
+ return true;
+}
+#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported
+#endif
+
/*
* Denies creating a writable executable mapping or gaining executable permissions.
*
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index e05e167dbd16..8d38dcb6d044 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -73,6 +73,14 @@ static inline void mmap_assert_write_locked(struct mm_struct *mm)
}
#ifdef CONFIG_PER_VMA_LOCK
+/*
+ * Drop all currently-held per-VMA locks.
+ * This is called from the mmap_lock implementation directly before releasing
+ * a write-locked mmap_lock (or downgrading it to read-locked).
+ * This should normally NOT be called manually from other places.
+ * If you want to call this manually anyway, keep in mind that this will release
+ * *all* VMA write locks, including ones from further up the stack.
+ */
static inline void vma_end_write_all(struct mm_struct *mm)
{
mmap_assert_write_locked(mm);
@@ -118,16 +126,6 @@ static inline int mmap_write_lock_killable(struct mm_struct *mm)
return ret;
}
-static inline bool mmap_write_trylock(struct mm_struct *mm)
-{
- bool ret;
-
- __mmap_lock_trace_start_locking(mm, true);
- ret = down_write_trylock(&mm->mmap_lock) != 0;
- __mmap_lock_trace_acquire_returned(mm, true, ret);
- return ret;
-}
-
static inline void mmap_write_unlock(struct mm_struct *mm)
{
__mmap_lock_trace_released(mm, true);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index daa2f40d9ce6..f34407cc2788 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -32,6 +32,7 @@ struct mmc_csd {
unsigned int r2w_factor;
unsigned int max_dtr;
unsigned int erase_size; /* In sectors */
+ unsigned int wp_grp_size;
unsigned int read_blkbits;
unsigned int write_blkbits;
unsigned int capacity;
@@ -52,9 +53,6 @@ struct mmc_ext_csd {
u8 part_config;
u8 cache_ctrl;
u8 rst_n_function;
- u8 max_packed_writes;
- u8 max_packed_reads;
- u8 packed_event_en;
unsigned int part_time; /* Units: ms */
unsigned int sa_timeout; /* Units: 100ns */
unsigned int generic_cmd6_time; /* Units: 10ms */
@@ -295,7 +293,9 @@ struct mmc_card {
#define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */
#define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */
#define MMC_QUIRK_BROKEN_SD_CACHE (1<<15) /* Disable broken SD cache support */
+#define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */
+ bool written_flag; /* Indicates eMMC has been written since power on */
bool reenable_cmdq; /* Re-enable Command Queue */
unsigned int erase_size; /* erase size in sectors */
@@ -304,6 +304,7 @@ struct mmc_card {
unsigned int eg_boundary; /* don't cross erase-group boundaries */
unsigned int erase_arg; /* erase / trim / discard */
u8 erased_byte; /* value of erased bytes */
+ unsigned int wp_grp_size; /* write group size in sectors */
u32 raw_cid[4]; /* raw card CID */
u32 raw_csd[4]; /* raw card CSD */
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 6efec0b9820c..2c7928a50907 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -27,7 +27,6 @@ struct mmc_command {
u32 opcode;
u32 arg;
#define MMC_CMD23_ARG_REL_WR (1 << 31)
-#define MMC_CMD23_ARG_PACKED ((0 << 31) | (1 << 30))
#define MMC_CMD23_ARG_TAG_REQ (1 << 29)
u32 resp[4];
unsigned int flags; /* expected response type */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 461d1543893b..5894bf912f7b 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -184,6 +184,12 @@ struct mmc_host_ops {
/* Execute HS400 tuning depending host driver */
int (*execute_hs400_tuning)(struct mmc_host *host, struct mmc_card *card);
+ /* Optional callback to prepare for SD high-speed tuning */
+ int (*prepare_sd_hs_tuning)(struct mmc_host *host, struct mmc_card *card);
+
+ /* Optional callback to execute SD high-speed tuning */
+ int (*execute_sd_hs_tuning)(struct mmc_host *host, struct mmc_card *card);
+
/* Prepare switch to DDR during the HS400 init sequence */
int (*hs400_prepare_ddr)(struct mmc_host *host);
@@ -520,6 +526,7 @@ struct mmc_host {
/* Host Software Queue support */
bool hsq_enabled;
+ int hsq_depth;
u32 err_stats[MMC_ERR_MAX];
unsigned long private[] ____cacheline_aligned;
@@ -532,7 +539,7 @@ struct mmc_host *devm_mmc_alloc_host(struct device *dev, int extra);
int mmc_add_host(struct mmc_host *);
void mmc_remove_host(struct mmc_host *);
void mmc_free_host(struct mmc_host *);
-void mmc_of_parse_clk_phase(struct mmc_host *host,
+void mmc_of_parse_clk_phase(struct device *dev,
struct mmc_clk_phase_map *map);
int mmc_of_parse(struct mmc_host *host);
int mmc_of_parse_voltage(struct mmc_host *host, u32 *mask);
@@ -665,6 +672,8 @@ static inline void mmc_debugfs_err_stats_inc(struct mmc_host *host,
host->err_stats[stat] += 1;
}
+int mmc_sd_switch(struct mmc_card *card, int mode, int group, u8 value, u8 *resp);
+int mmc_send_status(struct mmc_card *card, u32 *status);
int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode);
int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 6f7993803ee7..cf2bcb5da30d 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -257,8 +257,6 @@ static inline bool mmc_ready_for_data(u32 status)
#define EXT_CSD_FLUSH_CACHE 32 /* W */
#define EXT_CSD_CACHE_CTRL 33 /* R/W */
#define EXT_CSD_POWER_OFF_NOTIFICATION 34 /* R/W */
-#define EXT_CSD_PACKED_FAILURE_INDEX 35 /* RO */
-#define EXT_CSD_PACKED_CMD_STATUS 36 /* RO */
#define EXT_CSD_EXP_EVENTS_STATUS 54 /* RO, 2 bytes */
#define EXT_CSD_EXP_EVENTS_CTRL 56 /* R/W, 2 bytes */
#define EXT_CSD_DATA_SECTOR_SIZE 61 /* R */
@@ -321,8 +319,6 @@ static inline bool mmc_ready_for_data(u32 status)
#define EXT_CSD_SUPPORTED_MODE 493 /* RO */
#define EXT_CSD_TAG_UNIT_SIZE 498 /* RO */
#define EXT_CSD_DATA_TAG_SUPPORT 499 /* RO */
-#define EXT_CSD_MAX_PACKED_WRITES 500 /* RO */
-#define EXT_CSD_MAX_PACKED_READS 501 /* RO */
#define EXT_CSD_BKOPS_SUPPORT 502 /* RO */
#define EXT_CSD_HPI_FEATURES 503 /* RO */
@@ -402,18 +398,12 @@ static inline bool mmc_ready_for_data(u32 status)
#define EXT_CSD_PWR_CL_8BIT_SHIFT 4
#define EXT_CSD_PWR_CL_4BIT_SHIFT 0
-#define EXT_CSD_PACKED_EVENT_EN BIT(3)
-
/*
* EXCEPTION_EVENT_STATUS field
*/
#define EXT_CSD_URGENT_BKOPS BIT(0)
#define EXT_CSD_DYNCAP_NEEDED BIT(1)
#define EXT_CSD_SYSPOOL_EXHAUSTED BIT(2)
-#define EXT_CSD_PACKED_FAILURE BIT(3)
-
-#define EXT_CSD_PACKED_GENERIC_ERROR BIT(0)
-#define EXT_CSD_PACKED_INDEXED_ERROR BIT(1)
/*
* BKOPS status level
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 7c3e7b0b0e8f..39a7714605a7 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -10,7 +10,7 @@ struct vm_area_struct;
struct mm_struct;
struct vma_iterator;
-void dump_page(struct page *page, const char *reason);
+void dump_page(const struct page *page, const char *reason);
void dump_vma(const struct vm_area_struct *vma);
void dump_mm(const struct mm_struct *mm);
void vma_iter_dump_tree(const struct vma_iterator *vmi);
diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h
index f2b7a3f04099..bbaec80c78c5 100644
--- a/include/linux/mmu_context.h
+++ b/include/linux/mmu_context.h
@@ -11,7 +11,7 @@
#endif
#ifndef leave_mm
-static inline void leave_mm(int cpu) { }
+static inline void leave_mm(void) { }
#endif
/*
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 64a3e051c3c4..f349e08a9dfe 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -187,27 +187,27 @@ struct mmu_notifier_ops {
const struct mmu_notifier_range *range);
/*
- * invalidate_range() is either called between
- * invalidate_range_start() and invalidate_range_end() when the
- * VM has to free pages that where unmapped, but before the
- * pages are actually freed, or outside of _start()/_end() when
- * a (remote) TLB is necessary.
+ * arch_invalidate_secondary_tlbs() is used to manage a non-CPU TLB
+ * which shares page-tables with the CPU. The
+ * invalidate_range_start()/end() callbacks should not be implemented as
+ * invalidate_secondary_tlbs() already catches the points in time when
+ * an external TLB needs to be flushed.
*
- * If invalidate_range() is used to manage a non-CPU TLB with
- * shared page-tables, it not necessary to implement the
- * invalidate_range_start()/end() notifiers, as
- * invalidate_range() already catches the points in time when an
- * external TLB range needs to be flushed. For more in depth
- * discussion on this see Documentation/mm/mmu_notifier.rst
+ * This requires arch_invalidate_secondary_tlbs() to be called while
+ * holding the ptl spin-lock and therefore this callback is not allowed
+ * to sleep.
*
- * Note that this function might be called with just a sub-range
- * of what was passed to invalidate_range_start()/end(), if
- * called between those functions.
+ * This is called by architecture code whenever invalidating a TLB
+ * entry. It is assumed that any secondary TLB has the same rules for
+ * when invalidations are required. If this is not the case architecture
+ * code will need to call this explicitly when required for secondary
+ * TLB invalidation.
*/
- void (*invalidate_range)(struct mmu_notifier *subscription,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end);
+ void (*arch_invalidate_secondary_tlbs)(
+ struct mmu_notifier *subscription,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end);
/*
* These callbacks are used with the get/put interface to manage the
@@ -395,10 +395,9 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm,
extern void __mmu_notifier_change_pte(struct mm_struct *mm,
unsigned long address, pte_t pte);
extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r);
-extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r,
- bool only_end);
-extern void __mmu_notifier_invalidate_range(struct mm_struct *mm,
- unsigned long start, unsigned long end);
+extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r);
+extern void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
+ unsigned long start, unsigned long end);
extern bool
mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range);
@@ -460,7 +459,14 @@ mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
lock_map_release(&__mmu_notifier_invalidate_range_start_map);
}
-static inline int
+/*
+ * This version of mmu_notifier_invalidate_range_start() avoids blocking, but it
+ * can return an error if a notifier can't proceed without blocking, in which
+ * case you're not allowed to modify PTEs in the specified range.
+ *
+ * This is mainly intended for OOM handling.
+ */
+static inline int __must_check
mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range)
{
int ret = 0;
@@ -481,21 +487,14 @@ mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
might_sleep();
if (mm_has_notifiers(range->mm))
- __mmu_notifier_invalidate_range_end(range, false);
-}
-
-static inline void
-mmu_notifier_invalidate_range_only_end(struct mmu_notifier_range *range)
-{
- if (mm_has_notifiers(range->mm))
- __mmu_notifier_invalidate_range_end(range, true);
+ __mmu_notifier_invalidate_range_end(range);
}
-static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
- unsigned long start, unsigned long end)
+static inline void mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
{
if (mm_has_notifiers(mm))
- __mmu_notifier_invalidate_range(mm, start, end);
+ __mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}
static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm)
@@ -582,45 +581,6 @@ static inline void mmu_notifier_range_init_owner(
__young; \
})
-#define ptep_clear_flush_notify(__vma, __address, __ptep) \
-({ \
- unsigned long ___addr = __address & PAGE_MASK; \
- struct mm_struct *___mm = (__vma)->vm_mm; \
- pte_t ___pte; \
- \
- ___pte = ptep_clear_flush(__vma, __address, __ptep); \
- mmu_notifier_invalidate_range(___mm, ___addr, \
- ___addr + PAGE_SIZE); \
- \
- ___pte; \
-})
-
-#define pmdp_huge_clear_flush_notify(__vma, __haddr, __pmd) \
-({ \
- unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \
- struct mm_struct *___mm = (__vma)->vm_mm; \
- pmd_t ___pmd; \
- \
- ___pmd = pmdp_huge_clear_flush(__vma, __haddr, __pmd); \
- mmu_notifier_invalidate_range(___mm, ___haddr, \
- ___haddr + HPAGE_PMD_SIZE); \
- \
- ___pmd; \
-})
-
-#define pudp_huge_clear_flush_notify(__vma, __haddr, __pud) \
-({ \
- unsigned long ___haddr = __haddr & HPAGE_PUD_MASK; \
- struct mm_struct *___mm = (__vma)->vm_mm; \
- pud_t ___pud; \
- \
- ___pud = pudp_huge_clear_flush(__vma, __haddr, __pud); \
- mmu_notifier_invalidate_range(___mm, ___haddr, \
- ___haddr + HPAGE_PUD_SIZE); \
- \
- ___pud; \
-})
-
/*
* set_pte_at_notify() sets the pte _after_ running the notifier.
* This is safe to start by updating the secondary MMUs, because the primary MMU
@@ -711,12 +671,7 @@ void mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
{
}
-static inline void
-mmu_notifier_invalidate_range_only_end(struct mmu_notifier_range *range)
-{
-}
-
-static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
+static inline void mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
}
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 5e50b78d58ea..c11b7cde81ef 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -22,18 +22,21 @@
#include <linux/mm_types.h>
#include <linux/page-flags.h>
#include <linux/local_lock.h>
+#include <linux/zswap.h>
#include <asm/page.h>
/* Free memory management - zoned buddy allocator. */
#ifndef CONFIG_ARCH_FORCE_MAX_ORDER
-#define MAX_ORDER 10
+#define MAX_PAGE_ORDER 10
#else
-#define MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER
+#define MAX_PAGE_ORDER CONFIG_ARCH_FORCE_MAX_ORDER
#endif
-#define MAX_ORDER_NR_PAGES (1 << MAX_ORDER)
+#define MAX_ORDER_NR_PAGES (1 << MAX_PAGE_ORDER)
#define IS_MAX_ORDER_ALIGNED(pfn) IS_ALIGNED(pfn, MAX_ORDER_NR_PAGES)
+#define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1)
+
/*
* PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
* costly to service. That is between allocation orders which should
@@ -73,9 +76,12 @@ extern const char * const migratetype_names[MIGRATE_TYPES];
#ifdef CONFIG_CMA
# define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
# define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA)
+# define is_migrate_cma_folio(folio, pfn) (MIGRATE_CMA == \
+ get_pfnblock_flags_mask(&folio->page, pfn, MIGRATETYPE_MASK))
#else
# define is_migrate_cma(migratetype) false
# define is_migrate_cma_page(_page) false
+# define is_migrate_cma_folio(folio, pfn) false
#endif
static inline bool is_migrate_movable(int mt)
@@ -95,7 +101,7 @@ static inline bool migratetype_is_mergeable(int mt)
}
#define for_each_migratetype_order(order, type) \
- for (order = 0; order <= MAX_ORDER; order++) \
+ for (order = 0; order < NR_PAGE_ORDERS; order++) \
for (type = 0; type < MIGRATE_TYPES; type++)
extern int page_group_by_mobility_disabled;
@@ -207,6 +213,10 @@ enum node_stat_item {
PGPROMOTE_SUCCESS, /* promote successfully */
PGPROMOTE_CANDIDATE, /* candidate pages to promote */
#endif
+ /* PGDEMOTE_*: pages demoted */
+ PGDEMOTE_KSWAPD,
+ PGDEMOTE_DIRECT,
+ PGDEMOTE_KHUGEPAGED,
NR_VM_NODE_STAT_ITEMS
};
@@ -435,14 +445,12 @@ struct lru_gen_folio {
atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
/* whether the multi-gen LRU is enabled */
bool enabled;
-#ifdef CONFIG_MEMCG
/* the memcg generation this lru_gen_folio belongs to */
u8 gen;
/* the list segment this lru_gen_folio belongs to */
u8 seg;
/* per-node lru_gen_folio list for global reclaim */
struct hlist_nulls_node list;
-#endif
};
enum {
@@ -459,7 +467,7 @@ enum {
#define NR_BLOOM_FILTERS 2
struct lru_gen_mm_state {
- /* set to max_seq after each iteration */
+ /* synced with max_seq after each iteration */
unsigned long seq;
/* where the current iteration continues after */
struct list_head *head;
@@ -474,8 +482,8 @@ struct lru_gen_mm_state {
struct lru_gen_mm_walk {
/* the lruvec under reclaim */
struct lruvec *lruvec;
- /* unstable max_seq from lru_gen_folio */
- unsigned long max_seq;
+ /* max_seq from lru_gen_folio: can be out of date */
+ unsigned long seq;
/* the next address within an mm to scan */
unsigned long next_addr;
/* to batch promoted pages */
@@ -488,11 +496,6 @@ struct lru_gen_mm_walk {
bool force_scan;
};
-void lru_gen_init_lruvec(struct lruvec *lruvec);
-void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
-
-#ifdef CONFIG_MEMCG
-
/*
* For each node, memcgs are divided into two generations: the old and the
* young. For each generation, memcgs are randomly sharded into multiple bins
@@ -505,33 +508,37 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
* the old generation, is incremented when all its bins become empty.
*
* There are four operations:
- * 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its
+ * 1. MEMCG_LRU_HEAD, which moves a memcg to the head of a random bin in its
* current generation (old or young) and updates its "seg" to "head";
- * 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its
+ * 2. MEMCG_LRU_TAIL, which moves a memcg to the tail of a random bin in its
* current generation (old or young) and updates its "seg" to "tail";
- * 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old
+ * 3. MEMCG_LRU_OLD, which moves a memcg to the head of a random bin in the old
* generation, updates its "gen" to "old" and resets its "seg" to "default";
- * 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the
+ * 4. MEMCG_LRU_YOUNG, which moves a memcg to the tail of a random bin in the
* young generation, updates its "gen" to "young" and resets its "seg" to
* "default".
*
* The events that trigger the above operations are:
* 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD;
- * 2. The first attempt to reclaim an memcg below low, which triggers
+ * 2. The first attempt to reclaim a memcg below low, which triggers
* MEMCG_LRU_TAIL;
- * 3. The first attempt to reclaim an memcg below reclaimable size threshold,
- * which triggers MEMCG_LRU_TAIL;
- * 4. The second attempt to reclaim an memcg below reclaimable size threshold,
- * which triggers MEMCG_LRU_YOUNG;
- * 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG;
+ * 3. The first attempt to reclaim a memcg offlined or below reclaimable size
+ * threshold, which triggers MEMCG_LRU_TAIL;
+ * 4. The second attempt to reclaim a memcg offlined or below reclaimable size
+ * threshold, which triggers MEMCG_LRU_YOUNG;
+ * 5. Attempting to reclaim a memcg below min, which triggers MEMCG_LRU_YOUNG;
* 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG;
- * 7. Offlining an memcg, which triggers MEMCG_LRU_OLD.
+ * 7. Offlining a memcg, which triggers MEMCG_LRU_OLD.
*
- * Note that memcg LRU only applies to global reclaim, and the round-robin
- * incrementing of their max_seq counters ensures the eventual fairness to all
- * eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter().
+ * Notes:
+ * 1. Memcg LRU only applies to global reclaim, and the round-robin incrementing
+ * of their max_seq counters ensures the eventual fairness to all eligible
+ * memcgs. For memcg reclaim, it still relies on mem_cgroup_iter().
+ * 2. There are only two valid generations: old (seq) and young (seq+1).
+ * MEMCG_NR_GENS is set to three so that when reading the generation counter
+ * locklessly, a stale value (seq-1) does not wraparound to young.
*/
-#define MEMCG_NR_GENS 2
+#define MEMCG_NR_GENS 3
#define MEMCG_NR_BINS 8
struct lru_gen_memcg {
@@ -546,6 +553,8 @@ struct lru_gen_memcg {
};
void lru_gen_init_pgdat(struct pglist_data *pgdat);
+void lru_gen_init_lruvec(struct lruvec *lruvec);
+void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
void lru_gen_init_memcg(struct mem_cgroup *memcg);
void lru_gen_exit_memcg(struct mem_cgroup *memcg);
@@ -554,19 +563,6 @@ void lru_gen_offline_memcg(struct mem_cgroup *memcg);
void lru_gen_release_memcg(struct mem_cgroup *memcg);
void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid);
-#else /* !CONFIG_MEMCG */
-
-#define MEMCG_NR_GENS 1
-
-struct lru_gen_memcg {
-};
-
-static inline void lru_gen_init_pgdat(struct pglist_data *pgdat)
-{
-}
-
-#endif /* CONFIG_MEMCG */
-
#else /* !CONFIG_LRU_GEN */
static inline void lru_gen_init_pgdat(struct pglist_data *pgdat)
@@ -581,8 +577,6 @@ static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
{
}
-#ifdef CONFIG_MEMCG
-
static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
{
}
@@ -607,8 +601,6 @@ static inline void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid)
{
}
-#endif /* CONFIG_MEMCG */
-
#endif /* CONFIG_LRU_GEN */
struct lruvec {
@@ -631,16 +623,17 @@ struct lruvec {
#ifdef CONFIG_LRU_GEN
/* evictable pages divided into generations */
struct lru_gen_folio lrugen;
+#ifdef CONFIG_LRU_GEN_WALKS_MMU
/* to concurrently iterate lru_gen_mm_list */
struct lru_gen_mm_state mm_state;
#endif
+#endif /* CONFIG_LRU_GEN */
#ifdef CONFIG_MEMCG
struct pglist_data *pgdat;
#endif
+ struct zswap_lruvec_state zswap_lruvec_state;
};
-/* Isolate unmapped pages */
-#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2)
/* Isolate for asynchronous migration */
#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4)
/* Isolate unevictable pages */
@@ -676,16 +669,34 @@ enum zone_watermarks {
#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost)
-/* Fields and list protected by pagesets local_lock in page_alloc.c */
+/*
+ * Flags used in pcp->flags field.
+ *
+ * PCPF_PREV_FREE_HIGH_ORDER: a high-order page is freed in the
+ * previous page freeing. To avoid to drain PCP for an accident
+ * high-order page freeing.
+ *
+ * PCPF_FREE_HIGH_BATCH: preserve "pcp->batch" pages in PCP before
+ * draining PCP for consecutive high-order pages freeing without
+ * allocation if data cache slice of CPU is large enough. To reduce
+ * zone lock contention and keep cache-hot pages reusing.
+ */
+#define PCPF_PREV_FREE_HIGH_ORDER BIT(0)
+#define PCPF_FREE_HIGH_BATCH BIT(1)
+
struct per_cpu_pages {
spinlock_t lock; /* Protects lists field */
int count; /* number of pages in the list */
int high; /* high watermark, emptying needed */
+ int high_min; /* min high watermark */
+ int high_max; /* max high watermark */
int batch; /* chunk size for buddy add/remove */
- short free_factor; /* batch scaling factor during free */
+ u8 flags; /* protected by pcp->lock */
+ u8 alloc_factor; /* batch scaling factor during allocate */
#ifdef CONFIG_NUMA
- short expire; /* When 0, remote pagesets are drained */
+ u8 expire; /* When 0, remote pagesets are drained */
#endif
+ short free_count; /* consecutive free count */
/* Lists of pages, one per migrate type stored on the pcp-lists */
struct list_head lists[NR_PCP_LISTS];
@@ -838,7 +849,8 @@ struct zone {
* the high and batch values are copied to individual pagesets for
* faster access
*/
- int pageset_high;
+ int pageset_high_min;
+ int pageset_high_max;
int pageset_batch;
#ifndef CONFIG_SPARSEMEM
@@ -926,10 +938,10 @@ struct zone {
CACHELINE_PADDING(_pad1_);
/* free areas of different sizes */
- struct free_area free_area[MAX_ORDER + 1];
+ struct free_area free_area[NR_PAGE_ORDERS];
#ifdef CONFIG_UNACCEPTED_MEMORY
- /* Pages to be accepted. All pages on the list are MAX_ORDER */
+ /* Pages to be accepted. All pages on the list are MAX_PAGE_ORDER */
struct list_head unaccepted_pages;
#endif
@@ -999,6 +1011,7 @@ enum zone_flags {
* Cleared when kswapd is woken.
*/
ZONE_RECLAIM_ACTIVE, /* kswapd may be scanning the zone. */
+ ZONE_BELOW_HIGH, /* zone is below high watermark. */
};
static inline unsigned long zone_managed_pages(struct zone *zone)
@@ -1738,8 +1751,8 @@ static inline bool movable_only_nodes(nodemask_t *nodes)
#define SECTION_BLOCKFLAGS_BITS \
((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS)
-#if (MAX_ORDER + PAGE_SHIFT) > SECTION_SIZE_BITS
-#error Allocator MAX_ORDER exceeds SECTION_SIZE
+#if (MAX_PAGE_ORDER + PAGE_SHIFT) > SECTION_SIZE_BITS
+#error Allocator MAX_PAGE_ORDER exceeds SECTION_SIZE
#endif
static inline unsigned long pfn_to_section_nr(unsigned long pfn)
@@ -1771,6 +1784,7 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
#define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
struct mem_section_usage {
+ struct rcu_head rcu;
#ifdef CONFIG_SPARSEMEM_VMEMMAP
DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
#endif
@@ -1964,7 +1978,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
{
int idx = subsection_map_index(pfn);
- return test_bit(idx, ms->usage->subsection_map);
+ return test_bit(idx, READ_ONCE(ms->usage)->subsection_map);
}
#else
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
@@ -1988,6 +2002,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
static inline int pfn_valid(unsigned long pfn)
{
struct mem_section *ms;
+ int ret;
/*
* Ensure the upper PAGE_SHIFT bits are clear in the
@@ -2001,13 +2016,19 @@ static inline int pfn_valid(unsigned long pfn)
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
return 0;
ms = __pfn_to_section(pfn);
- if (!valid_section(ms))
+ rcu_read_lock_sched();
+ if (!valid_section(ms)) {
+ rcu_read_unlock_sched();
return 0;
+ }
/*
* Traditionally early sections always returned pfn_valid() for
* the entire section-sized span.
*/
- return early_section(ms) || pfn_section_valid(ms, pfn);
+ ret = early_section(ms) || pfn_section_valid(ms, pfn);
+ rcu_read_unlock_sched();
+
+ return ret;
}
#endif
diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
index 057c89867aa2..cd4d5c8781f5 100644
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -115,6 +115,9 @@ static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid)
int vfsgid_in_group_p(vfsgid_t vfsgid);
+struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap);
+void mnt_idmap_put(struct mnt_idmap *idmap);
+
vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
struct user_namespace *fs_userns, kuid_t kuid);
@@ -241,7 +244,4 @@ static inline kgid_t mapped_fsgid(struct mnt_idmap *idmap,
return from_vfsgid(idmap, fs_userns, VFSGIDT_INIT(current_fsgid()));
}
-bool check_fsmapping(const struct mnt_idmap *idmap,
- const struct super_block *sb);
-
#endif /* _LINUX_MNT_IDMAPPING_H */
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index b0678b093cb2..7a9a07ea451b 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -935,6 +935,12 @@ enum {
* struct cdx_device_id - CDX device identifier
* @vendor: Vendor ID
* @device: Device ID
+ * @subvendor: Subsystem vendor ID (or CDX_ANY_ID)
+ * @subdevice: Subsystem device ID (or CDX_ANY_ID)
+ * @class: Device class
+ * Most drivers do not need to specify class/class_mask
+ * as vendor/device is normally sufficient.
+ * @class_mask: Limit which sub-fields of the class field are compared.
* @override_only: Match only when dev->driver_override is this driver.
*
* Type of entries in the "device Id" table for CDX devices supported by
@@ -943,7 +949,25 @@ enum {
struct cdx_device_id {
__u16 vendor;
__u16 device;
+ __u16 subvendor;
+ __u16 subdevice;
+ __u32 class;
+ __u32 class_mask;
__u32 override_only;
};
+struct vchiq_device_id {
+ char name[32];
+};
+
+/**
+ * struct coreboot_device_id - Identifies a coreboot table entry
+ * @tag: tag ID
+ * @driver_data: driver specific data
+ */
+struct coreboot_device_id {
+ __u32 tag;
+ kernel_ulong_t driver_data;
+};
+
#endif /* LINUX_MOD_DEVICETABLE_H */
diff --git a/include/linux/module.h b/include/linux/module.h
index a98e188cf37b..1153b0d99a80 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -540,6 +540,8 @@ struct module {
struct static_call_site *static_call_sites;
#endif
#if IS_ENABLED(CONFIG_KUNIT)
+ int num_kunit_init_suites;
+ struct kunit_suite **kunit_init_suites;
int num_kunit_suites;
struct kunit_suite **kunit_suites;
#endif
@@ -668,7 +670,7 @@ extern void __module_get(struct module *module);
* @module: the module we should check for
*
* Only try to get a module reference count if the module is not being removed.
- * This call will fail if the module is already being removed.
+ * This call will fail if the module is in the process of being removed.
*
* Care must also be taken to ensure the module exists and is alive prior to
* usage of this call. This can be gauranteed through two means:
@@ -883,7 +885,7 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr,
static inline void module_bug_cleanup(struct module *mod) {}
#endif /* CONFIG_GENERIC_BUG */
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
extern bool retpoline_module_ok(bool has_retpoline);
#else
static inline bool retpoline_module_ok(bool has_retpoline)
diff --git a/include/linux/module_symbol.h b/include/linux/module_symbol.h
index 7ace7ba30203..77c9895b9ddb 100644
--- a/include/linux/module_symbol.h
+++ b/include/linux/module_symbol.h
@@ -3,15 +3,13 @@
#define _LINUX_MODULE_SYMBOL_H
/* This ignores the intensely annoying "mapping symbols" found in ELF files. */
-static inline int is_mapping_symbol(const char *str)
+static inline bool is_mapping_symbol(const char *str)
{
if (str[0] == '.' && str[1] == 'L')
return true;
if (str[0] == 'L' && str[1] == '0')
return true;
- return str[0] == '$' &&
- (str[1] == 'a' || str[1] == 'd' || str[1] == 't' || str[1] == 'x')
- && (str[2] == '\0' || str[2] == '.');
+ return str[0] == '$';
}
#endif /* _LINUX_MODULE_SYMBOL_H */
diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
index 03be088fb439..89b1e0ed9811 100644
--- a/include/linux/moduleloader.h
+++ b/include/linux/moduleloader.h
@@ -42,6 +42,11 @@ bool module_init_section(const char *name);
*/
bool module_exit_section(const char *name);
+/* Describes whether within_module_init() will consider this an init section
+ * or not. This behaviour changes with CONFIG_MODULE_UNLOAD.
+ */
+bool module_init_layout_section(const char *sname);
+
/*
* Apply the given relocation to the (simplified) ELF. Return -error
* or 0.
@@ -110,6 +115,14 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *mod);
+#ifdef CONFIG_MODULES
+void flush_module_init_free_work(void);
+#else
+static inline void flush_module_init_free_work(void)
+{
+}
+#endif
+
/* Any cleanup needed when module leaves. */
void module_arch_cleanup(struct module *mod);
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 962cd41a2cb5..bfb85fd13e1f 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -276,7 +276,7 @@ struct kparam_array
read-only sections (which is part of respective UNIX ABI on these
platforms). So 'const' makes no sense and even causes compile failures
with some compilers. */
-#if defined(CONFIG_ALPHA) || defined(CONFIG_IA64) || defined(CONFIG_PPC64)
+#if defined(CONFIG_ALPHA) || defined(CONFIG_PPC64)
#define __moduleparam_const
#else
#define __moduleparam_const const
@@ -293,7 +293,11 @@ struct kparam_array
= { __param_str_##name, THIS_MODULE, ops, \
VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } }
-/* Obsolete - use module_param_cb() */
+/*
+ * Useful for describing a set/get pair used only once (i.e. for this
+ * parameter). For repeated set/get pairs (i.e. the same struct
+ * kernel_param_ops), use module_param_cb() instead.
+ */
#define module_param_call(name, _set, _get, arg, perm) \
static const struct kernel_param_ops __param_ops_##name = \
{ .flags = 0, .set = _set, .get = _get }; \
@@ -381,6 +385,8 @@ extern bool parameq(const char *name1, const char *name2);
*/
extern bool parameqn(const char *name1, const char *name2, size_t n);
+typedef int (*parse_unknown_fn)(char *param, char *val, const char *doing, void *arg);
+
/* Called on module insert or kernel boot */
extern char *parse_args(const char *name,
char *args,
@@ -388,9 +394,7 @@ extern char *parse_args(const char *name,
unsigned num,
s16 level_min,
s16 level_max,
- void *arg,
- int (*unknown)(char *param, char *val,
- const char *doing, void *arg));
+ void *arg, parse_unknown_fn unknown);
/* Called by module remove. */
#ifdef CONFIG_SYSFS
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 4f40b40306d0..c34c18b4e8f3 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -50,8 +50,7 @@ struct path;
#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
- MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | \
- MNT_CURSOR)
+ MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | MNT_ONRB)
#define MNT_INTERNAL 0x4000
@@ -65,7 +64,7 @@ struct path;
#define MNT_SYNC_UMOUNT 0x2000000
#define MNT_MARKED 0x4000000
#define MNT_UMOUNT 0x8000000
-#define MNT_CURSOR 0x10000000
+#define MNT_ONRB 0x10000000
struct vfsmount {
struct dentry *mnt_root; /* root of the mounted tree */
@@ -92,8 +91,8 @@ extern bool __mnt_is_readonly(struct vfsmount *mnt);
extern bool mnt_may_suid(struct vfsmount *mnt);
extern struct vfsmount *clone_private_mount(const struct path *path);
-extern int __mnt_want_write(struct vfsmount *);
-extern void __mnt_drop_write(struct vfsmount *);
+int mnt_get_write_access(struct vfsmount *mnt);
+void mnt_put_write_access(struct vfsmount *mnt);
extern struct vfsmount *fc_mount(struct fs_context *fc);
extern struct vfsmount *vfs_create_mount(struct fs_context *fc);
diff --git a/include/linux/moxtet.h b/include/linux/moxtet.h
index 79184948fab4..ac577699edfd 100644
--- a/include/linux/moxtet.h
+++ b/include/linux/moxtet.h
@@ -35,8 +35,6 @@ enum turris_mox_module_id {
#define MOXTET_NIRQS 16
-extern struct bus_type moxtet_type;
-
struct moxtet {
struct device *dev;
struct mutex lock;
diff --git a/include/linux/msi.h b/include/linux/msi.h
index a50ea79522f8..26d07e23052e 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -412,6 +412,7 @@ bool arch_restore_msi_irqs(struct pci_dev *dev);
struct irq_domain;
struct irq_domain_ops;
struct irq_chip;
+struct irq_fwspec;
struct device_node;
struct fwnode_handle;
struct msi_domain_info;
@@ -431,6 +432,8 @@ struct msi_domain_info;
* function.
* @msi_post_free: Optional function which is invoked after freeing
* all interrupts.
+ * @msi_translate: Optional translate callback to support the odd wire to
+ * MSI bridges, e.g. MBIGEN
*
* @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying
* irqdomain.
@@ -468,6 +471,8 @@ struct msi_domain_ops {
struct device *dev);
void (*msi_post_free)(struct irq_domain *domain,
struct device *dev);
+ int (*msi_translate)(struct irq_domain *domain, struct irq_fwspec *fwspec,
+ irq_hw_number_t *hwirq, unsigned int *type);
};
/**
@@ -547,12 +552,10 @@ enum {
MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 5),
/* Free MSI descriptors */
MSI_FLAG_FREE_MSI_DESCS = (1 << 6),
- /*
- * Quirk to handle MSI implementations which do not provide
- * masking. Currently known to affect x86, but has to be partially
- * handled in the core MSI code.
- */
- MSI_FLAG_NOMASK_QUIRK = (1 << 7),
+ /* Use dev->fwnode for MSI device domain creation */
+ MSI_FLAG_USE_DEV_FWNODE = (1 << 7),
+ /* Set parent->dev into domain->pm_dev on device domain creation */
+ MSI_FLAG_PARENT_PM_DEV = (1 << 8),
/* Mask for the generic functionality */
MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0),
@@ -578,6 +581,11 @@ enum {
* struct msi_parent_ops - MSI parent domain callbacks and configuration info
*
* @supported_flags: Required: The supported MSI flags of the parent domain
+ * @required_flags: Optional: The required MSI flags of the parent MSI domain
+ * @bus_select_token: Optional: The bus token of the real parent domain for
+ * irq_domain::select()
+ * @bus_select_mask: Optional: A mask of supported BUS_DOMAINs for
+ * irq_domain::select()
* @prefix: Optional: Prefix for the domain and chip name
* @init_dev_msi_info: Required: Callback for MSI parent domains to setup parent
* domain specific domain flags, domain ops and interrupt chip
@@ -585,6 +593,9 @@ enum {
*/
struct msi_parent_ops {
u32 supported_flags;
+ u32 required_flags;
+ u32 bus_select_token;
+ u32 bus_select_mask;
const char *prefix;
bool (*init_dev_msi_info)(struct device *dev, struct irq_domain *domain,
struct irq_domain *msi_parent_domain,
@@ -633,9 +644,6 @@ struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain);
struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode,
struct msi_domain_info *info,
struct irq_domain *parent);
-int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
- irq_write_msi_msg_t write_msi_msg);
-void platform_msi_domain_free_irqs(struct device *dev);
/* When an MSI domain is used as an intermediate domain */
int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
@@ -662,6 +670,10 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir
void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nvec);
void *platform_msi_get_host_data(struct irq_domain *domain);
+/* Per device platform MSI */
+int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nvec,
+ irq_write_msi_msg_t write_msi_msg);
+void platform_device_msi_free_irqs_all(struct device *dev);
bool msi_device_has_isolated_msi(struct device *dev);
#else /* CONFIG_GENERIC_MSI_IRQ */
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index d88bb56c18e2..947410faf9e2 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -287,7 +287,7 @@ struct cfi_private {
unsigned long chipshift; /* Because they're of the same type */
const char *im_name; /* inter_module name for cmdset_setup */
unsigned long quirks;
- struct flchip chips[]; /* per-chip data structure for each chip */
+ struct flchip chips[] __counted_by(numchips); /* per-chip data structure for each chip */
};
uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs,
diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h
index c04f690871ca..9798c1a1d3b6 100644
--- a/include/linux/mtd/flashchip.h
+++ b/include/linux/mtd/flashchip.h
@@ -13,6 +13,7 @@
*/
#include <linux/sched.h>
#include <linux/mutex.h>
+#include <linux/wait.h>
typedef enum {
FL_READY,
diff --git a/include/linux/mtd/jedec.h b/include/linux/mtd/jedec.h
index 0b6b59f7cfbd..56047a4e54c9 100644
--- a/include/linux/mtd/jedec.h
+++ b/include/linux/mtd/jedec.h
@@ -21,6 +21,9 @@ struct jedec_ecc_info {
/* JEDEC features */
#define JEDEC_FEATURE_16_BIT_BUS (1 << 0)
+/* JEDEC Optional Commands */
+#define JEDEC_OPT_CMD_READ_CACHE BIT(1)
+
struct nand_jedec_params {
/* rev info and features block */
/* 'J' 'E' 'S' 'D' */
diff --git a/include/linux/mtd/lpc32xx_mlc.h b/include/linux/mtd/lpc32xx_mlc.h
index d168c628c0d5..35e971be0950 100644
--- a/include/linux/mtd/lpc32xx_mlc.h
+++ b/include/linux/mtd/lpc32xx_mlc.h
@@ -11,7 +11,7 @@
#include <linux/dmaengine.h>
struct lpc32xx_mlc_platform_data {
- bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
+ dma_filter_fn dma_filter;
};
#endif /* __LINUX_MTD_LPC32XX_MLC_H */
diff --git a/include/linux/mtd/lpc32xx_slc.h b/include/linux/mtd/lpc32xx_slc.h
index cf54a9f80460..a044b806566b 100644
--- a/include/linux/mtd/lpc32xx_slc.h
+++ b/include/linux/mtd/lpc32xx_slc.h
@@ -11,7 +11,7 @@
#include <linux/dmaengine.h>
struct lpc32xx_slc_platform_data {
- bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
+ dma_filter_fn dma_filter;
};
#endif /* __LINUX_MTD_LPC32XX_SLC_H */
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 7c58c44662b8..8d10d9d2e830 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -223,7 +223,7 @@ struct mtd_part {
* @partitions_lock: lock protecting accesses to the partition list. Protects
* not only the master partition list, but also all
* sub-partitions.
- * @suspended: et to 1 when the device is suspended, 0 otherwise
+ * @suspended: set to 1 when the device is suspended, 0 otherwise
*
* This struct is embedded in mtd_info and contains master-specific
* properties/fields. The master is the root MTD device from the MTD partition
@@ -379,7 +379,7 @@ struct mtd_info {
struct module *owner;
struct device dev;
- int usecount;
+ struct kref refcnt;
struct mtd_debug_info dbg;
struct nvmem_device *nvmem;
struct nvmem_device *otp_user_nvmem;
diff --git a/include/linux/mtd/onfi.h b/include/linux/mtd/onfi.h
index a7376f9beddf..55ab2e4d62f9 100644
--- a/include/linux/mtd/onfi.h
+++ b/include/linux/mtd/onfi.h
@@ -55,6 +55,7 @@
#define ONFI_SUBFEATURE_PARAM_LEN 4
/* ONFI optional commands SET/GET FEATURES supported? */
+#define ONFI_OPT_CMD_READ_CACHE BIT(1)
#define ONFI_OPT_CMD_SET_GET_FEATURES BIT(2)
struct nand_onfi_params {
diff --git a/include/linux/mtd/qinfo.h b/include/linux/mtd/qinfo.h
index 2e3f43788d48..0421f12156b5 100644
--- a/include/linux/mtd/qinfo.h
+++ b/include/linux/mtd/qinfo.h
@@ -24,7 +24,7 @@ struct lpddr_private {
struct qinfo_chip *qinfo;
int numchips;
unsigned long chipshift;
- struct flchip chips[];
+ struct flchip chips[] __counted_by(numchips);
};
/* qinfo_query_info structure contains request information for
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 5159d692f9ce..e84522e31301 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -225,6 +225,7 @@ struct gpio_desc;
* struct nand_parameters - NAND generic parameters from the parameter page
* @model: Model name
* @supports_set_get_features: The NAND chip supports setting/getting features
+ * @supports_read_cache: The NAND chip supports read cache operations
* @set_feature_list: Bitmap of features that can be set
* @get_feature_list: Bitmap of features that can be get
* @onfi: ONFI specific parameters
@@ -233,6 +234,7 @@ struct nand_parameters {
/* Generic parameters */
const char *model;
bool supports_set_get_features;
+ bool supports_read_cache;
DECLARE_BITMAP(set_feature_list, ONFI_FEATURE_NUMBER);
DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER);
@@ -1001,6 +1003,8 @@ struct nand_op_parser {
/**
* struct nand_operation - NAND operation descriptor
* @cs: the CS line to select for this NAND operation
+ * @deassert_wp: set to true when the operation requires the WP pin to be
+ * de-asserted (ERASE, PROG, ...)
* @instrs: array of instructions to execute
* @ninstrs: length of the @instrs array
*
@@ -1008,6 +1012,7 @@ struct nand_op_parser {
*/
struct nand_operation {
unsigned int cs;
+ bool deassert_wp;
const struct nand_op_instr *instrs;
unsigned int ninstrs;
};
@@ -1019,6 +1024,14 @@ struct nand_operation {
.ninstrs = ARRAY_SIZE(_instrs), \
}
+#define NAND_DESTRUCTIVE_OPERATION(_cs, _instrs) \
+ { \
+ .cs = _cs, \
+ .deassert_wp = true, \
+ .instrs = _instrs, \
+ .ninstrs = ARRAY_SIZE(_instrs), \
+ }
+
int nand_op_parser_exec_op(struct nand_chip *chip,
const struct nand_op_parser *parser,
const struct nand_operation *op, bool check_only);
@@ -1102,6 +1115,7 @@ struct nand_controller_ops {
* the bus without restarting an entire read operation nor
* changing the column.
* @supported_op.cont_read: The controller supports sequential cache reads.
+ * @controller_wp: the controller is in charge of handling the WP pin.
*/
struct nand_controller {
struct mutex lock;
@@ -1110,6 +1124,7 @@ struct nand_controller {
unsigned int data_only_read: 1;
unsigned int cont_read: 1;
} supported_op;
+ bool controller_wp;
};
static inline void nand_controller_init(struct nand_controller *nfc)
@@ -1263,6 +1278,7 @@ struct nand_secure_region {
* @cont_read: Sequential page read internals
* @cont_read.ongoing: Whether a continuous read is ongoing or not
* @cont_read.first_page: Start of the continuous read operation
+ * @cont_read.pause_page: End of the current sequential cache read operation
* @cont_read.last_page: End of the continuous read operation
* @controller: The hardware controller structure which is shared among multiple
* independent devices
@@ -1319,6 +1335,7 @@ struct nand_chip {
struct {
bool ongoing;
unsigned int first_page;
+ unsigned int pause_page;
unsigned int last_page;
} cont_read;
@@ -1540,6 +1557,7 @@ int nand_reset_op(struct nand_chip *chip);
int nand_readid_op(struct nand_chip *chip, u8 addr, void *buf,
unsigned int len);
int nand_status_op(struct nand_chip *chip, u8 *status);
+int nand_exit_status_op(struct nand_chip *chip);
int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock);
int nand_read_page_op(struct nand_chip *chip, unsigned int page,
unsigned int offset_in_page, void *buf, unsigned int len);
diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 3e285c09d16d..5c19ead60499 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -169,7 +169,7 @@
struct spinand_op;
struct spinand_device;
-#define SPINAND_MAX_ID_LEN 4
+#define SPINAND_MAX_ID_LEN 5
/*
* For erase, write and read operation, we got the following timings :
* tBERS (erase) 1ms to 4ms
@@ -263,6 +263,7 @@ struct spinand_manufacturer {
extern const struct spinand_manufacturer alliancememory_spinand_manufacturer;
extern const struct spinand_manufacturer ato_spinand_manufacturer;
extern const struct spinand_manufacturer esmt_c8_spinand_manufacturer;
+extern const struct spinand_manufacturer foresee_spinand_manufacturer;
extern const struct spinand_manufacturer gigadevice_spinand_manufacturer;
extern const struct spinand_manufacturer macronix_spinand_manufacturer;
extern const struct spinand_manufacturer micron_spinand_manufacturer;
diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h
index a529347fd75b..562f92504f2b 100644
--- a/include/linux/mtd/ubi.h
+++ b/include/linux/mtd/ubi.h
@@ -192,6 +192,7 @@ struct ubi_device_info {
* or a volume was removed)
* @UBI_VOLUME_RESIZED: a volume has been re-sized
* @UBI_VOLUME_RENAMED: a volume has been re-named
+ * @UBI_VOLUME_SHUTDOWN: a volume is going to removed, shutdown users
* @UBI_VOLUME_UPDATED: data has been written to a volume
*
* These constants define which type of event has happened when a volume
@@ -202,6 +203,7 @@ enum {
UBI_VOLUME_REMOVED,
UBI_VOLUME_RESIZED,
UBI_VOLUME_RENAMED,
+ UBI_VOLUME_SHUTDOWN,
UBI_VOLUME_UPDATED,
};
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index a33aa9eb9fc3..67edc4ca2bee 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -20,6 +20,7 @@
#include <linux/osq_lock.h>
#include <linux/debug_locks.h>
#include <linux/cleanup.h>
+#include <linux/mutex_types.h>
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
@@ -31,54 +32,9 @@
# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
#endif
-#ifndef CONFIG_PREEMPT_RT
-
-/*
- * Simple, straightforward mutexes with strict semantics:
- *
- * - only one task can hold the mutex at a time
- * - only the owner can unlock the mutex
- * - multiple unlocks are not permitted
- * - recursive locking is not permitted
- * - a mutex object must be initialized via the API
- * - a mutex object must not be initialized via memset or copying
- * - task may not exit with mutex held
- * - memory areas where held locks reside must not be freed
- * - held mutexes must not be reinitialized
- * - mutexes may not be used in hardware or software interrupt
- * contexts such as tasklets and timers
- *
- * These semantics are fully enforced when DEBUG_MUTEXES is
- * enabled. Furthermore, besides enforcing the above rules, the mutex
- * debugging code also implements a number of additional features
- * that make lock debugging easier and faster:
- *
- * - uses symbolic names of mutexes, whenever they are printed in debug output
- * - point-of-acquire tracking, symbolic lookup of function names
- * - list of all locks held in the system, printout of them
- * - owner tracking
- * - detects self-recursing locks and prints out all relevant info
- * - detects multi-task circular deadlocks and prints out all affected
- * locks and tasks (and only those tasks)
- */
-struct mutex {
- atomic_long_t owner;
- raw_spinlock_t wait_lock;
-#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
- struct optimistic_spin_queue osq; /* Spinner MCS lock */
-#endif
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_MUTEXES
- void *magic;
-#endif
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
#ifdef CONFIG_DEBUG_MUTEXES
-#define __DEBUG_MUTEX_INITIALIZER(lockname) \
+# define __DEBUG_MUTEX_INITIALIZER(lockname) \
, .magic = &lockname
extern void mutex_destroy(struct mutex *lock);
@@ -91,6 +47,7 @@ static inline void mutex_destroy(struct mutex *lock) {}
#endif
+#ifndef CONFIG_PREEMPT_RT
/**
* mutex_init - initialize the mutex
* @mutex: the mutex to be initialized
@@ -131,14 +88,6 @@ extern bool mutex_is_locked(struct mutex *lock);
/*
* Preempt-RT variant based on rtmutexes.
*/
-#include <linux/rtmutex.h>
-
-struct mutex {
- struct rt_mutex_base rtmutex;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
#define __MUTEX_INITIALIZER(mutexname) \
{ \
@@ -151,9 +100,6 @@ struct mutex {
extern void __mutex_rt_init(struct mutex *lock, const char *name,
struct lock_class_key *key);
-extern int mutex_trylock(struct mutex *lock);
-
-static inline void mutex_destroy(struct mutex *lock) { }
#define mutex_is_locked(l) rt_mutex_base_is_locked(&(l)->rtmutex)
@@ -221,6 +167,7 @@ extern void mutex_unlock(struct mutex *lock);
extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T))
-DEFINE_FREE(mutex, struct mutex *, if (_T) mutex_unlock(_T))
+DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T))
+DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0)
#endif /* __LINUX_MUTEX_H */
diff --git a/include/linux/mutex_types.h b/include/linux/mutex_types.h
new file mode 100644
index 000000000000..fdf7f515fde8
--- /dev/null
+++ b/include/linux/mutex_types.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_MUTEX_TYPES_H
+#define __LINUX_MUTEX_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/lockdep_types.h>
+#include <linux/osq_lock.h>
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+
+#ifndef CONFIG_PREEMPT_RT
+
+/*
+ * Simple, straightforward mutexes with strict semantics:
+ *
+ * - only one task can hold the mutex at a time
+ * - only the owner can unlock the mutex
+ * - multiple unlocks are not permitted
+ * - recursive locking is not permitted
+ * - a mutex object must be initialized via the API
+ * - a mutex object must not be initialized via memset or copying
+ * - task may not exit with mutex held
+ * - memory areas where held locks reside must not be freed
+ * - held mutexes must not be reinitialized
+ * - mutexes may not be used in hardware or software interrupt
+ * contexts such as tasklets and timers
+ *
+ * These semantics are fully enforced when DEBUG_MUTEXES is
+ * enabled. Furthermore, besides enforcing the above rules, the mutex
+ * debugging code also implements a number of additional features
+ * that make lock debugging easier and faster:
+ *
+ * - uses symbolic names of mutexes, whenever they are printed in debug output
+ * - point-of-acquire tracking, symbolic lookup of function names
+ * - list of all locks held in the system, printout of them
+ * - owner tracking
+ * - detects self-recursing locks and prints out all relevant info
+ * - detects multi-task circular deadlocks and prints out all affected
+ * locks and tasks (and only those tasks)
+ */
+struct mutex {
+ atomic_long_t owner;
+ raw_spinlock_t wait_lock;
+#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
+ struct optimistic_spin_queue osq; /* Spinner MCS lock */
+#endif
+ struct list_head wait_list;
+#ifdef CONFIG_DEBUG_MUTEXES
+ void *magic;
+#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+};
+
+#else /* !CONFIG_PREEMPT_RT */
+/*
+ * Preempt-RT variant based on rtmutexes.
+ */
+#include <linux/rtmutex.h>
+
+struct mutex {
+ struct rt_mutex_base rtmutex;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+};
+
+#endif /* CONFIG_PREEMPT_RT */
+
+#endif /* __LINUX_MUTEX_TYPES_H */
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 1463cbda4888..74e0cc14ebf8 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -66,6 +66,7 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne
extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
extern void done_path_create(struct path *, struct dentry *);
extern struct dentry *kern_path_locked(const char *, struct path *);
+extern struct dentry *user_path_locked_at(int , const char __user *, struct path *);
int vfs_path_parent_lookup(struct filename *filename, unsigned int flags,
struct path *parent, struct qstr *last, int *type,
const struct path *root);
@@ -92,6 +93,30 @@ extern struct dentry *lock_rename(struct dentry *, struct dentry *);
extern struct dentry *lock_rename_child(struct dentry *, struct dentry *);
extern void unlock_rename(struct dentry *, struct dentry *);
+/**
+ * mode_strip_umask - handle vfs umask stripping
+ * @dir: parent directory of the new inode
+ * @mode: mode of the new inode to be created in @dir
+ *
+ * In most filesystems, umask stripping depends on whether or not the
+ * filesystem supports POSIX ACLs. If the filesystem doesn't support it umask
+ * stripping is done directly in here. If the filesystem does support POSIX
+ * ACLs umask stripping is deferred until the filesystem calls
+ * posix_acl_create().
+ *
+ * Some filesystems (like NFSv4) also want to avoid umask stripping by the
+ * VFS, but don't support POSIX ACLs. Those filesystems can set SB_I_NOUMASK
+ * to get this effect without declaring that they support POSIX ACLs.
+ *
+ * Returns: mode
+ */
+static inline umode_t __must_check mode_strip_umask(const struct inode *dir, umode_t mode)
+{
+ if (!IS_POSIXACL(dir) && !(dir->i_sb->s_iflags & SB_I_NOUMASK))
+ mode &= ~current_umask();
+ return mode;
+}
+
extern int __must_check nd_jump_link(const struct path *path);
static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
@@ -112,7 +137,7 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
static inline bool
retry_estale(const long error, const unsigned int flags)
{
- return error == -ESTALE && !(flags & LOOKUP_REVAL);
+ return unlikely(error == -ESTALE && !(flags & LOOKUP_REVAL));
}
#endif /* _LINUX_NAMEI_H */
diff --git a/include/linux/net.h b/include/linux/net.h
index 41c608c1b02c..15df6d5f27a7 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -123,7 +123,7 @@ struct socket {
struct file *file;
struct sock *sk;
- const struct proto_ops *ops;
+ const struct proto_ops *ops; /* Might change with IPV6_ADDRFORM or MPTCP. */
struct socket_wq wq;
};
@@ -299,10 +299,7 @@ do { \
net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__)
#else
#define net_dbg_ratelimited(fmt, ...) \
- do { \
- if (0) \
- no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
- } while (0)
+ no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#endif
#define net_get_random_once(buf, nbytes) \
diff --git a/include/linux/net/intel/i40e_client.h b/include/linux/net/intel/i40e_client.h
index ed42bd5f639f..0aa4411528fc 100644
--- a/include/linux/net/intel/i40e_client.h
+++ b/include/linux/net/intel/i40e_client.h
@@ -45,7 +45,7 @@ struct i40e_qv_info {
struct i40e_qvlist_info {
u32 num_vectors;
- struct i40e_qv_info qv_info[];
+ struct i40e_qv_info qv_info[] __counted_by(num_vectors);
};
diff --git a/include/linux/net_mm.h b/include/linux/net_mm.h
deleted file mode 100644
index b298998bd5a0..000000000000
--- a/include/linux/net_mm.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifdef CONFIG_MMU
-
-#ifdef CONFIG_INET
-extern const struct vm_operations_struct tcp_vm_ops;
-static inline bool vma_is_tcp(const struct vm_area_struct *vma)
-{
- return vma->vm_ops == &tcp_vm_ops;
-}
-#else
-static inline bool vma_is_tcp(const struct vm_area_struct *vma)
-{
- return false;
-}
-#endif /* CONFIG_INET*/
-
-#endif /* CONFIG_MMU */
diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h
index fd67f3cc0c4b..eb01c37e71e0 100644
--- a/include/linux/net_tstamp.h
+++ b/include/linux/net_tstamp.h
@@ -5,12 +5,23 @@
#include <uapi/linux/net_tstamp.h>
+enum hwtstamp_source {
+ HWTSTAMP_SOURCE_NETDEV,
+ HWTSTAMP_SOURCE_PHYLIB,
+};
+
/**
* struct kernel_hwtstamp_config - Kernel copy of struct hwtstamp_config
*
* @flags: see struct hwtstamp_config
* @tx_type: see struct hwtstamp_config
* @rx_filter: see struct hwtstamp_config
+ * @ifr: pointer to ifreq structure from the original ioctl request, to pass to
+ * a legacy implementation of a lower driver
+ * @copied_to_user: request was passed to a legacy implementation which already
+ * copied the ioctl request back to user space
+ * @source: indication whether timestamps should come from the netdev or from
+ * an attached phylib PHY
*
* Prefer using this structure for in-kernel processing of hardware
* timestamping configuration, over the inextensible struct hwtstamp_config
@@ -20,6 +31,9 @@ struct kernel_hwtstamp_config {
int flags;
int tx_type;
int rx_filter;
+ struct ifreq *ifr;
+ bool copied_to_user;
+ enum hwtstamp_source source;
};
static inline void hwtstamp_config_to_kernel(struct kernel_hwtstamp_config *kernel_cfg,
@@ -30,4 +44,20 @@ static inline void hwtstamp_config_to_kernel(struct kernel_hwtstamp_config *kern
kernel_cfg->rx_filter = cfg->rx_filter;
}
+static inline void hwtstamp_config_from_kernel(struct hwtstamp_config *cfg,
+ const struct kernel_hwtstamp_config *kernel_cfg)
+{
+ cfg->flags = kernel_cfg->flags;
+ cfg->tx_type = kernel_cfg->tx_type;
+ cfg->rx_filter = kernel_cfg->rx_filter;
+}
+
+static inline bool kernel_hwtstamp_config_changed(const struct kernel_hwtstamp_config *a,
+ const struct kernel_hwtstamp_config *b)
+{
+ return a->flags != b->flags ||
+ a->tx_type != b->tx_type ||
+ a->rx_filter != b->rx_filter;
+}
+
#endif /* _LINUX_NET_TIMESTAMPING_H_ */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b828c7a75be2..cb37817d6382 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -40,7 +40,6 @@
#include <net/dcbnl.h>
#endif
#include <net/netprio_cgroup.h>
-#include <net/xdp.h>
#include <linux/netdev_features.h>
#include <linux/neighbour.h>
@@ -57,6 +56,7 @@
struct netpoll_info;
struct device;
struct ethtool_ops;
+struct kernel_hwtstamp_config;
struct phy_device;
struct dsa_port;
struct ip_tunnel_parm;
@@ -76,8 +76,12 @@ struct udp_tunnel_nic_info;
struct udp_tunnel_nic;
struct bpf_prog;
struct xdp_buff;
+struct xdp_frame;
+struct xdp_metadata_ops;
struct xdp_md;
+typedef u32 xdp_features_t;
+
void synchronize_net(void);
void netdev_set_default_ethtool_ops(struct net_device *dev,
const struct ethtool_ops *ops);
@@ -221,12 +225,6 @@ struct net_device_core_stats {
#include <linux/cache.h>
#include <linux/skbuff.h>
-#ifdef CONFIG_RPS
-#include <linux/static_key.h>
-extern struct static_key_false rps_needed;
-extern struct static_key_false rfs_needed;
-#endif
-
struct neighbour;
struct neigh_parms;
struct sk_buff;
@@ -376,6 +374,7 @@ struct napi_struct {
/* control-path-only fields follow */
struct list_head dev_list;
struct hlist_node napi_hash_node;
+ int irq;
};
enum {
@@ -476,6 +475,29 @@ static inline bool napi_prefer_busy_poll(struct napi_struct *n)
return test_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
}
+/**
+ * napi_is_scheduled - test if NAPI is scheduled
+ * @n: NAPI context
+ *
+ * This check is "best-effort". With no locking implemented,
+ * a NAPI can be scheduled or terminate right after this check
+ * and produce not precise results.
+ *
+ * NAPI_STATE_SCHED is an internal state, napi_is_scheduled
+ * should not be used normally and napi_schedule should be
+ * used instead.
+ *
+ * Use only if the driver really needs to check if a NAPI
+ * is scheduled for example in the context of delayed timer
+ * that can be skipped if a NAPI is already scheduled.
+ *
+ * Return True if NAPI is scheduled, False otherwise.
+ */
+static inline bool napi_is_scheduled(struct napi_struct *n)
+{
+ return test_bit(NAPI_STATE_SCHED, &n->state);
+}
+
bool napi_schedule_prep(struct napi_struct *n);
/**
@@ -484,11 +506,18 @@ bool napi_schedule_prep(struct napi_struct *n);
*
* Schedule NAPI poll routine to be called if it is not already
* running.
+ * Return true if we schedule a NAPI or false if not.
+ * Refer to napi_schedule_prep() for additional reason on why
+ * a NAPI might not be scheduled.
*/
-static inline void napi_schedule(struct napi_struct *n)
+static inline bool napi_schedule(struct napi_struct *n)
{
- if (napi_schedule_prep(n))
+ if (napi_schedule_prep(n)) {
__napi_schedule(n);
+ return true;
+ }
+
+ return false;
}
/**
@@ -503,16 +532,6 @@ static inline void napi_schedule_irqoff(struct napi_struct *n)
__napi_schedule_irqoff(n);
}
-/* Try to reschedule poll. Called by dev->poll() after napi_complete(). */
-static inline bool napi_reschedule(struct napi_struct *napi)
-{
- if (napi_schedule_prep(napi)) {
- __napi_schedule(napi);
- return true;
- }
- return false;
-}
-
/**
* napi_complete_done - NAPI processing complete
* @n: NAPI context
@@ -639,6 +658,10 @@ struct netdev_queue {
#ifdef CONFIG_XDP_SOCKETS
struct xsk_buff_pool *pool;
#endif
+ /* NAPI instance for the queue
+ * Readers and writers must hold RTNL
+ */
+ struct napi_struct *napi;
/*
* write-mostly part
*/
@@ -701,112 +724,10 @@ static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node
#endif
}
-#ifdef CONFIG_RPS
-/*
- * This structure holds an RPS map which can be of variable length. The
- * map is an array of CPUs.
- */
-struct rps_map {
- unsigned int len;
- struct rcu_head rcu;
- u16 cpus[];
-};
-#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
-
-/*
- * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
- * tail pointer for that CPU's input queue at the time of last enqueue, and
- * a hardware filter index.
- */
-struct rps_dev_flow {
- u16 cpu;
- u16 filter;
- unsigned int last_qtail;
-};
-#define RPS_NO_FILTER 0xffff
-
-/*
- * The rps_dev_flow_table structure contains a table of flow mappings.
- */
-struct rps_dev_flow_table {
- unsigned int mask;
- struct rcu_head rcu;
- struct rps_dev_flow flows[];
-};
-#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
- ((_num) * sizeof(struct rps_dev_flow)))
-
-/*
- * The rps_sock_flow_table contains mappings of flows to the last CPU
- * on which they were processed by the application (set in recvmsg).
- * Each entry is a 32bit value. Upper part is the high-order bits
- * of flow hash, lower part is CPU number.
- * rps_cpu_mask is used to partition the space, depending on number of
- * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
- * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
- * meaning we use 32-6=26 bits for the hash.
- */
-struct rps_sock_flow_table {
- u32 mask;
-
- u32 ents[] ____cacheline_aligned_in_smp;
-};
-#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
-
-#define RPS_NO_CPU 0xffff
-
-extern u32 rps_cpu_mask;
-extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
-
-static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
- u32 hash)
-{
- if (table && hash) {
- unsigned int index = hash & table->mask;
- u32 val = hash & ~rps_cpu_mask;
-
- /* We only give a hint, preemption can change CPU under us */
- val |= raw_smp_processor_id();
-
- /* The following WRITE_ONCE() is paired with the READ_ONCE()
- * here, and another one in get_rps_cpu().
- */
- if (READ_ONCE(table->ents[index]) != val)
- WRITE_ONCE(table->ents[index], val);
- }
-}
-
#ifdef CONFIG_RFS_ACCEL
bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
u16 filter_id);
#endif
-#endif /* CONFIG_RPS */
-
-/* This structure contains an instance of an RX queue. */
-struct netdev_rx_queue {
- struct xdp_rxq_info xdp_rxq;
-#ifdef CONFIG_RPS
- struct rps_map __rcu *rps_map;
- struct rps_dev_flow_table __rcu *rps_flow_table;
-#endif
- struct kobject kobj;
- struct net_device *dev;
- netdevice_tracker dev_tracker;
-
-#ifdef CONFIG_XDP_SOCKETS
- struct xsk_buff_pool *pool;
-#endif
-} ____cacheline_aligned_in_smp;
-
-/*
- * RX queue sysfs structures and functions.
- */
-struct rx_queue_attribute {
- struct attribute attr;
- ssize_t (*show)(struct netdev_rx_queue *queue, char *buf);
- ssize_t (*store)(struct netdev_rx_queue *queue,
- const char *buf, size_t len);
-};
/* XPS map type and offset of the xps map within net_device->xps_maps[]. */
enum xps_map_type {
@@ -939,6 +860,7 @@ struct net_device_path {
u8 queue;
u16 wcid;
u8 bss;
+ u8 amsdu;
} mtk_wdma;
};
};
@@ -1056,7 +978,7 @@ struct xfrmdev_ops {
bool (*xdo_dev_offload_ok) (struct sk_buff *skb,
struct xfrm_state *x);
void (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
- void (*xdo_dev_state_update_curlft) (struct xfrm_state *x);
+ void (*xdo_dev_state_update_stats) (struct xfrm_state *x);
int (*xdo_dev_policy_add) (struct xfrm_policy *x, struct netlink_ext_ack *extack);
void (*xdo_dev_policy_delete) (struct xfrm_policy *x);
void (*xdo_dev_policy_free) (struct xfrm_policy *x);
@@ -1309,9 +1231,7 @@ struct netdev_net_notifier {
* struct net_device *dev,
* const unsigned char *addr, u16 vid)
* Deletes the FDB entry from dev coresponding to addr.
- * int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, struct nlattr *tb[],
- * struct net_device *dev,
- * u16 vid,
+ * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev,
* struct netlink_ext_ack *extack);
* int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
* struct net_device *dev, struct net_device *filter_dev,
@@ -1325,6 +1245,9 @@ struct netdev_net_notifier {
* int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[],
* struct netlink_ext_ack *extack);
* Deletes the MDB entry from dev.
+ * int (*ndo_mdb_del_bulk)(struct net_device *dev, struct nlattr *tb[],
+ * struct netlink_ext_ack *extack);
+ * Bulk deletes MDB entries from dev.
* int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb,
* struct netlink_callback *cb);
* Dumps MDB entries from dev. The first argument (marker) in the netlink
@@ -1418,6 +1341,16 @@ struct netdev_net_notifier {
* Get hardware timestamp based on normal/adjustable time or free running
* cycle counter. This function is required if physical clock supports a
* free running cycle counter.
+ *
+ * int (*ndo_hwtstamp_get)(struct net_device *dev,
+ * struct kernel_hwtstamp_config *kernel_config);
+ * Get the currently configured hardware timestamping parameters for the
+ * NIC device.
+ *
+ * int (*ndo_hwtstamp_set)(struct net_device *dev,
+ * struct kernel_hwtstamp_config *kernel_config,
+ * struct netlink_ext_ack *extack);
+ * Change the hardware timestamping parameters for NIC device.
*/
struct net_device_ops {
int (*ndo_init)(struct net_device *dev);
@@ -1576,10 +1509,8 @@ struct net_device_ops {
struct net_device *dev,
const unsigned char *addr,
u16 vid, struct netlink_ext_ack *extack);
- int (*ndo_fdb_del_bulk)(struct ndmsg *ndm,
- struct nlattr *tb[],
+ int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh,
struct net_device *dev,
- u16 vid,
struct netlink_ext_ack *extack);
int (*ndo_fdb_dump)(struct sk_buff *skb,
struct netlink_callback *cb,
@@ -1599,9 +1530,16 @@ struct net_device_ops {
int (*ndo_mdb_del)(struct net_device *dev,
struct nlattr *tb[],
struct netlink_ext_ack *extack);
+ int (*ndo_mdb_del_bulk)(struct net_device *dev,
+ struct nlattr *tb[],
+ struct netlink_ext_ack *extack);
int (*ndo_mdb_dump)(struct net_device *dev,
struct sk_buff *skb,
struct netlink_callback *cb);
+ int (*ndo_mdb_get)(struct net_device *dev,
+ struct nlattr *tb[], u32 portid,
+ u32 seq,
+ struct netlink_ext_ack *extack);
int (*ndo_bridge_setlink)(struct net_device *dev,
struct nlmsghdr *nlh,
u16 flags,
@@ -1652,12 +1590,11 @@ struct net_device_ops {
ktime_t (*ndo_get_tstamp)(struct net_device *dev,
const struct skb_shared_hwtstamps *hwtstamps,
bool cycles);
-};
-
-struct xdp_metadata_ops {
- int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
- int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
- enum xdp_rss_hash_type *rss_type);
+ int (*ndo_hwtstamp_get)(struct net_device *dev,
+ struct kernel_hwtstamp_config *kernel_config);
+ int (*ndo_hwtstamp_set)(struct net_device *dev,
+ struct kernel_hwtstamp_config *kernel_config,
+ struct netlink_ext_ack *extack);
};
/**
@@ -1708,6 +1645,9 @@ struct xdp_metadata_ops {
* @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with
* skb_headlen(skb) == 0 (data starts from frag0)
* @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN
+ * @IFF_SEE_ALL_HWTSTAMP_REQUESTS: device wants to see calls to
+ * ndo_hwtstamp_set() for all timestamp requests regardless of source,
+ * even if those aren't HWTSTAMP_SOURCE_NETDEV.
*/
enum netdev_priv_flags {
IFF_802_1Q_VLAN = 1<<0,
@@ -1743,6 +1683,7 @@ enum netdev_priv_flags {
IFF_NO_ADDRCONF = BIT_ULL(30),
IFF_TX_SKB_NO_LINEAR = BIT_ULL(31),
IFF_CHANGE_PROTO_DOWN = BIT_ULL(32),
+ IFF_SEE_ALL_HWTSTAMP_REQUESTS = BIT_ULL(33),
};
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
@@ -1783,6 +1724,22 @@ enum netdev_ml_priv_type {
ML_PRIV_CAN,
};
+enum netdev_stat_type {
+ NETDEV_PCPU_STAT_NONE,
+ NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */
+ NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */
+ NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
+};
+
+enum netdev_reg_state {
+ NETREG_UNINITIALIZED = 0,
+ NETREG_REGISTERED, /* completed register_netdevice */
+ NETREG_UNREGISTERING, /* called unregister_netdevice */
+ NETREG_UNREGISTERED, /* completed unregister todo */
+ NETREG_RELEASED, /* called free_netdev */
+ NETREG_DUMMY, /* dummy device for NAPI poll */
+};
+
/**
* struct net_device - The DEVICE structure.
*
@@ -1844,6 +1801,7 @@ enum netdev_ml_priv_type {
* @netdev_ops: Includes several pointers to callbacks,
* if one wants to override the ndo_*() functions
* @xdp_metadata_ops: Includes pointers to XDP metadata callbacks.
+ * @xsk_tx_metadata_ops: Includes pointers to AF_XDP TX metadata callbacks.
* @ethtool_ops: Management operations
* @l3mdev_ops: Layer 3 master device operations
* @ndisc_ops: Includes callbacks for different IPv6 neighbour
@@ -1930,8 +1888,7 @@ enum netdev_ml_priv_type {
*
* @rx_handler: handler for received packets
* @rx_handler_data: XXX: need comments on this one
- * @miniq_ingress: ingress/clsact qdisc specific data for
- * ingress processing
+ * @tcx_ingress: BPF & clsact qdisc specific data for ingress processing
* @ingress_queue: XXX: need comments on this one
* @nf_hooks_ingress: netfilter hooks executed for ingress packets
* @broadcast: hw bcast address
@@ -1952,8 +1909,7 @@ enum netdev_ml_priv_type {
* @xps_maps: all CPUs/RXQs maps for XPS device
*
* @xps_maps: XXX: need comments on this one
- * @miniq_egress: clsact qdisc specific data for
- * egress processing
+ * @tcx_egress: BPF & clsact qdisc specific data for egress processing
* @nf_hooks_egress: netfilter hooks executed for egress packets
* @qdisc_hash: qdisc hash table
* @watchdog_timeo: Represents the timeout that is used by
@@ -1979,10 +1935,14 @@ enum netdev_ml_priv_type {
*
* @ml_priv: Mid-layer private
* @ml_priv_type: Mid-layer private type
- * @lstats: Loopback statistics
- * @tstats: Tunnel statistics
- * @dstats: Dummy statistics
- * @vstats: Virtual ethernet statistics
+ *
+ * @pcpu_stat_type: Type of device statistics which the core should
+ * allocate/free: none, lstats, tstats, dstats. none
+ * means the driver is handling statistics allocation/
+ * freeing internally.
+ * @lstats: Loopback statistics: packets, bytes
+ * @tstats: Tunnel statistics: RX/TX packets, RX/TX bytes
+ * @dstats: Dummy statistics: RX/TX/drop packets, RX/TX bytes
*
* @garp_port: GARP
* @mrp_port: MRP
@@ -1995,6 +1955,7 @@ enum netdev_ml_priv_type {
*
* @sysfs_rx_queue_group: Space for optional per-rx queue attributes
* @rtnl_link_ops: Rtnl_link_ops
+ * @stat_ops: Optional ops for queue-aware statistics
*
* @gso_max_size: Maximum size of generic segmentation offload
* @tso_max_size: Device (as in HW) limit on the max TSO request size
@@ -2045,6 +2006,8 @@ enum netdev_ml_priv_type {
* receive offload (GRO)
* @gro_ipv4_max_size: Maximum size of aggregated packet in generic
* receive offload (GRO), for IPv4.
+ * @xdp_zc_max_segs: Maximum number of segments supported by AF_XDP
+ * zero copy driver
*
* @dev_addr_shadow: Copy of @dev_addr to catch direct writes.
* @linkwatch_dev_tracker: refcount tracker used by linkwatch.
@@ -2058,11 +2021,86 @@ enum netdev_ml_priv_type {
* SET_NETDEV_DEVLINK_PORT macro. This pointer is static
* during the time netdevice is registered.
*
+ * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem,
+ * where the clock is recovered.
+ *
* FIXME: cleanup struct net_device such that network protocol info
* moves out.
*/
struct net_device {
+ /* Cacheline organization can be found documented in
+ * Documentation/networking/net_cachelines/net_device.rst.
+ * Please update the document when adding new fields.
+ */
+
+ /* TX read-mostly hotpath */
+ __cacheline_group_begin(net_device_read_tx);
+ unsigned long long priv_flags;
+ const struct net_device_ops *netdev_ops;
+ const struct header_ops *header_ops;
+ struct netdev_queue *_tx;
+ netdev_features_t gso_partial_features;
+ unsigned int real_num_tx_queues;
+ unsigned int gso_max_size;
+ unsigned int gso_ipv4_max_size;
+ u16 gso_max_segs;
+ s16 num_tc;
+ /* Note : dev->mtu is often read without holding a lock.
+ * Writers usually hold RTNL.
+ * It is recommended to use READ_ONCE() to annotate the reads,
+ * and to use WRITE_ONCE() to annotate the writes.
+ */
+ unsigned int mtu;
+ unsigned short needed_headroom;
+ struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
+#ifdef CONFIG_XPS
+ struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ struct nf_hook_entries __rcu *nf_hooks_egress;
+#endif
+#ifdef CONFIG_NET_XGRESS
+ struct bpf_mprog_entry __rcu *tcx_egress;
+#endif
+ __cacheline_group_end(net_device_read_tx);
+
+ /* TXRX read-mostly hotpath */
+ __cacheline_group_begin(net_device_read_txrx);
+ union {
+ struct pcpu_lstats __percpu *lstats;
+ struct pcpu_sw_netstats __percpu *tstats;
+ struct pcpu_dstats __percpu *dstats;
+ };
+ unsigned long state;
+ unsigned int flags;
+ unsigned short hard_header_len;
+ netdev_features_t features;
+ struct inet6_dev __rcu *ip6_ptr;
+ __cacheline_group_end(net_device_read_txrx);
+
+ /* RX read-mostly hotpath */
+ __cacheline_group_begin(net_device_read_rx);
+ struct bpf_prog __rcu *xdp_prog;
+ struct list_head ptype_specific;
+ int ifindex;
+ unsigned int real_num_rx_queues;
+ struct netdev_rx_queue *_rx;
+ unsigned long gro_flush_timeout;
+ int napi_defer_hard_irqs;
+ unsigned int gro_max_size;
+ unsigned int gro_ipv4_max_size;
+ rx_handler_func_t __rcu *rx_handler;
+ void __rcu *rx_handler_data;
+ possible_net_t nd_net;
+#ifdef CONFIG_NETPOLL
+ struct netpoll_info __rcu *npinfo;
+#endif
+#ifdef CONFIG_NET_XGRESS
+ struct bpf_mprog_entry __rcu *tcx_ingress;
+#endif
+ __cacheline_group_end(net_device_read_rx);
+
char name[IFNAMSIZ];
struct netdev_name_node *name_node;
struct dev_ifalias __rcu *ifalias;
@@ -2080,14 +2118,12 @@ struct net_device {
* part of the usual set specified in Space.c.
*/
- unsigned long state;
struct list_head dev_list;
struct list_head napi_list;
struct list_head unreg_list;
struct list_head close_list;
struct list_head ptype_all;
- struct list_head ptype_specific;
struct {
struct list_head upper;
@@ -2095,31 +2131,18 @@ struct net_device {
} adj_list;
/* Read-mostly cache-line for fast-path access */
- unsigned int flags;
xdp_features_t xdp_features;
- unsigned long long priv_flags;
- const struct net_device_ops *netdev_ops;
const struct xdp_metadata_ops *xdp_metadata_ops;
- int ifindex;
+ const struct xsk_tx_metadata_ops *xsk_tx_metadata_ops;
unsigned short gflags;
- unsigned short hard_header_len;
- /* Note : dev->mtu is often read without holding a lock.
- * Writers usually hold RTNL.
- * It is recommended to use READ_ONCE() to annotate the reads,
- * and to use WRITE_ONCE() to annotate the writes.
- */
- unsigned int mtu;
- unsigned short needed_headroom;
unsigned short needed_tailroom;
- netdev_features_t features;
netdev_features_t hw_features;
netdev_features_t wanted_features;
netdev_features_t vlan_features;
netdev_features_t hw_enc_features;
netdev_features_t mpls_features;
- netdev_features_t gso_partial_features;
unsigned int min_mtu;
unsigned int max_mtu;
@@ -2157,9 +2180,7 @@ struct net_device {
const struct tlsdev_ops *tlsdev_ops;
#endif
- const struct header_ops *header_ops;
-
- unsigned char operstate;
+ unsigned int operstate;
unsigned char link_mode;
unsigned char if_port;
@@ -2199,9 +2220,7 @@ struct net_device {
/* Protocol-specific pointers */
-
struct in_device __rcu *ip_ptr;
- struct inet6_dev __rcu *ip6_ptr;
#if IS_ENABLED(CONFIG_VLAN_8021Q)
struct vlan_info __rcu *vlan_info;
#endif
@@ -2236,26 +2255,13 @@ struct net_device {
/* Interface address info used in eth_type_trans() */
const unsigned char *dev_addr;
- struct netdev_rx_queue *_rx;
unsigned int num_rx_queues;
- unsigned int real_num_rx_queues;
-
- struct bpf_prog __rcu *xdp_prog;
- unsigned long gro_flush_timeout;
- int napi_defer_hard_irqs;
#define GRO_LEGACY_MAX_SIZE 65536u
/* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE),
* and shinfo->gso_segs is a 16bit field.
*/
#define GRO_MAX_SIZE (8 * 65535u)
- unsigned int gro_max_size;
- unsigned int gro_ipv4_max_size;
- rx_handler_func_t __rcu *rx_handler;
- void __rcu *rx_handler_data;
-
-#ifdef CONFIG_NET_CLS_ACT
- struct mini_Qdisc __rcu *miniq_ingress;
-#endif
+ unsigned int xdp_zc_max_segs;
struct netdev_queue __rcu *ingress_queue;
#ifdef CONFIG_NETFILTER_INGRESS
struct nf_hook_entries __rcu *nf_hooks_ingress;
@@ -2270,25 +2276,13 @@ struct net_device {
/*
* Cache lines mostly used on transmit path
*/
- struct netdev_queue *_tx ____cacheline_aligned_in_smp;
unsigned int num_tx_queues;
- unsigned int real_num_tx_queues;
struct Qdisc __rcu *qdisc;
unsigned int tx_queue_len;
spinlock_t tx_global_lock;
struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
-#ifdef CONFIG_XPS
- struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
-#endif
-#ifdef CONFIG_NET_CLS_ACT
- struct mini_Qdisc __rcu *miniq_egress;
-#endif
-#ifdef CONFIG_NETFILTER_EGRESS
- struct nf_hook_entries __rcu *nf_hooks_egress;
-#endif
-
#ifdef CONFIG_NET_SCHED
DECLARE_HASHTABLE (qdisc_hash, 4);
#endif
@@ -2309,13 +2303,7 @@ struct net_device {
struct list_head link_watch_list;
- enum { NETREG_UNINITIALIZED=0,
- NETREG_REGISTERED, /* completed register_netdevice */
- NETREG_UNREGISTERING, /* called unregister_netdevice */
- NETREG_UNREGISTERED, /* completed unregister todo */
- NETREG_RELEASED, /* called free_netdev */
- NETREG_DUMMY, /* dummy device for NAPI poll */
- } reg_state:8;
+ u8 reg_state;
bool dismantle;
@@ -2327,21 +2315,11 @@ struct net_device {
bool needs_free_netdev;
void (*priv_destructor)(struct net_device *dev);
-#ifdef CONFIG_NETPOLL
- struct netpoll_info __rcu *npinfo;
-#endif
-
- possible_net_t nd_net;
-
/* mid-layer private */
void *ml_priv;
enum netdev_ml_priv_type ml_priv_type;
- union {
- struct pcpu_lstats __percpu *lstats;
- struct pcpu_sw_netstats __percpu *tstats;
- struct pcpu_dstats __percpu *dstats;
- };
+ enum netdev_stat_type pcpu_stat_type:8;
#if IS_ENABLED(CONFIG_GARP)
struct garp_port __rcu *garp_port;
@@ -2358,6 +2336,8 @@ struct net_device {
const struct rtnl_link_ops *rtnl_link_ops;
+ const struct netdev_stat_ops *stat_ops;
+
/* for setting kernel sock attribute on TCP connection setup */
#define GSO_MAX_SEGS 65535u
#define GSO_LEGACY_MAX_SIZE 65536u
@@ -2366,20 +2346,15 @@ struct net_device {
*/
#define GSO_MAX_SIZE (8 * GSO_MAX_SEGS)
- unsigned int gso_max_size;
#define TSO_LEGACY_MAX_SIZE 65536
#define TSO_MAX_SIZE UINT_MAX
unsigned int tso_max_size;
- u16 gso_max_segs;
#define TSO_MAX_SEGS U16_MAX
u16 tso_max_segs;
- unsigned int gso_ipv4_max_size;
#ifdef CONFIG_DCB
const struct dcbnl_rtnl_ops *dcbnl_ops;
#endif
- s16 num_tc;
- struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
u8 prio_tc_map[TC_BITMASK + 1];
#if IS_ENABLED(CONFIG_FCOE)
@@ -2414,6 +2389,14 @@ struct net_device {
struct rtnl_hw_stats64 *offload_xstats_l3;
struct devlink_port *devlink_port;
+
+#if IS_ENABLED(CONFIG_DPLL)
+ struct dpll_pin __rcu *dpll_pin;
+#endif
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+ /** @page_pools: page pools created for this netdevice */
+ struct hlist_head page_pools;
+#endif
};
#define to_net_dev(d) container_of(d, struct net_device, dev)
@@ -2618,6 +2601,15 @@ static inline void *netdev_priv(const struct net_device *dev)
*/
#define SET_NETDEV_DEVTYPE(net, devtype) ((net)->dev.type = (devtype))
+void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
+ enum netdev_queue_type type,
+ struct napi_struct *napi);
+
+static inline void netif_napi_set_irq(struct napi_struct *napi, int irq)
+{
+ napi->irq = irq;
+}
+
/* Default NAPI poll() weight
* Device drivers are strongly advised to not use bigger value
*/
@@ -2734,6 +2726,16 @@ struct pcpu_sw_netstats {
struct u64_stats_sync syncp;
} __aligned(4 * sizeof(u64));
+struct pcpu_dstats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 rx_drops;
+ u64 tx_packets;
+ u64 tx_bytes;
+ u64 tx_drops;
+ struct u64_stats_sync syncp;
+} __aligned(8 * sizeof(u64));
+
struct pcpu_lstats {
u64_stats_t packets;
u64_stats_t bytes;
@@ -2994,8 +2996,6 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
int call_netdevice_notifiers_info(unsigned long val,
struct netdev_notifier_info *info);
-extern rwlock_t dev_base_lock; /* Device list lock */
-
#define for_each_netdev(net, d) \
list_for_each_entry(d, &(net)->dev_base_head, dev_list)
#define for_each_netdev_reverse(net, d) \
@@ -3016,6 +3016,9 @@ extern rwlock_t dev_base_lock; /* Device list lock */
if (netdev_master_upper_dev_get_rcu(slave) == (bond))
#define net_device_entry(lh) list_entry(lh, struct net_device, dev_list)
+#define for_each_netdev_dump(net, d, ifindex) \
+ xa_for_each_start(&(net)->dev_by_index, (ifindex), (d), (ifindex))
+
static inline struct net_device *next_net_device(struct net_device *dev)
{
struct list_head *lh;
@@ -3115,7 +3118,7 @@ static inline void unregister_netdevice(struct net_device *dev)
int netdev_refcnt_read(const struct net_device *dev);
void free_netdev(struct net_device *dev);
void netdev_freemem(struct net_device *dev);
-int init_dummy_netdev(struct net_device *dev);
+void init_dummy_netdev(struct net_device *dev);
struct net_device *netdev_get_xmit_slave(struct net_device *dev,
struct sk_buff *skb,
@@ -3130,8 +3133,6 @@ struct net_device *netdev_get_by_name(struct net *net, const char *name,
netdevice_tracker *tracker, gfp_t gfp);
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
struct net_device *dev_get_by_napi_id(unsigned int napi_id);
-int dev_restart(struct net_device *dev);
-
static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type,
@@ -3418,6 +3419,16 @@ static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue
#endif
}
+static inline int netdev_queue_dql_avail(const struct netdev_queue *txq)
+{
+#ifdef CONFIG_BQL
+ /* Non-BQL migrated drivers will return 0, too. */
+ return dql_avail(&txq->dql);
+#else
+ return 0;
+#endif
+}
+
/**
* netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write
* @dev_queue: pointer to transmit queue
@@ -3830,24 +3841,6 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev,
int netif_set_real_num_queues(struct net_device *dev,
unsigned int txq, unsigned int rxq);
-static inline struct netdev_rx_queue *
-__netif_get_rx_queue(struct net_device *dev, unsigned int rxq)
-{
- return dev->_rx + rxq;
-}
-
-#ifdef CONFIG_SYSFS
-static inline unsigned int get_netdev_rx_queue_index(
- struct netdev_rx_queue *queue)
-{
- struct net_device *dev = queue->dev;
- int index = queue - dev->_rx;
-
- BUG_ON(index >= dev->num_rx_queues);
- return index;
-}
-#endif
-
int netif_get_num_default_rss_queues(void);
void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason);
@@ -3895,7 +3888,7 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog);
void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
-int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
+int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb);
int netif_rx(struct sk_buff *skb);
int __netif_rx(struct sk_buff *skb);
@@ -3908,8 +3901,6 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
void napi_get_frags_check(struct napi_struct *napi);
gro_result_t napi_gro_frags(struct napi_struct *napi);
-struct packet_offload *gro_find_receive_by_type(__be16 type);
-struct packet_offload *gro_find_complete_by_type(__be16 type);
static inline void napi_free_frags(struct napi_struct *napi)
{
@@ -3933,6 +3924,14 @@ int put_user_ifreq(struct ifreq *ifr, void __user *arg);
int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr,
void __user *data, bool *need_copyout);
int dev_ifconf(struct net *net, struct ifconf __user *ifc);
+int generic_hwtstamp_get_lower(struct net_device *dev,
+ struct kernel_hwtstamp_config *kernel_cfg);
+int generic_hwtstamp_set_lower(struct net_device *dev,
+ struct kernel_hwtstamp_config *kernel_cfg,
+ struct netlink_ext_ack *extack);
+int dev_set_hwtstamp_phylib(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack);
int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata);
unsigned int dev_get_flags(const struct net_device *);
int __dev_change_flags(struct net_device *dev, unsigned int flags,
@@ -3961,6 +3960,7 @@ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name);
int dev_get_port_parent_id(struct net_device *dev,
struct netdev_phys_item_id *ppid, bool recurse);
bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b);
+
struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again);
struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, int *ret);
@@ -4001,32 +4001,19 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev,
return false;
}
-struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev);
-
-static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev)
-{
- /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
- struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
-
- if (likely(p))
- return p;
-
- return netdev_core_stats_alloc(dev);
-}
+void netdev_core_stats_inc(struct net_device *dev, u32 offset);
#define DEV_CORE_STATS_INC(FIELD) \
static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \
{ \
- struct net_device_core_stats __percpu *p; \
- \
- p = dev_core_stats(dev); \
- if (p) \
- this_cpu_inc(p->FIELD); \
+ netdev_core_stats_inc(dev, \
+ offsetof(struct net_device_core_stats, FIELD)); \
}
DEV_CORE_STATS_INC(rx_dropped)
DEV_CORE_STATS_INC(tx_dropped)
DEV_CORE_STATS_INC(rx_nohandler)
DEV_CORE_STATS_INC(rx_otherhost_dropped)
+#undef DEV_CORE_STATS_INC
static __always_inline int ____dev_forward_skb(struct net_device *dev,
struct sk_buff *skb,
@@ -4166,6 +4153,15 @@ static inline void netdev_ref_replace(struct net_device *odev,
void linkwatch_fire_event(struct net_device *dev);
/**
+ * linkwatch_sync_dev - sync linkwatch for the given device
+ * @dev: network device to sync linkwatch for
+ *
+ * Sync linkwatch for the given device, removing it from the
+ * pending work list (if queued).
+ */
+void linkwatch_sync_dev(struct net_device *dev);
+
+/**
* netif_carrier_ok - test if carrier present
* @dev: network device
*
@@ -4274,8 +4270,10 @@ static inline bool netif_testing(const struct net_device *dev)
*/
static inline bool netif_oper_up(const struct net_device *dev)
{
- return (dev->operstate == IF_OPER_UP ||
- dev->operstate == IF_OPER_UNKNOWN /* backward compat */);
+ unsigned int operstate = READ_ONCE(dev->operstate);
+
+ return operstate == IF_OPER_UP ||
+ operstate == IF_OPER_UNKNOWN /* backward compat */;
}
/**
@@ -4714,11 +4712,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
const struct pcpu_sw_netstats __percpu *netstats);
void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);
-extern int netdev_max_backlog;
-extern int dev_rx_weight;
-extern int dev_tx_weight;
-extern int gro_normal_batch;
-
enum {
NESTED_SYNC_IMM_BIT,
NESTED_SYNC_TODO_BIT,
@@ -5102,6 +5095,11 @@ static inline bool netif_is_ovs_port(const struct net_device *dev)
return dev->priv_flags & IFF_OVS_DATAPATH;
}
+static inline bool netif_is_any_bridge_master(const struct net_device *dev)
+{
+ return netif_is_bridge_master(dev) || netif_is_ovs_master(dev);
+}
+
static inline bool netif_is_any_bridge_port(const struct net_device *dev)
{
return netif_is_bridge_port(dev) || netif_is_ovs_port(dev);
@@ -5170,7 +5168,9 @@ static inline const char *netdev_name(const struct net_device *dev)
static inline const char *netdev_reg_state(const struct net_device *dev)
{
- switch (dev->reg_state) {
+ u8 reg_state = READ_ONCE(dev->reg_state);
+
+ switch (reg_state) {
case NETREG_UNINITIALIZED: return " (uninitialized)";
case NETREG_REGISTERED: return "";
case NETREG_UNREGISTERING: return " (unregistering)";
@@ -5179,7 +5179,7 @@ static inline const char *netdev_reg_state(const struct net_device *dev)
case NETREG_DUMMY: return " (dummy)";
}
- WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, dev->reg_state);
+ WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, reg_state);
return " (unknown)";
}
@@ -5221,7 +5221,6 @@ static inline const char *netdev_reg_state(const struct net_device *dev)
#define PTYPE_HASH_SIZE (16)
#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
-extern struct list_head ptype_all __read_mostly;
extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
extern struct net_device *blackhole_netdev;
@@ -5230,5 +5229,6 @@ extern struct net_device *blackhole_netdev;
#define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD)
#define DEV_STATS_ADD(DEV, FIELD, VAL) \
atomic_long_add((VAL), &(DEV)->stats.__##FIELD)
+#define DEV_STATS_READ(DEV, FIELD) atomic_long_read(&(DEV)->stats.__##FIELD)
#endif /* _LINUX_NETDEVICE_H */
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index d4fed4c508ca..2683b2b77612 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -11,6 +11,7 @@
#include <linux/wait.h>
#include <linux/list.h>
#include <linux/static_key.h>
+#include <linux/module.h>
#include <linux/netfilter_defs.h>
#include <linux/netdevice.h>
#include <linux/sockptr.h>
@@ -21,6 +22,16 @@ static inline int NF_DROP_GETERR(int verdict)
return -(verdict >> NF_VERDICT_QBITS);
}
+static __always_inline int
+NF_DROP_REASON(struct sk_buff *skb, enum skb_drop_reason reason, u32 err)
+{
+ BUILD_BUG_ON(err > 0xffff);
+
+ kfree_skb_reason(skb, reason);
+
+ return ((err << 16) | NF_STOLEN);
+}
+
static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1,
const union nf_inet_addr *a2)
{
@@ -359,7 +370,6 @@ __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
u_int8_t protocol, unsigned short family);
int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
bool strict, unsigned short family);
-int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry);
#include <net/flow.h>
@@ -463,6 +473,7 @@ struct nf_ct_hook {
const struct sk_buff *);
void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
void (*set_closing)(struct nf_conntrack *nfct);
+ int (*confirm)(struct sk_buff *skb);
};
extern const struct nf_ct_hook __rcu *nf_ct_hook;
@@ -481,6 +492,15 @@ struct nfnl_ct_hook {
};
extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook;
+struct nf_defrag_hook {
+ struct module *owner;
+ int (*enable)(struct net *net);
+ void (*disable)(struct net *net);
+};
+
+extern const struct nf_defrag_hook __rcu *nf_defrag_v4_hook;
+extern const struct nf_defrag_hook __rcu *nf_defrag_v6_hook;
+
/*
* nf_skb_duplicated - TEE target has sent a packet
*
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index e8c350a3ade1..e9f4f845d760 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -186,6 +186,8 @@ struct ip_set_type_variant {
/* Return true if "b" set is the same as "a"
* according to the create set parameters */
bool (*same_set)(const struct ip_set *a, const struct ip_set *b);
+ /* Cancel ongoing garbage collectors before destroying the set*/
+ void (*cancel_gc)(struct ip_set *set);
/* Region-locking is used */
bool region_lock;
};
@@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type);
/* A generic IP set */
struct ip_set {
+ /* For call_cru in destroy */
+ struct rcu_head rcu;
/* The name of the set */
char name[IPSET_MAXNAMELEN];
/* Lock protecting the set data */
diff --git a/include/linux/netfilter/nf_conntrack_h323.h b/include/linux/netfilter/nf_conntrack_h323.h
index 9e937f64a1ad..81286c499325 100644
--- a/include/linux/netfilter/nf_conntrack_h323.h
+++ b/include/linux/netfilter/nf_conntrack_h323.h
@@ -34,10 +34,6 @@ struct nf_ct_h323_master {
int get_h225_addr(struct nf_conn *ct, unsigned char *data,
TransportAddress *taddr, union nf_inet_addr *addr,
__be16 *port);
-void nf_conntrack_h245_expect(struct nf_conn *new,
- struct nf_conntrack_expect *this);
-void nf_conntrack_q931_expect(struct nf_conn *new,
- struct nf_conntrack_expect *this);
struct nfct_h323_nat_hooks {
int (*set_h245_addr)(struct sk_buff *skb, unsigned int protoff,
diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index f33aa6021364..34ce5d2f37a2 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -25,7 +25,6 @@ struct nf_ct_gre_keymap {
int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
struct nf_conntrack_tuple *t);
-void nf_ct_gre_keymap_flush(struct net *net);
/* delete keymap entries */
void nf_ct_gre_keymap_destroy(struct nf_conn *ct);
diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/linux/netfilter/nf_conntrack_sctp.h
index 625f491b95de..fb31312825ae 100644
--- a/include/linux/netfilter/nf_conntrack_sctp.h
+++ b/include/linux/netfilter/nf_conntrack_sctp.h
@@ -9,6 +9,7 @@ struct ip_ct_sctp {
enum sctp_conntrack state;
__be32 vtag[IP_CT_DIR_MAX];
+ u8 init[IP_CT_DIR_MAX];
u8 last_dir;
u8 flags;
};
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index f980edfdd278..743475ca7e9d 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -42,7 +42,7 @@ static inline int nf_bridge_get_physinif(const struct sk_buff *skb)
if (!nf_bridge)
return 0;
- return nf_bridge->physindev ? nf_bridge->physindev->ifindex : 0;
+ return nf_bridge->physinif;
}
static inline int nf_bridge_get_physoutif(const struct sk_buff *skb)
@@ -56,11 +56,11 @@ static inline int nf_bridge_get_physoutif(const struct sk_buff *skb)
}
static inline struct net_device *
-nf_bridge_get_physindev(const struct sk_buff *skb)
+nf_bridge_get_physindev(const struct sk_buff *skb, struct net *net)
{
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
- return nf_bridge ? nf_bridge->physindev : NULL;
+ return nf_bridge ? dev_get_by_index_rcu(net, nf_bridge->physinif) : NULL;
}
static inline struct net_device *
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 7834c0be2831..61aa48f46dd7 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -51,7 +51,7 @@ struct nf_ipv6_ops {
u32 (*cookie_init_sequence)(const struct ipv6hdr *iph,
const struct tcphdr *th, u16 *mssp);
int (*cookie_v6_check)(const struct ipv6hdr *iph,
- const struct tcphdr *th, __u32 cookie);
+ const struct tcphdr *th);
#endif
void (*route_input)(struct sk_buff *skb);
int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
@@ -179,16 +179,16 @@ static inline u32 nf_ipv6_cookie_init_sequence(const struct ipv6hdr *iph,
}
static inline int nf_cookie_v6_check(const struct ipv6hdr *iph,
- const struct tcphdr *th, __u32 cookie)
+ const struct tcphdr *th)
{
#if IS_ENABLED(CONFIG_SYN_COOKIES)
#if IS_MODULE(CONFIG_IPV6)
const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
if (v6_ops)
- return v6_ops->cookie_v6_check(iph, th, cookie);
+ return v6_ops->cookie_v6_check(iph, th);
#elif IS_BUILTIN(CONFIG_IPV6)
- return __cookie_v6_check(iph, th, cookie);
+ return __cookie_v6_check(iph, th);
#endif
#endif
return 0;
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index b11a84f6c32b..100cbb261269 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -109,11 +109,18 @@ static inline int wait_on_page_fscache_killable(struct page *page)
return folio_wait_private_2_killable(page_folio(page));
}
+/* Marks used on xarray-based buffers */
+#define NETFS_BUF_PUT_MARK XA_MARK_0 /* - Page needs putting */
+#define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */
+
enum netfs_io_source {
NETFS_FILL_WITH_ZEROES,
NETFS_DOWNLOAD_FROM_SERVER,
NETFS_READ_FROM_CACHE,
NETFS_INVALID_READ,
+ NETFS_UPLOAD_TO_SERVER,
+ NETFS_WRITE_TO_CACHE,
+ NETFS_INVALID_WRITE,
} __mode(byte);
typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error,
@@ -129,9 +136,57 @@ struct netfs_inode {
struct fscache_cookie *cache;
#endif
loff_t remote_i_size; /* Size of the remote file */
+ loff_t zero_point; /* Size after which we assume there's no data
+ * on the server */
+ unsigned long flags;
+#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */
+#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
+#define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */
+#define NETFS_ICTX_NO_WRITE_STREAMING 3 /* Don't engage in write-streaming */
+};
+
+/*
+ * A netfs group - for instance a ceph snap. This is marked on dirty pages and
+ * pages marked with a group must be flushed before they can be written under
+ * the domain of another group.
+ */
+struct netfs_group {
+ refcount_t ref;
+ void (*free)(struct netfs_group *netfs_group);
};
/*
+ * Information about a dirty page (attached only if necessary).
+ * folio->private
+ */
+struct netfs_folio {
+ struct netfs_group *netfs_group; /* Filesystem's grouping marker (or NULL). */
+ unsigned int dirty_offset; /* Write-streaming dirty data offset */
+ unsigned int dirty_len; /* Write-streaming dirty data length */
+};
+#define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */
+
+static inline struct netfs_folio *netfs_folio_info(struct folio *folio)
+{
+ void *priv = folio_get_private(folio);
+
+ if ((unsigned long)priv & NETFS_FOLIO_INFO)
+ return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO);
+ return NULL;
+}
+
+static inline struct netfs_group *netfs_folio_group(struct folio *folio)
+{
+ struct netfs_folio *finfo;
+ void *priv = folio_get_private(folio);
+
+ finfo = netfs_folio_info(folio);
+ if (finfo)
+ return finfo->netfs_group;
+ return priv;
+}
+
+/*
* Resources required to do operations on a cache.
*/
struct netfs_cache_resources {
@@ -143,17 +198,24 @@ struct netfs_cache_resources {
};
/*
- * Descriptor for a single component subrequest.
+ * Descriptor for a single component subrequest. Each operation represents an
+ * individual read/write from/to a server, a cache, a journal, etc..
+ *
+ * The buffer iterator is persistent for the life of the subrequest struct and
+ * the pages it points to can be relied on to exist for the duration.
*/
struct netfs_io_subrequest {
struct netfs_io_request *rreq; /* Supervising I/O request */
+ struct work_struct work;
struct list_head rreq_link; /* Link in rreq->subrequests */
+ struct iov_iter io_iter; /* Iterator for this subrequest */
loff_t start; /* Where to start the I/O */
size_t len; /* Size of the I/O */
size_t transferred; /* Amount of data transferred */
refcount_t ref;
short error; /* 0 or error that occurred */
unsigned short debug_index; /* Index in list (for debugging output) */
+ unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */
enum netfs_io_source source; /* Where to read from/write to */
unsigned long flags;
#define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */
@@ -168,6 +230,13 @@ enum netfs_io_origin {
NETFS_READAHEAD, /* This read was triggered by readahead */
NETFS_READPAGE, /* This read is a synchronous read */
NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
+ NETFS_WRITEBACK, /* This write was triggered by writepages */
+ NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */
+ NETFS_LAUNDER_WRITE, /* This is triggered by ->launder_folio() */
+ NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */
+ NETFS_DIO_READ, /* This is a direct I/O read */
+ NETFS_DIO_WRITE, /* This is a direct I/O write */
+ nr__netfs_io_origin
} __mode(byte);
/*
@@ -175,19 +244,34 @@ enum netfs_io_origin {
* operations to a variety of data stores and then stitch the result together.
*/
struct netfs_io_request {
- struct work_struct work;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
struct inode *inode; /* The file being accessed */
struct address_space *mapping; /* The mapping being accessed */
+ struct kiocb *iocb; /* AIO completion vector */
struct netfs_cache_resources cache_resources;
+ struct list_head proc_link; /* Link in netfs_iorequests */
struct list_head subrequests; /* Contributory I/O operations */
+ struct iov_iter iter; /* Unencrypted-side iterator */
+ struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */
void *netfs_priv; /* Private data for the netfs */
+ struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */
+ unsigned int direct_bv_count; /* Number of elements in direct_bv[] */
unsigned int debug_id;
+ unsigned int rsize; /* Maximum read size (0 for none) */
+ unsigned int wsize; /* Maximum write size (0 for none) */
+ unsigned int subreq_counter; /* Next subreq->debug_index */
atomic_t nr_outstanding; /* Number of ops in progress */
atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */
size_t submitted; /* Amount submitted for I/O so far */
size_t len; /* Length of the request */
+ size_t upper_len; /* Length can be extended to here */
+ size_t transferred; /* Amount to be indicated as transferred */
short error; /* 0 or error that occurred */
enum netfs_io_origin origin; /* Origin of the request */
+ bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */
loff_t i_size; /* Size of the file */
loff_t start; /* Start position */
pgoff_t no_unlock_folio; /* Don't unlock this folio after read */
@@ -199,17 +283,25 @@ struct netfs_io_request {
#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */
#define NETFS_RREQ_FAILED 4 /* The request failed */
#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */
+#define NETFS_RREQ_WRITE_TO_CACHE 7 /* Need to write to the cache */
+#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */
+#define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */
+#define NETFS_RREQ_BLOCKED 10 /* We blocked */
const struct netfs_request_ops *netfs_ops;
+ void (*cleanup)(struct netfs_io_request *req);
};
/*
* Operations the network filesystem can/must provide to the helpers.
*/
struct netfs_request_ops {
+ unsigned int io_request_size; /* Alloc size for netfs_io_request struct */
+ unsigned int io_subrequest_size; /* Alloc size for netfs_io_subrequest struct */
int (*init_request)(struct netfs_io_request *rreq, struct file *file);
void (*free_request)(struct netfs_io_request *rreq);
- int (*begin_cache_operation)(struct netfs_io_request *rreq);
+ void (*free_subrequest)(struct netfs_io_subrequest *rreq);
+ /* Read request handling */
void (*expand_readahead)(struct netfs_io_request *rreq);
bool (*clamp_length)(struct netfs_io_subrequest *subreq);
void (*issue_read)(struct netfs_io_subrequest *subreq);
@@ -217,6 +309,14 @@ struct netfs_request_ops {
int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
struct folio **foliop, void **_fsdata);
void (*done)(struct netfs_io_request *rreq);
+
+ /* Modification handling */
+ void (*update_i_size)(struct inode *inode, loff_t i_size);
+
+ /* Write request handling */
+ void (*create_write_requests)(struct netfs_io_request *wreq,
+ loff_t start, size_t len);
+ void (*invalidate_cache)(struct netfs_io_request *wreq);
};
/*
@@ -229,8 +329,7 @@ enum netfs_read_from_hole {
};
/*
- * Table of operations for access to a cache. This is obtained by
- * rreq->ops->begin_cache_operation().
+ * Table of operations for access to a cache.
*/
struct netfs_cache_ops {
/* End an operation */
@@ -265,8 +364,8 @@ struct netfs_cache_ops {
* actually do.
*/
int (*prepare_write)(struct netfs_cache_resources *cres,
- loff_t *_start, size_t *_len, loff_t i_size,
- bool no_space_allocated_yet);
+ loff_t *_start, size_t *_len, size_t upper_len,
+ loff_t i_size, bool no_space_allocated_yet);
/* Prepare an on-demand read operation, shortening it to a cached/uncached
* boundary as appropriate.
@@ -284,22 +383,62 @@ struct netfs_cache_ops {
loff_t *_data_start, size_t *_data_len);
};
+/* High-level read API. */
+ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+
+/* High-level write API */
+ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
+ struct netfs_group *netfs_group);
+ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from,
+ struct netfs_group *netfs_group);
+ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from);
+ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from);
+
+/* Address operations API */
struct readahead_control;
void netfs_readahead(struct readahead_control *);
int netfs_read_folio(struct file *, struct folio *);
int netfs_write_begin(struct netfs_inode *, struct file *,
- struct address_space *, loff_t pos, unsigned int len,
- struct folio **, void **fsdata);
-
+ struct address_space *, loff_t pos, unsigned int len,
+ struct folio **, void **fsdata);
+int netfs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc);
+bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio);
+int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc);
+void netfs_clear_inode_writeback(struct inode *inode, const void *aux);
+void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length);
+bool netfs_release_folio(struct folio *folio, gfp_t gfp);
+int netfs_launder_folio(struct folio *folio);
+
+/* VMA operations API. */
+vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group);
+
+/* (Sub)request management API. */
void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool);
void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
enum netfs_sreq_ref_trace what);
void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
bool was_async, enum netfs_sreq_ref_trace what);
-void netfs_stats_show(struct seq_file *);
ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
struct iov_iter *new,
iov_iter_extraction_t extraction_flags);
+size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
+ size_t max_size, size_t max_segs);
+struct netfs_io_subrequest *netfs_create_write_request(
+ struct netfs_io_request *wreq, enum netfs_io_source dest,
+ loff_t start, size_t len, work_func_t worker);
+void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
+ bool was_async);
+void netfs_queue_write_request(struct netfs_io_subrequest *subreq);
+
+int netfs_start_io_read(struct inode *inode);
+void netfs_end_io_read(struct inode *inode);
+int netfs_start_io_write(struct inode *inode);
+void netfs_end_io_write(struct inode *inode);
+int netfs_start_io_direct(struct inode *inode);
+void netfs_end_io_direct(struct inode *inode);
/**
* netfs_inode - Get the netfs inode context from the inode
@@ -317,30 +456,44 @@ static inline struct netfs_inode *netfs_inode(struct inode *inode)
* netfs_inode_init - Initialise a netfslib inode context
* @ctx: The netfs inode to initialise
* @ops: The netfs's operations list
+ * @use_zero_point: True to use the zero_point read optimisation
*
* Initialise the netfs library context struct. This is expected to follow on
* directly from the VFS inode struct.
*/
static inline void netfs_inode_init(struct netfs_inode *ctx,
- const struct netfs_request_ops *ops)
+ const struct netfs_request_ops *ops,
+ bool use_zero_point)
{
ctx->ops = ops;
ctx->remote_i_size = i_size_read(&ctx->inode);
+ ctx->zero_point = LLONG_MAX;
+ ctx->flags = 0;
#if IS_ENABLED(CONFIG_FSCACHE)
ctx->cache = NULL;
#endif
+ /* ->releasepage() drives zero_point */
+ if (use_zero_point) {
+ ctx->zero_point = ctx->remote_i_size;
+ mapping_set_release_always(ctx->inode.i_mapping);
+ }
}
/**
* netfs_resize_file - Note that a file got resized
* @ctx: The netfs inode being resized
* @new_i_size: The new file size
+ * @changed_on_server: The change was applied to the server
*
* Inform the netfs lib that a file got resized so that it can adjust its state.
*/
-static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size)
+static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size,
+ bool changed_on_server)
{
- ctx->remote_i_size = new_i_size;
+ if (changed_on_server)
+ ctx->remote_i_size = new_i_size;
+ if (new_i_size < ctx->zero_point)
+ ctx->zero_point = new_i_size;
}
/**
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 9eec3f4f5351..5df7340d4dab 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -50,6 +50,7 @@ struct netlink_kernel_cfg {
struct mutex *cb_mutex;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
+ void (*release) (struct sock *sk, unsigned long *groups);
};
struct sock *__netlink_kernel_create(struct net *net, int unit,
@@ -227,6 +228,13 @@ bool netlink_strict_get_check(struct sk_buff *skb);
int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid,
__u32 group, gfp_t allocation);
+
+typedef int (*netlink_filter_fn)(struct sock *dsk, struct sk_buff *skb, void *data);
+
+int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
+ __u32 portid, __u32 group, gfp_t allocation,
+ netlink_filter_fn filter,
+ void *filter_data);
int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code);
int netlink_register_notifier(struct notifier_block *nb);
int netlink_unregister_notifier(struct notifier_block *nb);
@@ -283,6 +291,7 @@ struct netlink_callback {
u16 answer_flags;
u32 min_dump_alloc;
unsigned int prev_seq, seq;
+ int flags;
bool strict_check;
union {
u8 ctx[48];
@@ -315,6 +324,7 @@ struct netlink_dump_control {
void *data;
struct module *module;
u32 min_dump_alloc;
+ int flags;
};
int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
@@ -345,5 +355,6 @@ bool netlink_ns_capable(const struct sk_buff *skb,
struct user_namespace *ns, int cap);
bool netlink_capable(const struct sk_buff *skb, int cap);
bool netlink_net_capable(const struct sk_buff *skb, int cap);
+struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast);
#endif /* __LINUX_NETLINK_H */
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 730003c4f4af..ef8d2d618d5b 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -150,7 +150,7 @@ enum nfs_opnum4 {
OP_WRITE_SAME = 70,
OP_CLONE = 71,
- /* xattr support (RFC8726) */
+ /* xattr support (RFC8276) */
OP_GETXATTR = 72,
OP_SETXATTR = 73,
OP_LISTXATTRS = 74,
@@ -389,79 +389,203 @@ enum lock_type4 {
NFS4_WRITEW_LT = 4
};
+/*
+ * Symbol names and values are from RFC 7531 Section 2.
+ * "XDR Description of NFSv4.0"
+ */
+enum {
+ FATTR4_SUPPORTED_ATTRS = 0,
+ FATTR4_TYPE = 1,
+ FATTR4_FH_EXPIRE_TYPE = 2,
+ FATTR4_CHANGE = 3,
+ FATTR4_SIZE = 4,
+ FATTR4_LINK_SUPPORT = 5,
+ FATTR4_SYMLINK_SUPPORT = 6,
+ FATTR4_NAMED_ATTR = 7,
+ FATTR4_FSID = 8,
+ FATTR4_UNIQUE_HANDLES = 9,
+ FATTR4_LEASE_TIME = 10,
+ FATTR4_RDATTR_ERROR = 11,
+ FATTR4_ACL = 12,
+ FATTR4_ACLSUPPORT = 13,
+ FATTR4_ARCHIVE = 14,
+ FATTR4_CANSETTIME = 15,
+ FATTR4_CASE_INSENSITIVE = 16,
+ FATTR4_CASE_PRESERVING = 17,
+ FATTR4_CHOWN_RESTRICTED = 18,
+ FATTR4_FILEHANDLE = 19,
+ FATTR4_FILEID = 20,
+ FATTR4_FILES_AVAIL = 21,
+ FATTR4_FILES_FREE = 22,
+ FATTR4_FILES_TOTAL = 23,
+ FATTR4_FS_LOCATIONS = 24,
+ FATTR4_HIDDEN = 25,
+ FATTR4_HOMOGENEOUS = 26,
+ FATTR4_MAXFILESIZE = 27,
+ FATTR4_MAXLINK = 28,
+ FATTR4_MAXNAME = 29,
+ FATTR4_MAXREAD = 30,
+ FATTR4_MAXWRITE = 31,
+ FATTR4_MIMETYPE = 32,
+ FATTR4_MODE = 33,
+ FATTR4_NO_TRUNC = 34,
+ FATTR4_NUMLINKS = 35,
+ FATTR4_OWNER = 36,
+ FATTR4_OWNER_GROUP = 37,
+ FATTR4_QUOTA_AVAIL_HARD = 38,
+ FATTR4_QUOTA_AVAIL_SOFT = 39,
+ FATTR4_QUOTA_USED = 40,
+ FATTR4_RAWDEV = 41,
+ FATTR4_SPACE_AVAIL = 42,
+ FATTR4_SPACE_FREE = 43,
+ FATTR4_SPACE_TOTAL = 44,
+ FATTR4_SPACE_USED = 45,
+ FATTR4_SYSTEM = 46,
+ FATTR4_TIME_ACCESS = 47,
+ FATTR4_TIME_ACCESS_SET = 48,
+ FATTR4_TIME_BACKUP = 49,
+ FATTR4_TIME_CREATE = 50,
+ FATTR4_TIME_DELTA = 51,
+ FATTR4_TIME_METADATA = 52,
+ FATTR4_TIME_MODIFY = 53,
+ FATTR4_TIME_MODIFY_SET = 54,
+ FATTR4_MOUNTED_ON_FILEID = 55,
+};
+
+/*
+ * Symbol names and values are from RFC 5662 Section 2.
+ * "XDR Description of NFSv4.1"
+ */
+enum {
+ FATTR4_DIR_NOTIF_DELAY = 56,
+ FATTR4_DIRENT_NOTIF_DELAY = 57,
+ FATTR4_DACL = 58,
+ FATTR4_SACL = 59,
+ FATTR4_CHANGE_POLICY = 60,
+ FATTR4_FS_STATUS = 61,
+ FATTR4_FS_LAYOUT_TYPES = 62,
+ FATTR4_LAYOUT_HINT = 63,
+ FATTR4_LAYOUT_TYPES = 64,
+ FATTR4_LAYOUT_BLKSIZE = 65,
+ FATTR4_LAYOUT_ALIGNMENT = 66,
+ FATTR4_FS_LOCATIONS_INFO = 67,
+ FATTR4_MDSTHRESHOLD = 68,
+ FATTR4_RETENTION_GET = 69,
+ FATTR4_RETENTION_SET = 70,
+ FATTR4_RETENTEVT_GET = 71,
+ FATTR4_RETENTEVT_SET = 72,
+ FATTR4_RETENTION_HOLD = 73,
+ FATTR4_MODE_SET_MASKED = 74,
+ FATTR4_SUPPATTR_EXCLCREAT = 75,
+ FATTR4_FS_CHARSET_CAP = 76,
+};
+
+/*
+ * Symbol names and values are from RFC 7863 Section 2.
+ * "XDR Description of NFSv4.2"
+ */
+enum {
+ FATTR4_CLONE_BLKSIZE = 77,
+ FATTR4_SPACE_FREED = 78,
+ FATTR4_CHANGE_ATTR_TYPE = 79,
+ FATTR4_SEC_LABEL = 80,
+};
+
+/*
+ * Symbol names and values are from RFC 8275 Section 5.
+ * "The mode_umask Attribute"
+ */
+enum {
+ FATTR4_MODE_UMASK = 81,
+};
+
+/*
+ * Symbol names and values are from RFC 8276 Section 8.6.
+ * "Numeric Values Assigned to Protocol Extensions"
+ */
+enum {
+ FATTR4_XATTR_SUPPORT = 82,
+};
+
+/*
+ * The following internal definitions enable processing the above
+ * attribute bits within 32-bit word boundaries.
+ */
/* Mandatory Attributes */
-#define FATTR4_WORD0_SUPPORTED_ATTRS (1UL << 0)
-#define FATTR4_WORD0_TYPE (1UL << 1)
-#define FATTR4_WORD0_FH_EXPIRE_TYPE (1UL << 2)
-#define FATTR4_WORD0_CHANGE (1UL << 3)
-#define FATTR4_WORD0_SIZE (1UL << 4)
-#define FATTR4_WORD0_LINK_SUPPORT (1UL << 5)
-#define FATTR4_WORD0_SYMLINK_SUPPORT (1UL << 6)
-#define FATTR4_WORD0_NAMED_ATTR (1UL << 7)
-#define FATTR4_WORD0_FSID (1UL << 8)
-#define FATTR4_WORD0_UNIQUE_HANDLES (1UL << 9)
-#define FATTR4_WORD0_LEASE_TIME (1UL << 10)
-#define FATTR4_WORD0_RDATTR_ERROR (1UL << 11)
+#define FATTR4_WORD0_SUPPORTED_ATTRS BIT(FATTR4_SUPPORTED_ATTRS)
+#define FATTR4_WORD0_TYPE BIT(FATTR4_TYPE)
+#define FATTR4_WORD0_FH_EXPIRE_TYPE BIT(FATTR4_FH_EXPIRE_TYPE)
+#define FATTR4_WORD0_CHANGE BIT(FATTR4_CHANGE)
+#define FATTR4_WORD0_SIZE BIT(FATTR4_SIZE)
+#define FATTR4_WORD0_LINK_SUPPORT BIT(FATTR4_LINK_SUPPORT)
+#define FATTR4_WORD0_SYMLINK_SUPPORT BIT(FATTR4_SYMLINK_SUPPORT)
+#define FATTR4_WORD0_NAMED_ATTR BIT(FATTR4_NAMED_ATTR)
+#define FATTR4_WORD0_FSID BIT(FATTR4_FSID)
+#define FATTR4_WORD0_UNIQUE_HANDLES BIT(FATTR4_UNIQUE_HANDLES)
+#define FATTR4_WORD0_LEASE_TIME BIT(FATTR4_LEASE_TIME)
+#define FATTR4_WORD0_RDATTR_ERROR BIT(FATTR4_RDATTR_ERROR)
/* Mandatory in NFSv4.1 */
-#define FATTR4_WORD2_SUPPATTR_EXCLCREAT (1UL << 11)
+#define FATTR4_WORD2_SUPPATTR_EXCLCREAT BIT(FATTR4_SUPPATTR_EXCLCREAT - 64)
/* Recommended Attributes */
-#define FATTR4_WORD0_ACL (1UL << 12)
-#define FATTR4_WORD0_ACLSUPPORT (1UL << 13)
-#define FATTR4_WORD0_ARCHIVE (1UL << 14)
-#define FATTR4_WORD0_CANSETTIME (1UL << 15)
-#define FATTR4_WORD0_CASE_INSENSITIVE (1UL << 16)
-#define FATTR4_WORD0_CASE_PRESERVING (1UL << 17)
-#define FATTR4_WORD0_CHOWN_RESTRICTED (1UL << 18)
-#define FATTR4_WORD0_FILEHANDLE (1UL << 19)
-#define FATTR4_WORD0_FILEID (1UL << 20)
-#define FATTR4_WORD0_FILES_AVAIL (1UL << 21)
-#define FATTR4_WORD0_FILES_FREE (1UL << 22)
-#define FATTR4_WORD0_FILES_TOTAL (1UL << 23)
-#define FATTR4_WORD0_FS_LOCATIONS (1UL << 24)
-#define FATTR4_WORD0_HIDDEN (1UL << 25)
-#define FATTR4_WORD0_HOMOGENEOUS (1UL << 26)
-#define FATTR4_WORD0_MAXFILESIZE (1UL << 27)
-#define FATTR4_WORD0_MAXLINK (1UL << 28)
-#define FATTR4_WORD0_MAXNAME (1UL << 29)
-#define FATTR4_WORD0_MAXREAD (1UL << 30)
-#define FATTR4_WORD0_MAXWRITE (1UL << 31)
-#define FATTR4_WORD1_MIMETYPE (1UL << 0)
-#define FATTR4_WORD1_MODE (1UL << 1)
-#define FATTR4_WORD1_NO_TRUNC (1UL << 2)
-#define FATTR4_WORD1_NUMLINKS (1UL << 3)
-#define FATTR4_WORD1_OWNER (1UL << 4)
-#define FATTR4_WORD1_OWNER_GROUP (1UL << 5)
-#define FATTR4_WORD1_QUOTA_HARD (1UL << 6)
-#define FATTR4_WORD1_QUOTA_SOFT (1UL << 7)
-#define FATTR4_WORD1_QUOTA_USED (1UL << 8)
-#define FATTR4_WORD1_RAWDEV (1UL << 9)
-#define FATTR4_WORD1_SPACE_AVAIL (1UL << 10)
-#define FATTR4_WORD1_SPACE_FREE (1UL << 11)
-#define FATTR4_WORD1_SPACE_TOTAL (1UL << 12)
-#define FATTR4_WORD1_SPACE_USED (1UL << 13)
-#define FATTR4_WORD1_SYSTEM (1UL << 14)
-#define FATTR4_WORD1_TIME_ACCESS (1UL << 15)
-#define FATTR4_WORD1_TIME_ACCESS_SET (1UL << 16)
-#define FATTR4_WORD1_TIME_BACKUP (1UL << 17)
-#define FATTR4_WORD1_TIME_CREATE (1UL << 18)
-#define FATTR4_WORD1_TIME_DELTA (1UL << 19)
-#define FATTR4_WORD1_TIME_METADATA (1UL << 20)
-#define FATTR4_WORD1_TIME_MODIFY (1UL << 21)
-#define FATTR4_WORD1_TIME_MODIFY_SET (1UL << 22)
-#define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23)
-#define FATTR4_WORD1_DACL (1UL << 26)
-#define FATTR4_WORD1_SACL (1UL << 27)
-#define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30)
-#define FATTR4_WORD2_LAYOUT_TYPES (1UL << 0)
-#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1)
-#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4)
-#define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13)
-#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15)
-#define FATTR4_WORD2_SECURITY_LABEL (1UL << 16)
-#define FATTR4_WORD2_MODE_UMASK (1UL << 17)
-#define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18)
+#define FATTR4_WORD0_ACL BIT(FATTR4_ACL)
+#define FATTR4_WORD0_ACLSUPPORT BIT(FATTR4_ACLSUPPORT)
+#define FATTR4_WORD0_ARCHIVE BIT(FATTR4_ARCHIVE)
+#define FATTR4_WORD0_CANSETTIME BIT(FATTR4_CANSETTIME)
+#define FATTR4_WORD0_CASE_INSENSITIVE BIT(FATTR4_CASE_INSENSITIVE)
+#define FATTR4_WORD0_CASE_PRESERVING BIT(FATTR4_CASE_PRESERVING)
+#define FATTR4_WORD0_CHOWN_RESTRICTED BIT(FATTR4_CHOWN_RESTRICTED)
+#define FATTR4_WORD0_FILEHANDLE BIT(FATTR4_FILEHANDLE)
+#define FATTR4_WORD0_FILEID BIT(FATTR4_FILEID)
+#define FATTR4_WORD0_FILES_AVAIL BIT(FATTR4_FILES_AVAIL)
+#define FATTR4_WORD0_FILES_FREE BIT(FATTR4_FILES_FREE)
+#define FATTR4_WORD0_FILES_TOTAL BIT(FATTR4_FILES_TOTAL)
+#define FATTR4_WORD0_FS_LOCATIONS BIT(FATTR4_FS_LOCATIONS)
+#define FATTR4_WORD0_HIDDEN BIT(FATTR4_HIDDEN)
+#define FATTR4_WORD0_HOMOGENEOUS BIT(FATTR4_HOMOGENEOUS)
+#define FATTR4_WORD0_MAXFILESIZE BIT(FATTR4_MAXFILESIZE)
+#define FATTR4_WORD0_MAXLINK BIT(FATTR4_MAXLINK)
+#define FATTR4_WORD0_MAXNAME BIT(FATTR4_MAXNAME)
+#define FATTR4_WORD0_MAXREAD BIT(FATTR4_MAXREAD)
+#define FATTR4_WORD0_MAXWRITE BIT(FATTR4_MAXWRITE)
+
+#define FATTR4_WORD1_MIMETYPE BIT(FATTR4_MIMETYPE - 32)
+#define FATTR4_WORD1_MODE BIT(FATTR4_MODE - 32)
+#define FATTR4_WORD1_NO_TRUNC BIT(FATTR4_NO_TRUNC - 32)
+#define FATTR4_WORD1_NUMLINKS BIT(FATTR4_NUMLINKS - 32)
+#define FATTR4_WORD1_OWNER BIT(FATTR4_OWNER - 32)
+#define FATTR4_WORD1_OWNER_GROUP BIT(FATTR4_OWNER_GROUP - 32)
+#define FATTR4_WORD1_QUOTA_HARD BIT(FATTR4_QUOTA_AVAIL_HARD - 32)
+#define FATTR4_WORD1_QUOTA_SOFT BIT(FATTR4_QUOTA_AVAIL_SOFT - 32)
+#define FATTR4_WORD1_QUOTA_USED BIT(FATTR4_QUOTA_USED - 32)
+#define FATTR4_WORD1_RAWDEV BIT(FATTR4_RAWDEV - 32)
+#define FATTR4_WORD1_SPACE_AVAIL BIT(FATTR4_SPACE_AVAIL - 32)
+#define FATTR4_WORD1_SPACE_FREE BIT(FATTR4_SPACE_FREE - 32)
+#define FATTR4_WORD1_SPACE_TOTAL BIT(FATTR4_SPACE_TOTAL - 32)
+#define FATTR4_WORD1_SPACE_USED BIT(FATTR4_SPACE_USED - 32)
+#define FATTR4_WORD1_SYSTEM BIT(FATTR4_SYSTEM - 32)
+#define FATTR4_WORD1_TIME_ACCESS BIT(FATTR4_TIME_ACCESS - 32)
+#define FATTR4_WORD1_TIME_ACCESS_SET BIT(FATTR4_TIME_ACCESS_SET - 32)
+#define FATTR4_WORD1_TIME_BACKUP BIT(FATTR4_TIME_BACKUP - 32)
+#define FATTR4_WORD1_TIME_CREATE BIT(FATTR4_TIME_CREATE - 32)
+#define FATTR4_WORD1_TIME_DELTA BIT(FATTR4_TIME_DELTA - 32)
+#define FATTR4_WORD1_TIME_METADATA BIT(FATTR4_TIME_METADATA - 32)
+#define FATTR4_WORD1_TIME_MODIFY BIT(FATTR4_TIME_MODIFY - 32)
+#define FATTR4_WORD1_TIME_MODIFY_SET BIT(FATTR4_TIME_MODIFY_SET - 32)
+#define FATTR4_WORD1_MOUNTED_ON_FILEID BIT(FATTR4_MOUNTED_ON_FILEID - 32)
+#define FATTR4_WORD1_DACL BIT(FATTR4_DACL - 32)
+#define FATTR4_WORD1_SACL BIT(FATTR4_SACL - 32)
+#define FATTR4_WORD1_FS_LAYOUT_TYPES BIT(FATTR4_FS_LAYOUT_TYPES - 32)
+
+#define FATTR4_WORD2_LAYOUT_TYPES BIT(FATTR4_LAYOUT_TYPES - 64)
+#define FATTR4_WORD2_LAYOUT_BLKSIZE BIT(FATTR4_LAYOUT_BLKSIZE - 64)
+#define FATTR4_WORD2_MDSTHRESHOLD BIT(FATTR4_MDSTHRESHOLD - 64)
+#define FATTR4_WORD2_CLONE_BLKSIZE BIT(FATTR4_CLONE_BLKSIZE - 64)
+#define FATTR4_WORD2_CHANGE_ATTR_TYPE BIT(FATTR4_CHANGE_ATTR_TYPE - 64)
+#define FATTR4_WORD2_SECURITY_LABEL BIT(FATTR4_SEC_LABEL - 64)
+#define FATTR4_WORD2_MODE_UMASK BIT(FATTR4_MODE_UMASK - 64)
+#define FATTR4_WORD2_XATTR_SUPPORT BIT(FATTR4_XATTR_SUPPORT - 64)
/* MDS threshold bitmap bits */
#define THRESHOLD_RD (1UL << 0)
@@ -745,4 +869,26 @@ enum {
RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15,
};
+enum nfs_cb_opnum4 {
+ OP_CB_GETATTR = 3,
+ OP_CB_RECALL = 4,
+
+ /* Callback operations new to NFSv4.1 */
+ OP_CB_LAYOUTRECALL = 5,
+ OP_CB_NOTIFY = 6,
+ OP_CB_PUSH_DELEG = 7,
+ OP_CB_RECALL_ANY = 8,
+ OP_CB_RECALLABLE_OBJ_AVAIL = 9,
+ OP_CB_RECALL_SLOT = 10,
+ OP_CB_SEQUENCE = 11,
+ OP_CB_WANTS_CANCELLED = 12,
+ OP_CB_NOTIFY_LOCK = 13,
+ OP_CB_NOTIFY_DEVICEID = 14,
+
+ /* Callback operations new to NFSv4.2 */
+ OP_CB_OFFLOAD = 15,
+
+ OP_CB_ILLEGAL = 10044,
+};
+
#endif
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 279262057a92..d59116ac8209 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -595,7 +595,6 @@ extern void nfs_complete_unlink(struct dentry *dentry, struct inode *);
* linux/fs/nfs/write.c
*/
extern int nfs_congestion_kb;
-extern int nfs_writepage(struct page *page, struct writeback_control *wbc);
extern int nfs_writepages(struct address_space *, struct writeback_control *);
extern int nfs_flush_incompatible(struct file *file, struct folio *folio);
extern int nfs_update_folio(struct file *file, struct folio *folio,
@@ -612,6 +611,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio);
extern int nfs_commit_inode(struct inode *, int);
extern struct nfs_commit_data *nfs_commitdata_alloc(void);
extern void nfs_commit_free(struct nfs_commit_data *data);
+void nfs_commit_begin(struct nfs_mds_commit_info *cinfo);
bool nfs_commit_end(struct nfs_mds_commit_info *cinfo);
static inline bool nfs_have_writebacks(const struct inode *inode)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 20eeba8b009d..92de074e63b9 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -48,6 +48,7 @@ struct nfs_client {
#define NFS_CS_NOPING 6 /* - don't ping on connect */
#define NFS_CS_DS 7 /* - Server is a DS */
#define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */
+#define NFS_CS_PNFS 9 /* - Server used for pnfs */
struct sockaddr_storage cl_addr; /* server identifier */
size_t cl_addrlen;
char * cl_hostname; /* hostname of server */
@@ -123,6 +124,7 @@ struct nfs_client {
char cl_ipaddr[48];
struct net *cl_net;
struct list_head pending_cb_stateids;
+ struct rcu_head rcu;
};
/*
@@ -238,6 +240,7 @@ struct nfs_server {
struct list_head delegations;
struct list_head ss_copies;
+ unsigned long delegation_gen;
unsigned long mig_gen;
unsigned long mig_status;
#define NFS_MIG_IN_TRANSITION (1)
@@ -263,6 +266,7 @@ struct nfs_server {
const struct cred *cred;
bool has_sec_mnt_opts;
struct kobject kobj;
+ struct rcu_head rcu;
};
/* Server capabilities */
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index aa9f4c6ebe26..1c315f854ea8 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -157,7 +157,9 @@ extern void nfs_unlock_request(struct nfs_page *req);
extern void nfs_unlock_and_release_request(struct nfs_page *);
extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req);
extern int nfs_page_group_lock_subrequests(struct nfs_page *head);
-extern void nfs_join_page_group(struct nfs_page *head, struct inode *inode);
+extern void nfs_join_page_group(struct nfs_page *head,
+ struct nfs_commit_info *cinfo,
+ struct inode *inode);
extern int nfs_page_group_lock(struct nfs_page *);
extern void nfs_page_group_unlock(struct nfs_page *);
extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 12bbb5c63664..d09b9773b20c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1772,7 +1772,7 @@ struct nfs_rpc_ops {
void (*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *);
int (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir);
int (*link) (struct inode *, struct inode *, const struct qstr *);
- int (*symlink) (struct inode *, struct dentry *, struct page *,
+ int (*symlink) (struct inode *, struct dentry *, struct folio *,
unsigned int, struct iattr *);
int (*mkdir) (struct inode *, struct dentry *, struct iattr *);
int (*rmdir) (struct inode *, const struct qstr *);
@@ -1821,13 +1821,6 @@ struct nfs_rpc_ops {
};
/*
- * NFS_CALL(getattr, inode, (fattr));
- * into
- * NFS_PROTO(inode)->getattr(fattr);
- */
-#define NFS_CALL(op, inode, args) NFS_PROTO(inode)->op args
-
-/*
* Function vectors etc. for the NFS client
*/
extern const struct nfs_rpc_ops nfs_v2_clientops;
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index e3e6a64b98e0..f53438eae815 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -157,31 +157,31 @@ static inline void touch_nmi_watchdog(void)
#ifdef arch_trigger_cpumask_backtrace
static inline bool trigger_all_cpu_backtrace(void)
{
- arch_trigger_cpumask_backtrace(cpu_online_mask, false);
+ arch_trigger_cpumask_backtrace(cpu_online_mask, -1);
return true;
}
-static inline bool trigger_allbutself_cpu_backtrace(void)
+static inline bool trigger_allbutcpu_cpu_backtrace(int exclude_cpu)
{
- arch_trigger_cpumask_backtrace(cpu_online_mask, true);
+ arch_trigger_cpumask_backtrace(cpu_online_mask, exclude_cpu);
return true;
}
static inline bool trigger_cpumask_backtrace(struct cpumask *mask)
{
- arch_trigger_cpumask_backtrace(mask, false);
+ arch_trigger_cpumask_backtrace(mask, -1);
return true;
}
static inline bool trigger_single_cpu_backtrace(int cpu)
{
- arch_trigger_cpumask_backtrace(cpumask_of(cpu), false);
+ arch_trigger_cpumask_backtrace(cpumask_of(cpu), -1);
return true;
}
/* generic implementation */
void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
- bool exclude_self,
+ int exclude_cpu,
void (*raise)(cpumask_t *mask));
bool nmi_cpu_backtrace(struct pt_regs *regs);
@@ -190,7 +190,7 @@ static inline bool trigger_all_cpu_backtrace(void)
{
return false;
}
-static inline bool trigger_allbutself_cpu_backtrace(void)
+static inline bool trigger_allbutcpu_cpu_backtrace(int exclude_cpu)
{
return false;
}
@@ -216,13 +216,6 @@ void watchdog_update_hrtimer_threshold(u64 period);
static inline void watchdog_update_hrtimer_threshold(u64 period) { }
#endif
-struct ctl_table;
-int proc_watchdog(struct ctl_table *, int, void *, size_t *, loff_t *);
-int proc_nmi_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *);
-int proc_soft_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *);
-int proc_watchdog_thresh(struct ctl_table *, int , void *, size_t *, loff_t *);
-int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *);
-
#ifdef CONFIG_HAVE_ACPI_APEI_NMI
#include <asm/nmi.h>
#endif
diff --git a/include/linux/node.h b/include/linux/node.h
index 427a5975cf40..dfc004e4bee7 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -20,20 +20,32 @@
#include <linux/list.h>
/**
- * struct node_hmem_attrs - heterogeneous memory performance attributes
+ * struct access_coordinate - generic performance coordinates container
*
* @read_bandwidth: Read bandwidth in MB/s
* @write_bandwidth: Write bandwidth in MB/s
* @read_latency: Read latency in nanoseconds
* @write_latency: Write latency in nanoseconds
*/
-struct node_hmem_attrs {
+struct access_coordinate {
unsigned int read_bandwidth;
unsigned int write_bandwidth;
unsigned int read_latency;
unsigned int write_latency;
};
+/*
+ * ACCESS_COORDINATE_LOCAL correlates to ACCESS CLASS 0
+ * - access_coordinate between target node and nearest initiator node
+ * ACCESS_COORDINATE_CPU correlates to ACCESS CLASS 1
+ * - access_coordinate between target node and nearest CPU node
+ */
+enum access_coordinate_class {
+ ACCESS_COORDINATE_LOCAL,
+ ACCESS_COORDINATE_CPU,
+ ACCESS_COORDINATE_MAX
+};
+
enum cache_indexing {
NODE_CACHE_DIRECT_MAP,
NODE_CACHE_INDEXED,
@@ -65,8 +77,8 @@ struct node_cache_attrs {
#ifdef CONFIG_HMEM_REPORTING
void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs);
-void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs,
- unsigned access);
+void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
+ enum access_coordinate_class access);
#else
static inline void node_add_cache(unsigned int nid,
struct node_cache_attrs *cache_attrs)
@@ -74,8 +86,8 @@ static inline void node_add_cache(unsigned int nid,
}
static inline void node_set_perf_attrs(unsigned int nid,
- struct node_hmem_attrs *hmem_attrs,
- unsigned access)
+ struct access_coordinate *coord,
+ enum access_coordinate_class access)
{
}
#endif
@@ -137,7 +149,7 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
extern int register_memory_node_under_compute_node(unsigned int mem_nid,
unsigned int cpu_nid,
- unsigned access);
+ enum access_coordinate_class access);
#else
static inline void node_dev_init(void)
{
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 8d07116caaf1..b61438313a73 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -93,10 +93,10 @@
#include <linux/threads.h>
#include <linux/bitmap.h>
#include <linux/minmax.h>
+#include <linux/nodemask_types.h>
#include <linux/numa.h>
#include <linux/random.h>
-typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
extern nodemask_t _unused_nodemask_arg_;
/**
diff --git a/include/linux/nodemask_types.h b/include/linux/nodemask_types.h
new file mode 100644
index 000000000000..6b28d97ea6ed
--- /dev/null
+++ b/include/linux/nodemask_types.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_NODEMASK_TYPES_H
+#define __LINUX_NODEMASK_TYPES_H
+
+#include <linux/bitops.h>
+#include <linux/numa.h>
+
+typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
+
+#endif /* __LINUX_NODEMASK_TYPES_H */
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 86544707236a..45702bdcbceb 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -73,9 +73,7 @@ struct raw_notifier_head {
struct srcu_notifier_head {
struct mutex mutex;
-#ifdef CONFIG_TREE_SRCU
struct srcu_usage srcuu;
-#endif
struct srcu_struct srcu;
struct notifier_block __rcu *head;
};
@@ -106,7 +104,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
#define RAW_NOTIFIER_INIT(name) { \
.head = NULL }
-#ifdef CONFIG_TREE_SRCU
#define SRCU_NOTIFIER_INIT(name, pcpu) \
{ \
.mutex = __MUTEX_INITIALIZER(name.mutex), \
@@ -114,14 +111,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
.srcuu = __SRCU_USAGE_INIT(name.srcuu), \
.srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
}
-#else
-#define SRCU_NOTIFIER_INIT(name, pcpu) \
- { \
- .mutex = __MUTEX_INITIALIZER(name.mutex), \
- .head = NULL, \
- .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
- }
-#endif
#define ATOMIC_NOTIFIER_HEAD(name) \
struct atomic_notifier_head name = \
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 0f1d024bd958..7d22ea50b098 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -7,7 +7,7 @@
struct proc_ns_operations;
struct ns_common {
- atomic_long_t stashed;
+ struct dentry *stashed;
const struct proc_ns_operations *ops;
unsigned int inum;
refcount_t count;
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index fee881cded01..5601d14e2886 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_NSPROXY_H
#define _LINUX_NSPROXY_H
+#include <linux/refcount.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
@@ -29,7 +30,7 @@ struct fs_struct;
* nsproxy is copied.
*/
struct nsproxy {
- atomic_t count;
+ refcount_t count;
struct uts_namespace *uts_ns;
struct ipc_namespace *ipc_ns;
struct mnt_namespace *mnt_ns;
@@ -102,14 +103,13 @@ int __init nsproxy_cache_init(void);
static inline void put_nsproxy(struct nsproxy *ns)
{
- if (atomic_dec_and_test(&ns->count)) {
+ if (refcount_dec_and_test(&ns->count))
free_nsproxy(ns);
- }
}
static inline void get_nsproxy(struct nsproxy *ns)
{
- atomic_inc(&ns->count);
+ refcount_inc(&ns->count);
}
#endif
diff --git a/include/linux/nubus.h b/include/linux/nubus.h
index bdcd85e622d8..4d103ac8f5c7 100644
--- a/include/linux/nubus.h
+++ b/include/linux/nubus.h
@@ -89,8 +89,6 @@ struct nubus_driver {
void (*remove)(struct nubus_board *board);
};
-extern struct bus_type nubus_bus_type;
-
/* Generic NuBus interface functions, modelled after the PCI interface */
#ifdef CONFIG_PROC_FS
extern bool nubus_populate_procfs;
diff --git a/include/linux/numa.h b/include/linux/numa.h
index 59df211d051f..915033a75731 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_NUMA_H
#define _LINUX_NUMA_H
+#include <linux/init.h>
#include <linux/types.h>
#ifdef CONFIG_NODES_SHIFT
@@ -12,6 +13,7 @@
#define MAX_NUMNODES (1 << NODES_SHIFT)
#define NUMA_NO_NODE (-1)
+#define NUMA_NO_MEMBLK (-1)
/* optionally keep NUMA memory info available post init */
#ifdef CONFIG_NUMA_KEEP_MEMINFO
@@ -21,33 +23,32 @@
#endif
#ifdef CONFIG_NUMA
-#include <linux/printk.h>
#include <asm/sparsemem.h>
/* Generic implementation available */
-int numa_map_to_online_node(int node);
+int numa_nearest_node(int node, unsigned int state);
#ifndef memory_add_physaddr_to_nid
-static inline int memory_add_physaddr_to_nid(u64 start)
-{
- pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n",
- start);
- return 0;
-}
+int memory_add_physaddr_to_nid(u64 start);
#endif
+
#ifndef phys_to_target_node
-static inline int phys_to_target_node(u64 start)
+int phys_to_target_node(u64 start);
+#endif
+
+#ifndef numa_fill_memblks
+static inline int __init numa_fill_memblks(u64 start, u64 end)
{
- pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n",
- start);
- return 0;
+ return NUMA_NO_MEMBLK;
}
#endif
+
#else /* !CONFIG_NUMA */
-static inline int numa_map_to_online_node(int node)
+static inline int numa_nearest_node(int node, unsigned int state)
{
return NUMA_NO_NODE;
}
+
static inline int memory_add_physaddr_to_nid(u64 start)
{
return 0;
@@ -58,6 +59,8 @@ static inline int phys_to_target_node(u64 start)
}
#endif
+#define numa_map_to_online_node(node) numa_nearest_node(node, N_ONLINE)
+
#ifdef CONFIG_HAVE_ARCH_NODE_DEV_GROUP
extern const struct attribute_group arch_node_dev_group;
#endif
diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h
index dcb8030062dd..c1d0bc5d9624 100644
--- a/include/linux/nvme-auth.h
+++ b/include/linux/nvme-auth.h
@@ -9,9 +9,9 @@
#include <crypto/kpp.h>
struct nvme_dhchap_key {
- u8 *key;
size_t len;
u8 hash;
+ u8 key[];
};
u32 nvme_auth_get_seqnum(void);
@@ -24,10 +24,13 @@ const char *nvme_auth_digest_name(u8 hmac_id);
size_t nvme_auth_hmac_hash_len(u8 hmac_id);
u8 nvme_auth_hmac_id(const char *hmac_name);
+u32 nvme_auth_key_struct_size(u32 key_len);
struct nvme_dhchap_key *nvme_auth_extract_key(unsigned char *secret,
u8 key_hash);
void nvme_auth_free_key(struct nvme_dhchap_key *key);
-u8 *nvme_auth_transform_key(struct nvme_dhchap_key *key, char *nqn);
+struct nvme_dhchap_key *nvme_auth_alloc_key(u32 len, u8 hash);
+struct nvme_dhchap_key *nvme_auth_transform_key(
+ struct nvme_dhchap_key *key, char *nqn);
int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key);
int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len,
u8 *challenge, u8 *aug, size_t hlen);
diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h
new file mode 100644
index 000000000000..e10333d78dbb
--- /dev/null
+++ b/include/linux/nvme-keyring.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Hannes Reinecke, SUSE Labs
+ */
+
+#ifndef _NVME_KEYRING_H
+#define _NVME_KEYRING_H
+
+#if IS_ENABLED(CONFIG_NVME_KEYRING)
+
+key_serial_t nvme_tls_psk_default(struct key *keyring,
+ const char *hostnqn, const char *subnqn);
+
+key_serial_t nvme_keyring_id(void);
+
+#else
+
+static inline key_serial_t nvme_tls_psk_default(struct key *keyring,
+ const char *hostnqn, const char *subnqn)
+{
+ return 0;
+}
+static inline key_serial_t nvme_keyring_id(void)
+{
+ return 0;
+}
+#endif /* !CONFIG_NVME_KEYRING */
+#endif /* _NVME_KEYRING_H */
diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h
index 4dd7e6fe92fb..eb2f04d636c8 100644
--- a/include/linux/nvme-rdma.h
+++ b/include/linux/nvme-rdma.h
@@ -6,7 +6,11 @@
#ifndef _LINUX_NVME_RDMA_H
#define _LINUX_NVME_RDMA_H
-#define NVME_RDMA_MAX_QUEUE_SIZE 128
+#define NVME_RDMA_IP_PORT 4420
+
+#define NVME_RDMA_MAX_QUEUE_SIZE 256
+#define NVME_RDMA_MAX_METADATA_QUEUE_SIZE 128
+#define NVME_RDMA_DEFAULT_QUEUE_SIZE 128
enum nvme_rdma_cm_fmt {
NVME_RDMA_CM_FMT_1_0 = 0x0,
diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h
index 57ebe1267f7f..e07e8978d691 100644
--- a/include/linux/nvme-tcp.h
+++ b/include/linux/nvme-tcp.h
@@ -18,6 +18,12 @@ enum nvme_tcp_pfv {
NVME_TCP_PFV_1_0 = 0x0,
};
+enum nvme_tcp_tls_cipher {
+ NVME_TCP_TLS_CIPHER_INVALID = 0,
+ NVME_TCP_TLS_CIPHER_SHA256 = 1,
+ NVME_TCP_TLS_CIPHER_SHA384 = 2,
+};
+
enum nvme_tcp_fatal_error_status {
NVME_TCP_FES_INVALID_PDU_HDR = 0x01,
NVME_TCP_FES_PDU_SEQ_ERR = 0x02,
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 26dd3f859d9d..425573202295 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -20,12 +20,9 @@
#define NVMF_TRSVCID_SIZE 32
#define NVMF_TRADDR_SIZE 256
#define NVMF_TSAS_SIZE 256
-#define NVMF_AUTH_HASH_LEN 64
#define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery"
-#define NVME_RDMA_IP_PORT 4420
-
#define NVME_NSID_ALL 0xffffffff
enum nvme_subsys_type {
@@ -108,6 +105,13 @@ enum {
NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */
};
+/* TSAS SECTYPE for TCP transport */
+enum {
+ NVMF_TCP_SECTYPE_NONE = 0, /* No Security */
+ NVMF_TCP_SECTYPE_TLS12 = 1, /* TLSv1.2, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */
+ NVMF_TCP_SECTYPE_TLS13 = 2, /* TLSv1.3, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */
+};
+
#define NVME_AQ_DEPTH 32
#define NVME_NR_AEN_COMMANDS 1
#define NVME_AQ_BLK_MQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
@@ -640,6 +644,7 @@ enum {
NVME_CMD_EFFECTS_NCC = 1 << 2,
NVME_CMD_EFFECTS_NIC = 1 << 3,
NVME_CMD_EFFECTS_CCC = 1 << 4,
+ NVME_CMD_EFFECTS_CSER_MASK = GENMASK(15, 14),
NVME_CMD_EFFECTS_CSE_MASK = GENMASK(18, 16),
NVME_CMD_EFFECTS_UUID_SEL = 1 << 19,
NVME_CMD_EFFECTS_SCOPE_MASK = GENMASK(31, 20),
@@ -810,12 +815,6 @@ struct nvme_reservation_status_ext {
struct nvme_registered_ctrl_ext regctl_eds[];
};
-enum nvme_async_event_type {
- NVME_AER_TYPE_ERROR = 0,
- NVME_AER_TYPE_SMART = 1,
- NVME_AER_TYPE_NOTICE = 2,
-};
-
/* I/O commands */
enum nvme_opcode {
@@ -1493,6 +1492,9 @@ struct nvmf_disc_rsp_page_entry {
__u16 pkey;
__u8 resv10[246];
} rdma;
+ struct tcp {
+ __u8 sectype;
+ } tcp;
} tsas;
};
@@ -1722,7 +1724,7 @@ struct nvmf_auth_dhchap_success1_data {
__u8 rsvd2;
__u8 rvalid;
__u8 rsvd3[7];
- /* 'hl' bytes of response value if 'rvalid' is set */
+ /* 'hl' bytes of response value */
__u8 rval[];
};
@@ -1809,7 +1811,7 @@ struct nvme_command {
};
};
-static inline bool nvme_is_fabrics(struct nvme_command *cmd)
+static inline bool nvme_is_fabrics(const struct nvme_command *cmd)
{
return cmd->common.opcode == nvme_fabrics_command;
}
@@ -1828,7 +1830,7 @@ struct nvme_error_slot {
__u8 resv2[24];
};
-static inline bool nvme_is_write(struct nvme_command *cmd)
+static inline bool nvme_is_write(const struct nvme_command *cmd)
{
/*
* What a mess...
diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h
index fa030d93b768..34c0e58dfa26 100644
--- a/include/linux/nvmem-consumer.h
+++ b/include/linux/nvmem-consumer.h
@@ -43,6 +43,8 @@ enum {
NVMEM_REMOVE,
NVMEM_CELL_ADD,
NVMEM_CELL_REMOVE,
+ NVMEM_LAYOUT_ADD,
+ NVMEM_LAYOUT_REMOVE,
};
#if IS_ENABLED(CONFIG_NVMEM)
@@ -79,6 +81,7 @@ int nvmem_device_cell_write(struct nvmem_device *nvmem,
struct nvmem_cell_info *info, void *buf);
const char *nvmem_dev_name(struct nvmem_device *nvmem);
+size_t nvmem_dev_size(struct nvmem_device *nvmem);
void nvmem_add_cell_lookups(struct nvmem_cell_lookup *entries,
size_t nentries);
@@ -125,6 +128,12 @@ static inline int nvmem_cell_write(struct nvmem_cell *cell,
return -EOPNOTSUPP;
}
+static inline int nvmem_cell_read_u8(struct device *dev,
+ const char *cell_id, u8 *val)
+{
+ return -EOPNOTSUPP;
+}
+
static inline int nvmem_cell_read_u16(struct device *dev,
const char *cell_id, u16 *val)
{
@@ -239,7 +248,6 @@ struct nvmem_cell *of_nvmem_cell_get(struct device_node *np,
const char *id);
struct nvmem_device *of_nvmem_device_get(struct device_node *np,
const char *name);
-struct device_node *of_nvmem_layout_get_container(struct nvmem_device *nvmem);
#else
static inline struct nvmem_cell *of_nvmem_cell_get(struct device_node *np,
const char *id)
@@ -252,12 +260,6 @@ static inline struct nvmem_device *of_nvmem_device_get(struct device_node *np,
{
return ERR_PTR(-EOPNOTSUPP);
}
-
-static inline struct device_node *
-of_nvmem_layout_get_container(struct nvmem_device *nvmem)
-{
- return ERR_PTR(-EOPNOTSUPP);
-}
#endif /* CONFIG_NVMEM && CONFIG_OF */
#endif /* ifndef _LINUX_NVMEM_CONSUMER_H */
diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index dae26295e6be..f0ba0e03218f 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -9,6 +9,7 @@
#ifndef _LINUX_NVMEM_PROVIDER_H
#define _LINUX_NVMEM_PROVIDER_H
+#include <linux/device.h>
#include <linux/device/driver.h>
#include <linux/err.h>
#include <linux/errno.h>
@@ -82,13 +83,15 @@ struct nvmem_cell_info {
* @owner: Pointer to exporter module. Used for refcounting.
* @cells: Optional array of pre-defined NVMEM cells.
* @ncells: Number of elements in cells.
+ * @add_legacy_fixed_of_cells: Read fixed NVMEM cells from old OF syntax.
+ * @fixup_dt_cell_info: Will be called before a cell is added. Can be
+ * used to modify the nvmem_cell_info.
* @keepout: Optional array of keepout ranges (sorted ascending by start).
* @nkeepout: Number of elements in the keepout array.
* @type: Type of the nvmem storage
* @read_only: Device is read-only.
* @root_only: Device is accessibly to root only.
* @of_node: If given, this will be used instead of the parent's of_node.
- * @no_of_node: Device should not use the parent's of_node even if it's !NULL.
* @reg_read: Callback to read data.
* @reg_write: Callback to write data.
* @size: Device size.
@@ -112,6 +115,9 @@ struct nvmem_config {
struct module *owner;
const struct nvmem_cell_info *cells;
int ncells;
+ bool add_legacy_fixed_of_cells;
+ void (*fixup_dt_cell_info)(struct nvmem_device *nvmem,
+ struct nvmem_cell_info *cell);
const struct nvmem_keepout *keepout;
unsigned int nkeepout;
enum nvmem_type type;
@@ -120,7 +126,6 @@ struct nvmem_config {
bool ignore_wp;
struct nvmem_layout *layout;
struct device_node *of_node;
- bool no_of_node;
nvmem_reg_read_t reg_read;
nvmem_reg_write_t reg_write;
int size;
@@ -154,15 +159,11 @@ struct nvmem_cell_table {
/**
* struct nvmem_layout - NVMEM layout definitions
*
- * @name: Layout name.
- * @of_match_table: Open firmware match table.
+ * @dev: Device-model layout device.
+ * @nvmem: The underlying NVMEM device
* @add_cells: Will be called if a nvmem device is found which
* has this layout. The function will add layout
* specific cells with nvmem_add_one_cell().
- * @fixup_cell_info: Will be called before a cell is added. Can be
- * used to modify the nvmem_cell_info.
- * @owner: Pointer to struct module.
- * @node: List node.
*
* A nvmem device can hold a well defined structure which can just be
* evaluated during runtime. For example a TLV list, or a list of "name=val"
@@ -170,17 +171,15 @@ struct nvmem_cell_table {
* cells.
*/
struct nvmem_layout {
- const char *name;
- const struct of_device_id *of_match_table;
- int (*add_cells)(struct device *dev, struct nvmem_device *nvmem,
- struct nvmem_layout *layout);
- void (*fixup_cell_info)(struct nvmem_device *nvmem,
- struct nvmem_layout *layout,
- struct nvmem_cell_info *cell);
-
- /* private */
- struct module *owner;
- struct list_head node;
+ struct device dev;
+ struct nvmem_device *nvmem;
+ int (*add_cells)(struct nvmem_layout *layout);
+};
+
+struct nvmem_layout_driver {
+ struct device_driver driver;
+ int (*probe)(struct nvmem_layout *layout);
+ void (*remove)(struct nvmem_layout *layout);
};
#if IS_ENABLED(CONFIG_NVMEM)
@@ -197,13 +196,14 @@ void nvmem_del_cell_table(struct nvmem_cell_table *table);
int nvmem_add_one_cell(struct nvmem_device *nvmem,
const struct nvmem_cell_info *info);
-int __nvmem_layout_register(struct nvmem_layout *layout, struct module *owner);
-#define nvmem_layout_register(layout) \
- __nvmem_layout_register(layout, THIS_MODULE)
+int nvmem_layout_register(struct nvmem_layout *layout);
void nvmem_layout_unregister(struct nvmem_layout *layout);
-const void *nvmem_layout_get_match_data(struct nvmem_device *nvmem,
- struct nvmem_layout *layout);
+int nvmem_layout_driver_register(struct nvmem_layout_driver *drv);
+void nvmem_layout_driver_unregister(struct nvmem_layout_driver *drv);
+#define module_nvmem_layout_driver(__nvmem_layout_driver) \
+ module_driver(__nvmem_layout_driver, nvmem_layout_driver_register, \
+ nvmem_layout_driver_unregister)
#else
@@ -235,17 +235,27 @@ static inline int nvmem_layout_register(struct nvmem_layout *layout)
static inline void nvmem_layout_unregister(struct nvmem_layout *layout) {}
-static inline const void *
-nvmem_layout_get_match_data(struct nvmem_device *nvmem,
- struct nvmem_layout *layout)
+#endif /* CONFIG_NVMEM */
+
+#if IS_ENABLED(CONFIG_NVMEM) && IS_ENABLED(CONFIG_OF)
+
+/**
+ * of_nvmem_layout_get_container() - Get OF node of layout container
+ *
+ * @nvmem: nvmem device
+ *
+ * Return: a node pointer with refcount incremented or NULL if no
+ * container exists. Use of_node_put() on it when done.
+ */
+struct device_node *of_nvmem_layout_get_container(struct nvmem_device *nvmem);
+
+#else /* CONFIG_NVMEM && CONFIG_OF */
+
+static inline struct device_node *of_nvmem_layout_get_container(struct nvmem_device *nvmem)
{
return NULL;
}
-#endif /* CONFIG_NVMEM */
-
-#define module_nvmem_layout_driver(__layout_driver) \
- module_driver(__layout_driver, nvmem_layout_register, \
- nvmem_layout_unregister)
+#endif /* CONFIG_NVMEM && CONFIG_OF */
#endif /* ifndef _LINUX_NVMEM_PROVIDER_H */
diff --git a/include/linux/objpool.h b/include/linux/objpool.h
new file mode 100644
index 000000000000..15aff4a17f0c
--- /dev/null
+++ b/include/linux/objpool.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_OBJPOOL_H
+#define _LINUX_OBJPOOL_H
+
+#include <linux/types.h>
+#include <linux/refcount.h>
+
+/*
+ * objpool: ring-array based lockless MPMC queue
+ *
+ * Copyright: wuqiang.matt@bytedance.com,mhiramat@kernel.org
+ *
+ * objpool is a scalable implementation of high performance queue for
+ * object allocation and reclamation, such as kretprobe instances.
+ *
+ * With leveraging percpu ring-array to mitigate hot spots of memory
+ * contention, it delivers near-linear scalability for high parallel
+ * scenarios. The objpool is best suited for the following cases:
+ * 1) Memory allocation or reclamation are prohibited or too expensive
+ * 2) Consumers are of different priorities, such as irqs and threads
+ *
+ * Limitations:
+ * 1) Maximum objects (capacity) is fixed after objpool creation
+ * 2) All pre-allocated objects are managed in percpu ring array,
+ * which consumes more memory than linked lists
+ */
+
+/**
+ * struct objpool_slot - percpu ring array of objpool
+ * @head: head sequence of the local ring array (to retrieve at)
+ * @tail: tail sequence of the local ring array (to append at)
+ * @last: the last sequence number marked as ready for retrieve
+ * @mask: bits mask for modulo capacity to compute array indexes
+ * @entries: object entries on this slot
+ *
+ * Represents a cpu-local array-based ring buffer, its size is specialized
+ * during initialization of object pool. The percpu objpool node is to be
+ * allocated from local memory for NUMA system, and to be kept compact in
+ * continuous memory: CPU assigned number of objects are stored just after
+ * the body of objpool_node.
+ *
+ * Real size of the ring array is far too smaller than the value range of
+ * head and tail, typed as uint32_t: [0, 2^32), so only lower bits (mask)
+ * of head and tail are used as the actual position in the ring array. In
+ * general the ring array is acting like a small sliding window, which is
+ * always moving forward in the loop of [0, 2^32).
+ */
+struct objpool_slot {
+ uint32_t head;
+ uint32_t tail;
+ uint32_t last;
+ uint32_t mask;
+ void *entries[];
+} __packed;
+
+struct objpool_head;
+
+/*
+ * caller-specified callback for object initial setup, it's only called
+ * once for each object (just after the memory allocation of the object)
+ */
+typedef int (*objpool_init_obj_cb)(void *obj, void *context);
+
+/* caller-specified cleanup callback for objpool destruction */
+typedef int (*objpool_fini_cb)(struct objpool_head *head, void *context);
+
+/**
+ * struct objpool_head - object pooling metadata
+ * @obj_size: object size, aligned to sizeof(void *)
+ * @nr_objs: total objs (to be pre-allocated with objpool)
+ * @nr_cpus: local copy of nr_cpu_ids
+ * @capacity: max objs can be managed by one objpool_slot
+ * @gfp: gfp flags for kmalloc & vmalloc
+ * @ref: refcount of objpool
+ * @flags: flags for objpool management
+ * @cpu_slots: pointer to the array of objpool_slot
+ * @release: resource cleanup callback
+ * @context: caller-provided context
+ */
+struct objpool_head {
+ int obj_size;
+ int nr_objs;
+ int nr_cpus;
+ int capacity;
+ gfp_t gfp;
+ refcount_t ref;
+ unsigned long flags;
+ struct objpool_slot **cpu_slots;
+ objpool_fini_cb release;
+ void *context;
+};
+
+#define OBJPOOL_NR_OBJECT_MAX (1UL << 24) /* maximum numbers of total objects */
+#define OBJPOOL_OBJECT_SIZE_MAX (1UL << 16) /* maximum size of an object */
+
+/**
+ * objpool_init() - initialize objpool and pre-allocated objects
+ * @pool: the object pool to be initialized, declared by caller
+ * @nr_objs: total objects to be pre-allocated by this object pool
+ * @object_size: size of an object (should be > 0)
+ * @gfp: flags for memory allocation (via kmalloc or vmalloc)
+ * @context: user context for object initialization callback
+ * @objinit: object initialization callback for extra setup
+ * @release: cleanup callback for extra cleanup task
+ *
+ * return value: 0 for success, otherwise error code
+ *
+ * All pre-allocated objects are to be zeroed after memory allocation.
+ * Caller could do extra initialization in objinit callback. objinit()
+ * will be called just after slot allocation and called only once for
+ * each object. After that the objpool won't touch any content of the
+ * objects. It's caller's duty to perform reinitialization after each
+ * pop (object allocation) or do clearance before each push (object
+ * reclamation).
+ */
+int objpool_init(struct objpool_head *pool, int nr_objs, int object_size,
+ gfp_t gfp, void *context, objpool_init_obj_cb objinit,
+ objpool_fini_cb release);
+
+/**
+ * objpool_pop() - allocate an object from objpool
+ * @pool: object pool
+ *
+ * return value: object ptr or NULL if failed
+ */
+void *objpool_pop(struct objpool_head *pool);
+
+/**
+ * objpool_push() - reclaim the object and return back to objpool
+ * @obj: object ptr to be pushed to objpool
+ * @pool: object pool
+ *
+ * return: 0 or error code (it fails only when user tries to push
+ * the same object multiple times or wrong "objects" into objpool)
+ */
+int objpool_push(void *obj, struct objpool_head *pool);
+
+/**
+ * objpool_drop() - discard the object and deref objpool
+ * @obj: object ptr to be discarded
+ * @pool: object pool
+ *
+ * return: 0 if objpool was released; -EAGAIN if there are still
+ * outstanding objects
+ *
+ * objpool_drop is normally for the release of outstanding objects
+ * after objpool cleanup (objpool_fini). Thinking of this example:
+ * kretprobe is unregistered and objpool_fini() is called to release
+ * all remained objects, but there are still objects being used by
+ * unfinished kretprobes (like blockable function: sys_accept). So
+ * only when the last outstanding object is dropped could the whole
+ * objpool be released along with the call of objpool_drop()
+ */
+int objpool_drop(void *obj, struct objpool_head *pool);
+
+/**
+ * objpool_free() - release objpool forcely (all objects to be freed)
+ * @pool: object pool to be released
+ */
+void objpool_free(struct objpool_head *pool);
+
+/**
+ * objpool_fini() - deref object pool (also releasing unused objects)
+ * @pool: object pool to be dereferenced
+ *
+ * objpool_fini() will try to release all remained free objects and
+ * then drop an extra reference of the objpool. If all objects are
+ * already returned to objpool (so called synchronous use cases),
+ * the objpool itself will be freed together. But if there are still
+ * outstanding objects (so called asynchronous use cases, such like
+ * blockable kretprobe), the objpool won't be released until all
+ * the outstanding objects are dropped, but the caller must assure
+ * there are no concurrent objpool_push() on the fly. Normally RCU
+ * is being required to make sure all ongoing objpool_push() must
+ * be finished before calling objpool_fini(), so does test_objpool,
+ * kretprobe or rethook
+ */
+void objpool_fini(struct objpool_head *pool);
+
+#endif /* _LINUX_OBJPOOL_H */
diff --git a/include/linux/objtool.h b/include/linux/objtool.h
index 03f82c2c2ebf..b3b8d3dab52d 100644
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -48,13 +48,13 @@
#define ANNOTATE_NOENDBR \
"986: \n\t" \
".pushsection .discard.noendbr\n\t" \
- ".long 986b - .\n\t" \
+ ".long 986b\n\t" \
".popsection\n\t"
#define ASM_REACHABLE \
"998:\n\t" \
".pushsection .discard.reachable\n\t" \
- ".long 998b - .\n\t" \
+ ".long 998b\n\t" \
".popsection\n\t"
#else /* __ASSEMBLY__ */
@@ -66,7 +66,7 @@
#define ANNOTATE_INTRA_FUNCTION_CALL \
999: \
.pushsection .discard.intra_function_calls; \
- .long 999b - .; \
+ .long 999b; \
.popsection;
/*
@@ -118,7 +118,7 @@
.macro ANNOTATE_NOENDBR
.Lhere_\@:
.pushsection .discard.noendbr
- .long .Lhere_\@ - .
+ .long .Lhere_\@
.popsection
.endm
@@ -130,7 +130,8 @@
* it will be ignored.
*/
.macro VALIDATE_UNRET_BEGIN
-#if defined(CONFIG_NOINSTR_VALIDATION) && defined(CONFIG_CPU_UNRET_ENTRY)
+#if defined(CONFIG_NOINSTR_VALIDATION) && \
+ (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO))
.Lhere_\@:
.pushsection .discard.validate_unret
.long .Lhere_\@ - .
@@ -141,7 +142,7 @@
.macro REACHABLE
.Lhere_\@:
.pushsection .discard.reachable
- .long .Lhere_\@ - .
+ .long .Lhere_\@
.popsection
.endm
diff --git a/include/linux/of.h b/include/linux/of.h
index 6ecde0515677..a0bedd038a05 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -13,6 +13,7 @@
*/
#include <linux/types.h>
#include <linux/bitops.h>
+#include <linux/cleanup.h>
#include <linux/errno.h>
#include <linux/kobject.h>
#include <linux/mod_devicetable.h>
@@ -96,10 +97,12 @@ struct of_reconfig_data {
struct property *old_prop;
};
+extern const struct kobj_type of_node_ktype;
+extern const struct fwnode_operations of_fwnode_ops;
+
/**
* of_node_init - initialize a devicetree node
* @node: Pointer to device node that has been created by kzalloc()
- * @phandle_name: Name of property holding a phandle value
*
* On return the device_node refcount is set to one. Use of_node_put()
* on @node when done to free the memory allocated for it. If the node
@@ -107,9 +110,6 @@ struct of_reconfig_data {
* whether to free the memory will be done by node->release(), which is
* of_node_release().
*/
-/* initialize a node */
-extern const struct kobj_type of_node_ktype;
-extern const struct fwnode_operations of_fwnode_ops;
static inline void of_node_init(struct device_node *node)
{
#if defined(CONFIG_OF_KOBJ)
@@ -135,6 +135,7 @@ static inline struct device_node *of_node_get(struct device_node *node)
}
static inline void of_node_put(struct device_node *node) { }
#endif /* !CONFIG_OF_DYNAMIC */
+DEFINE_FREE(device_node, struct device_node *, if (_T) of_node_put(_T))
/* Pointer for first entry in chain of all nodes. */
extern struct device_node *of_root;
@@ -181,11 +182,6 @@ static inline bool is_of_node(const struct fwnode_handle *fwnode)
&__of_fwnode_handle_node->fwnode : NULL; \
})
-static inline bool of_have_populated_dt(void)
-{
- return of_root != NULL;
-}
-
static inline bool of_node_is_root(const struct device_node *node)
{
return node && (node->parent == NULL);
@@ -295,6 +291,8 @@ extern struct device_node *of_get_next_child(const struct device_node *node,
struct device_node *prev);
extern struct device_node *of_get_next_available_child(
const struct device_node *node, struct device_node *prev);
+extern struct device_node *of_get_next_reserved_child(
+ const struct device_node *node, struct device_node *prev);
extern struct device_node *of_get_compatible_child(const struct device_node *parent,
const char *compatible);
@@ -363,9 +361,6 @@ extern struct device_node *of_get_cpu_state_node(struct device_node *cpu_node,
int index);
extern u64 of_get_cpu_hwid(struct device_node *cpun, unsigned int thread);
-#define for_each_property_of_node(dn, pp) \
- for (pp = dn->properties; pp != NULL; pp = pp->next)
-
extern int of_n_addr_cells(struct device_node *np);
extern int of_n_size_cells(struct device_node *np);
extern const struct of_device_id *of_match_node(
@@ -403,7 +398,20 @@ extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align));
extern int of_alias_get_id(struct device_node *np, const char *stem);
extern int of_alias_get_highest_id(const char *stem);
-extern int of_machine_is_compatible(const char *compat);
+bool of_machine_compatible_match(const char *const *compats);
+
+/**
+ * of_machine_is_compatible - Test root of device tree for a given compatible value
+ * @compat: compatible string to look for in root node's compatible property.
+ *
+ * Return: true if the root node has the given value in its compatible property.
+ */
+static inline bool of_machine_is_compatible(const char *compat)
+{
+ const char *compats[] = { compat, NULL };
+
+ return of_machine_compatible_match(compats);
+}
extern int of_add_property(struct device_node *np, struct property *prop);
extern int of_remove_property(struct device_node *np, struct property *prop);
@@ -542,6 +550,12 @@ static inline struct device_node *of_get_next_available_child(
return NULL;
}
+static inline struct device_node *of_get_next_reserved_child(
+ const struct device_node *node, struct device_node *prev)
+{
+ return NULL;
+}
+
static inline struct device_node *of_find_node_with_property(
struct device_node *from, const char *prop_name)
{
@@ -550,11 +564,6 @@ static inline struct device_node *of_find_node_with_property(
#define of_fwnode_handle(node) NULL
-static inline bool of_have_populated_dt(void)
-{
- return false;
-}
-
static inline struct device_node *of_get_compatible_child(const struct device_node *parent,
const char *compatible)
{
@@ -809,6 +818,11 @@ static inline int of_remove_property(struct device_node *np, struct property *pr
return 0;
}
+static inline bool of_machine_compatible_match(const char *const *compats)
+{
+ return false;
+}
+
static inline bool of_console_check(const struct device_node *dn, const char *name, int index)
{
return false;
@@ -893,6 +907,9 @@ static inline int of_prop_val_eq(struct property *p1, struct property *p2)
!memcmp(p1->value, p2->value, (size_t)p1->length);
}
+#define for_each_property_of_node(dn, pp) \
+ for (pp = dn->properties; pp != NULL; pp = pp->next)
+
#if defined(CONFIG_OF) && defined(CONFIG_NUMA)
extern int of_node_to_nid(struct device_node *np);
#else
@@ -1067,6 +1084,22 @@ static inline int of_parse_phandle_with_optional_args(const struct device_node *
}
/**
+ * of_phandle_args_equal() - Compare two of_phandle_args
+ * @a1: First of_phandle_args to compare
+ * @a2: Second of_phandle_args to compare
+ *
+ * Return: True if a1 and a2 are the same (same node pointer, same phandle
+ * args), false otherwise.
+ */
+static inline bool of_phandle_args_equal(const struct of_phandle_args *a1,
+ const struct of_phandle_args *a2)
+{
+ return a1->np == a2->np &&
+ a1->args_count == a2->args_count &&
+ !memcmp(a1->args, a2->args, sizeof(a1->args[0]) * a1->args_count);
+}
+
+/**
* of_property_count_u8_elems - Count the number of u8 elements in a property
*
* @np: device node from which the property value is to be read.
@@ -1429,9 +1462,25 @@ static inline int of_property_read_s32(const struct device_node *np,
#define for_each_child_of_node(parent, child) \
for (child = of_get_next_child(parent, NULL); child != NULL; \
child = of_get_next_child(parent, child))
+
+#define for_each_child_of_node_scoped(parent, child) \
+ for (struct device_node *child __free(device_node) = \
+ of_get_next_child(parent, NULL); \
+ child != NULL; \
+ child = of_get_next_child(parent, child))
+
#define for_each_available_child_of_node(parent, child) \
for (child = of_get_next_available_child(parent, NULL); child != NULL; \
child = of_get_next_available_child(parent, child))
+#define for_each_reserved_child_of_node(parent, child) \
+ for (child = of_get_next_reserved_child(parent, NULL); child != NULL; \
+ child = of_get_next_reserved_child(parent, child))
+
+#define for_each_available_child_of_node_scoped(parent, child) \
+ for (struct device_node *child __free(device_node) = \
+ of_get_next_available_child(parent, NULL); \
+ child != NULL; \
+ child = of_get_next_available_child(parent, child))
#define for_each_of_cpu_node(cpu) \
for (cpu = of_get_next_cpu_node(NULL); cpu != NULL; \
@@ -1580,6 +1629,29 @@ static inline int of_changeset_update_property(struct of_changeset *ocs,
{
return of_changeset_action(ocs, OF_RECONFIG_UPDATE_PROPERTY, np, prop);
}
+
+struct device_node *of_changeset_create_node(struct of_changeset *ocs,
+ struct device_node *parent,
+ const char *full_name);
+int of_changeset_add_prop_string(struct of_changeset *ocs,
+ struct device_node *np,
+ const char *prop_name, const char *str);
+int of_changeset_add_prop_string_array(struct of_changeset *ocs,
+ struct device_node *np,
+ const char *prop_name,
+ const char **str_array, size_t sz);
+int of_changeset_add_prop_u32_array(struct of_changeset *ocs,
+ struct device_node *np,
+ const char *prop_name,
+ const u32 *array, size_t sz);
+static inline int of_changeset_add_prop_u32(struct of_changeset *ocs,
+ struct device_node *np,
+ const char *prop_name,
+ const u32 val)
+{
+ return of_changeset_add_prop_u32_array(ocs, np, prop_name, &val, 1);
+}
+
#else /* CONFIG_OF_DYNAMIC */
static inline int of_reconfig_notifier_register(struct notifier_block *nb)
{
@@ -1612,6 +1684,21 @@ static inline bool of_device_is_system_power_controller(const struct device_node
return of_property_read_bool(np, "system-power-controller");
}
+/**
+ * of_have_populated_dt() - Has DT been populated by bootloader
+ *
+ * Return: True if a DTB has been populated by the bootloader and it isn't the
+ * empty builtin one. False otherwise.
+ */
+static inline bool of_have_populated_dt(void)
+{
+#ifdef CONFIG_OF
+ return of_property_present(of_root, "compatible");
+#else
+ return false;
+#endif
+}
+
/*
* Overlay support
*/
@@ -1645,7 +1732,7 @@ struct of_overlay_notify_data {
#ifdef CONFIG_OF_OVERLAY
int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size,
- int *ovcs_id);
+ int *ovcs_id, struct device_node *target_base);
int of_overlay_remove(int *ovcs_id);
int of_overlay_remove_all(void);
@@ -1654,8 +1741,8 @@ int of_overlay_notifier_unregister(struct notifier_block *nb);
#else
-static inline int of_overlay_fdt_apply(void *overlay_fdt, u32 overlay_fdt_size,
- int *ovcs_id)
+static inline int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size,
+ int *ovcs_id, struct device_node *target_base)
{
return -ENOTSUPP;
}
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 2c7a3d4bc775..9042bca5bb84 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -2,10 +2,7 @@
#ifndef _LINUX_OF_DEVICE_H
#define _LINUX_OF_DEVICE_H
-#include <linux/platform_device.h>
-#include <linux/of_platform.h> /* temporary until merge */
-
-#include <linux/of.h>
+#include <linux/device/driver.h>
struct device;
struct of_device_id;
@@ -40,6 +37,9 @@ static inline int of_dma_configure(struct device *dev,
{
return of_dma_configure_id(dev, np, force_dma, NULL);
}
+
+void of_device_make_bus_id(struct device *dev);
+
#else /* CONFIG_OF */
static inline int of_driver_match_device(struct device *dev,
@@ -82,6 +82,9 @@ static inline int of_dma_configure(struct device *dev,
{
return 0;
}
+
+static inline void of_device_make_bus_id(struct device *dev) {}
+
#endif /* CONFIG_OF */
#endif /* _LINUX_OF_DEVICE_H */
diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h
index 4d7756087b6b..a4bea62bfa29 100644
--- a/include/linux/of_graph.h
+++ b/include/linux/of_graph.h
@@ -41,7 +41,7 @@ struct of_endpoint {
bool of_graph_is_present(const struct device_node *node);
int of_graph_parse_endpoint(const struct device_node *node,
struct of_endpoint *endpoint);
-int of_graph_get_endpoint_count(const struct device_node *np);
+unsigned int of_graph_get_endpoint_count(const struct device_node *np);
struct device_node *of_graph_get_port_by_id(struct device_node *node, u32 id);
struct device_node *of_graph_get_next_endpoint(const struct device_node *parent,
struct device_node *previous);
@@ -68,7 +68,7 @@ static inline int of_graph_parse_endpoint(const struct device_node *node,
return -ENOSYS;
}
-static inline int of_graph_get_endpoint_count(const struct device_node *np)
+static inline unsigned int of_graph_get_endpoint_count(const struct device_node *np)
{
return 0;
}
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index 9a5e6b410dd2..e61cbbe12dac 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -8,20 +8,19 @@ struct iommu_ops;
#ifdef CONFIG_OF_IOMMU
-extern const struct iommu_ops *of_iommu_configure(struct device *dev,
- struct device_node *master_np,
- const u32 *id);
+extern int of_iommu_configure(struct device *dev, struct device_node *master_np,
+ const u32 *id);
extern void of_iommu_get_resv_regions(struct device *dev,
struct list_head *list);
#else
-static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
- struct device_node *master_np,
- const u32 *id)
+static inline int of_iommu_configure(struct device *dev,
+ struct device_node *master_np,
+ const u32 *id)
{
- return NULL;
+ return -ENODEV;
}
static inline void of_iommu_get_resv_regions(struct device *dev,
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index d8045bcfc35e..a2ff1ad48f7f 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -7,11 +7,11 @@
*/
#include <linux/mod_devicetable.h>
-#include <linux/of_device.h>
-#include <linux/platform_device.h>
struct device;
+struct device_node;
struct of_device_id;
+struct platform_device;
/**
* struct of_dev_auxdata - lookup table entry for device names & platform_data
@@ -127,10 +127,4 @@ static inline int devm_of_platform_populate(struct device *dev)
static inline void devm_of_platform_depopulate(struct device *dev) { }
#endif
-#if defined(CONFIG_OF_DYNAMIC) && defined(CONFIG_OF_ADDRESS)
-extern void of_platform_register_reconfig_notifier(void);
-#else
-static inline void of_platform_register_reconfig_notifier(void) { }
-#endif
-
#endif /* _LINUX_OF_PLATFORM_H */
diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h
index 0f4a8903922a..51421fdbb0ba 100644
--- a/include/linux/oid_registry.h
+++ b/include/linux/oid_registry.h
@@ -30,9 +30,6 @@ enum OID {
/* PKCS#1 {iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-1(1)} */
OID_rsaEncryption, /* 1.2.840.113549.1.1.1 */
- OID_md2WithRSAEncryption, /* 1.2.840.113549.1.1.2 */
- OID_md3WithRSAEncryption, /* 1.2.840.113549.1.1.3 */
- OID_md4WithRSAEncryption, /* 1.2.840.113549.1.1.4 */
OID_sha1WithRSAEncryption, /* 1.2.840.113549.1.1.5 */
OID_sha256WithRSAEncryption, /* 1.2.840.113549.1.1.11 */
OID_sha384WithRSAEncryption, /* 1.2.840.113549.1.1.12 */
@@ -49,11 +46,6 @@ enum OID {
OID_smimeCapabilites, /* 1.2.840.113549.1.9.15 */
OID_smimeAuthenticatedAttrs, /* 1.2.840.113549.1.9.16.2.11 */
- /* {iso(1) member-body(2) us(840) rsadsi(113549) digestAlgorithm(2)} */
- OID_md2, /* 1.2.840.113549.2.2 */
- OID_md4, /* 1.2.840.113549.2.4 */
- OID_md5, /* 1.2.840.113549.2.5 */
-
OID_mskrb5, /* 1.2.840.48018.1.2.2 */
OID_krb5, /* 1.2.840.113554.1.2.2 */
OID_krb5u2u, /* 1.2.840.113554.1.2.2.3 */
@@ -67,6 +59,7 @@ enum OID {
OID_msOutlookExpress, /* 1.3.6.1.4.1.311.16.4 */
OID_ntlmssp, /* 1.3.6.1.4.1.311.2.2.10 */
+ OID_negoex, /* 1.3.6.1.4.1.311.2.2.30 */
OID_spnego, /* 1.3.6.1.5.5.2 */
@@ -140,6 +133,17 @@ enum OID {
OID_TPMImportableKey, /* 2.23.133.10.1.4 */
OID_TPMSealedData, /* 2.23.133.10.1.5 */
+ /* CSOR FIPS-202 SHA-3 */
+ OID_sha3_256, /* 2.16.840.1.101.3.4.2.8 */
+ OID_sha3_384, /* 2.16.840.1.101.3.4.2.9 */
+ OID_sha3_512, /* 2.16.840.1.101.3.4.2.10 */
+ OID_id_ecdsa_with_sha3_256, /* 2.16.840.1.101.3.4.3.10 */
+ OID_id_ecdsa_with_sha3_384, /* 2.16.840.1.101.3.4.3.11 */
+ OID_id_ecdsa_with_sha3_512, /* 2.16.840.1.101.3.4.3.12 */
+ OID_id_rsassa_pkcs1_v1_5_with_sha3_256, /* 2.16.840.1.101.3.4.3.14 */
+ OID_id_rsassa_pkcs1_v1_5_with_sha3_384, /* 2.16.840.1.101.3.4.3.15 */
+ OID_id_rsassa_pkcs1_v1_5_with_sha3_512, /* 2.16.840.1.101.3.4.3.16 */
+
OID__NR
};
diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h
index 5581dbd3bd34..ea8fb31379e3 100644
--- a/include/linux/osq_lock.h
+++ b/include/linux/osq_lock.h
@@ -6,11 +6,6 @@
* An MCS like lock especially tailored for optimistic spinning for sleeping
* lock implementations (mutex, rwsem, etc).
*/
-struct optimistic_spin_node {
- struct optimistic_spin_node *next, *prev;
- int locked; /* 1 if lock acquired */
- int cpu; /* encoded CPU # + 1 value */
-};
struct optimistic_spin_queue {
/*
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index f9b60313eaea..0c7e3dcfe867 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -31,8 +31,10 @@
* credit to Christian Biere.
*/
#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
-#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
-#define type_min(T) ((T)((T)-type_max(T)-(T)1))
+#define __type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
+#define type_max(t) __type_max(typeof(t))
+#define __type_min(T) ((T)((T)-type_max(T)-(T)1))
+#define type_min(t) __type_min(typeof(t))
/*
* Avoids triggering -Wtype-limits compilation warning,
@@ -57,46 +59,123 @@ static inline bool __must_check __must_check_overflow(bool overflow)
* @b: second addend
* @d: pointer to store sum
*
- * Returns 0 on success.
+ * Returns true on wrap-around, false otherwise.
*
- * *@d holds the results of the attempted addition, but is not considered
- * "safe for use" on a non-zero return value, which indicates that the
- * sum has overflowed or been truncated.
+ * *@d holds the results of the attempted addition, regardless of whether
+ * wrap-around occurred.
*/
#define check_add_overflow(a, b, d) \
__must_check_overflow(__builtin_add_overflow(a, b, d))
/**
+ * wrapping_add() - Intentionally perform a wrapping addition
+ * @type: type for result of calculation
+ * @a: first addend
+ * @b: second addend
+ *
+ * Return the potentially wrapped-around addition without
+ * tripping any wrap-around sanitizers that may be enabled.
+ */
+#define wrapping_add(type, a, b) \
+ ({ \
+ type __val; \
+ __builtin_add_overflow(a, b, &__val); \
+ __val; \
+ })
+
+/**
+ * wrapping_assign_add() - Intentionally perform a wrapping increment assignment
+ * @var: variable to be incremented
+ * @offset: amount to add
+ *
+ * Increments @var by @offset with wrap-around. Returns the resulting
+ * value of @var. Will not trip any wrap-around sanitizers.
+ *
+ * Returns the new value of @var.
+ */
+#define wrapping_assign_add(var, offset) \
+ ({ \
+ typeof(var) *__ptr = &(var); \
+ *__ptr = wrapping_add(typeof(var), *__ptr, offset); \
+ })
+
+/**
* check_sub_overflow() - Calculate subtraction with overflow checking
* @a: minuend; value to subtract from
* @b: subtrahend; value to subtract from @a
* @d: pointer to store difference
*
- * Returns 0 on success.
+ * Returns true on wrap-around, false otherwise.
*
- * *@d holds the results of the attempted subtraction, but is not considered
- * "safe for use" on a non-zero return value, which indicates that the
- * difference has underflowed or been truncated.
+ * *@d holds the results of the attempted subtraction, regardless of whether
+ * wrap-around occurred.
*/
#define check_sub_overflow(a, b, d) \
__must_check_overflow(__builtin_sub_overflow(a, b, d))
/**
+ * wrapping_sub() - Intentionally perform a wrapping subtraction
+ * @type: type for result of calculation
+ * @a: minuend; value to subtract from
+ * @b: subtrahend; value to subtract from @a
+ *
+ * Return the potentially wrapped-around subtraction without
+ * tripping any wrap-around sanitizers that may be enabled.
+ */
+#define wrapping_sub(type, a, b) \
+ ({ \
+ type __val; \
+ __builtin_sub_overflow(a, b, &__val); \
+ __val; \
+ })
+
+/**
+ * wrapping_assign_sub() - Intentionally perform a wrapping decrement assign
+ * @var: variable to be decremented
+ * @offset: amount to subtract
+ *
+ * Decrements @var by @offset with wrap-around. Returns the resulting
+ * value of @var. Will not trip any wrap-around sanitizers.
+ *
+ * Returns the new value of @var.
+ */
+#define wrapping_assign_sub(var, offset) \
+ ({ \
+ typeof(var) *__ptr = &(var); \
+ *__ptr = wrapping_sub(typeof(var), *__ptr, offset); \
+ })
+
+/**
* check_mul_overflow() - Calculate multiplication with overflow checking
* @a: first factor
* @b: second factor
* @d: pointer to store product
*
- * Returns 0 on success.
+ * Returns true on wrap-around, false otherwise.
*
- * *@d holds the results of the attempted multiplication, but is not
- * considered "safe for use" on a non-zero return value, which indicates
- * that the product has overflowed or been truncated.
+ * *@d holds the results of the attempted multiplication, regardless of whether
+ * wrap-around occurred.
*/
#define check_mul_overflow(a, b, d) \
__must_check_overflow(__builtin_mul_overflow(a, b, d))
/**
+ * wrapping_mul() - Intentionally perform a wrapping multiplication
+ * @type: type for result of calculation
+ * @a: first factor
+ * @b: second factor
+ *
+ * Return the potentially wrapped-around multiplication without
+ * tripping any wrap-around sanitizers that may be enabled.
+ */
+#define wrapping_mul(type, a, b) \
+ ({ \
+ type __val; \
+ __builtin_mul_overflow(a, b, &__val); \
+ __val; \
+ })
+
+/**
* check_shl_overflow() - Calculate a left-shifted value and check overflow
* @a: Value to be shifted
* @s: How many bits left to shift
@@ -120,7 +199,7 @@ static inline bool __must_check __must_check_overflow(bool overflow)
typeof(a) _a = a; \
typeof(s) _s = s; \
typeof(d) _d = d; \
- u64 _a_full = _a; \
+ unsigned long long _a_full = _a; \
unsigned int _to_shift = \
is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \
*_d = (_a_full << _to_shift); \
@@ -130,10 +209,10 @@ static inline bool __must_check __must_check_overflow(bool overflow)
#define __overflows_type_constexpr(x, T) ( \
is_unsigned_type(typeof(x)) ? \
- (x) > type_max(typeof(T)) : \
+ (x) > type_max(T) : \
is_unsigned_type(typeof(T)) ? \
- (x) < 0 || (x) > type_max(typeof(T)) : \
- (x) < type_min(typeof(T)) || (x) > type_max(typeof(T)))
+ (x) < 0 || (x) > type_max(T) : \
+ (x) < type_min(T) || (x) > type_max(T))
#define __overflows_type(x, T) ({ \
typeof(T) v = 0; \
@@ -309,4 +388,56 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
#define struct_size_t(type, member, count) \
struct_size((type *)NULL, member, count)
+/**
+ * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
+ * Enables caller macro to pass (different) initializer.
+ *
+ * @type: structure type name, including "struct" keyword.
+ * @name: Name for a variable to define.
+ * @member: Name of the array member.
+ * @count: Number of elements in the array; must be compile-time const.
+ * @initializer: initializer expression (could be empty for no init).
+ */
+#define _DEFINE_FLEX(type, name, member, count, initializer...) \
+ _Static_assert(__builtin_constant_p(count), \
+ "onstack flex array members require compile-time const count"); \
+ union { \
+ u8 bytes[struct_size_t(type, member, count)]; \
+ type obj; \
+ } name##_u initializer; \
+ type *name = (type *)&name##_u
+
+/**
+ * DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing
+ * flexible array member, when it does not have a __counted_by annotation.
+ *
+ * @type: structure type name, including "struct" keyword.
+ * @name: Name for a variable to define.
+ * @member: Name of the array member.
+ * @count: Number of elements in the array; must be compile-time const.
+ *
+ * Define a zeroed, on-stack, instance of @type structure with a trailing
+ * flexible array member.
+ * Use __struct_size(@name) to get compile-time size of it afterwards.
+ */
+#define DEFINE_RAW_FLEX(type, name, member, count) \
+ _DEFINE_FLEX(type, name, member, count, = {})
+
+/**
+ * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing
+ * flexible array member.
+ *
+ * @TYPE: structure type name, including "struct" keyword.
+ * @NAME: Name for a variable to define.
+ * @MEMBER: Name of the array member.
+ * @COUNTER: Name of the __counted_by member.
+ * @COUNT: Number of elements in the array; must be compile-time const.
+ *
+ * Define a zeroed, on-stack, instance of @TYPE structure with a trailing
+ * flexible array member.
+ * Use __struct_size(@NAME) to get compile-time size of it afterwards.
+ */
+#define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \
+ _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, })
+
#endif /* __LINUX_OVERFLOW_H */
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 495b16b6b4d7..0146daf34430 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -137,6 +137,7 @@ struct padata_shell {
* appropriate for one worker thread to do at once.
* @max_threads: Max threads to use for the job, actual number may be less
* depending on task size and minimum chunk size.
+ * @numa_aware: Distribute jobs to different nodes with CPU in a round robin fashion.
*/
struct padata_mt_job {
void (*thread_fn)(unsigned long start, unsigned long end, void *arg);
@@ -146,6 +147,7 @@ struct padata_mt_job {
unsigned long align;
unsigned long min_chunk;
int max_threads;
+ bool numa_aware;
};
/**
@@ -178,10 +180,6 @@ struct padata_instance {
#ifdef CONFIG_PADATA
extern void __init padata_init(void);
-#else
-static inline void __init padata_init(void) {}
-#endif
-
extern struct padata_instance *padata_alloc(const char *name);
extern void padata_free(struct padata_instance *pinst);
extern struct padata_shell *padata_alloc_shell(struct padata_instance *pinst);
@@ -192,4 +190,12 @@ extern void padata_do_serial(struct padata_priv *padata);
extern void __init padata_do_multithreaded(struct padata_mt_job *job);
extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
cpumask_var_t cpumask);
+#else
+static inline void __init padata_init(void) {}
+static inline void __init padata_do_multithreaded(struct padata_mt_job *job)
+{
+ job->thread_fn(job->start, job->start + job->size, job->fn_arg);
+}
+#endif
+
#endif
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 92a2063a0a23..4bf1c25fd1dc 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -99,13 +99,15 @@
*/
enum pageflags {
PG_locked, /* Page is locked. Don't touch. */
+ PG_writeback, /* Page is under writeback */
PG_referenced,
PG_uptodate,
PG_dirty,
PG_lru,
+ PG_head, /* Must be in bit 6 */
+ PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
PG_active,
PG_workingset,
- PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
PG_error,
PG_slab,
PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/
@@ -113,8 +115,6 @@ enum pageflags {
PG_reserved,
PG_private, /* If pagecache, has fs-private data */
PG_private_2, /* If pagecache, has fs aux data */
- PG_writeback, /* Page is under writeback */
- PG_head, /* A head page */
PG_mappedtodisk, /* Has blocks allocated on-disk */
PG_reclaim, /* To be reclaimed asap */
PG_swapbacked, /* Page is backed by RAM/swap */
@@ -171,15 +171,6 @@ enum pageflags {
/* Remapped by swiotlb-xen. */
PG_xen_remapped = PG_owner_priv_1,
-#ifdef CONFIG_MEMORY_FAILURE
- /*
- * Compound pages. Stored in first tail page's flags.
- * Indicates that at least one subpage is hwpoisoned in the
- * THP.
- */
- PG_has_hwpoisoned = PG_error,
-#endif
-
/* non-lru isolated movable page */
PG_isolated = PG_reclaim,
@@ -190,6 +181,16 @@ enum pageflags {
/* For self-hosted memmap pages */
PG_vmemmap_self_hosted = PG_owner_priv_1,
#endif
+
+ /*
+ * Flags only valid for compound pages. Stored in first tail page's
+ * flags word. Cannot use the first 8 flags or any flag marked as
+ * PF_ANY.
+ */
+
+ /* At least one page in this folio has the hwpoison flag set */
+ PG_has_hwpoisoned = PG_error,
+ PG_large_rmappable = PG_workingset, /* anon or file-backed */
};
#define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1)
@@ -235,7 +236,7 @@ static inline const struct page *page_fixed_fake_head(const struct page *page)
}
#endif
-static __always_inline int page_is_fake_head(struct page *page)
+static __always_inline int page_is_fake_head(const struct page *page)
{
return page_fixed_fake_head(page) != page;
}
@@ -279,12 +280,12 @@ static inline unsigned long _compound_head(const struct page *page)
*/
#define folio_page(folio, n) nth_page(&(folio)->page, n)
-static __always_inline int PageTail(struct page *page)
+static __always_inline int PageTail(const struct page *page)
{
return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page);
}
-static __always_inline int PageCompound(struct page *page)
+static __always_inline int PageCompound(const struct page *page)
{
return test_bit(PG_head, &page->flags) ||
READ_ONCE(page->compound_head) & 1;
@@ -304,6 +305,16 @@ static inline void page_init_poison(struct page *page, size_t size)
}
#endif
+static const unsigned long *const_folio_flags(const struct folio *folio,
+ unsigned n)
+{
+ const struct page *page = &folio->page;
+
+ VM_BUG_ON_PGFLAGS(PageTail(page), page);
+ VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page);
+ return &page[n].flags;
+}
+
static unsigned long *folio_flags(struct folio *folio, unsigned n)
{
struct page *page = &folio->page;
@@ -326,9 +337,6 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n)
* for compound page all operations related to the page flag applied to
* head page.
*
- * PF_ONLY_HEAD:
- * for compound page, callers only ever operate on the head page.
- *
* PF_NO_TAIL:
* modifications of the page flag must be done on small or head pages,
* checks can be done on tail pages too.
@@ -344,9 +352,6 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n)
page; })
#define PF_ANY(page, enforce) PF_POISONED_CHECK(page)
#define PF_HEAD(page, enforce) PF_POISONED_CHECK(compound_head(page))
-#define PF_ONLY_HEAD(page, enforce) ({ \
- VM_BUG_ON_PGFLAGS(PageTail(page), page); \
- PF_POISONED_CHECK(page); })
#define PF_NO_TAIL(page, enforce) ({ \
VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \
PF_POISONED_CHECK(compound_head(page)); })
@@ -360,59 +365,81 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n)
/* Which page is the flag stored in */
#define FOLIO_PF_ANY 0
#define FOLIO_PF_HEAD 0
-#define FOLIO_PF_ONLY_HEAD 0
#define FOLIO_PF_NO_TAIL 0
#define FOLIO_PF_NO_COMPOUND 0
#define FOLIO_PF_SECOND 1
+#define FOLIO_HEAD_PAGE 0
+#define FOLIO_SECOND_PAGE 1
+
/*
* Macros to create function definitions for page flags
*/
+#define FOLIO_TEST_FLAG(name, page) \
+static __always_inline bool folio_test_##name(const struct folio *folio) \
+{ return test_bit(PG_##name, const_folio_flags(folio, page)); }
+
+#define FOLIO_SET_FLAG(name, page) \
+static __always_inline void folio_set_##name(struct folio *folio) \
+{ set_bit(PG_##name, folio_flags(folio, page)); }
+
+#define FOLIO_CLEAR_FLAG(name, page) \
+static __always_inline void folio_clear_##name(struct folio *folio) \
+{ clear_bit(PG_##name, folio_flags(folio, page)); }
+
+#define __FOLIO_SET_FLAG(name, page) \
+static __always_inline void __folio_set_##name(struct folio *folio) \
+{ __set_bit(PG_##name, folio_flags(folio, page)); }
+
+#define __FOLIO_CLEAR_FLAG(name, page) \
+static __always_inline void __folio_clear_##name(struct folio *folio) \
+{ __clear_bit(PG_##name, folio_flags(folio, page)); }
+
+#define FOLIO_TEST_SET_FLAG(name, page) \
+static __always_inline bool folio_test_set_##name(struct folio *folio) \
+{ return test_and_set_bit(PG_##name, folio_flags(folio, page)); }
+
+#define FOLIO_TEST_CLEAR_FLAG(name, page) \
+static __always_inline bool folio_test_clear_##name(struct folio *folio) \
+{ return test_and_clear_bit(PG_##name, folio_flags(folio, page)); }
+
+#define FOLIO_FLAG(name, page) \
+FOLIO_TEST_FLAG(name, page) \
+FOLIO_SET_FLAG(name, page) \
+FOLIO_CLEAR_FLAG(name, page)
+
#define TESTPAGEFLAG(uname, lname, policy) \
-static __always_inline bool folio_test_##lname(struct folio *folio) \
-{ return test_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
-static __always_inline int Page##uname(struct page *page) \
+FOLIO_TEST_FLAG(lname, FOLIO_##policy) \
+static __always_inline int Page##uname(const struct page *page) \
{ return test_bit(PG_##lname, &policy(page, 0)->flags); }
#define SETPAGEFLAG(uname, lname, policy) \
-static __always_inline \
-void folio_set_##lname(struct folio *folio) \
-{ set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
+FOLIO_SET_FLAG(lname, FOLIO_##policy) \
static __always_inline void SetPage##uname(struct page *page) \
{ set_bit(PG_##lname, &policy(page, 1)->flags); }
#define CLEARPAGEFLAG(uname, lname, policy) \
-static __always_inline \
-void folio_clear_##lname(struct folio *folio) \
-{ clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
+FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \
static __always_inline void ClearPage##uname(struct page *page) \
{ clear_bit(PG_##lname, &policy(page, 1)->flags); }
#define __SETPAGEFLAG(uname, lname, policy) \
-static __always_inline \
-void __folio_set_##lname(struct folio *folio) \
-{ __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
+__FOLIO_SET_FLAG(lname, FOLIO_##policy) \
static __always_inline void __SetPage##uname(struct page *page) \
{ __set_bit(PG_##lname, &policy(page, 1)->flags); }
#define __CLEARPAGEFLAG(uname, lname, policy) \
-static __always_inline \
-void __folio_clear_##lname(struct folio *folio) \
-{ __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
+__FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \
static __always_inline void __ClearPage##uname(struct page *page) \
{ __clear_bit(PG_##lname, &policy(page, 1)->flags); }
#define TESTSETFLAG(uname, lname, policy) \
-static __always_inline \
-bool folio_test_set_##lname(struct folio *folio) \
-{ return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
+FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy) \
static __always_inline int TestSetPage##uname(struct page *page) \
{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
#define TESTCLEARFLAG(uname, lname, policy) \
-static __always_inline \
-bool folio_test_clear_##lname(struct folio *folio) \
-{ return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
+FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy) \
static __always_inline int TestClearPage##uname(struct page *page) \
{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
@@ -430,30 +457,51 @@ static __always_inline int TestClearPage##uname(struct page *page) \
TESTSETFLAG(uname, lname, policy) \
TESTCLEARFLAG(uname, lname, policy)
+#define FOLIO_TEST_FLAG_FALSE(name) \
+static inline bool folio_test_##name(const struct folio *folio) \
+{ return false; }
+#define FOLIO_SET_FLAG_NOOP(name) \
+static inline void folio_set_##name(struct folio *folio) { }
+#define FOLIO_CLEAR_FLAG_NOOP(name) \
+static inline void folio_clear_##name(struct folio *folio) { }
+#define __FOLIO_SET_FLAG_NOOP(name) \
+static inline void __folio_set_##name(struct folio *folio) { }
+#define __FOLIO_CLEAR_FLAG_NOOP(name) \
+static inline void __folio_clear_##name(struct folio *folio) { }
+#define FOLIO_TEST_SET_FLAG_FALSE(name) \
+static inline bool folio_test_set_##name(struct folio *folio) \
+{ return false; }
+#define FOLIO_TEST_CLEAR_FLAG_FALSE(name) \
+static inline bool folio_test_clear_##name(struct folio *folio) \
+{ return false; }
+
+#define FOLIO_FLAG_FALSE(name) \
+FOLIO_TEST_FLAG_FALSE(name) \
+FOLIO_SET_FLAG_NOOP(name) \
+FOLIO_CLEAR_FLAG_NOOP(name)
+
#define TESTPAGEFLAG_FALSE(uname, lname) \
-static inline bool folio_test_##lname(const struct folio *folio) { return false; } \
+FOLIO_TEST_FLAG_FALSE(lname) \
static inline int Page##uname(const struct page *page) { return 0; }
#define SETPAGEFLAG_NOOP(uname, lname) \
-static inline void folio_set_##lname(struct folio *folio) { } \
+FOLIO_SET_FLAG_NOOP(lname) \
static inline void SetPage##uname(struct page *page) { }
#define CLEARPAGEFLAG_NOOP(uname, lname) \
-static inline void folio_clear_##lname(struct folio *folio) { } \
+FOLIO_CLEAR_FLAG_NOOP(lname) \
static inline void ClearPage##uname(struct page *page) { }
#define __CLEARPAGEFLAG_NOOP(uname, lname) \
-static inline void __folio_clear_##lname(struct folio *folio) { } \
+__FOLIO_CLEAR_FLAG_NOOP(lname) \
static inline void __ClearPage##uname(struct page *page) { }
#define TESTSETFLAG_FALSE(uname, lname) \
-static inline bool folio_test_set_##lname(struct folio *folio) \
-{ return 0; } \
+FOLIO_TEST_SET_FLAG_FALSE(lname) \
static inline int TestSetPage##uname(struct page *page) { return 0; }
#define TESTCLEARFLAG_FALSE(uname, lname) \
-static inline bool folio_test_clear_##lname(struct folio *folio) \
-{ return 0; } \
+FOLIO_TEST_CLEAR_FLAG_FALSE(lname) \
static inline int TestClearPage##uname(struct page *page) { return 0; }
#define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \
@@ -463,7 +511,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; }
TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname)
__PAGEFLAG(Locked, locked, PF_NO_TAIL)
-PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
+FOLIO_FLAG(waiters, FOLIO_HEAD_PAGE)
PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL)
PAGEFLAG(Referenced, referenced, PF_HEAD)
TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
@@ -530,13 +578,13 @@ PAGEFLAG_FALSE(HighMem, highmem)
#endif
#ifdef CONFIG_SWAP
-static __always_inline bool folio_test_swapcache(struct folio *folio)
+static __always_inline bool folio_test_swapcache(const struct folio *folio)
{
return folio_test_swapbacked(folio) &&
- test_bit(PG_swapcache, folio_flags(folio, 0));
+ test_bit(PG_swapcache, const_folio_flags(folio, 0));
}
-static __always_inline bool PageSwapCache(struct page *page)
+static __always_inline bool PageSwapCache(const struct page *page)
{
return folio_test_swapcache(page_folio(page));
}
@@ -581,10 +629,10 @@ PAGEFLAG_FALSE(HWPoison, hwpoison)
#endif
#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
-TESTPAGEFLAG(Young, young, PF_ANY)
-SETPAGEFLAG(Young, young, PF_ANY)
-TESTCLEARFLAG(Young, young, PF_ANY)
-PAGEFLAG(Idle, idle, PF_ANY)
+FOLIO_TEST_FLAG(young, FOLIO_HEAD_PAGE)
+FOLIO_SET_FLAG(young, FOLIO_HEAD_PAGE)
+FOLIO_TEST_CLEAR_FLAG(young, FOLIO_HEAD_PAGE)
+FOLIO_FLAG(idle, FOLIO_HEAD_PAGE)
#endif
/*
@@ -635,22 +683,22 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
*/
#define PAGE_MAPPING_DAX_SHARED ((void *)0x1)
-static __always_inline bool folio_mapping_flags(struct folio *folio)
+static __always_inline bool folio_mapping_flags(const struct folio *folio)
{
return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0;
}
-static __always_inline int PageMappingFlags(struct page *page)
+static __always_inline int PageMappingFlags(const struct page *page)
{
return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0;
}
-static __always_inline bool folio_test_anon(struct folio *folio)
+static __always_inline bool folio_test_anon(const struct folio *folio)
{
return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0;
}
-static __always_inline bool PageAnon(struct page *page)
+static __always_inline bool PageAnon(const struct page *page)
{
return folio_test_anon(page_folio(page));
}
@@ -661,7 +709,7 @@ static __always_inline bool __folio_test_movable(const struct folio *folio)
PAGE_MAPPING_MOVABLE;
}
-static __always_inline int __PageMovable(struct page *page)
+static __always_inline int __PageMovable(const struct page *page)
{
return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
PAGE_MAPPING_MOVABLE;
@@ -674,13 +722,13 @@ static __always_inline int __PageMovable(struct page *page)
* is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any
* anon_vma, but to that page's node of the stable tree.
*/
-static __always_inline bool folio_test_ksm(struct folio *folio)
+static __always_inline bool folio_test_ksm(const struct folio *folio)
{
return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) ==
PAGE_MAPPING_KSM;
}
-static __always_inline bool PageKsm(struct page *page)
+static __always_inline bool PageKsm(const struct page *page)
{
return folio_test_ksm(page_folio(page));
}
@@ -691,6 +739,25 @@ TESTPAGEFLAG_FALSE(Ksm, ksm)
u64 stable_page_flags(struct page *page);
/**
+ * folio_xor_flags_has_waiters - Change some folio flags.
+ * @folio: The folio.
+ * @mask: Bits set in this word will be changed.
+ *
+ * This must only be used for flags which are changed with the folio
+ * lock held. For example, it is unsafe to use for PG_dirty as that
+ * can be set without the folio lock held. It can also only be used
+ * on flags which are in the range 0-6 as some of the implementations
+ * only affect those bits.
+ *
+ * Return: Whether there are tasks waiting on the folio.
+ */
+static inline bool folio_xor_flags_has_waiters(struct folio *folio,
+ unsigned long mask)
+{
+ return xor_unlock_is_negative_byte(mask, folio_flags(folio, 0));
+}
+
+/**
* folio_test_uptodate - Is this folio up to date?
* @folio: The folio.
*
@@ -700,9 +767,9 @@ u64 stable_page_flags(struct page *page);
* some of the bytes in it may be; see the is_partially_uptodate()
* address_space operation.
*/
-static inline bool folio_test_uptodate(struct folio *folio)
+static inline bool folio_test_uptodate(const struct folio *folio)
{
- bool ret = test_bit(PG_uptodate, folio_flags(folio, 0));
+ bool ret = test_bit(PG_uptodate, const_folio_flags(folio, 0));
/*
* Must ensure that the data we read out of the folio is loaded
* _after_ we've loaded folio->flags to check the uptodate bit.
@@ -717,7 +784,7 @@ static inline bool folio_test_uptodate(struct folio *folio)
return ret;
}
-static inline int PageUptodate(struct page *page)
+static inline int PageUptodate(const struct page *page)
{
return folio_test_uptodate(page_folio(page));
}
@@ -751,25 +818,20 @@ static __always_inline void SetPageUptodate(struct page *page)
CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL)
-bool __folio_start_writeback(struct folio *folio, bool keep_write);
-bool set_page_writeback(struct page *page);
+void __folio_start_writeback(struct folio *folio, bool keep_write);
+void set_page_writeback(struct page *page);
#define folio_start_writeback(folio) \
__folio_start_writeback(folio, false)
#define folio_start_writeback_keepwrite(folio) \
__folio_start_writeback(folio, true)
-static inline bool test_set_page_writeback(struct page *page)
-{
- return set_page_writeback(page);
-}
-
-static __always_inline bool folio_test_head(struct folio *folio)
+static __always_inline bool folio_test_head(const struct folio *folio)
{
- return test_bit(PG_head, folio_flags(folio, FOLIO_PF_ANY));
+ return test_bit(PG_head, const_folio_flags(folio, FOLIO_PF_ANY));
}
-static __always_inline int PageHead(struct page *page)
+static __always_inline int PageHead(const struct page *page)
{
PF_POISONED_CHECK(page);
return test_bit(PG_head, &page->flags) && !page_is_fake_head(page);
@@ -785,7 +847,7 @@ CLEARPAGEFLAG(Head, head, PF_ANY)
*
* Return: True if the folio is larger than one page.
*/
-static inline bool folio_test_large(struct folio *folio)
+static inline bool folio_test_large(const struct folio *folio)
{
return folio_test_head(folio);
}
@@ -806,17 +868,13 @@ static inline void ClearPageCompound(struct page *page)
BUG_ON(!PageHead(page));
ClearPageHead(page);
}
+PAGEFLAG(LargeRmappable, large_rmappable, PF_SECOND)
+#else
+TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable)
#endif
#define PG_head_mask ((1UL << PG_head))
-#ifdef CONFIG_HUGETLB_PAGE
-int PageHuge(struct page *page);
-bool folio_test_hugetlb(struct folio *folio);
-#else
-TESTPAGEFLAG_FALSE(Huge, hugetlb)
-#endif
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* PageHuge() only returns true for hugetlbfs pages, but not for
@@ -826,23 +884,18 @@ TESTPAGEFLAG_FALSE(Huge, hugetlb)
* hugetlbfs pages, but not normal pages. PageTransHuge() can only be
* called only in the core VM paths where hugetlbfs pages can't exist.
*/
-static inline int PageTransHuge(struct page *page)
+static inline int PageTransHuge(const struct page *page)
{
VM_BUG_ON_PAGE(PageTail(page), page);
return PageHead(page);
}
-static inline bool folio_test_transhuge(struct folio *folio)
-{
- return folio_test_head(folio);
-}
-
/*
* PageTransCompound returns true for both transparent huge pages
* and hugetlbfs pages, so it should only be called when it's known
* that hugetlbfs pages aren't involved.
*/
-static inline int PageTransCompound(struct page *page)
+static inline int PageTransCompound(const struct page *page)
{
return PageCompound(page);
}
@@ -852,7 +905,7 @@ static inline int PageTransCompound(struct page *page)
* and hugetlbfs pages, so it should only be called when it's known
* that hugetlbfs pages aren't involved.
*/
-static inline int PageTransTail(struct page *page)
+static inline int PageTransTail(const struct page *page)
{
return PageTail(page);
}
@@ -878,49 +931,57 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
#endif
/*
- * Check if a page is currently marked HWPoisoned. Note that this check is
- * best effort only and inherently racy: there is no way to synchronize with
- * failing hardware.
- */
-static inline bool is_page_hwpoison(struct page *page)
-{
- if (PageHWPoison(page))
- return true;
- return PageHuge(page) && PageHWPoison(compound_head(page));
-}
-
-/*
* For pages that are never mapped to userspace (and aren't PageSlab),
* page_type may be used. Because it is initialised to -1, we invert the
* sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
* __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and
- * low bits so that an underflow or overflow of page_mapcount() won't be
+ * low bits so that an underflow or overflow of _mapcount won't be
* mistaken for a page type value.
*/
#define PAGE_TYPE_BASE 0xf0000000
-/* Reserve 0x0000007f to catch underflows of page_mapcount */
+/* Reserve 0x0000007f to catch underflows of _mapcount */
#define PAGE_MAPCOUNT_RESERVE -128
#define PG_buddy 0x00000080
#define PG_offline 0x00000100
#define PG_table 0x00000200
#define PG_guard 0x00000400
+#define PG_hugetlb 0x00000800
#define PageType(page, flag) \
((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
+#define folio_test_type(folio, flag) \
+ ((folio->page.page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
static inline int page_type_has_type(unsigned int page_type)
{
return (int)page_type < PAGE_MAPCOUNT_RESERVE;
}
-static inline int page_has_type(struct page *page)
+static inline int page_has_type(const struct page *page)
{
return page_type_has_type(page->page_type);
}
-#define PAGE_TYPE_OPS(uname, lname) \
-static __always_inline int Page##uname(struct page *page) \
+#define FOLIO_TYPE_OPS(lname, fname) \
+static __always_inline bool folio_test_##fname(const struct folio *folio)\
+{ \
+ return folio_test_type(folio, PG_##lname); \
+} \
+static __always_inline void __folio_set_##fname(struct folio *folio) \
+{ \
+ VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \
+ folio->page.page_type &= ~PG_##lname; \
+} \
+static __always_inline void __folio_clear_##fname(struct folio *folio) \
+{ \
+ VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \
+ folio->page.page_type |= PG_##lname; \
+}
+
+#define PAGE_TYPE_OPS(uname, lname, fname) \
+FOLIO_TYPE_OPS(lname, fname) \
+static __always_inline int Page##uname(const struct page *page) \
{ \
return PageType(page, PG_##lname); \
} \
@@ -939,7 +1000,7 @@ static __always_inline void __ClearPage##uname(struct page *page) \
* PageBuddy() indicates that the page is free and in the buddy system
* (see mm/page_alloc.c).
*/
-PAGE_TYPE_OPS(Buddy, buddy)
+PAGE_TYPE_OPS(Buddy, buddy, buddy)
/*
* PageOffline() indicates that the page is logically offline although the
@@ -963,7 +1024,7 @@ PAGE_TYPE_OPS(Buddy, buddy)
* pages should check PageOffline() and synchronize with such drivers using
* page_offline_freeze()/page_offline_thaw().
*/
-PAGE_TYPE_OPS(Offline, offline)
+PAGE_TYPE_OPS(Offline, offline, offline)
extern void page_offline_freeze(void);
extern void page_offline_thaw(void);
@@ -973,18 +1034,49 @@ extern void page_offline_end(void);
/*
* Marks pages in use as page tables.
*/
-PAGE_TYPE_OPS(Table, table)
+PAGE_TYPE_OPS(Table, table, pgtable)
/*
* Marks guardpages used with debug_pagealloc.
*/
-PAGE_TYPE_OPS(Guard, guard)
+PAGE_TYPE_OPS(Guard, guard, guard)
+
+#ifdef CONFIG_HUGETLB_PAGE
+FOLIO_TYPE_OPS(hugetlb, hugetlb)
+#else
+FOLIO_TEST_FLAG_FALSE(hugetlb)
+#endif
+
+/**
+ * PageHuge - Determine if the page belongs to hugetlbfs
+ * @page: The page to test.
+ *
+ * Context: Any context.
+ * Return: True for hugetlbfs pages, false for anon pages or pages
+ * belonging to other filesystems.
+ */
+static inline bool PageHuge(const struct page *page)
+{
+ return folio_test_hugetlb(page_folio(page));
+}
+
+/*
+ * Check if a page is currently marked HWPoisoned. Note that this check is
+ * best effort only and inherently racy: there is no way to synchronize with
+ * failing hardware.
+ */
+static inline bool is_page_hwpoison(struct page *page)
+{
+ if (PageHWPoison(page))
+ return true;
+ return PageHuge(page) && PageHWPoison(compound_head(page));
+}
extern bool is_free_buddy_page(struct page *page);
PAGEFLAG(Isolated, isolated, PF_ANY);
-static __always_inline int PageAnonExclusive(struct page *page)
+static __always_inline int PageAnonExclusive(const struct page *page)
{
VM_BUG_ON_PGFLAGS(!PageAnon(page), page);
VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
@@ -1040,6 +1132,14 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
#define PAGE_FLAGS_CHECK_AT_PREP \
((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK)
+/*
+ * Flags stored in the second page of a compound page. They may overlap
+ * the CHECK_AT_FREE flags above, so need to be cleared.
+ */
+#define PAGE_FLAGS_SECOND \
+ (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \
+ 1UL << PG_large_rmappable)
+
#define PAGE_FLAGS_PRIVATE \
(1UL << PG_private | 1UL << PG_private_2)
/**
@@ -1049,19 +1149,18 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
* Determine if a page has private stuff, indicating that release routines
* should be invoked upon it.
*/
-static inline int page_has_private(struct page *page)
+static inline int page_has_private(const struct page *page)
{
return !!(page->flags & PAGE_FLAGS_PRIVATE);
}
-static inline bool folio_has_private(struct folio *folio)
+static inline bool folio_has_private(const struct folio *folio)
{
return page_has_private(&folio->page);
}
#undef PF_ANY
#undef PF_HEAD
-#undef PF_ONLY_HEAD
#undef PF_NO_TAIL
#undef PF_NO_COMPOUND
#undef PF_SECOND
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index c141ea9a95ef..8cd858d912c4 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -4,7 +4,7 @@
#include <linux/atomic.h>
#include <linux/cache.h>
-#include <linux/kernel.h>
+#include <linux/limits.h>
#include <asm/page.h>
struct page_counter {
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index 67314f648aeb..be98564191e6 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -8,6 +8,7 @@
struct pglist_data;
+#ifdef CONFIG_PAGE_EXTENSION
/**
* struct page_ext_operations - per page_ext client operations
* @offset: Offset to the client's data within page_ext. Offset is returned to
@@ -29,8 +30,6 @@ struct page_ext_operations {
bool need_shared_flags;
};
-#ifdef CONFIG_PAGE_EXTENSION
-
/*
* The page_ext_flags users must set need_shared_flags to true.
*/
@@ -82,6 +81,12 @@ static inline void page_ext_init(void)
extern struct page_ext *page_ext_get(struct page *page);
extern void page_ext_put(struct page_ext *page_ext);
+static inline void *page_ext_data(struct page_ext *page_ext,
+ struct page_ext_operations *ops)
+{
+ return (void *)(page_ext) + ops->offset;
+}
+
static inline struct page_ext *page_ext_next(struct page_ext *curr)
{
void *next = curr;
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
index 5cb7bd2078ec..d8f344840643 100644
--- a/include/linux/page_idle.h
+++ b/include/linux/page_idle.h
@@ -144,9 +144,4 @@ static inline void set_page_idle(struct page *page)
{
folio_set_idle(page_folio(page));
}
-
-static inline void clear_page_idle(struct page *page)
-{
- folio_clear_idle(page_folio(page));
-}
#endif /* _LINUX_MM_PAGE_IDLE_H */
diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index 119a0c9d2a8b..debdc25f08b9 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -11,7 +11,8 @@ extern struct page_ext_operations page_owner_ops;
extern void __reset_page_owner(struct page *page, unsigned short order);
extern void __set_page_owner(struct page *page,
unsigned short order, gfp_t gfp_mask);
-extern void __split_page_owner(struct page *page, unsigned int nr);
+extern void __split_page_owner(struct page *page, int old_order,
+ int new_order);
extern void __folio_copy_owner(struct folio *newfolio, struct folio *old);
extern void __set_page_owner_migrate_reason(struct page *page, int reason);
extern void __dump_page_owner(const struct page *page);
@@ -31,10 +32,11 @@ static inline void set_page_owner(struct page *page,
__set_page_owner(page, order, gfp_mask);
}
-static inline void split_page_owner(struct page *page, unsigned int nr)
+static inline void split_page_owner(struct page *page, int old_order,
+ int new_order)
{
if (static_branch_unlikely(&page_owner_inited))
- __split_page_owner(page, nr);
+ __split_page_owner(page, old_order, new_order);
}
static inline void folio_copy_owner(struct folio *newfolio, struct folio *old)
{
@@ -56,11 +58,11 @@ static inline void reset_page_owner(struct page *page, unsigned short order)
{
}
static inline void set_page_owner(struct page *page,
- unsigned int order, gfp_t gfp_mask)
+ unsigned short order, gfp_t gfp_mask)
{
}
-static inline void split_page_owner(struct page *page,
- unsigned short order)
+static inline void split_page_owner(struct page *page, int old_order,
+ int new_order)
{
}
static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio)
diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h
index 01e16c7696ec..6722941c7cb8 100644
--- a/include/linux/page_table_check.h
+++ b/include/linux/page_table_check.h
@@ -14,18 +14,13 @@ extern struct static_key_true page_table_check_disabled;
extern struct page_ext_operations page_table_check_ops;
void __page_table_check_zero(struct page *page, unsigned int order);
-void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr,
- pte_t pte);
-void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr,
- pmd_t pmd);
-void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr,
- pud_t pud);
-void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte);
-void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, pmd_t pmd);
-void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
- pud_t *pudp, pud_t pud);
+void __page_table_check_pte_clear(struct mm_struct *mm, pte_t pte);
+void __page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd);
+void __page_table_check_pud_clear(struct mm_struct *mm, pud_t pud);
+void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte,
+ unsigned int nr);
+void __page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd);
+void __page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, pud_t pud);
void __page_table_check_pte_clear_range(struct mm_struct *mm,
unsigned long addr,
pmd_t pmd);
@@ -46,61 +41,55 @@ static inline void page_table_check_free(struct page *page, unsigned int order)
__page_table_check_zero(page, order);
}
-static inline void page_table_check_pte_clear(struct mm_struct *mm,
- unsigned long addr, pte_t pte)
+static inline void page_table_check_pte_clear(struct mm_struct *mm, pte_t pte)
{
if (static_branch_likely(&page_table_check_disabled))
return;
- __page_table_check_pte_clear(mm, addr, pte);
+ __page_table_check_pte_clear(mm, pte);
}
-static inline void page_table_check_pmd_clear(struct mm_struct *mm,
- unsigned long addr, pmd_t pmd)
+static inline void page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd)
{
if (static_branch_likely(&page_table_check_disabled))
return;
- __page_table_check_pmd_clear(mm, addr, pmd);
+ __page_table_check_pmd_clear(mm, pmd);
}
-static inline void page_table_check_pud_clear(struct mm_struct *mm,
- unsigned long addr, pud_t pud)
+static inline void page_table_check_pud_clear(struct mm_struct *mm, pud_t pud)
{
if (static_branch_likely(&page_table_check_disabled))
return;
- __page_table_check_pud_clear(mm, addr, pud);
+ __page_table_check_pud_clear(mm, pud);
}
-static inline void page_table_check_pte_set(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep,
- pte_t pte)
+static inline void page_table_check_ptes_set(struct mm_struct *mm,
+ pte_t *ptep, pte_t pte, unsigned int nr)
{
if (static_branch_likely(&page_table_check_disabled))
return;
- __page_table_check_pte_set(mm, addr, ptep, pte);
+ __page_table_check_ptes_set(mm, ptep, pte, nr);
}
-static inline void page_table_check_pmd_set(struct mm_struct *mm,
- unsigned long addr, pmd_t *pmdp,
+static inline void page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp,
pmd_t pmd)
{
if (static_branch_likely(&page_table_check_disabled))
return;
- __page_table_check_pmd_set(mm, addr, pmdp, pmd);
+ __page_table_check_pmd_set(mm, pmdp, pmd);
}
-static inline void page_table_check_pud_set(struct mm_struct *mm,
- unsigned long addr, pud_t *pudp,
+static inline void page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp,
pud_t pud)
{
if (static_branch_likely(&page_table_check_disabled))
return;
- __page_table_check_pud_set(mm, addr, pudp, pud);
+ __page_table_check_pud_set(mm, pudp, pud);
}
static inline void page_table_check_pte_clear_range(struct mm_struct *mm,
@@ -123,35 +112,29 @@ static inline void page_table_check_free(struct page *page, unsigned int order)
{
}
-static inline void page_table_check_pte_clear(struct mm_struct *mm,
- unsigned long addr, pte_t pte)
+static inline void page_table_check_pte_clear(struct mm_struct *mm, pte_t pte)
{
}
-static inline void page_table_check_pmd_clear(struct mm_struct *mm,
- unsigned long addr, pmd_t pmd)
+static inline void page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd)
{
}
-static inline void page_table_check_pud_clear(struct mm_struct *mm,
- unsigned long addr, pud_t pud)
+static inline void page_table_check_pud_clear(struct mm_struct *mm, pud_t pud)
{
}
-static inline void page_table_check_pte_set(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep,
- pte_t pte)
+static inline void page_table_check_ptes_set(struct mm_struct *mm,
+ pte_t *ptep, pte_t pte, unsigned int nr)
{
}
-static inline void page_table_check_pmd_set(struct mm_struct *mm,
- unsigned long addr, pmd_t *pmdp,
+static inline void page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp,
pmd_t pmd)
{
}
-static inline void page_table_check_pud_set(struct mm_struct *mm,
- unsigned long addr, pud_t *pudp,
+static inline void page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp,
pud_t pud)
{
}
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index e83c4c095041..3f2409b968ec 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -41,14 +41,14 @@ extern unsigned int pageblock_order;
* Huge pages are a constant size, but don't exceed the maximum allocation
* granularity.
*/
-#define pageblock_order min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_ORDER)
+#define pageblock_order min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER)
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
#else /* CONFIG_HUGETLB_PAGE */
/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
-#define pageblock_order MAX_ORDER
+#define pageblock_order MAX_PAGE_ORDER
#endif /* CONFIG_HUGETLB_PAGE */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 716953ee1ebd..2df35e65557d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -203,6 +203,10 @@ enum mapping_flags {
/* writeback related tags are not used */
AS_NO_WRITEBACK_TAGS = 5,
AS_LARGE_FOLIO_SUPPORT = 6,
+ AS_RELEASE_ALWAYS, /* Call ->release_folio(), even if no private data */
+ AS_STABLE_WRITES, /* must wait for writeback before modifying
+ folio contents */
+ AS_UNMOVABLE, /* The mapping cannot be moved, ever */
};
/**
@@ -273,6 +277,52 @@ static inline int mapping_use_writeback_tags(struct address_space *mapping)
return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
}
+static inline bool mapping_release_always(const struct address_space *mapping)
+{
+ return test_bit(AS_RELEASE_ALWAYS, &mapping->flags);
+}
+
+static inline void mapping_set_release_always(struct address_space *mapping)
+{
+ set_bit(AS_RELEASE_ALWAYS, &mapping->flags);
+}
+
+static inline void mapping_clear_release_always(struct address_space *mapping)
+{
+ clear_bit(AS_RELEASE_ALWAYS, &mapping->flags);
+}
+
+static inline bool mapping_stable_writes(const struct address_space *mapping)
+{
+ return test_bit(AS_STABLE_WRITES, &mapping->flags);
+}
+
+static inline void mapping_set_stable_writes(struct address_space *mapping)
+{
+ set_bit(AS_STABLE_WRITES, &mapping->flags);
+}
+
+static inline void mapping_clear_stable_writes(struct address_space *mapping)
+{
+ clear_bit(AS_STABLE_WRITES, &mapping->flags);
+}
+
+static inline void mapping_set_unmovable(struct address_space *mapping)
+{
+ /*
+ * It's expected unmovable mappings are also unevictable. Compaction
+ * migrate scanner (isolate_migratepages_block()) relies on this to
+ * reduce page locking.
+ */
+ set_bit(AS_UNEVICTABLE, &mapping->flags);
+ set_bit(AS_UNMOVABLE, &mapping->flags);
+}
+
+static inline bool mapping_unmovable(struct address_space *mapping)
+{
+ return test_bit(AS_UNMOVABLE, &mapping->flags);
+}
+
static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
{
return mapping->gfp_mask;
@@ -373,23 +423,31 @@ static inline struct address_space *folio_file_mapping(struct folio *folio)
return folio->mapping;
}
-static inline struct address_space *page_file_mapping(struct page *page)
-{
- return folio_file_mapping(page_folio(page));
-}
-
-/*
- * For file cache pages, return the address_space, otherwise return NULL
+/**
+ * folio_flush_mapping - Find the file mapping this folio belongs to.
+ * @folio: The folio.
+ *
+ * For folios which are in the page cache, return the mapping that this
+ * page belongs to. Anonymous folios return NULL, even if they're in
+ * the swap cache. Other kinds of folio also return NULL.
+ *
+ * This is ONLY used by architecture cache flushing code. If you aren't
+ * writing cache flushing code, you want either folio_mapping() or
+ * folio_file_mapping().
*/
-static inline struct address_space *page_mapping_file(struct page *page)
+static inline struct address_space *folio_flush_mapping(struct folio *folio)
{
- struct folio *folio = page_folio(page);
-
if (unlikely(folio_test_swapcache(folio)))
return NULL;
+
return folio_mapping(folio);
}
+static inline struct address_space *page_file_mapping(struct page *page)
+{
+ return folio_file_mapping(page_folio(page));
+}
+
/**
* folio_inode - Get the host inode for this folio.
* @folio: The folio.
@@ -470,6 +528,19 @@ static inline void *detach_page_private(struct page *page)
return folio_detach_private(page_folio(page));
}
+/*
+ * There are some parts of the kernel which assume that PMD entries
+ * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
+ * limit the maximum allocation order to PMD size. I'm not aware of any
+ * assumptions about maximum order if THP are disabled, but 8 seems like
+ * a good order (that's 1MB if you're using 4kB pages)
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
+#else
+#define MAX_PAGECACHE_ORDER 8
+#endif
+
#ifdef CONFIG_NUMA
struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
#else
@@ -501,22 +572,69 @@ pgoff_t page_cache_next_miss(struct address_space *mapping,
pgoff_t page_cache_prev_miss(struct address_space *mapping,
pgoff_t index, unsigned long max_scan);
-#define FGP_ACCESSED 0x00000001
-#define FGP_LOCK 0x00000002
-#define FGP_CREAT 0x00000004
-#define FGP_WRITE 0x00000008
-#define FGP_NOFS 0x00000010
-#define FGP_NOWAIT 0x00000020
-#define FGP_FOR_MMAP 0x00000040
-#define FGP_STABLE 0x00000080
+/**
+ * typedef fgf_t - Flags for getting folios from the page cache.
+ *
+ * Most users of the page cache will not need to use these flags;
+ * there are convenience functions such as filemap_get_folio() and
+ * filemap_lock_folio(). For users which need more control over exactly
+ * what is done with the folios, these flags to __filemap_get_folio()
+ * are available.
+ *
+ * * %FGP_ACCESSED - The folio will be marked accessed.
+ * * %FGP_LOCK - The folio is returned locked.
+ * * %FGP_CREAT - If no folio is present then a new folio is allocated,
+ * added to the page cache and the VM's LRU list. The folio is
+ * returned locked.
+ * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
+ * folio is already in cache. If the folio was allocated, unlock it
+ * before returning so the caller can do the same dance.
+ * * %FGP_WRITE - The folio will be written to by the caller.
+ * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
+ * * %FGP_NOWAIT - Don't block on the folio lock.
+ * * %FGP_STABLE - Wait for the folio to be stable (finished writeback)
+ * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin()
+ * implementation.
+ */
+typedef unsigned int __bitwise fgf_t;
+
+#define FGP_ACCESSED ((__force fgf_t)0x00000001)
+#define FGP_LOCK ((__force fgf_t)0x00000002)
+#define FGP_CREAT ((__force fgf_t)0x00000004)
+#define FGP_WRITE ((__force fgf_t)0x00000008)
+#define FGP_NOFS ((__force fgf_t)0x00000010)
+#define FGP_NOWAIT ((__force fgf_t)0x00000020)
+#define FGP_FOR_MMAP ((__force fgf_t)0x00000040)
+#define FGP_STABLE ((__force fgf_t)0x00000080)
+#define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */
#define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
+/**
+ * fgf_set_order - Encode a length in the fgf_t flags.
+ * @size: The suggested size of the folio to create.
+ *
+ * The caller of __filemap_get_folio() can use this to suggest a preferred
+ * size for the folio that is created. If there is already a folio at
+ * the index, it will be returned, no matter what its size. If a folio
+ * is freshly created, it may be of a different size than requested
+ * due to alignment constraints, memory pressure, or the presence of
+ * other folios at nearby indices.
+ */
+static inline fgf_t fgf_set_order(size_t size)
+{
+ unsigned int shift = ilog2(size);
+
+ if (shift <= PAGE_SHIFT)
+ return 0;
+ return (__force fgf_t)((shift - PAGE_SHIFT) << 26);
+}
+
void *filemap_get_entry(struct address_space *mapping, pgoff_t index);
struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
- int fgp_flags, gfp_t gfp);
+ fgf_t fgp_flags, gfp_t gfp);
struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
- int fgp_flags, gfp_t gfp);
+ fgf_t fgp_flags, gfp_t gfp);
/**
* filemap_get_folio - Find and get a folio.
@@ -590,7 +708,7 @@ static inline struct page *find_get_page(struct address_space *mapping,
}
static inline struct page *find_get_page_flags(struct address_space *mapping,
- pgoff_t offset, int fgp_flags)
+ pgoff_t offset, fgf_t fgp_flags)
{
return pagecache_get_page(mapping, offset, fgp_flags, 0);
}
@@ -705,9 +823,6 @@ static inline pgoff_t folio_next_index(struct folio *folio)
*/
static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
{
- /* HugeTLBfs indexes the page cache in units of hpage_size */
- if (folio_test_hugetlb(folio))
- return &folio->page;
return folio_page(folio, index & (folio_nr_pages(folio) - 1));
}
@@ -723,9 +838,6 @@ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
*/
static inline bool folio_contains(struct folio *folio, pgoff_t index)
{
- /* HugeTLBfs indexes the page cache in units of hpage_size */
- if (folio_test_hugetlb(folio))
- return folio->index == index;
return index - folio_index(folio) < folio_nr_pages(folio);
}
@@ -783,10 +895,9 @@ static inline struct folio *read_mapping_folio(struct address_space *mapping,
}
/*
- * Get index of the page within radix-tree (but not for hugetlb pages).
- * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
+ * Get the offset in PAGE_SIZE (even for hugetlb pages).
*/
-static inline pgoff_t page_to_index(struct page *page)
+static inline pgoff_t page_to_pgoff(struct page *page)
{
struct page *head;
@@ -801,19 +912,6 @@ static inline pgoff_t page_to_index(struct page *page)
return head->index + page - head;
}
-extern pgoff_t hugetlb_basepage_index(struct page *page);
-
-/*
- * Get the offset in PAGE_SIZE (even for hugetlb pages).
- * (TODO: hugetlb pages should have ->index in PAGE_SIZE)
- */
-static inline pgoff_t page_to_pgoff(struct page *page)
-{
- if (unlikely(PageHuge(page)))
- return hugetlb_basepage_index(page);
- return page_to_index(page);
-}
-
/*
* Return byte-offset into filesystem object for page.
*/
@@ -850,24 +948,16 @@ static inline loff_t folio_file_pos(struct folio *folio)
/*
* Get the offset in PAGE_SIZE (even for hugetlb folios).
- * (TODO: hugetlb folios should have ->index in PAGE_SIZE)
*/
static inline pgoff_t folio_pgoff(struct folio *folio)
{
- if (unlikely(folio_test_hugetlb(folio)))
- return hugetlb_basepage_index(&folio->page);
return folio->index;
}
-extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
- unsigned long address);
-
static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
unsigned long address)
{
pgoff_t pgoff;
- if (unlikely(is_vm_hugetlb_page(vma)))
- return linear_hugepage_index(vma, address);
pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
pgoff += vma->vm_pgoff;
return pgoff;
@@ -900,8 +990,7 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page,
void __folio_lock(struct folio *folio);
int __folio_lock_killable(struct folio *folio);
-bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
- unsigned int flags);
+vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf);
void unlock_page(struct page *page);
void folio_unlock(struct folio *folio);
@@ -1005,11 +1094,13 @@ static inline int folio_lock_killable(struct folio *folio)
* Return value and mmap_lock implications depend on flags; see
* __folio_lock_or_retry().
*/
-static inline bool folio_lock_or_retry(struct folio *folio,
- struct mm_struct *mm, unsigned int flags)
+static inline vm_fault_t folio_lock_or_retry(struct folio *folio,
+ struct vm_fault *vmf)
{
might_sleep();
- return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags);
+ if (!folio_trylock(folio))
+ return __folio_lock_or_retry(folio, vmf);
+ return 0;
}
/*
@@ -1044,11 +1135,7 @@ static inline void wait_on_page_locked(struct page *page)
folio_wait_locked(page_folio(page));
}
-static inline int wait_on_page_locked_killable(struct page *page)
-{
- return folio_wait_locked_killable(page_folio(page));
-}
-
+void folio_end_read(struct folio *folio, bool success);
void wait_on_page_writeback(struct page *page);
void folio_wait_writeback(struct folio *folio);
int folio_wait_writeback_killable(struct folio *folio);
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 87cc678adc85..5d3a0cccc6bf 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -11,8 +11,8 @@
#include <linux/types.h>
-/* 15 pointers + header align the folio_batch structure to a power of two */
-#define PAGEVEC_SIZE 15
+/* 31 pointers + header align the folio_batch structure to a power of two */
+#define PAGEVEC_SIZE 31
struct folio;
@@ -27,6 +27,7 @@ struct folio;
*/
struct folio_batch {
unsigned char nr;
+ unsigned char i;
bool percpu_pvec_drained;
struct folio *folios[PAGEVEC_SIZE];
};
@@ -40,12 +41,14 @@ struct folio_batch {
static inline void folio_batch_init(struct folio_batch *fbatch)
{
fbatch->nr = 0;
+ fbatch->i = 0;
fbatch->percpu_pvec_drained = false;
}
static inline void folio_batch_reinit(struct folio_batch *fbatch)
{
fbatch->nr = 0;
+ fbatch->i = 0;
}
static inline unsigned int folio_batch_count(struct folio_batch *fbatch)
@@ -75,6 +78,21 @@ static inline unsigned folio_batch_add(struct folio_batch *fbatch,
return folio_batch_space(fbatch);
}
+/**
+ * folio_batch_next - Return the next folio to process.
+ * @fbatch: The folio batch being processed.
+ *
+ * Use this function to implement a queue of folios.
+ *
+ * Return: The next folio in the queue, or NULL if the queue is empty.
+ */
+static inline struct folio *folio_batch_next(struct folio_batch *fbatch)
+{
+ if (fbatch->i == fbatch->nr)
+ return NULL;
+ return fbatch->folios[fbatch->i++];
+}
+
void __folio_batch_release(struct folio_batch *pvec);
static inline void folio_batch_release(struct folio_batch *fbatch)
diff --git a/include/linux/parport.h b/include/linux/parport.h
index 999eddd619b7..fff39bc30629 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -180,8 +180,6 @@ struct ieee1284_info {
struct semaphore irq;
};
-#define PARPORT_NAME_MAX_LEN 15
-
/* A parallel port */
struct parport {
unsigned long base; /* base address */
diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h
index 6b1301e2498e..3a4860bd2758 100644
--- a/include/linux/pci-ecam.h
+++ b/include/linux/pci-ecam.h
@@ -93,6 +93,6 @@ extern const struct pci_ecam_ops loongson_pci_ecam_ops; /* Loongson PCIe */
#if IS_ENABLED(CONFIG_PCI_HOST_COMMON)
/* for DT-based PCI controllers that support ECAM */
int pci_host_common_probe(struct platform_device *pdev);
-int pci_host_common_remove(struct platform_device *pdev);
+void pci_host_common_remove(struct platform_device *pdev);
#endif
#endif
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 5cb694031072..cc2f70d061c8 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -19,13 +19,6 @@ enum pci_epc_interface_type {
SECONDARY_INTERFACE,
};
-enum pci_epc_irq_type {
- PCI_EPC_IRQ_UNKNOWN,
- PCI_EPC_IRQ_LEGACY,
- PCI_EPC_IRQ_MSI,
- PCI_EPC_IRQ_MSIX,
-};
-
static inline const char *
pci_epc_interface_string(enum pci_epc_interface_type type)
{
@@ -79,7 +72,7 @@ struct pci_epc_ops {
u16 interrupts, enum pci_barno, u32 offset);
int (*get_msix)(struct pci_epc *epc, u8 func_no, u8 vfunc_no);
int (*raise_irq)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
- enum pci_epc_irq_type type, u16 interrupt_num);
+ unsigned int type, u16 interrupt_num);
int (*map_msi_irq)(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
phys_addr_t phys_addr, u8 interrupt_num,
u32 entry_size, u32 *msi_data,
@@ -122,7 +115,7 @@ struct pci_epc_mem {
* struct pci_epc - represents the PCI EPC device
* @dev: PCI EPC device
* @pci_epf: list of endpoint functions present in this EPC device
- * list_lock: Mutex for protecting pci_epf list
+ * @list_lock: Mutex for protecting pci_epf list
* @ops: function pointers for performing endpoint operations
* @windows: array of address space of the endpoint controller
* @mem: first window of the endpoint controller, which corresponds to
@@ -153,15 +146,44 @@ struct pci_epc {
};
/**
+ * @BAR_PROGRAMMABLE: The BAR mask can be configured by the EPC.
+ * @BAR_FIXED: The BAR mask is fixed by the hardware.
+ * @BAR_RESERVED: The BAR should not be touched by an EPF driver.
+ */
+enum pci_epc_bar_type {
+ BAR_PROGRAMMABLE = 0,
+ BAR_FIXED,
+ BAR_RESERVED,
+};
+
+/**
+ * struct pci_epc_bar_desc - hardware description for a BAR
+ * @type: the type of the BAR
+ * @fixed_size: the fixed size, only applicable if type is BAR_FIXED_MASK.
+ * @only_64bit: if true, an EPF driver is not allowed to choose if this BAR
+ * should be configured as 32-bit or 64-bit, the EPF driver must
+ * configure this BAR as 64-bit. Additionally, the BAR succeeding
+ * this BAR must be set to type BAR_RESERVED.
+ *
+ * only_64bit should not be set on a BAR of type BAR_RESERVED.
+ * (If BARx is a 64-bit BAR that an EPF driver is not allowed to
+ * touch, then both BARx and BARx+1 must be set to type
+ * BAR_RESERVED.)
+ */
+struct pci_epc_bar_desc {
+ enum pci_epc_bar_type type;
+ u64 fixed_size;
+ bool only_64bit;
+};
+
+/**
* struct pci_epc_features - features supported by a EPC device per function
* @linkup_notifier: indicate if the EPC device can notify EPF driver on link up
* @core_init_notifier: indicate cores that can notify about their availability
* for initialization
* @msi_capable: indicate if the endpoint function has MSI capability
* @msix_capable: indicate if the endpoint function has MSI-X capability
- * @reserved_bar: bitmap to indicate reserved BAR unavailable to function driver
- * @bar_fixed_64bit: bitmap to indicate fixed 64bit BARs
- * @bar_fixed_size: Array specifying the size supported by each BAR
+ * @bar: array specifying the hardware description for each BAR
* @align: alignment size required for BAR buffer allocation
*/
struct pci_epc_features {
@@ -169,9 +191,7 @@ struct pci_epc_features {
unsigned int core_init_notifier : 1;
unsigned int msi_capable : 1;
unsigned int msix_capable : 1;
- u8 reserved_bar;
- u8 bar_fixed_64bit;
- u64 bar_fixed_size[PCI_STD_NUM_BARS];
+ struct pci_epc_bar_desc bar[PCI_STD_NUM_BARS];
size_t align;
};
@@ -229,7 +249,7 @@ int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
phys_addr_t phys_addr, u8 interrupt_num,
u32 entry_size, u32 *msi_data, u32 *msi_addr_offset);
int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
- enum pci_epc_irq_type type, u16 interrupt_num);
+ unsigned int type, u16 interrupt_num);
int pci_epc_start(struct pci_epc *epc);
void pci_epc_stop(struct pci_epc *epc);
const struct pci_epc_features *pci_epc_get_features(struct pci_epc *epc,
diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index 3f44b6aec477..adee6a1b35db 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -15,6 +15,7 @@
#include <linux/pci.h>
struct pci_epf;
+struct pci_epc_features;
enum pci_epc_interface_type;
enum pci_barno {
@@ -68,7 +69,7 @@ struct pci_epf_ops {
};
/**
- * struct pci_epf_event_ops - Callbacks for capturing the EPC events
+ * struct pci_epc_event_ops - Callbacks for capturing the EPC events
* @core_init: Callback for the EPC initialization complete event
* @link_up: Callback for the EPC link up event
* @link_down: Callback for the EPC link down event
@@ -98,7 +99,7 @@ struct pci_epf_driver {
void (*remove)(struct pci_epf *epf);
struct device_driver driver;
- struct pci_epf_ops *ops;
+ const struct pci_epf_ops *ops;
struct module *owner;
struct list_head epf_group;
const struct pci_epf_device_id *id_table;
@@ -216,7 +217,8 @@ int __pci_epf_register_driver(struct pci_epf_driver *driver,
struct module *owner);
void pci_epf_unregister_driver(struct pci_epf_driver *driver);
void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar,
- size_t align, enum pci_epc_interface_type type);
+ const struct pci_epc_features *epc_features,
+ enum pci_epc_interface_type type);
void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar,
enum pci_epc_interface_type type);
int pci_epf_bind(struct pci_epf *epf);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c69a2cc1f412..16493426a04f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -23,7 +23,7 @@
#ifndef LINUX_PCI_H
#define LINUX_PCI_H
-
+#include <linux/args.h>
#include <linux/mod_devicetable.h>
#include <linux/types.h>
@@ -366,8 +366,8 @@ struct pci_dev {
pci_power_t current_state; /* Current operating state. In ACPI,
this is D0-D3, D0 being fully
functional, and D3 being off. */
- unsigned int imm_ready:1; /* Supports Immediate Readiness */
u8 pm_cap; /* PM capability offset */
+ unsigned int imm_ready:1; /* Supports Immediate Readiness */
unsigned int pme_support:5; /* Bitmask of states from which PME#
can be generated */
unsigned int pme_poll:1; /* Poll device's PME status bit */
@@ -390,11 +390,11 @@ struct pci_dev {
unsigned int d3hot_delay; /* D3hot->D0 transition time in ms */
unsigned int d3cold_delay; /* D3cold->D0 transition time in ms */
+ u16 l1ss; /* L1SS Capability pointer */
#ifdef CONFIG_PCIEASPM
struct pcie_link_state *link_state; /* ASPM link state */
unsigned int ltr_path:1; /* Latency Tolerance Reporting
supported from root to here */
- u16 l1ss; /* L1SS Capability pointer */
#endif
unsigned int pasid_no_tlp:1; /* PASID works without TLP Prefix */
unsigned int eetlp_prefix_path:1; /* End-to-End TLP Prefix */
@@ -464,12 +464,13 @@ struct pci_dev {
unsigned int no_vf_scan:1; /* Don't scan for VFs after IOV enablement */
unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */
unsigned int rom_bar_overlap:1; /* ROM BAR disable broken */
+ unsigned int rom_attr_enabled:1; /* Display of ROM attribute enabled? */
pci_dev_flags_t dev_flags;
atomic_t enable_cnt; /* pci_enable_device has been called */
+ spinlock_t pcie_cap_lock; /* Protects RMW ops in capability accessors */
u32 saved_config_space[16]; /* Config space saved at suspend time */
struct hlist_head saved_cap_space;
- int rom_attr_enabled; /* Display of ROM attribute enabled? */
struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
@@ -712,6 +713,31 @@ static inline bool pci_is_bridge(struct pci_dev *dev)
dev->hdr_type == PCI_HEADER_TYPE_CARDBUS;
}
+/**
+ * pci_is_vga - check if the PCI device is a VGA device
+ * @pdev: PCI device
+ *
+ * The PCI Code and ID Assignment spec, r1.15, secs 1.4 and 1.1, define
+ * VGA Base Class and Sub-Classes:
+ *
+ * 03 00 PCI_CLASS_DISPLAY_VGA VGA-compatible or 8514-compatible
+ * 00 01 PCI_CLASS_NOT_DEFINED_VGA VGA-compatible (before Class Code)
+ *
+ * Return true if the PCI device is a VGA device and uses the legacy VGA
+ * resources ([mem 0xa0000-0xbffff], [io 0x3b0-0x3bb], [io 0x3c0-0x3df] and
+ * aliases).
+ */
+static inline bool pci_is_vga(struct pci_dev *pdev)
+{
+ if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
+ return true;
+
+ if ((pdev->class >> 8) == PCI_CLASS_NOT_DEFINED_VGA)
+ return true;
+
+ return false;
+}
+
#define for_each_pci_bridge(dev, bus) \
list_for_each_entry(dev, &bus->devices, bus_list) \
if (!pci_is_bridge(dev)) {} else
@@ -860,7 +886,6 @@ struct module;
/**
* struct pci_driver - PCI driver structure
- * @node: List of driver structures.
* @name: Driver name.
* @id_table: Pointer to table of device IDs the driver is
* interested in. Most drivers should export this
@@ -915,7 +940,6 @@ struct module;
* own I/O address space.
*/
struct pci_driver {
- struct list_head node;
const char *name;
const struct pci_device_id *id_table; /* Must be non-NULL for probe to be called */
int (*probe)(struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */
@@ -1048,11 +1072,13 @@ enum {
PCI_SCAN_ALL_PCIE_DEVS = 0x00000040, /* Scan all, not just dev 0 */
};
-#define PCI_IRQ_LEGACY (1 << 0) /* Allow legacy interrupts */
+#define PCI_IRQ_INTX (1 << 0) /* Allow INTx interrupts */
#define PCI_IRQ_MSI (1 << 1) /* Allow MSI interrupts */
#define PCI_IRQ_MSIX (1 << 2) /* Allow MSI-X interrupts */
#define PCI_IRQ_AFFINITY (1 << 3) /* Auto-assign affinity */
+#define PCI_IRQ_LEGACY PCI_IRQ_INTX /* Deprecated! Use PCI_IRQ_INTX */
+
/* These external functions are only available when PCI support is enabled */
#ifdef CONFIG_PCI
@@ -1145,6 +1171,7 @@ int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge);
u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp);
struct pci_dev *pci_dev_get(struct pci_dev *dev);
void pci_dev_put(struct pci_dev *dev);
+DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T))
void pci_remove_bus(struct pci_bus *b);
void pci_stop_and_remove_bus_device(struct pci_dev *dev);
void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev);
@@ -1180,6 +1207,8 @@ struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn);
struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus,
unsigned int devfn);
struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from);
+struct pci_dev *pci_get_base_class(unsigned int class, struct pci_dev *from);
+
int pci_dev_present(const struct pci_device_id *ids);
int pci_bus_read_config_byte(struct pci_bus *bus, unsigned int devfn,
@@ -1212,16 +1241,47 @@ int pci_read_config_dword(const struct pci_dev *dev, int where, u32 *val);
int pci_write_config_byte(const struct pci_dev *dev, int where, u8 val);
int pci_write_config_word(const struct pci_dev *dev, int where, u16 val);
int pci_write_config_dword(const struct pci_dev *dev, int where, u32 val);
+void pci_clear_and_set_config_dword(const struct pci_dev *dev, int pos,
+ u32 clear, u32 set);
int pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val);
int pcie_capability_read_dword(struct pci_dev *dev, int pos, u32 *val);
int pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val);
int pcie_capability_write_dword(struct pci_dev *dev, int pos, u32 val);
-int pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos,
- u16 clear, u16 set);
+int pcie_capability_clear_and_set_word_unlocked(struct pci_dev *dev, int pos,
+ u16 clear, u16 set);
+int pcie_capability_clear_and_set_word_locked(struct pci_dev *dev, int pos,
+ u16 clear, u16 set);
int pcie_capability_clear_and_set_dword(struct pci_dev *dev, int pos,
u32 clear, u32 set);
+/**
+ * pcie_capability_clear_and_set_word - RMW accessor for PCI Express Capability Registers
+ * @dev: PCI device structure of the PCI Express device
+ * @pos: PCI Express Capability Register
+ * @clear: Clear bitmask
+ * @set: Set bitmask
+ *
+ * Perform a Read-Modify-Write (RMW) operation using @clear and @set
+ * bitmasks on PCI Express Capability Register at @pos. Certain PCI Express
+ * Capability Registers are accessed concurrently in RMW fashion, hence
+ * require locking which is handled transparently to the caller.
+ */
+static inline int pcie_capability_clear_and_set_word(struct pci_dev *dev,
+ int pos,
+ u16 clear, u16 set)
+{
+ switch (pos) {
+ case PCI_EXP_LNKCTL:
+ case PCI_EXP_RTCTL:
+ return pcie_capability_clear_and_set_word_locked(dev, pos,
+ clear, set);
+ default:
+ return pcie_capability_clear_and_set_word_unlocked(dev, pos,
+ clear, set);
+ }
+}
+
static inline int pcie_capability_set_word(struct pci_dev *dev, int pos,
u16 set)
{
@@ -1308,6 +1368,7 @@ int pcie_set_mps(struct pci_dev *dev, int mps);
u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev,
enum pci_bus_speed *speed,
enum pcie_link_width *width);
+int pcie_link_speed_mbps(struct pci_dev *pdev);
void pcie_print_link_status(struct pci_dev *dev);
int pcie_reset_flr(struct pci_dev *dev, bool probe);
int pcie_flr(struct pci_dev *dev);
@@ -1361,6 +1422,7 @@ int pci_load_and_free_saved_state(struct pci_dev *dev,
struct pci_saved_state **state);
int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state);
int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
+int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state);
pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
void pci_pme_active(struct pci_dev *dev, bool enable);
@@ -1403,7 +1465,6 @@ void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge);
void pci_assign_unassigned_bus_resources(struct pci_bus *bus);
void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus);
int pci_reassign_bridge_resources(struct pci_dev *bridge, unsigned long type);
-void pdev_enable_device(struct pci_dev *);
int pci_enable_resources(struct pci_dev *, int mask);
void pci_assign_irq(struct pci_dev *dev);
struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res);
@@ -1565,6 +1626,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
void *userdata);
+void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
+ void *userdata);
int pci_cfg_space_size(struct pci_dev *dev);
unsigned char pci_bus_max_busnr(struct pci_bus *bus);
void pci_setup_bridge(struct pci_bus *bus);
@@ -1595,6 +1658,8 @@ struct msix_entry {
u16 entry; /* Driver uses to specify entry, OS writes */
};
+struct msi_domain_template;
+
#ifdef CONFIG_PCI_MSI
int pci_msi_vec_count(struct pci_dev *dev);
void pci_disable_msi(struct pci_dev *dev);
@@ -1627,6 +1692,11 @@ void pci_msix_free_irq(struct pci_dev *pdev, struct msi_map map);
void pci_free_irq_vectors(struct pci_dev *dev);
int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec);
+bool pci_create_ims_domain(struct pci_dev *pdev, const struct msi_domain_template *template,
+ unsigned int hwsize, void *data);
+struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, union msi_instance_cookie *icookie,
+ const struct irq_affinity_desc *affdesc);
+void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map);
#else
static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
@@ -1690,6 +1760,25 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev,
{
return cpu_possible_mask;
}
+
+static inline bool pci_create_ims_domain(struct pci_dev *pdev,
+ const struct msi_domain_template *template,
+ unsigned int hwsize, void *data)
+{ return false; }
+
+static inline struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev,
+ union msi_instance_cookie *icookie,
+ const struct irq_affinity_desc *affdesc)
+{
+ struct msi_map map = { .index = -ENOSYS, };
+
+ return map;
+}
+
+static inline void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map)
+{
+}
+
#endif
/**
@@ -1748,6 +1837,7 @@ extern bool pcie_ports_native;
int pci_disable_link_state(struct pci_dev *pdev, int state);
int pci_disable_link_state_locked(struct pci_dev *pdev, int state);
int pci_enable_link_state(struct pci_dev *pdev, int state);
+int pci_enable_link_state_locked(struct pci_dev *pdev, int state);
void pcie_no_aspm(void);
bool pcie_aspm_support_enabled(void);
bool pcie_aspm_enabled(struct pci_dev *pdev);
@@ -1758,6 +1848,8 @@ static inline int pci_disable_link_state_locked(struct pci_dev *pdev, int state)
{ return 0; }
static inline int pci_enable_link_state(struct pci_dev *pdev, int state)
{ return 0; }
+static inline int pci_enable_link_state_locked(struct pci_dev *pdev, int state)
+{ return 0; }
static inline void pcie_no_aspm(void) { }
static inline bool pcie_aspm_support_enabled(void) { return false; }
static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; }
@@ -1790,6 +1882,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev);
void pci_dev_lock(struct pci_dev *dev);
int pci_dev_trylock(struct pci_dev *dev);
void pci_dev_unlock(struct pci_dev *dev);
+DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T))
/*
* PCI domain support. Sometimes called PCI segment (eg by ACPI),
@@ -1895,6 +1988,9 @@ static inline struct pci_dev *pci_get_class(unsigned int class,
struct pci_dev *from)
{ return NULL; }
+static inline struct pci_dev *pci_get_base_class(unsigned int class,
+ struct pci_dev *from)
+{ return NULL; }
static inline int pci_dev_present(const struct pci_device_id *ids)
{ return 0; }
@@ -1932,6 +2028,8 @@ static inline int pci_save_state(struct pci_dev *dev) { return 0; }
static inline void pci_restore_state(struct pci_dev *dev) { }
static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
{ return 0; }
+static inline int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state)
+{ return 0; }
static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable)
{ return 0; }
static inline pci_power_t pci_choose_state(struct pci_dev *dev,
@@ -2043,14 +2141,14 @@ int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma);
(pci_resource_end((dev), (bar)) ? \
resource_size(pci_resource_n((dev), (bar))) : 0)
-#define __pci_dev_for_each_res0(dev, res, ...) \
- for (unsigned int __b = 0; \
- res = pci_resource_n(dev, __b), __b < PCI_NUM_RESOURCES; \
+#define __pci_dev_for_each_res0(dev, res, ...) \
+ for (unsigned int __b = 0; \
+ __b < PCI_NUM_RESOURCES && (res = pci_resource_n(dev, __b)); \
__b++)
-#define __pci_dev_for_each_res1(dev, res, __b) \
- for (__b = 0; \
- res = pci_resource_n(dev, __b), __b < PCI_NUM_RESOURCES; \
+#define __pci_dev_for_each_res1(dev, res, __b) \
+ for (__b = 0; \
+ __b < PCI_NUM_RESOURCES && (res = pci_resource_n(dev, __b)); \
__b++)
#define pci_dev_for_each_resource(dev, res, ...) \
@@ -2260,6 +2358,11 @@ int pcibios_alloc_irq(struct pci_dev *dev);
void pcibios_free_irq(struct pci_dev *dev);
resource_size_t pcibios_default_alignment(void);
+#if !defined(HAVE_PCI_MMAP) && !defined(ARCH_GENERIC_PCI_MMAP_RESOURCE)
+extern int pci_create_resource_files(struct pci_dev *dev);
+extern void pci_remove_resource_files(struct pci_dev *dev);
+#endif
+
#if defined(CONFIG_PCI_MMCONFIG) || defined(CONFIG_ACPI_MCFG)
void __init pci_mmcfg_early_init(void);
void __init pci_mmcfg_late_init(void);
@@ -2414,6 +2517,11 @@ static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev)
return NULL;
}
+static inline bool pci_dev_is_disconnected(const struct pci_dev *dev)
+{
+ return dev->error_state == pci_channel_io_perm_failure;
+}
+
void pci_request_acs(void);
bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags);
bool pci_acs_path_enabled(struct pci_dev *start,
@@ -2582,14 +2690,6 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev)
void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type);
#endif
-struct msi_domain_template;
-
-bool pci_create_ims_domain(struct pci_dev *pdev, const struct msi_domain_template *template,
- unsigned int hwsize, void *data);
-struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, union msi_instance_cookie *icookie,
- const struct irq_affinity_desc *affdesc);
-void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map);
-
#include <linux/dma-mapping.h>
#define pci_printk(level, pdev, fmt, arg...) \
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 2dc75df1437f..c547d1d4feb1 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -180,6 +180,8 @@
#define PCI_DEVICE_ID_BERKOM_A4T 0xffa4
#define PCI_DEVICE_ID_BERKOM_SCITEL_QUADRO 0xffa8
+#define PCI_VENDOR_ID_ITTIM 0x0b48
+
#define PCI_VENDOR_ID_COMPAQ 0x0e11
#define PCI_DEVICE_ID_COMPAQ_TOKENRING 0x0508
#define PCI_DEVICE_ID_COMPAQ_TACHYON 0xa0fc
@@ -576,7 +578,12 @@
#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F3 0x14e3
#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F3 0x14f3
#define PCI_DEVICE_ID_AMD_19H_M78H_DF_F3 0x12fb
+#define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3
+#define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb
+#define PCI_DEVICE_ID_AMD_1AH_M70H_DF_F3 0x12bb
#define PCI_DEVICE_ID_AMD_MI200_DF_F3 0x14d3
+#define PCI_DEVICE_ID_AMD_MI300_DF_F3 0x152b
+#define PCI_DEVICE_ID_AMD_VANGOGH_USB 0x163a
#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
#define PCI_DEVICE_ID_AMD_LANCE 0x2000
#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
@@ -1760,6 +1767,10 @@
#define PCI_SUBDEVICE_ID_AT_2700FX 0x2701
#define PCI_SUBDEVICE_ID_AT_2701FX 0x2703
+#define PCI_VENDOR_ID_ASIX 0x125b
+#define PCI_DEVICE_ID_ASIX_AX99100 0x9100
+#define PCI_DEVICE_ID_ASIX_AX99100_LB 0x9110
+
#define PCI_VENDOR_ID_ESS 0x125d
#define PCI_DEVICE_ID_ESS_ESS1968 0x1968
#define PCI_DEVICE_ID_ESS_ESS1978 0x1978
@@ -2595,6 +2606,8 @@
#define PCI_VENDOR_ID_TEKRAM 0x1de1
#define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29
+#define PCI_VENDOR_ID_ALIBABA 0x1ded
+
#define PCI_VENDOR_ID_TEHUTI 0x1fc9
#define PCI_DEVICE_ID_TEHUTI_3009 0x3009
#define PCI_DEVICE_ID_TEHUTI_3010 0x3010
@@ -2644,6 +2657,7 @@
#define PCI_VENDOR_ID_INTEL 0x8086
#define PCI_DEVICE_ID_INTEL_EESSC 0x0008
+#define PCI_DEVICE_ID_INTEL_HDA_CML_LP 0x02c8
#define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320
#define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321
#define PCI_DEVICE_ID_INTEL_PXH_0 0x0329
@@ -2659,8 +2673,10 @@
#define PCI_DEVICE_ID_INTEL_82424 0x0483
#define PCI_DEVICE_ID_INTEL_82378 0x0484
#define PCI_DEVICE_ID_INTEL_82425 0x0486
+#define PCI_DEVICE_ID_INTEL_HDA_CML_H 0x06c8
#define PCI_DEVICE_ID_INTEL_MRST_SD0 0x0807
#define PCI_DEVICE_ID_INTEL_MRST_SD1 0x0808
+#define PCI_DEVICE_ID_INTEL_HDA_OAKTRAIL 0x080a
#define PCI_DEVICE_ID_INTEL_MFD_SD 0x0820
#define PCI_DEVICE_ID_INTEL_MFD_SDIO1 0x0821
#define PCI_DEVICE_ID_INTEL_MFD_SDIO2 0x0822
@@ -2670,15 +2686,19 @@
#define PCI_DEVICE_ID_INTEL_QUARK_X1000_ILB 0x095e
#define PCI_DEVICE_ID_INTEL_I960 0x0960
#define PCI_DEVICE_ID_INTEL_I960RM 0x0962
+#define PCI_DEVICE_ID_INTEL_HDA_HSW_0 0x0a0c
+#define PCI_DEVICE_ID_INTEL_HDA_HSW_2 0x0c0c
#define PCI_DEVICE_ID_INTEL_CENTERTON_ILB 0x0c60
+#define PCI_DEVICE_ID_INTEL_HDA_HSW_3 0x0d0c
+#define PCI_DEVICE_ID_INTEL_HDA_BYT 0x0f04
+#define PCI_DEVICE_ID_INTEL_SST_BYT 0x0f28
#define PCI_DEVICE_ID_INTEL_8257X_SOL 0x1062
#define PCI_DEVICE_ID_INTEL_82573E_SOL 0x1085
#define PCI_DEVICE_ID_INTEL_82573L_SOL 0x108f
#define PCI_DEVICE_ID_INTEL_82815_MC 0x1130
#define PCI_DEVICE_ID_INTEL_82815_CGC 0x1132
+#define PCI_DEVICE_ID_INTEL_SST_TNG 0x119a
#define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221
-#define PCI_DEVICE_ID_INTEL_7505_0 0x2550
-#define PCI_DEVICE_ID_INTEL_7205_0 0x255d
#define PCI_DEVICE_ID_INTEL_82437 0x122d
#define PCI_DEVICE_ID_INTEL_82371FB_0 0x122e
#define PCI_DEVICE_ID_INTEL_82371FB_1 0x1230
@@ -2704,20 +2724,26 @@
#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_BRIDGE 0x1576
#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_NHI 0x1577
#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_BRIDGE 0x1578
+#define PCI_DEVICE_ID_INTEL_HDA_BDW 0x160c
#define PCI_DEVICE_ID_INTEL_80960_RP 0x1960
#define PCI_DEVICE_ID_INTEL_QAT_C3XXX 0x19e2
#define PCI_DEVICE_ID_INTEL_QAT_C3XXX_VF 0x19e3
#define PCI_DEVICE_ID_INTEL_82840_HB 0x1a21
#define PCI_DEVICE_ID_INTEL_82845_HB 0x1a30
#define PCI_DEVICE_ID_INTEL_IOAT 0x1a38
+#define PCI_DEVICE_ID_INTEL_HDA_CPT 0x1c20
#define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN 0x1c41
#define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX 0x1c5f
+#define PCI_DEVICE_ID_INTEL_HDA_PBG 0x1d20
#define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0 0x1d40
#define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1 0x1d41
+#define PCI_DEVICE_ID_INTEL_HDA_PPT 0x1e20
#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI 0x1e31
#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN 0x1e40
#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX 0x1e5f
#define PCI_DEVICE_ID_INTEL_VMD_201D 0x201d
+#define PCI_DEVICE_ID_INTEL_HDA_BSW 0x2284
+#define PCI_DEVICE_ID_INTEL_SST_BSW 0x22a8
#define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN 0x2310
#define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX 0x231f
#define PCI_DEVICE_ID_INTEL_82801AA_0 0x2410
@@ -2772,6 +2798,8 @@
#define PCI_DEVICE_ID_INTEL_82850_HB 0x2530
#define PCI_DEVICE_ID_INTEL_82860_HB 0x2531
#define PCI_DEVICE_ID_INTEL_E7501_MCH 0x254c
+#define PCI_DEVICE_ID_INTEL_7505_0 0x2550
+#define PCI_DEVICE_ID_INTEL_7205_0 0x255d
#define PCI_DEVICE_ID_INTEL_82845G_HB 0x2560
#define PCI_DEVICE_ID_INTEL_82845G_IG 0x2562
#define PCI_DEVICE_ID_INTEL_82865_HB 0x2570
@@ -2793,12 +2821,14 @@
#define PCI_DEVICE_ID_INTEL_ICH6_0 0x2640
#define PCI_DEVICE_ID_INTEL_ICH6_1 0x2641
#define PCI_DEVICE_ID_INTEL_ICH6_2 0x2642
+#define PCI_DEVICE_ID_INTEL_HDA_ICH6 0x2668
#define PCI_DEVICE_ID_INTEL_ICH6_16 0x266a
#define PCI_DEVICE_ID_INTEL_ICH6_17 0x266d
#define PCI_DEVICE_ID_INTEL_ICH6_18 0x266e
#define PCI_DEVICE_ID_INTEL_ICH6_19 0x266f
#define PCI_DEVICE_ID_INTEL_ESB2_0 0x2670
#define PCI_DEVICE_ID_INTEL_ESB2_14 0x2698
+#define PCI_DEVICE_ID_INTEL_HDA_ESB2 0x269a
#define PCI_DEVICE_ID_INTEL_ESB2_17 0x269b
#define PCI_DEVICE_ID_INTEL_ESB2_18 0x269e
#define PCI_DEVICE_ID_INTEL_82945G_HB 0x2770
@@ -2806,11 +2836,12 @@
#define PCI_DEVICE_ID_INTEL_3000_HB 0x2778
#define PCI_DEVICE_ID_INTEL_82945GM_HB 0x27a0
#define PCI_DEVICE_ID_INTEL_82945GM_IG 0x27a2
+#define PCI_DEVICE_ID_INTEL_ICH7_30 0x27b0
#define PCI_DEVICE_ID_INTEL_ICH7_0 0x27b8
#define PCI_DEVICE_ID_INTEL_ICH7_1 0x27b9
-#define PCI_DEVICE_ID_INTEL_ICH7_30 0x27b0
#define PCI_DEVICE_ID_INTEL_TGP_LPC 0x27bc
#define PCI_DEVICE_ID_INTEL_ICH7_31 0x27bd
+#define PCI_DEVICE_ID_INTEL_HDA_ICH7 0x27d8
#define PCI_DEVICE_ID_INTEL_ICH7_17 0x27da
#define PCI_DEVICE_ID_INTEL_ICH7_19 0x27dd
#define PCI_DEVICE_ID_INTEL_ICH7_20 0x27de
@@ -2821,17 +2852,20 @@
#define PCI_DEVICE_ID_INTEL_ICH8_3 0x2814
#define PCI_DEVICE_ID_INTEL_ICH8_4 0x2815
#define PCI_DEVICE_ID_INTEL_ICH8_5 0x283e
+#define PCI_DEVICE_ID_INTEL_HDA_ICH8 0x284b
#define PCI_DEVICE_ID_INTEL_ICH8_6 0x2850
#define PCI_DEVICE_ID_INTEL_VMD_28C0 0x28c0
#define PCI_DEVICE_ID_INTEL_ICH9_0 0x2910
-#define PCI_DEVICE_ID_INTEL_ICH9_1 0x2917
#define PCI_DEVICE_ID_INTEL_ICH9_2 0x2912
#define PCI_DEVICE_ID_INTEL_ICH9_3 0x2913
#define PCI_DEVICE_ID_INTEL_ICH9_4 0x2914
-#define PCI_DEVICE_ID_INTEL_ICH9_5 0x2919
-#define PCI_DEVICE_ID_INTEL_ICH9_6 0x2930
#define PCI_DEVICE_ID_INTEL_ICH9_7 0x2916
+#define PCI_DEVICE_ID_INTEL_ICH9_1 0x2917
#define PCI_DEVICE_ID_INTEL_ICH9_8 0x2918
+#define PCI_DEVICE_ID_INTEL_ICH9_5 0x2919
+#define PCI_DEVICE_ID_INTEL_ICH9_6 0x2930
+#define PCI_DEVICE_ID_INTEL_HDA_ICH9_0 0x293e
+#define PCI_DEVICE_ID_INTEL_HDA_ICH9_1 0x293f
#define PCI_DEVICE_ID_INTEL_I7_MCR 0x2c18
#define PCI_DEVICE_ID_INTEL_I7_MC_TAD 0x2c19
#define PCI_DEVICE_ID_INTEL_I7_MC_RAS 0x2c1a
@@ -2848,8 +2882,8 @@
#define PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR 0x2c31
#define PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK 0x2c32
#define PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC 0x2c33
-#define PCI_DEVICE_ID_INTEL_I7_NONCORE 0x2c41
#define PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT 0x2c40
+#define PCI_DEVICE_ID_INTEL_I7_NONCORE 0x2c41
#define PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE 0x2c50
#define PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT 0x2c51
#define PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2 0x2c70
@@ -2883,6 +2917,7 @@
#define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2 0x2db1
#define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2 0x2db2
#define PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2 0x2db3
+#define PCI_DEVICE_ID_INTEL_HDA_GML 0x3198
#define PCI_DEVICE_ID_INTEL_82855PM_HB 0x3340
#define PCI_DEVICE_ID_INTEL_IOAT_TBG4 0x3429
#define PCI_DEVICE_ID_INTEL_IOAT_TBG5 0x342a
@@ -2893,12 +2928,13 @@
#define PCI_DEVICE_ID_INTEL_IOAT_TBG1 0x3431
#define PCI_DEVICE_ID_INTEL_IOAT_TBG2 0x3432
#define PCI_DEVICE_ID_INTEL_IOAT_TBG3 0x3433
+#define PCI_DEVICE_ID_INTEL_HDA_ICL_LP 0x34c8
#define PCI_DEVICE_ID_INTEL_82830_HB 0x3575
#define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577
-#define PCI_DEVICE_ID_INTEL_82854_HB 0x358c
-#define PCI_DEVICE_ID_INTEL_82854_IG 0x358e
#define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580
#define PCI_DEVICE_ID_INTEL_82855GM_IG 0x3582
+#define PCI_DEVICE_ID_INTEL_82854_HB 0x358c
+#define PCI_DEVICE_ID_INTEL_82854_IG 0x358e
#define PCI_DEVICE_ID_INTEL_E7520_MCH 0x3590
#define PCI_DEVICE_ID_INTEL_E7320_MCH 0x3592
#define PCI_DEVICE_ID_INTEL_MCH_PA 0x3595
@@ -2908,11 +2944,11 @@
#define PCI_DEVICE_ID_INTEL_MCH_PC 0x3599
#define PCI_DEVICE_ID_INTEL_MCH_PC1 0x359a
#define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e
+#define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b
+#define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c
#define PCI_DEVICE_ID_INTEL_I7300_MCH_ERR 0x360c
#define PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 0x360f
#define PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 0x3610
-#define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b
-#define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c
#define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710
#define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711
#define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712
@@ -2925,14 +2961,19 @@
#define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719
#define PCI_DEVICE_ID_INTEL_QAT_C62X 0x37c8
#define PCI_DEVICE_ID_INTEL_QAT_C62X_VF 0x37c9
+#define PCI_DEVICE_ID_INTEL_HDA_ICL_N 0x38c8
#define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14
#define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16
#define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18
#define PCI_DEVICE_ID_INTEL_ICH10_3 0x3a1a
#define PCI_DEVICE_ID_INTEL_ICH10_4 0x3a30
+#define PCI_DEVICE_ID_INTEL_HDA_ICH10_0 0x3a3e
#define PCI_DEVICE_ID_INTEL_ICH10_5 0x3a60
+#define PCI_DEVICE_ID_INTEL_HDA_ICH10_1 0x3a6e
#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN 0x3b00
#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX 0x3b1f
+#define PCI_DEVICE_ID_INTEL_HDA_5_3400_SERIES_0 0x3b56
+#define PCI_DEVICE_ID_INTEL_HDA_5_3400_SERIES_1 0x3b57
#define PCI_DEVICE_ID_INTEL_IOAT_SNB0 0x3c20
#define PCI_DEVICE_ID_INTEL_IOAT_SNB1 0x3c21
#define PCI_DEVICE_ID_INTEL_IOAT_SNB2 0x3c22
@@ -2943,16 +2984,12 @@
#define PCI_DEVICE_ID_INTEL_IOAT_SNB7 0x3c27
#define PCI_DEVICE_ID_INTEL_IOAT_SNB8 0x3c2e
#define PCI_DEVICE_ID_INTEL_IOAT_SNB9 0x3c2f
-#define PCI_DEVICE_ID_INTEL_UNC_HA 0x3c46
-#define PCI_DEVICE_ID_INTEL_UNC_IMC0 0x3cb0
-#define PCI_DEVICE_ID_INTEL_UNC_IMC1 0x3cb1
-#define PCI_DEVICE_ID_INTEL_UNC_IMC2 0x3cb4
-#define PCI_DEVICE_ID_INTEL_UNC_IMC3 0x3cb5
#define PCI_DEVICE_ID_INTEL_UNC_QPI0 0x3c41
#define PCI_DEVICE_ID_INTEL_UNC_QPI1 0x3c42
#define PCI_DEVICE_ID_INTEL_UNC_R2PCIE 0x3c43
#define PCI_DEVICE_ID_INTEL_UNC_R3QPI0 0x3c44
#define PCI_DEVICE_ID_INTEL_UNC_R3QPI1 0x3c45
+#define PCI_DEVICE_ID_INTEL_UNC_HA 0x3c46
#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS 0x3c71 /* 15.1 */
#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0 0x3c72 /* 16.2 */
#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1 0x3c73 /* 16.3 */
@@ -2964,17 +3001,40 @@
#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1 0x3cab /* 15.3 */
#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2 0x3cac /* 15.4 */
#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3 0x3cad /* 15.5 */
+#define PCI_DEVICE_ID_INTEL_UNC_IMC0 0x3cb0
+#define PCI_DEVICE_ID_INTEL_UNC_IMC1 0x3cb1
+#define PCI_DEVICE_ID_INTEL_UNC_IMC2 0x3cb4
+#define PCI_DEVICE_ID_INTEL_UNC_IMC3 0x3cb5
#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO 0x3cb8 /* 17.0 */
#define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX 0x3ce0
#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0 0x3cf4 /* 12.6 */
#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */
#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1 0x3cf6 /* 12.7 */
+#define PCI_DEVICE_ID_INTEL_HDA_ICL_H 0x3dc8
#define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f
#define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030
#define PCI_DEVICE_ID_INTEL_5400_FBD0 0x4035
#define PCI_DEVICE_ID_INTEL_5400_FBD1 0x4036
+#define PCI_DEVICE_ID_INTEL_HDA_TGL_H 0x43c8
+#define PCI_DEVICE_ID_INTEL_HDA_DG1 0x490d
+#define PCI_DEVICE_ID_INTEL_HDA_EHL_0 0x4b55
+#define PCI_DEVICE_ID_INTEL_HDA_EHL_3 0x4b58
+#define PCI_DEVICE_ID_INTEL_HDA_JSL_N 0x4dc8
+#define PCI_DEVICE_ID_INTEL_HDA_DG2_0 0x4f90
+#define PCI_DEVICE_ID_INTEL_HDA_DG2_1 0x4f91
+#define PCI_DEVICE_ID_INTEL_HDA_DG2_2 0x4f92
#define PCI_DEVICE_ID_INTEL_EP80579_0 0x5031
#define PCI_DEVICE_ID_INTEL_EP80579_1 0x5032
+#define PCI_DEVICE_ID_INTEL_HDA_ADL_P 0x51c8
+#define PCI_DEVICE_ID_INTEL_HDA_ADL_PS 0x51c9
+#define PCI_DEVICE_ID_INTEL_HDA_RPL_P_0 0x51ca
+#define PCI_DEVICE_ID_INTEL_HDA_RPL_P_1 0x51cb
+#define PCI_DEVICE_ID_INTEL_HDA_ADL_M 0x51cc
+#define PCI_DEVICE_ID_INTEL_HDA_ADL_PX 0x51cd
+#define PCI_DEVICE_ID_INTEL_HDA_RPL_M 0x51ce
+#define PCI_DEVICE_ID_INTEL_HDA_RPL_PX 0x51cf
+#define PCI_DEVICE_ID_INTEL_HDA_ADL_N 0x54c8
+#define PCI_DEVICE_ID_INTEL_HDA_APL 0x5a98
#define PCI_DEVICE_ID_INTEL_5100_16 0x65f0
#define PCI_DEVICE_ID_INTEL_5100_19 0x65f3
#define PCI_DEVICE_ID_INTEL_5100_21 0x65f5
@@ -3008,8 +3068,14 @@
#define PCI_DEVICE_ID_INTEL_82443GX_0 0x71a0
#define PCI_DEVICE_ID_INTEL_82443GX_2 0x71a2
#define PCI_DEVICE_ID_INTEL_82372FB_1 0x7601
+#define PCI_DEVICE_ID_INTEL_HDA_ARL 0x7728
+#define PCI_DEVICE_ID_INTEL_HDA_RPL_S 0x7a50
+#define PCI_DEVICE_ID_INTEL_HDA_ADL_S 0x7ad0
+#define PCI_DEVICE_ID_INTEL_HDA_MTL 0x7e28
+#define PCI_DEVICE_ID_INTEL_HDA_ARL_S 0x7f50
#define PCI_DEVICE_ID_INTEL_SCH_LPC 0x8119
#define PCI_DEVICE_ID_INTEL_SCH_IDE 0x811a
+#define PCI_DEVICE_ID_INTEL_HDA_POULSBO 0x811b
#define PCI_DEVICE_ID_INTEL_E6XX_CU 0x8183
#define PCI_DEVICE_ID_INTEL_ITC_LPC 0x8186
#define PCI_DEVICE_ID_INTEL_82454GX 0x84c4
@@ -3018,9 +3084,31 @@
#define PCI_DEVICE_ID_INTEL_82454NX 0x84cb
#define PCI_DEVICE_ID_INTEL_84460GX 0x84ea
#define PCI_DEVICE_ID_INTEL_IXP4XX 0x8500
+#define PCI_DEVICE_ID_INTEL_HDA_LPT 0x8c20
+#define PCI_DEVICE_ID_INTEL_HDA_9_SERIES 0x8ca0
+#define PCI_DEVICE_ID_INTEL_HDA_WBG_0 0x8d20
+#define PCI_DEVICE_ID_INTEL_HDA_WBG_1 0x8d21
#define PCI_DEVICE_ID_INTEL_IXP2800 0x9004
+#define PCI_DEVICE_ID_INTEL_HDA_LKF 0x98c8
#define PCI_DEVICE_ID_INTEL_VMD_9A0B 0x9a0b
+#define PCI_DEVICE_ID_INTEL_HDA_LPT_LP_0 0x9c20
+#define PCI_DEVICE_ID_INTEL_HDA_LPT_LP_1 0x9c21
+#define PCI_DEVICE_ID_INTEL_HDA_WPT_LP 0x9ca0
+#define PCI_DEVICE_ID_INTEL_HDA_SKL_LP 0x9d70
+#define PCI_DEVICE_ID_INTEL_HDA_KBL_LP 0x9d71
+#define PCI_DEVICE_ID_INTEL_HDA_CNL_LP 0x9dc8
+#define PCI_DEVICE_ID_INTEL_HDA_TGL_LP 0xa0c8
+#define PCI_DEVICE_ID_INTEL_HDA_SKL 0xa170
+#define PCI_DEVICE_ID_INTEL_HDA_KBL 0xa171
+#define PCI_DEVICE_ID_INTEL_HDA_LBG_0 0xa1f0
+#define PCI_DEVICE_ID_INTEL_HDA_LBG_1 0xa270
+#define PCI_DEVICE_ID_INTEL_HDA_KBL_H 0xa2f0
+#define PCI_DEVICE_ID_INTEL_HDA_CNL_H 0xa348
+#define PCI_DEVICE_ID_INTEL_HDA_CML_S 0xa3f0
+#define PCI_DEVICE_ID_INTEL_HDA_LNL_P 0xa828
#define PCI_DEVICE_ID_INTEL_S21152BB 0xb152
+#define PCI_DEVICE_ID_INTEL_HDA_CML_R 0xf0c8
+#define PCI_DEVICE_ID_INTEL_HDA_RKL_S 0xf1c8
#define PCI_VENDOR_ID_WANGXUN 0x8088
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index ff99cf7a5d0d..da3a6c30f6d2 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -20,12 +20,20 @@
#define DW_AN_C37_1000BASEX 4
#define DW_10GBASER 5
+/* device vendor OUI */
+#define DW_OUI_WX 0x0018fc80
+
+/* dev_flag */
+#define DW_DEV_TXGBE BIT(0)
+
struct xpcs_id;
struct dw_xpcs {
struct mdio_device *mdiodev;
const struct xpcs_id *id;
struct phylink_pcs pcs;
+ phy_interface_t interface;
+ int dev_flag;
};
int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface);
diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h
index bcba7fda3cc9..4b4e9a98b37b 100644
--- a/include/linux/pds/pds_adminq.h
+++ b/include/linux/pds/pds_adminq.h
@@ -818,6 +818,367 @@ struct pds_vdpa_set_features_cmd {
__le64 features;
};
+#define PDS_LM_DEVICE_STATE_LENGTH 65536
+#define PDS_LM_CHECK_DEVICE_STATE_LENGTH(X) \
+ PDS_CORE_SIZE_CHECK(union, PDS_LM_DEVICE_STATE_LENGTH, X)
+
+/*
+ * enum pds_lm_cmd_opcode - Live Migration Device commands
+ */
+enum pds_lm_cmd_opcode {
+ PDS_LM_CMD_HOST_VF_STATUS = 1,
+
+ /* Device state commands */
+ PDS_LM_CMD_STATE_SIZE = 16,
+ PDS_LM_CMD_SUSPEND = 18,
+ PDS_LM_CMD_SUSPEND_STATUS = 19,
+ PDS_LM_CMD_RESUME = 20,
+ PDS_LM_CMD_SAVE = 21,
+ PDS_LM_CMD_RESTORE = 22,
+
+ /* Dirty page tracking commands */
+ PDS_LM_CMD_DIRTY_STATUS = 32,
+ PDS_LM_CMD_DIRTY_ENABLE = 33,
+ PDS_LM_CMD_DIRTY_DISABLE = 34,
+ PDS_LM_CMD_DIRTY_READ_SEQ = 35,
+ PDS_LM_CMD_DIRTY_WRITE_ACK = 36,
+};
+
+/**
+ * struct pds_lm_cmd - generic command
+ * @opcode: Opcode
+ * @rsvd: Word boundary padding
+ * @vf_id: VF id
+ * @rsvd2: Structure padding to 60 Bytes
+ */
+struct pds_lm_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 vf_id;
+ u8 rsvd2[56];
+};
+
+/**
+ * struct pds_lm_state_size_cmd - STATE_SIZE command
+ * @opcode: Opcode
+ * @rsvd: Word boundary padding
+ * @vf_id: VF id
+ */
+struct pds_lm_state_size_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 vf_id;
+};
+
+/**
+ * struct pds_lm_state_size_comp - STATE_SIZE command completion
+ * @status: Status of the command (enum pds_core_status_code)
+ * @rsvd: Word boundary padding
+ * @comp_index: Index in the desc ring for which this is the completion
+ * @size: Size of the device state
+ * @rsvd2: Word boundary padding
+ * @color: Color bit
+ */
+struct pds_lm_state_size_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ union {
+ __le64 size;
+ u8 rsvd2[11];
+ } __packed;
+ u8 color;
+};
+
+enum pds_lm_suspend_resume_type {
+ PDS_LM_SUSPEND_RESUME_TYPE_FULL = 0,
+ PDS_LM_SUSPEND_RESUME_TYPE_P2P = 1,
+};
+
+/**
+ * struct pds_lm_suspend_cmd - SUSPEND command
+ * @opcode: Opcode PDS_LM_CMD_SUSPEND
+ * @rsvd: Word boundary padding
+ * @vf_id: VF id
+ * @type: Type of suspend (enum pds_lm_suspend_resume_type)
+ */
+struct pds_lm_suspend_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 vf_id;
+ u8 type;
+};
+
+/**
+ * struct pds_lm_suspend_status_cmd - SUSPEND status command
+ * @opcode: Opcode PDS_AQ_CMD_LM_SUSPEND_STATUS
+ * @rsvd: Word boundary padding
+ * @vf_id: VF id
+ * @type: Type of suspend (enum pds_lm_suspend_resume_type)
+ */
+struct pds_lm_suspend_status_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 vf_id;
+ u8 type;
+};
+
+/**
+ * struct pds_lm_resume_cmd - RESUME command
+ * @opcode: Opcode PDS_LM_CMD_RESUME
+ * @rsvd: Word boundary padding
+ * @vf_id: VF id
+ * @type: Type of resume (enum pds_lm_suspend_resume_type)
+ */
+struct pds_lm_resume_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 vf_id;
+ u8 type;
+};
+
+/**
+ * struct pds_lm_sg_elem - Transmit scatter-gather (SG) descriptor element
+ * @addr: DMA address of SG element data buffer
+ * @len: Length of SG element data buffer, in bytes
+ * @rsvd: Word boundary padding
+ */
+struct pds_lm_sg_elem {
+ __le64 addr;
+ __le32 len;
+ __le16 rsvd[2];
+};
+
+/**
+ * struct pds_lm_save_cmd - SAVE command
+ * @opcode: Opcode PDS_LM_CMD_SAVE
+ * @rsvd: Word boundary padding
+ * @vf_id: VF id
+ * @rsvd2: Word boundary padding
+ * @sgl_addr: IOVA address of the SGL to dma the device state
+ * @num_sge: Total number of SG elements
+ */
+struct pds_lm_save_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 vf_id;
+ u8 rsvd2[4];
+ __le64 sgl_addr;
+ __le32 num_sge;
+} __packed;
+
+/**
+ * struct pds_lm_restore_cmd - RESTORE command
+ * @opcode: Opcode PDS_LM_CMD_RESTORE
+ * @rsvd: Word boundary padding
+ * @vf_id: VF id
+ * @rsvd2: Word boundary padding
+ * @sgl_addr: IOVA address of the SGL to dma the device state
+ * @num_sge: Total number of SG elements
+ */
+struct pds_lm_restore_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 vf_id;
+ u8 rsvd2[4];
+ __le64 sgl_addr;
+ __le32 num_sge;
+} __packed;
+
+/**
+ * union pds_lm_dev_state - device state information
+ * @words: Device state words
+ */
+union pds_lm_dev_state {
+ __le32 words[PDS_LM_DEVICE_STATE_LENGTH / sizeof(__le32)];
+};
+
+enum pds_lm_host_vf_status {
+ PDS_LM_STA_NONE = 0,
+ PDS_LM_STA_IN_PROGRESS,
+ PDS_LM_STA_MAX,
+};
+
+/**
+ * struct pds_lm_dirty_region_info - Memory region info for STATUS and ENABLE
+ * @dma_base: Base address of the DMA-contiguous memory region
+ * @page_count: Number of pages in the memory region
+ * @page_size_log2: Log2 page size in the memory region
+ * @rsvd: Word boundary padding
+ */
+struct pds_lm_dirty_region_info {
+ __le64 dma_base;
+ __le32 page_count;
+ u8 page_size_log2;
+ u8 rsvd[3];
+};
+
+/**
+ * struct pds_lm_dirty_status_cmd - DIRTY_STATUS command
+ * @opcode: Opcode PDS_LM_CMD_DIRTY_STATUS
+ * @rsvd: Word boundary padding
+ * @vf_id: VF id
+ * @max_regions: Capacity of the region info buffer
+ * @rsvd2: Word boundary padding
+ * @regions_dma: DMA address of the region info buffer
+ *
+ * The minimum of max_regions (from the command) and num_regions (from the
+ * completion) of struct pds_lm_dirty_region_info will be written to
+ * regions_dma.
+ *
+ * The max_regions may be zero, in which case regions_dma is ignored. In that
+ * case, the completion will only report the maximum number of regions
+ * supported by the device, and the number of regions currently enabled.
+ */
+struct pds_lm_dirty_status_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 vf_id;
+ u8 max_regions;
+ u8 rsvd2[3];
+ __le64 regions_dma;
+} __packed;
+
+/**
+ * enum pds_lm_dirty_bmp_type - Type of dirty page bitmap
+ * @PDS_LM_DIRTY_BMP_TYPE_NONE: No bitmap / disabled
+ * @PDS_LM_DIRTY_BMP_TYPE_SEQ_ACK: Seq/Ack bitmap representation
+ */
+enum pds_lm_dirty_bmp_type {
+ PDS_LM_DIRTY_BMP_TYPE_NONE = 0,
+ PDS_LM_DIRTY_BMP_TYPE_SEQ_ACK = 1,
+};
+
+/**
+ * struct pds_lm_dirty_status_comp - STATUS command completion
+ * @status: Status of the command (enum pds_core_status_code)
+ * @rsvd: Word boundary padding
+ * @comp_index: Index in the desc ring for which this is the completion
+ * @max_regions: Maximum number of regions supported by the device
+ * @num_regions: Number of regions currently enabled
+ * @bmp_type: Type of dirty bitmap representation
+ * @rsvd2: Word boundary padding
+ * @bmp_type_mask: Mask of supported bitmap types, bit index per type
+ * @rsvd3: Word boundary padding
+ * @color: Color bit
+ *
+ * This completion descriptor is used for STATUS, ENABLE, and DISABLE.
+ */
+struct pds_lm_dirty_status_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ u8 max_regions;
+ u8 num_regions;
+ u8 bmp_type;
+ u8 rsvd2;
+ __le32 bmp_type_mask;
+ u8 rsvd3[3];
+ u8 color;
+};
+
+/**
+ * struct pds_lm_dirty_enable_cmd - DIRTY_ENABLE command
+ * @opcode: Opcode PDS_LM_CMD_DIRTY_ENABLE
+ * @rsvd: Word boundary padding
+ * @vf_id: VF id
+ * @bmp_type: Type of dirty bitmap representation
+ * @num_regions: Number of entries in the region info buffer
+ * @rsvd2: Word boundary padding
+ * @regions_dma: DMA address of the region info buffer
+ *
+ * The num_regions must be nonzero, and less than or equal to the maximum
+ * number of regions supported by the device.
+ *
+ * The memory regions should not overlap.
+ *
+ * The information should be initialized by the driver. The device may modify
+ * the information on successful completion, such as by size-aligning the
+ * number of pages in a region.
+ *
+ * The modified number of pages will be greater than or equal to the page count
+ * given in the enable command, and at least as coarsly aligned as the given
+ * value. For example, the count might be aligned to a multiple of 64, but
+ * if the value is already a multiple of 128 or higher, it will not change.
+ * If the driver requires its own minimum alignment of the number of pages, the
+ * driver should account for that already in the region info of this command.
+ *
+ * This command uses struct pds_lm_dirty_status_comp for its completion.
+ */
+struct pds_lm_dirty_enable_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 vf_id;
+ u8 bmp_type;
+ u8 num_regions;
+ u8 rsvd2[2];
+ __le64 regions_dma;
+} __packed;
+
+/**
+ * struct pds_lm_dirty_disable_cmd - DIRTY_DISABLE command
+ * @opcode: Opcode PDS_LM_CMD_DIRTY_DISABLE
+ * @rsvd: Word boundary padding
+ * @vf_id: VF id
+ *
+ * Dirty page tracking will be disabled. This may be called in any state, as
+ * long as dirty page tracking is supported by the device, to ensure that dirty
+ * page tracking is disabled.
+ *
+ * This command uses struct pds_lm_dirty_status_comp for its completion. On
+ * success, num_regions will be zero.
+ */
+struct pds_lm_dirty_disable_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 vf_id;
+};
+
+/**
+ * struct pds_lm_dirty_seq_ack_cmd - DIRTY_READ_SEQ or _WRITE_ACK command
+ * @opcode: Opcode PDS_LM_CMD_DIRTY_[READ_SEQ|WRITE_ACK]
+ * @rsvd: Word boundary padding
+ * @vf_id: VF id
+ * @off_bytes: Byte offset in the bitmap
+ * @len_bytes: Number of bytes to transfer
+ * @num_sge: Number of DMA scatter gather elements
+ * @rsvd2: Word boundary padding
+ * @sgl_addr: DMA address of scatter gather list
+ *
+ * Read bytes from the SEQ bitmap, or write bytes into the ACK bitmap.
+ *
+ * This command treats the entire bitmap as a byte buffer. It does not
+ * distinguish between guest memory regions. The driver should refer to the
+ * number of pages in each region, according to PDS_LM_CMD_DIRTY_STATUS, to
+ * determine the region boundaries in the bitmap. Each region will be
+ * represented by exactly the number of bits as the page count for that region,
+ * immediately following the last bit of the previous region.
+ */
+struct pds_lm_dirty_seq_ack_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 vf_id;
+ __le32 off_bytes;
+ __le32 len_bytes;
+ __le16 num_sge;
+ u8 rsvd2[2];
+ __le64 sgl_addr;
+} __packed;
+
+/**
+ * struct pds_lm_host_vf_status_cmd - HOST_VF_STATUS command
+ * @opcode: Opcode PDS_LM_CMD_HOST_VF_STATUS
+ * @rsvd: Word boundary padding
+ * @vf_id: VF id
+ * @status: Current LM status of host VF driver (enum pds_lm_host_status)
+ */
+struct pds_lm_host_vf_status_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 vf_id;
+ u8 status;
+};
+
union pds_core_adminq_cmd {
u8 opcode;
u8 bytes[64];
@@ -844,6 +1205,17 @@ union pds_core_adminq_cmd {
struct pds_vdpa_vq_init_cmd vdpa_vq_init;
struct pds_vdpa_vq_reset_cmd vdpa_vq_reset;
+ struct pds_lm_suspend_cmd lm_suspend;
+ struct pds_lm_suspend_status_cmd lm_suspend_status;
+ struct pds_lm_resume_cmd lm_resume;
+ struct pds_lm_state_size_cmd lm_state_size;
+ struct pds_lm_save_cmd lm_save;
+ struct pds_lm_restore_cmd lm_restore;
+ struct pds_lm_host_vf_status_cmd lm_host_vf_status;
+ struct pds_lm_dirty_status_cmd lm_dirty_status;
+ struct pds_lm_dirty_enable_cmd lm_dirty_enable;
+ struct pds_lm_dirty_disable_cmd lm_dirty_disable;
+ struct pds_lm_dirty_seq_ack_cmd lm_dirty_seq_ack;
};
union pds_core_adminq_comp {
@@ -868,6 +1240,9 @@ union pds_core_adminq_comp {
struct pds_vdpa_vq_init_comp vdpa_vq_init;
struct pds_vdpa_vq_reset_comp vdpa_vq_reset;
+
+ struct pds_lm_state_size_comp lm_state_size;
+ struct pds_lm_dirty_status_comp lm_dirty_status;
};
#ifndef __CHECKER__
diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h
index 435c8e8161c2..30581e2e04cc 100644
--- a/include/linux/pds/pds_common.h
+++ b/include/linux/pds/pds_common.h
@@ -34,16 +34,19 @@ enum pds_core_vif_types {
#define PDS_DEV_TYPE_CORE_STR "Core"
#define PDS_DEV_TYPE_VDPA_STR "vDPA"
-#define PDS_DEV_TYPE_VFIO_STR "VFio"
+#define PDS_DEV_TYPE_VFIO_STR "vfio"
#define PDS_DEV_TYPE_ETH_STR "Eth"
#define PDS_DEV_TYPE_RDMA_STR "RDMA"
#define PDS_DEV_TYPE_LM_STR "LM"
#define PDS_VDPA_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR
+#define PDS_VFIO_LM_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_LM_STR "." PDS_DEV_TYPE_VFIO_STR
+
+struct pdsc;
int pdsc_register_notify(struct notifier_block *nb);
void pdsc_unregister_notify(struct notifier_block *nb);
void *pdsc_get_pf_struct(struct pci_dev *vf_pdev);
-int pds_client_register(struct pci_dev *pf_pdev, char *devname);
-int pds_client_unregister(struct pci_dev *pf_pdev, u16 client_id);
+int pds_client_register(struct pdsc *pf, char *devname);
+int pds_client_unregister(struct pdsc *pf, u16 client_id);
#endif /* _PDS_COMMON_H_ */
diff --git a/include/linux/pds/pds_core_if.h b/include/linux/pds/pds_core_if.h
index e838a2b90440..17a87c1a55d7 100644
--- a/include/linux/pds/pds_core_if.h
+++ b/include/linux/pds/pds_core_if.h
@@ -79,6 +79,7 @@ enum pds_core_status_code {
PDS_RC_EVFID = 31, /* VF ID does not exist */
PDS_RC_BAD_FW = 32, /* FW file is invalid or corrupted */
PDS_RC_ECLIENT = 33, /* No such client id */
+ PDS_RC_BAD_PCI = 255, /* Broken PCI when reading status */
};
/**
diff --git a/include/linux/peci.h b/include/linux/peci.h
index 06e6ef935297..90e241458ef6 100644
--- a/include/linux/peci.h
+++ b/include/linux/peci.h
@@ -42,13 +42,13 @@ struct peci_controller_ops {
*/
struct peci_controller {
struct device dev;
- struct peci_controller_ops *ops;
+ const struct peci_controller_ops *ops;
struct mutex bus_lock; /* held for the duration of xfer */
u8 id;
};
struct peci_controller *devm_peci_controller_add(struct device *parent,
- struct peci_controller_ops *ops);
+ const struct peci_controller_ops *ops);
static inline struct peci_controller *to_peci_controller(void *d)
{
@@ -58,7 +58,6 @@ static inline struct peci_controller *to_peci_controller(void *d)
/**
* struct peci_device - PECI device
* @dev: device object to register PECI device to the device model
- * @controller: manages the bus segment hosting this PECI device
* @info: PECI device characteristics
* @info.family: device family
* @info.model: device model
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index b3b458442330..8c677f185901 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -35,6 +35,12 @@
#define PCPU_BITMAP_BLOCK_BITS (PCPU_BITMAP_BLOCK_SIZE >> \
PCPU_MIN_ALLOC_SHIFT)
+#ifdef CONFIG_RANDOM_KMALLOC_CACHES
+#define PERCPU_DYNAMIC_SIZE_SHIFT 12
+#else
+#define PERCPU_DYNAMIC_SIZE_SHIFT 10
+#endif
+
/*
* Percpu allocator can serve percpu allocations before slab is
* initialized which allows slab to depend on the percpu allocator.
@@ -42,7 +48,7 @@
* for this. Keep PERCPU_DYNAMIC_RESERVE equal to or larger than
* PERCPU_DYNAMIC_EARLY_SIZE.
*/
-#define PERCPU_DYNAMIC_EARLY_SIZE (20 << 10)
+#define PERCPU_DYNAMIC_EARLY_SIZE (20 << PERCPU_DYNAMIC_SIZE_SHIFT)
/*
* PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy
@@ -56,9 +62,9 @@
* intelligent way to determine this would be nice.
*/
#if BITS_PER_LONG > 32
-#define PERCPU_DYNAMIC_RESERVE (28 << 10)
+#define PERCPU_DYNAMIC_RESERVE (28 << PERCPU_DYNAMIC_SIZE_SHIFT)
#else
-#define PERCPU_DYNAMIC_RESERVE (20 << 10)
+#define PERCPU_DYNAMIC_RESERVE (20 << PERCPU_DYNAMIC_SIZE_SHIFT)
#endif
extern void *pcpu_base_addr;
@@ -126,6 +132,7 @@ extern void __init setup_per_cpu_areas(void);
extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1);
extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1);
extern void free_percpu(void __percpu *__pdata);
+extern size_t pcpu_alloc_size(void __percpu *__pdata);
DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T))
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 75b73c83bc9d..3a44dd1e33d2 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -30,22 +30,35 @@ struct percpu_counter {
extern int percpu_counter_batch;
-int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
- struct lock_class_key *key);
+int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount,
+ gfp_t gfp, u32 nr_counters,
+ struct lock_class_key *key);
-#define percpu_counter_init(fbc, value, gfp) \
+#define percpu_counter_init_many(fbc, value, gfp, nr_counters) \
({ \
static struct lock_class_key __key; \
\
- __percpu_counter_init(fbc, value, gfp, &__key); \
+ __percpu_counter_init_many(fbc, value, gfp, nr_counters,\
+ &__key); \
})
-void percpu_counter_destroy(struct percpu_counter *fbc);
+
+#define percpu_counter_init(fbc, value, gfp) \
+ percpu_counter_init_many(fbc, value, gfp, 1)
+
+void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters);
+static inline void percpu_counter_destroy(struct percpu_counter *fbc)
+{
+ percpu_counter_destroy_many(fbc, 1);
+}
+
void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount,
s32 batch);
s64 __percpu_counter_sum(struct percpu_counter *fbc);
int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch);
+bool __percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit,
+ s64 amount, s32 batch);
void percpu_counter_sync(struct percpu_counter *fbc);
static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
@@ -58,6 +71,13 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
percpu_counter_add_batch(fbc, amount, percpu_counter_batch);
}
+static inline bool
+percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount)
+{
+ return __percpu_counter_limited_add(fbc, limit, amount,
+ percpu_counter_batch);
+}
+
/*
* With percpu_counter_add_local() and percpu_counter_sub_local(), counts
* are accumulated in local per cpu counter and not in fbc->count until
@@ -116,11 +136,27 @@ struct percpu_counter {
s64 count;
};
+static inline int percpu_counter_init_many(struct percpu_counter *fbc,
+ s64 amount, gfp_t gfp,
+ u32 nr_counters)
+{
+ u32 i;
+
+ for (i = 0; i < nr_counters; i++)
+ fbc[i].count = amount;
+
+ return 0;
+}
+
static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount,
gfp_t gfp)
{
- fbc->count = amount;
- return 0;
+ return percpu_counter_init_many(fbc, amount, gfp, 1);
+}
+
+static inline void percpu_counter_destroy_many(struct percpu_counter *fbc,
+ u32 nr_counters)
+{
}
static inline void percpu_counter_destroy(struct percpu_counter *fbc)
@@ -158,6 +194,27 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
local_irq_restore(flags);
}
+static inline bool
+percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount)
+{
+ unsigned long flags;
+ bool good = false;
+ s64 count;
+
+ if (amount == 0)
+ return true;
+
+ local_irq_save(flags);
+ count = fbc->count + amount;
+ if ((amount > 0 && count <= limit) ||
+ (amount < 0 && count >= limit)) {
+ fbc->count = count;
+ good = true;
+ }
+ local_irq_restore(flags);
+ return good;
+}
+
/* non-SMP percpu_counter_add_local is the same with percpu_counter_add */
static inline void
percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index a0801f68762b..b3b34f6670cf 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -60,12 +60,6 @@ struct pmu_hw_events {
DECLARE_BITMAP(used_mask, ARMPMU_MAX_HWEVENTS);
/*
- * Hardware lock to serialize accesses to PMU registers. Needed for the
- * read/modify/write sequences.
- */
- raw_spinlock_t pmu_lock;
-
- /*
* When using percpu IRQs, we need a percpu dev_id. Place it here as we
* already have to allocate this struct per cpu.
*/
@@ -187,5 +181,28 @@ void armpmu_free_irq(int irq, int cpu);
#endif /* CONFIG_ARM_PMU */
#define ARMV8_SPE_PDEV_NAME "arm,spe-v1"
+#define ARMV8_TRBE_PDEV_NAME "arm,trbe"
+
+/* Why does everything I do descend into this? */
+#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \
+ (lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi
+
+#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \
+ __GEN_PMU_FORMAT_ATTR(cfg, lo, hi)
+
+#define GEN_PMU_FORMAT_ATTR(name) \
+ PMU_FORMAT_ATTR(name, \
+ _GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG, \
+ ATTR_CFG_FLD_##name##_LO, \
+ ATTR_CFG_FLD_##name##_HI))
+
+#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi) \
+ ((((attr)->cfg) >> lo) & GENMASK_ULL(hi - lo, 0))
+
+#define ATTR_CFG_GET_FLD(attr, name) \
+ _ATTR_CFG_GET_FLD(attr, \
+ ATTR_CFG_FLD_##name##_CFG, \
+ ATTR_CFG_FLD_##name##_LO, \
+ ATTR_CFG_FLD_##name##_HI)
#endif /* __ARM_PMU_H__ */
diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
index e3899bd77f5c..46377e134d67 100644
--- a/include/linux/perf/arm_pmuv3.h
+++ b/include/linux/perf/arm_pmuv3.h
@@ -215,45 +215,54 @@
#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */
#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */
-#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */
-#define ARMV8_PMU_PMCR_N_MASK 0x1f
-#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */
+#define ARMV8_PMU_PMCR_N GENMASK(15, 11) /* Number of counters supported */
+/* Mask for writable bits */
+#define ARMV8_PMU_PMCR_MASK (ARMV8_PMU_PMCR_E | ARMV8_PMU_PMCR_P | \
+ ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_D | \
+ ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \
+ ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP)
/*
* PMOVSR: counters overflow flag status reg
*/
-#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */
-#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK
+#define ARMV8_PMU_OVSR_P GENMASK(30, 0)
+#define ARMV8_PMU_OVSR_C BIT(31)
+/* Mask for writable bits is both P and C fields */
+#define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C)
/*
* PMXEVTYPER: Event selection reg
*/
-#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
-#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
+#define ARMV8_PMU_EVTYPE_EVENT GENMASK(15, 0) /* Mask for EVENT bits */
+#define ARMV8_PMU_EVTYPE_TH GENMASK_ULL(43, 32) /* arm64 only */
+#define ARMV8_PMU_EVTYPE_TC GENMASK_ULL(63, 61) /* arm64 only */
/*
* Event filters for PMUv3
*/
-#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31)
-#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30)
-#define ARMV8_PMU_INCLUDE_EL2 (1U << 27)
+#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31)
+#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30)
+#define ARMV8_PMU_EXCLUDE_NS_EL1 (1U << 29)
+#define ARMV8_PMU_EXCLUDE_NS_EL0 (1U << 28)
+#define ARMV8_PMU_INCLUDE_EL2 (1U << 27)
+#define ARMV8_PMU_EXCLUDE_EL3 (1U << 26)
/*
* PMUSERENR: user enable reg
*/
-#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */
#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */
#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */
#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */
#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */
+/* Mask for writable bits */
+#define ARMV8_PMU_USERENR_MASK (ARMV8_PMU_USERENR_EN | ARMV8_PMU_USERENR_SW | \
+ ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_ER)
/* PMMIR_EL1.SLOTS mask */
-#define ARMV8_PMU_SLOTS_MASK 0xff
-
-#define ARMV8_PMU_BUS_SLOTS_SHIFT 8
-#define ARMV8_PMU_BUS_SLOTS_MASK 0xff
-#define ARMV8_PMU_BUS_WIDTH_SHIFT 16
-#define ARMV8_PMU_BUS_WIDTH_MASK 0xf
+#define ARMV8_PMU_SLOTS GENMASK(7, 0)
+#define ARMV8_PMU_BUS_SLOTS GENMASK(15, 8)
+#define ARMV8_PMU_BUS_WIDTH GENMASK(19, 16)
+#define ARMV8_PMU_THWIDTH GENMASK(23, 20)
/*
* This code is really good
diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
index 43fc892aa7d9..43282e22ebe1 100644
--- a/include/linux/perf/riscv_pmu.h
+++ b/include/linux/perf/riscv_pmu.h
@@ -6,8 +6,8 @@
*
*/
-#ifndef _ASM_RISCV_PERF_EVENT_H
-#define _ASM_RISCV_PERF_EVENT_H
+#ifndef _RISCV_PMU_H
+#define _RISCV_PMU_H
#include <linux/perf_event.h>
#include <linux/ptrace.h>
@@ -21,7 +21,7 @@
#define RISCV_MAX_COUNTERS 64
#define RISCV_OP_UNSUPP (-EOPNOTSUPP)
-#define RISCV_PMU_PDEV_NAME "riscv-pmu"
+#define RISCV_PMU_SBI_PDEV_NAME "riscv-pmu-sbi"
#define RISCV_PMU_LEGACY_PDEV_NAME "riscv-pmu-legacy"
#define RISCV_PMU_STOP_FLAG_RESET 1
@@ -55,6 +55,10 @@ struct riscv_pmu {
void (*ctr_start)(struct perf_event *event, u64 init_val);
void (*ctr_stop)(struct perf_event *event, unsigned long flag);
int (*event_map)(struct perf_event *event, u64 *config);
+ void (*event_init)(struct perf_event *event);
+ void (*event_mapped)(struct perf_event *event, struct mm_struct *mm);
+ void (*event_unmapped)(struct perf_event *event, struct mm_struct *mm);
+ uint8_t (*csr_index)(struct perf_event *event);
struct cpu_hw_events __percpu *hw_events;
struct hlist_node node;
@@ -81,4 +85,4 @@ int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr);
#endif /* CONFIG_RISCV_PMU */
-#endif /* _ASM_RISCV_PERF_EVENT_H */
+#endif /* _RISCV_PMU_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2166a69e3bf2..d2a15c0c6f8a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -288,10 +288,9 @@ struct perf_event_pmu_context;
#define PERF_PMU_CAP_EXTENDED_REGS 0x0008
#define PERF_PMU_CAP_EXCLUSIVE 0x0010
#define PERF_PMU_CAP_ITRACE 0x0020
-#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x0040
-#define PERF_PMU_CAP_NO_EXCLUDE 0x0080
-#define PERF_PMU_CAP_AUX_OUTPUT 0x0100
-#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0200
+#define PERF_PMU_CAP_NO_EXCLUDE 0x0040
+#define PERF_PMU_CAP_AUX_OUTPUT 0x0080
+#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
struct perf_output_handle;
@@ -445,7 +444,8 @@ struct pmu {
/*
* Will return the value for perf_event_mmap_page::index for this event,
- * if no implementation is provided it will default to: event->hw.idx + 1.
+ * if no implementation is provided it will default to 0 (see
+ * perf_event_idx_default).
*/
int (*event_idx) (struct perf_event *event); /*optional */
@@ -704,6 +704,7 @@ struct perf_event {
/* The cumulative AND of all event_caps for events in this group. */
int group_caps;
+ unsigned int group_generation;
struct perf_event *group_leader;
/*
* event->pmu will always point to pmu in which this event belongs.
@@ -842,11 +843,11 @@ struct perf_event {
};
/*
- * ,-----------------------[1:n]----------------------.
- * V V
- * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event
- * ^ ^ | |
- * `--------[1:n]---------' `-[n:1]-> pmu <-[1:n]-'
+ * ,-----------------------[1:n]------------------------.
+ * V V
+ * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event
+ * | |
+ * `--[n:1]-> pmu <-[1:n]--'
*
*
* struct perf_event_pmu_context lifetime is refcount based and RCU freed
@@ -864,6 +865,9 @@ struct perf_event {
* ctx->mutex pinning the configuration. Since we hold a reference on
* group_leader (through the filedesc) it can't go away, therefore it's
* associated pmu_ctx must exist and cannot change due to ctx->mutex.
+ *
+ * perf_event holds a refcount on perf_event_context
+ * perf_event holds a refcount on perf_event_pmu_context
*/
struct perf_event_pmu_context {
struct pmu *pmu;
@@ -878,6 +882,7 @@ struct perf_event_pmu_context {
unsigned int embedded : 1;
unsigned int nr_events;
+ unsigned int nr_cgroups;
atomic_t refcount; /* event <-> epc */
struct rcu_head rcu_head;
@@ -1138,6 +1143,15 @@ static inline bool branch_sample_priv(const struct perf_event *event)
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
}
+static inline bool branch_sample_counters(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS;
+}
+
+static inline bool branch_sample_call_stack(const struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
+}
struct perf_sample_data {
/*
@@ -1172,6 +1186,7 @@ struct perf_sample_data {
struct perf_callchain_entry *callchain;
struct perf_raw_record *raw;
struct perf_branch_stack *br_stack;
+ u64 *br_stack_cntr;
union perf_sample_weight weight;
union perf_mem_data_src data_src;
u64 txn;
@@ -1194,7 +1209,8 @@ struct perf_sample_data {
PERF_MEM_S(LVL, NA) |\
PERF_MEM_S(SNOOP, NA) |\
PERF_MEM_S(LOCK, NA) |\
- PERF_MEM_S(TLB, NA))
+ PERF_MEM_S(TLB, NA) |\
+ PERF_MEM_S(LVLNUM, NA))
static inline void perf_sample_data_init(struct perf_sample_data *data,
u64 addr, u64 period)
@@ -1248,7 +1264,8 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
static inline void perf_sample_save_brstack(struct perf_sample_data *data,
struct perf_event *event,
- struct perf_branch_stack *brs)
+ struct perf_branch_stack *brs,
+ u64 *brs_cntr)
{
int size = sizeof(u64); /* nr */
@@ -1256,7 +1273,16 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data,
size += sizeof(u64);
size += brs->nr * sizeof(struct perf_branch_entry);
+ /*
+ * The extension space for counters is appended after the
+ * struct perf_branch_stack. It is used to store the occurrences
+ * of events of each branch.
+ */
+ if (brs_cntr)
+ size += brs->nr * sizeof(u64);
+
data->br_stack = brs;
+ data->br_stack_cntr = brs_cntr;
data->dyn_size += size;
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
@@ -1316,15 +1342,31 @@ extern int perf_event_output(struct perf_event *event,
struct pt_regs *regs);
static inline bool
-is_default_overflow_handler(struct perf_event *event)
+__is_default_overflow_handler(perf_overflow_handler_t overflow_handler)
{
- if (likely(event->overflow_handler == perf_event_output_forward))
+ if (likely(overflow_handler == perf_event_output_forward))
return true;
- if (unlikely(event->overflow_handler == perf_event_output_backward))
+ if (unlikely(overflow_handler == perf_event_output_backward))
return true;
return false;
}
+#define is_default_overflow_handler(event) \
+ __is_default_overflow_handler((event)->overflow_handler)
+
+#ifdef CONFIG_BPF_SYSCALL
+static inline bool uses_default_overflow_handler(struct perf_event *event)
+{
+ if (likely(is_default_overflow_handler(event)))
+ return true;
+
+ return __is_default_overflow_handler(event->orig_overflow_handler);
+}
+#else
+#define uses_default_overflow_handler(event) \
+ is_default_overflow_handler(event)
+#endif
+
extern void
perf_event_header__init_id(struct perf_event_header *header,
struct perf_sample_data *data,
@@ -1556,7 +1598,7 @@ extern int sysctl_perf_cpu_time_max_percent;
extern void perf_sample_event_took(u64 sample_len_ns);
-int perf_proc_update_handler(struct ctl_table *table, int write,
+int perf_event_max_sample_rate_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
@@ -1860,10 +1902,6 @@ extern void arch_perf_update_userpage(struct perf_event *event,
struct perf_event_mmap_page *userpg,
u64 now);
-#ifdef CONFIG_MMU
-extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr);
-#endif
-
/*
* Snapshot branch stack on software events.
*
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 5063b482e34f..85fc7554cd52 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -5,6 +5,9 @@
#include <linux/pfn.h>
#include <asm/pgtable.h>
+#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
+#define PUD_ORDER (PUD_SHIFT - PAGE_SHIFT)
+
#ifndef __ASSEMBLY__
#ifdef CONFIG_MMU
@@ -63,7 +66,6 @@ static inline unsigned long pte_index(unsigned long address)
{
return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
}
-#define pte_index pte_index
#ifndef pmd_index
static inline unsigned long pmd_index(unsigned long address)
@@ -99,7 +101,7 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
((pte_t *)kmap_local_page(pmd_page(*(pmd))) + pte_index((address)))
#define pte_unmap(pte) do { \
kunmap_local((pte)); \
- /* rcu_read_unlock() to be added later */ \
+ rcu_read_unlock(); \
} while (0)
#else
static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address)
@@ -108,10 +110,12 @@ static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address)
}
static inline void pte_unmap(pte_t *pte)
{
- /* rcu_read_unlock() to be added later */
+ rcu_read_unlock();
}
#endif
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
+
/* Find an entry in the second-level page table.. */
#ifndef pmd_offset
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
@@ -180,6 +184,101 @@ static inline int pmd_young(pmd_t pmd)
}
#endif
+#ifndef pmd_dirty
+static inline int pmd_dirty(pmd_t pmd)
+{
+ return 0;
+}
+#endif
+
+/*
+ * A facility to provide lazy MMU batching. This allows PTE updates and
+ * page invalidations to be delayed until a call to leave lazy MMU mode
+ * is issued. Some architectures may benefit from doing this, and it is
+ * beneficial for both shadow and direct mode hypervisors, which may batch
+ * the PTE updates which happen during this window. Note that using this
+ * interface requires that read hazards be removed from the code. A read
+ * hazard could result in the direct mode hypervisor case, since the actual
+ * write to the page tables may not yet have taken place, so reads though
+ * a raw PTE pointer after it has been modified are not guaranteed to be
+ * up to date. This mode can only be entered and left under the protection of
+ * the page table locks for all page tables which may be modified. In the UP
+ * case, this is required so that preemption is disabled, and in the SMP case,
+ * it must synchronize the delayed page table writes properly on other CPUs.
+ */
+#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+#define arch_enter_lazy_mmu_mode() do {} while (0)
+#define arch_leave_lazy_mmu_mode() do {} while (0)
+#define arch_flush_lazy_mmu_mode() do {} while (0)
+#endif
+
+#ifndef pte_batch_hint
+/**
+ * pte_batch_hint - Number of pages that can be added to batch without scanning.
+ * @ptep: Page table pointer for the entry.
+ * @pte: Page table entry.
+ *
+ * Some architectures know that a set of contiguous ptes all map the same
+ * contiguous memory with the same permissions. In this case, it can provide a
+ * hint to aid pte batching without the core code needing to scan every pte.
+ *
+ * An architecture implementation may ignore the PTE accessed state. Further,
+ * the dirty state must apply atomically to all the PTEs described by the hint.
+ *
+ * May be overridden by the architecture, else pte_batch_hint is always 1.
+ */
+static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte)
+{
+ return 1;
+}
+#endif
+
+#ifndef pte_advance_pfn
+static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
+{
+ return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
+}
+#endif
+
+#define pte_next_pfn(pte) pte_advance_pfn(pte, 1)
+
+#ifndef set_ptes
+/**
+ * set_ptes - Map consecutive pages to a contiguous range of addresses.
+ * @mm: Address space to map the pages into.
+ * @addr: Address to map the first page at.
+ * @ptep: Page table pointer for the first entry.
+ * @pte: Page table entry for the first page.
+ * @nr: Number of pages to map.
+ *
+ * When nr==1, initial state of pte may be present or not present, and new state
+ * may be present or not present. When nr>1, initial state of all ptes must be
+ * not present, and new state must be present.
+ *
+ * May be overridden by the architecture, or the architecture can define
+ * set_pte() and PFN_PTE_SHIFT.
+ *
+ * Context: The caller holds the page table lock. The pages all belong
+ * to the same folio. The PTEs are all in the same PMD.
+ */
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
+{
+ page_table_check_ptes_set(mm, ptep, pte, nr);
+
+ arch_enter_lazy_mmu_mode();
+ for (;;) {
+ set_pte(ptep, pte);
+ if (--nr == 0)
+ break;
+ ptep++;
+ pte = pte_next_pfn(pte);
+ }
+ arch_leave_lazy_mmu_mode();
+}
+#endif
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
+
#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
extern int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
@@ -226,6 +325,27 @@ static inline pmd_t pmdp_get(pmd_t *pmdp)
}
#endif
+#ifndef pudp_get
+static inline pud_t pudp_get(pud_t *pudp)
+{
+ return READ_ONCE(*pudp);
+}
+#endif
+
+#ifndef p4dp_get
+static inline p4d_t p4dp_get(p4d_t *p4dp)
+{
+ return READ_ONCE(*p4dp);
+}
+#endif
+
+#ifndef pgdp_get
+static inline pgd_t pgdp_get(pgd_t *pgdp)
+{
+ return READ_ONCE(*pgdp);
+}
+#endif
+
#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address,
@@ -309,7 +429,21 @@ static inline bool arch_has_hw_nonleaf_pmd_young(void)
*/
static inline bool arch_has_hw_pte_young(void)
{
- return false;
+ return IS_ENABLED(CONFIG_ARCH_HAS_HW_PTE_YOUNG);
+}
+#endif
+
+#ifndef arch_check_zapped_pte
+static inline void arch_check_zapped_pte(struct vm_area_struct *vma,
+ pte_t pte)
+{
+}
+#endif
+
+#ifndef arch_check_zapped_pmd
+static inline void arch_check_zapped_pmd(struct vm_area_struct *vma,
+ pmd_t pmd)
+{
}
#endif
@@ -320,7 +454,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
{
pte_t pte = ptep_get(ptep);
pte_clear(mm, address, ptep);
- page_table_check_pte_clear(mm, address, pte);
+ page_table_check_pte_clear(mm, pte);
return pte;
}
#endif
@@ -390,6 +524,7 @@ static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
return pmd;
}
#define pmdp_get_lockless pmdp_get_lockless
+#define pmdp_get_lockless_sync() tlb_remove_table_sync_one()
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
#endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */
@@ -408,6 +543,9 @@ static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
{
return pmdp_get(pmdp);
}
+static inline void pmdp_get_lockless_sync(void)
+{
+}
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -419,7 +557,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
pmd_t pmd = *pmdp;
pmd_clear(pmdp);
- page_table_check_pmd_clear(mm, address, pmd);
+ page_table_check_pmd_clear(mm, pmd);
return pmd;
}
@@ -432,7 +570,7 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
pud_t pud = *pudp;
pud_clear(pudp);
- page_table_check_pud_clear(mm, address, pud);
+ page_table_check_pud_clear(mm, pud);
return pud;
}
@@ -450,11 +588,11 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
#endif
#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
-static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm,
+static inline pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
unsigned long address, pud_t *pudp,
int full)
{
- return pudp_huge_get_and_clear(mm, address, pudp);
+ return pudp_huge_get_and_clear(vma->vm_mm, address, pudp);
}
#endif
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -468,6 +606,76 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
}
#endif
+#ifndef get_and_clear_full_ptes
+/**
+ * get_and_clear_full_ptes - Clear present PTEs that map consecutive pages of
+ * the same folio, collecting dirty/accessed bits.
+ * @mm: Address space the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries to clear.
+ * @full: Whether we are clearing a full mm.
+ *
+ * May be overridden by the architecture; otherwise, implemented as a simple
+ * loop over ptep_get_and_clear_full(), merging dirty/accessed bits into the
+ * returned PTE.
+ *
+ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+ * some PTEs might be write-protected.
+ *
+ * Context: The caller holds the page table lock. The PTEs map consecutive
+ * pages that belong to the same folio. The PTEs are all in the same PMD.
+ */
+static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep, unsigned int nr, int full)
+{
+ pte_t pte, tmp_pte;
+
+ pte = ptep_get_and_clear_full(mm, addr, ptep, full);
+ while (--nr) {
+ ptep++;
+ addr += PAGE_SIZE;
+ tmp_pte = ptep_get_and_clear_full(mm, addr, ptep, full);
+ if (pte_dirty(tmp_pte))
+ pte = pte_mkdirty(pte);
+ if (pte_young(tmp_pte))
+ pte = pte_mkyoung(pte);
+ }
+ return pte;
+}
+#endif
+
+#ifndef clear_full_ptes
+/**
+ * clear_full_ptes - Clear present PTEs that map consecutive pages of the same
+ * folio.
+ * @mm: Address space the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries to clear.
+ * @full: Whether we are clearing a full mm.
+ *
+ * May be overridden by the architecture; otherwise, implemented as a simple
+ * loop over ptep_get_and_clear_full().
+ *
+ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+ * some PTEs might be write-protected.
+ *
+ * Context: The caller holds the page table lock. The PTEs map consecutive
+ * pages that belong to the same folio. The PTEs are all in the same PMD.
+ */
+static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr, int full)
+{
+ for (;;) {
+ ptep_get_and_clear_full(mm, addr, ptep, full);
+ if (--nr == 0)
+ break;
+ ptep++;
+ addr += PAGE_SIZE;
+ }
+}
+#endif
/*
* If two threads concurrently fault at the same page, the thread that
@@ -515,6 +723,20 @@ extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
pud_t *pudp);
#endif
+#ifndef pte_mkwrite
+static inline pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
+{
+ return pte_mkwrite_novma(pte);
+}
+#endif
+
+#if defined(CONFIG_ARCH_WANT_PMD_MKWRITE) && !defined(pmd_mkwrite)
+static inline pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
+{
+ return pmd_mkwrite_novma(pmd);
+}
+#endif
+
#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
struct mm_struct;
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
@@ -524,6 +746,37 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
}
#endif
+#ifndef wrprotect_ptes
+/**
+ * wrprotect_ptes - Write-protect PTEs that map consecutive pages of the same
+ * folio.
+ * @mm: Address space the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries to write-protect.
+ *
+ * May be overridden by the architecture; otherwise, implemented as a simple
+ * loop over ptep_set_wrprotect().
+ *
+ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+ * some PTEs might be write-protected.
+ *
+ * Context: The caller holds the page table lock. The PTEs map consecutive
+ * pages that belong to the same folio. The PTEs are all in the same PMD.
+ */
+static inline void wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr)
+{
+ for (;;) {
+ ptep_set_wrprotect(mm, addr, ptep);
+ if (--nr == 0)
+ break;
+ ptep++;
+ addr += PAGE_SIZE;
+ }
+}
+#endif
+
/*
* On some architectures hardware does not set page access bit when accessing
* memory page, it is responsibility of software setting this bit. It brings
@@ -558,6 +811,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
#endif
#ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline void pudp_set_wrprotect(struct mm_struct *mm,
unsigned long address, pud_t *pudp)
{
@@ -571,6 +825,7 @@ static inline void pudp_set_wrprotect(struct mm_struct *mm,
{
BUILD_BUG();
}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
#endif
@@ -693,11 +948,14 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
return pmd_val(pmd_a) == pmd_val(pmd_b);
}
+#endif
+#ifndef pud_same
static inline int pud_same(pud_t pud_a, pud_t pud_b)
{
return pud_val(pud_a) == pud_val(pud_b);
}
+#define pud_same pud_same
#endif
#ifndef __HAVE_ARCH_P4D_SAME
@@ -1041,27 +1299,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
#endif
/*
- * A facility to provide lazy MMU batching. This allows PTE updates and
- * page invalidations to be delayed until a call to leave lazy MMU mode
- * is issued. Some architectures may benefit from doing this, and it is
- * beneficial for both shadow and direct mode hypervisors, which may batch
- * the PTE updates which happen during this window. Note that using this
- * interface requires that read hazards be removed from the code. A read
- * hazard could result in the direct mode hypervisor case, since the actual
- * write to the page tables may not yet have taken place, so reads though
- * a raw PTE pointer after it has been modified are not guaranteed to be
- * up to date. This mode can only be entered and left under the protection of
- * the page table locks for all page tables which may be modified. In the UP
- * case, this is required so that preemption is disabled, and in the SMP case,
- * it must synchronize the delayed page table writes properly on other CPUs.
- */
-#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
-#define arch_enter_lazy_mmu_mode() do {} while (0)
-#define arch_leave_lazy_mmu_mode() do {} while (0)
-#define arch_flush_lazy_mmu_mode() do {} while (0)
-#endif
-
-/*
* A facility to provide batching of the reload of page tables and
* other process state with the actual context switch code for
* paravirtualized guests. By convention, only one of the batched
@@ -1322,12 +1559,16 @@ static inline int pud_trans_unstable(pud_t *pud)
#ifndef CONFIG_NUMA_BALANCING
/*
- * Technically a PTE can be PROTNONE even when not doing NUMA balancing but
- * the only case the kernel cares is for NUMA balancing and is only ever set
- * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked
- * _PAGE_PROTNONE so by default, implement the helper as "always no". It
- * is the responsibility of the caller to distinguish between PROT_NONE
- * protections and NUMA hinting fault protections.
+ * In an inaccessible (PROT_NONE) VMA, pte_protnone() may indicate "yes". It is
+ * perfectly valid to indicate "no" in that case, which is why our default
+ * implementation defaults to "always no".
+ *
+ * In an accessible VMA, however, pte_protnone() reliably indicates PROT_NONE
+ * page protection due to NUMA hinting. NUMA hinting faults only apply in
+ * accessible VMAs.
+ *
+ * So, to reliably identify PROT_NONE PTEs that require a NUMA hinting fault,
+ * looking at the VMA accessibility is sufficient.
*/
static inline int pte_protnone(pte_t pte)
{
@@ -1499,6 +1740,9 @@ typedef unsigned int pgtbl_mod_mask;
#define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE)
#endif
+#ifndef has_transparent_pud_hugepage
+#define has_transparent_pud_hugepage() IS_BUILTIN(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+#endif
/*
* On some architectures it depends on the mm if the p4d/pud or pmd
* layer of the page table hierarchy is folded or not.
@@ -1533,16 +1777,16 @@ typedef unsigned int pgtbl_mod_mask;
* Only meaningful when called on a valid entry.
*/
#ifndef pgd_leaf
-#define pgd_leaf(x) 0
+#define pgd_leaf(x) false
#endif
#ifndef p4d_leaf
-#define p4d_leaf(x) 0
+#define p4d_leaf(x) false
#endif
#ifndef pud_leaf
-#define pud_leaf(x) 0
+#define pud_leaf(x) false
#endif
#ifndef pmd_leaf
-#define pmd_leaf(x) 0
+#define pmd_leaf(x) false
#endif
#ifndef pgd_leaf_size
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 11c1e91563d4..3f68b8239bb1 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -30,6 +30,7 @@
#include <linux/refcount.h>
#include <linux/atomic.h>
+#include <net/eee.h>
#define PHY_DEFAULT_FEATURES (SUPPORTED_Autoneg | \
SUPPORTED_TP | \
@@ -54,6 +55,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init;
extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init;
extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init;
extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init;
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap2_features) __ro_after_init;
#define PHY_BASIC_FEATURES ((unsigned long *)&phy_basic_features)
#define PHY_BASIC_T1_FEATURES ((unsigned long *)&phy_basic_t1_features)
@@ -65,6 +67,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init;
#define PHY_10GBIT_FEC_FEATURES ((unsigned long *)&phy_10gbit_fec_features)
#define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features)
#define PHY_EEE_CAP1_FEATURES ((unsigned long *)&phy_eee_cap1_features)
+#define PHY_EEE_CAP2_FEATURES ((unsigned long *)&phy_eee_cap2_features)
extern const int phy_basic_ports_array[3];
extern const int phy_fibre_port_array[1];
@@ -110,6 +113,7 @@ extern const int phy_10gbit_features_array[1];
* @PHY_INTERFACE_MODE_XGMII: 10 gigabit media-independent interface
* @PHY_INTERFACE_MODE_XLGMII:40 gigabit media-independent interface
* @PHY_INTERFACE_MODE_MOCA: Multimedia over Coax
+ * @PHY_INTERFACE_MODE_PSGMII: Penta SGMII
* @PHY_INTERFACE_MODE_QSGMII: Quad SGMII
* @PHY_INTERFACE_MODE_TRGMII: Turbo RGMII
* @PHY_INTERFACE_MODE_100BASEX: 100 BaseX
@@ -147,6 +151,7 @@ typedef enum {
PHY_INTERFACE_MODE_XGMII,
PHY_INTERFACE_MODE_XLGMII,
PHY_INTERFACE_MODE_MOCA,
+ PHY_INTERFACE_MODE_PSGMII,
PHY_INTERFACE_MODE_QSGMII,
PHY_INTERFACE_MODE_TRGMII,
PHY_INTERFACE_MODE_100BASEX,
@@ -254,6 +259,8 @@ static inline const char *phy_modes(phy_interface_t interface)
return "xlgmii";
case PHY_INTERFACE_MODE_MOCA:
return "moca";
+ case PHY_INTERFACE_MODE_PSGMII:
+ return "psgmii";
case PHY_INTERFACE_MODE_QSGMII:
return "qsgmii";
case PHY_INTERFACE_MODE_TRGMII:
@@ -298,6 +305,7 @@ static inline const char *phy_modes(phy_interface_t interface)
#define MII_BUS_ID_SIZE 61
struct device;
+struct kernel_hwtstamp_config;
struct phylink;
struct sfp_bus;
struct sfp_upstream_ops;
@@ -322,7 +330,9 @@ struct mdio_bus_stats {
/**
* struct phy_package_shared - Shared information in PHY packages
- * @addr: Common PHY address used to combine PHYs in one package
+ * @base_addr: Base PHY address of PHY package used to combine PHYs
+ * in one package and for offset calculation of phy_package_read/write
+ * @np: Pointer to the Device Node if PHY package defined in DT
* @refcnt: Number of PHYs connected to this shared data
* @flags: Initialization of PHY package
* @priv_size: Size of the shared private data @priv
@@ -333,7 +343,9 @@ struct mdio_bus_stats {
* phy_package_leave().
*/
struct phy_package_shared {
- int addr;
+ u8 base_addr;
+ /* With PHY package defined in DT this points to the PHY package node */
+ struct device_node *np;
refcount_t refcnt;
unsigned long flags;
size_t priv_size;
@@ -563,7 +575,6 @@ struct macsec_ops;
* - Bits [31:24] are reserved for defining generic
* PHY driver behavior.
* @irq: IRQ number of the PHY's interrupt (-1 if none)
- * @phy_timer: The timer for handling the state machine
* @phylink: Pointer to phylink instance for this PHY
* @sfp_bus_attached: Flag indicating whether the SFP bus has been attached
* @sfp_bus: SFP bus attached to this PHY's fiber port
@@ -584,6 +595,8 @@ struct macsec_ops;
* @supported_eee: supported PHY EEE linkmodes
* @advertising_eee: Currently advertised EEE linkmodes
* @eee_enabled: Flag indicating whether the EEE feature is enabled
+ * @enable_tx_lpi: When True, MAC should transmit LPI to PHY
+ * @eee_cfg: User configuration of EEE
* @lp_advertising: Current link partner advertised linkmodes
* @host_interfaces: PHY interface modes supported by host
* @eee_broken_modes: Energy efficient ethernet modes which should be prohibited
@@ -600,6 +613,8 @@ struct macsec_ops;
* @irq_rerun: Flag indicating interrupts occurred while PHY was suspended,
* requiring a rerun of the interrupt handler after resume
* @interface: enum phy_interface_t value
+ * @possible_interfaces: bitmap if interface modes that the attached PHY
+ * will switch between depending on media speed.
* @skb: Netlink message for cable diagnostics
* @nest: Netlink nest used for cable diagnostics
* @ehdr: nNtlink header for cable diagnostics
@@ -631,7 +646,7 @@ struct phy_device {
/* Information about the PHY type */
/* And management functions */
- struct phy_driver *drv;
+ const struct phy_driver *drv;
struct device_link *devlink;
@@ -669,6 +684,7 @@ struct phy_device {
u32 dev_flags;
phy_interface_t interface;
+ DECLARE_PHY_INTERFACE_MASK(possible_interfaces);
/*
* forced speed & duplex (no autoneg)
@@ -690,7 +706,7 @@ struct phy_device {
__ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising);
/* used with phy_speed_down */
__ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old);
- /* used for eee validation */
+ /* used for eee validation and configuration*/
__ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee);
__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee);
bool eee_enabled;
@@ -700,6 +716,8 @@ struct phy_device {
/* Energy efficient ethernet modes which should be prohibited */
u32 eee_broken_modes;
+ bool enable_tx_lpi;
+ struct eee_config eee_cfg;
#ifdef CONFIG_LED_TRIGGER_PHY
struct phy_led_trigger *phy_led_triggers;
@@ -844,6 +862,15 @@ struct phy_plca_status {
bool pst;
};
+/* Modes for PHY LED configuration */
+enum phy_led_modes {
+ PHY_LED_ACTIVE_LOW = 0,
+ PHY_LED_INACTIVE_HIGH_IMPEDANCE = 1,
+
+ /* keep it last */
+ __PHY_LED_MODES_NUM,
+};
+
/**
* struct phy_led: An LED driven by the PHY
*
@@ -1104,6 +1131,52 @@ struct phy_driver {
int (*led_blink_set)(struct phy_device *dev, u8 index,
unsigned long *delay_on,
unsigned long *delay_off);
+ /**
+ * @led_hw_is_supported: Can the HW support the given rules.
+ * @dev: PHY device which has the LED
+ * @index: Which LED of the PHY device
+ * @rules The core is interested in these rules
+ *
+ * Return 0 if yes, -EOPNOTSUPP if not, or an error code.
+ */
+ int (*led_hw_is_supported)(struct phy_device *dev, u8 index,
+ unsigned long rules);
+ /**
+ * @led_hw_control_set: Set the HW to control the LED
+ * @dev: PHY device which has the LED
+ * @index: Which LED of the PHY device
+ * @rules The rules used to control the LED
+ *
+ * Returns 0, or a an error code.
+ */
+ int (*led_hw_control_set)(struct phy_device *dev, u8 index,
+ unsigned long rules);
+ /**
+ * @led_hw_control_get: Get how the HW is controlling the LED
+ * @dev: PHY device which has the LED
+ * @index: Which LED of the PHY device
+ * @rules Pointer to the rules used to control the LED
+ *
+ * Set *@rules to how the HW is currently blinking. Returns 0
+ * on success, or a error code if the current blinking cannot
+ * be represented in rules, or some other error happens.
+ */
+ int (*led_hw_control_get)(struct phy_device *dev, u8 index,
+ unsigned long *rules);
+
+ /**
+ * @led_polarity_set: Set the LED polarity modes
+ * @dev: PHY device which has the LED
+ * @index: Which LED of the PHY device
+ * @modes: bitmap of LED polarity modes
+ *
+ * Configure LED with all the required polarity modes in @modes
+ * to make it correctly turn ON or OFF.
+ *
+ * Returns 0, or an error code.
+ */
+ int (*led_polarity_set)(struct phy_device *dev, int index,
+ unsigned long modes);
};
#define to_phy_driver(d) container_of(to_mdio_common_driver(d), \
struct phy_driver, mdiodrv)
@@ -1522,9 +1595,11 @@ static inline bool phy_has_txtstamp(struct phy_device *phydev)
return phydev && phydev->mii_ts && phydev->mii_ts->txtstamp;
}
-static inline int phy_hwtstamp(struct phy_device *phydev, struct ifreq *ifr)
+static inline int phy_hwtstamp(struct phy_device *phydev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
{
- return phydev->mii_ts->hwtstamp(phydev->mii_ts, ifr);
+ return phydev->mii_ts->hwtstamp(phydev->mii_ts, cfg, extack);
}
static inline bool phy_rxtstamp(struct phy_device *phydev, struct sk_buff *skb,
@@ -1698,6 +1773,7 @@ void phy_detach(struct phy_device *phydev);
void phy_start(struct phy_device *phydev);
void phy_stop(struct phy_device *phydev);
int phy_config_aneg(struct phy_device *phydev);
+int _phy_start_aneg(struct phy_device *phydev);
int phy_start_aneg(struct phy_device *phydev);
int phy_aneg_done(struct phy_device *phydev);
int phy_speed_down(struct phy_device *phydev, bool sync);
@@ -1731,10 +1807,6 @@ int phy_start_cable_test_tdr(struct phy_device *phydev,
}
#endif
-int phy_cable_test_result(struct phy_device *phydev, u8 pair, u16 result);
-int phy_cable_test_fault_length(struct phy_device *phydev, u8 pair,
- u16 cm);
-
static inline void phy_device_reset(struct phy_device *phydev, int value)
{
mdio_device_reset(&phydev->mdio, value);
@@ -1811,7 +1883,7 @@ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
/* Clause 37 */
int genphy_c37_config_aneg(struct phy_device *phydev);
-int genphy_c37_read_status(struct phy_device *phydev);
+int genphy_c37_read_status(struct phy_device *phydev, bool *changed);
/* Clause 45 PHY */
int genphy_c45_restart_aneg(struct phy_device *phydev);
@@ -1826,6 +1898,8 @@ int genphy_c45_an_config_aneg(struct phy_device *phydev);
int genphy_c45_an_disable_aneg(struct phy_device *phydev);
int genphy_c45_read_mdix(struct phy_device *phydev);
int genphy_c45_pma_read_abilities(struct phy_device *phydev);
+int genphy_c45_pma_read_ext_abilities(struct phy_device *phydev);
+int genphy_c45_pma_baset1_read_abilities(struct phy_device *phydev);
int genphy_c45_read_eee_abilities(struct phy_device *phydev);
int genphy_c45_pma_baset1_read_master_slave(struct phy_device *phydev);
int genphy_c45_read_status(struct phy_device *phydev);
@@ -1844,9 +1918,9 @@ int genphy_c45_plca_get_status(struct phy_device *phydev,
int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv,
unsigned long *lp, bool *is_enabled);
int genphy_c45_ethtool_get_eee(struct phy_device *phydev,
- struct ethtool_eee *data);
+ struct ethtool_keee *data);
int genphy_c45_ethtool_set_eee(struct phy_device *phydev,
- struct ethtool_eee *data);
+ struct ethtool_keee *data);
int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv);
int genphy_c45_an_config_eee_aneg(struct phy_device *phydev);
int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv);
@@ -1896,8 +1970,10 @@ int phy_get_rate_matching(struct phy_device *phydev,
void phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
void phy_advertise_supported(struct phy_device *phydev);
+void phy_advertise_eee_all(struct phy_device *phydev);
void phy_support_sym_pause(struct phy_device *phydev);
void phy_support_asym_pause(struct phy_device *phydev);
+void phy_support_eee(struct phy_device *phydev);
void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx,
bool autoneg);
void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx);
@@ -1924,8 +2000,8 @@ int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask);
int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable);
int phy_get_eee_err(struct phy_device *phydev);
-int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data);
-int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data);
+int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data);
+int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data);
int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol);
void phy_ethtool_get_wol(struct phy_device *phydev,
struct ethtool_wolinfo *wol);
@@ -1934,10 +2010,13 @@ int phy_ethtool_get_link_ksettings(struct net_device *ndev,
int phy_ethtool_set_link_ksettings(struct net_device *ndev,
const struct ethtool_link_ksettings *cmd);
int phy_ethtool_nway_reset(struct net_device *ndev);
-int phy_package_join(struct phy_device *phydev, int addr, size_t priv_size);
+int phy_package_join(struct phy_device *phydev, int base_addr, size_t priv_size);
+int of_phy_package_join(struct phy_device *phydev, size_t priv_size);
void phy_package_leave(struct phy_device *phydev);
int devm_phy_package_join(struct device *dev, struct phy_device *phydev,
- int addr, size_t priv_size);
+ int base_addr, size_t priv_size);
+int devm_of_phy_package_join(struct device *dev, struct phy_device *phydev,
+ size_t priv_size);
int __init mdio_bus_init(void);
void mdio_bus_exit(void);
@@ -1954,48 +2033,89 @@ int phy_ethtool_set_plca_cfg(struct phy_device *phydev,
int phy_ethtool_get_plca_status(struct phy_device *phydev,
struct phy_plca_status *plca_st);
-static inline int phy_package_read(struct phy_device *phydev, u32 regnum)
+int __phy_hwtstamp_get(struct phy_device *phydev,
+ struct kernel_hwtstamp_config *config);
+int __phy_hwtstamp_set(struct phy_device *phydev,
+ struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack);
+
+static inline int phy_package_address(struct phy_device *phydev,
+ unsigned int addr_offset)
{
struct phy_package_shared *shared = phydev->shared;
+ u8 base_addr = shared->base_addr;
- if (!shared)
+ if (addr_offset >= PHY_MAX_ADDR - base_addr)
return -EIO;
- return mdiobus_read(phydev->mdio.bus, shared->addr, regnum);
+ /* we know that addr will be in the range 0..31 and thus the
+ * implicit cast to a signed int is not a problem.
+ */
+ return base_addr + addr_offset;
}
-static inline int __phy_package_read(struct phy_device *phydev, u32 regnum)
+static inline int phy_package_read(struct phy_device *phydev,
+ unsigned int addr_offset, u32 regnum)
{
- struct phy_package_shared *shared = phydev->shared;
+ int addr = phy_package_address(phydev, addr_offset);
- if (!shared)
- return -EIO;
+ if (addr < 0)
+ return addr;
- return __mdiobus_read(phydev->mdio.bus, shared->addr, regnum);
+ return mdiobus_read(phydev->mdio.bus, addr, regnum);
+}
+
+static inline int __phy_package_read(struct phy_device *phydev,
+ unsigned int addr_offset, u32 regnum)
+{
+ int addr = phy_package_address(phydev, addr_offset);
+
+ if (addr < 0)
+ return addr;
+
+ return __mdiobus_read(phydev->mdio.bus, addr, regnum);
}
static inline int phy_package_write(struct phy_device *phydev,
- u32 regnum, u16 val)
+ unsigned int addr_offset, u32 regnum,
+ u16 val)
{
- struct phy_package_shared *shared = phydev->shared;
+ int addr = phy_package_address(phydev, addr_offset);
- if (!shared)
- return -EIO;
+ if (addr < 0)
+ return addr;
- return mdiobus_write(phydev->mdio.bus, shared->addr, regnum, val);
+ return mdiobus_write(phydev->mdio.bus, addr, regnum, val);
}
static inline int __phy_package_write(struct phy_device *phydev,
- u32 regnum, u16 val)
+ unsigned int addr_offset, u32 regnum,
+ u16 val)
{
- struct phy_package_shared *shared = phydev->shared;
+ int addr = phy_package_address(phydev, addr_offset);
- if (!shared)
- return -EIO;
+ if (addr < 0)
+ return addr;
- return __mdiobus_write(phydev->mdio.bus, shared->addr, regnum, val);
+ return __mdiobus_write(phydev->mdio.bus, addr, regnum, val);
}
+int __phy_package_read_mmd(struct phy_device *phydev,
+ unsigned int addr_offset, int devad,
+ u32 regnum);
+
+int phy_package_read_mmd(struct phy_device *phydev,
+ unsigned int addr_offset, int devad,
+ u32 regnum);
+
+int __phy_package_write_mmd(struct phy_device *phydev,
+ unsigned int addr_offset, int devad,
+ u32 regnum, u16 val);
+
+int phy_package_write_mmd(struct phy_device *phydev,
+ unsigned int addr_offset, int devad,
+ u32 regnum, u16 val);
+
static inline bool __phy_package_set_once(struct phy_device *phydev,
unsigned int b)
{
@@ -2017,7 +2137,7 @@ static inline bool phy_package_probe_once(struct phy_device *phydev)
return __phy_package_set_once(phydev, PHY_SHARED_F_PROBE_DONE);
}
-extern struct bus_type mdio_bus_type;
+extern const struct bus_type mdio_bus_type;
struct mdio_board_info {
const char *bus_id;
diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index f6d607ef0e80..03cd5bae92d3 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -122,6 +122,11 @@ struct phy_ops {
union phy_configure_opts *opts);
int (*reset)(struct phy *phy);
int (*calibrate)(struct phy *phy);
+
+ /* notify phy connect status change */
+ int (*connect)(struct phy *phy, int port);
+ int (*disconnect)(struct phy *phy, int port);
+
void (*release)(struct phy *phy);
struct module *owner;
};
@@ -176,7 +181,7 @@ struct phy_provider {
struct module *owner;
struct list_head list;
struct phy * (*of_xlate)(struct device *dev,
- struct of_phandle_args *args);
+ const struct of_phandle_args *args);
};
/**
@@ -243,6 +248,8 @@ static inline enum phy_mode phy_get_mode(struct phy *phy)
}
int phy_reset(struct phy *phy);
int phy_calibrate(struct phy *phy);
+int phy_notify_connect(struct phy *phy, int port);
+int phy_notify_disconnect(struct phy *phy, int port);
static inline int phy_get_bus_width(struct phy *phy)
{
return phy->attrs.bus_width;
@@ -265,7 +272,7 @@ void phy_put(struct device *dev, struct phy *phy);
void devm_phy_put(struct device *dev, struct phy *phy);
struct phy *of_phy_get(struct device_node *np, const char *con_id);
struct phy *of_phy_simple_xlate(struct device *dev,
- struct of_phandle_args *args);
+ const struct of_phandle_args *args);
struct phy *phy_create(struct device *dev, struct device_node *node,
const struct phy_ops *ops);
struct phy *devm_phy_create(struct device *dev, struct device_node *node,
@@ -275,11 +282,11 @@ void devm_phy_destroy(struct device *dev, struct phy *phy);
struct phy_provider *__of_phy_provider_register(struct device *dev,
struct device_node *children, struct module *owner,
struct phy * (*of_xlate)(struct device *dev,
- struct of_phandle_args *args));
+ const struct of_phandle_args *args));
struct phy_provider *__devm_of_phy_provider_register(struct device *dev,
struct device_node *children, struct module *owner,
struct phy * (*of_xlate)(struct device *dev,
- struct of_phandle_args *args));
+ const struct of_phandle_args *args));
void of_phy_provider_unregister(struct phy_provider *phy_provider);
void devm_of_phy_provider_unregister(struct device *dev,
struct phy_provider *phy_provider);
@@ -396,6 +403,20 @@ static inline int phy_calibrate(struct phy *phy)
return -ENOSYS;
}
+static inline int phy_notify_connect(struct phy *phy, int index)
+{
+ if (!phy)
+ return 0;
+ return -ENOSYS;
+}
+
+static inline int phy_notify_disconnect(struct phy *phy, int index)
+{
+ if (!phy)
+ return 0;
+ return -ENOSYS;
+}
+
static inline int phy_configure(struct phy *phy,
union phy_configure_opts *opts)
{
@@ -479,7 +500,7 @@ static inline struct phy *of_phy_get(struct device_node *np, const char *con_id)
}
static inline struct phy *of_phy_simple_xlate(struct device *dev,
- struct of_phandle_args *args)
+ const struct of_phandle_args *args)
{
return ERR_PTR(-ENOSYS);
}
@@ -509,7 +530,7 @@ static inline void devm_phy_destroy(struct device *dev, struct phy *phy)
static inline struct phy_provider *__of_phy_provider_register(
struct device *dev, struct device_node *children, struct module *owner,
struct phy * (*of_xlate)(struct device *dev,
- struct of_phandle_args *args))
+ const struct of_phandle_args *args))
{
return ERR_PTR(-ENOSYS);
}
@@ -517,7 +538,7 @@ static inline struct phy_provider *__of_phy_provider_register(
static inline struct phy_provider *__devm_of_phy_provider_register(struct device
*dev, struct device_node *children, struct module *owner,
struct phy * (*of_xlate)(struct device *dev,
- struct of_phandle_args *args))
+ const struct of_phandle_args *args))
{
return ERR_PTR(-ENOSYS);
}
diff --git a/include/linux/phy/tegra/xusb.h b/include/linux/phy/tegra/xusb.h
index 70998e6dd6fd..6ca51e0080ec 100644
--- a/include/linux/phy/tegra/xusb.h
+++ b/include/linux/phy/tegra/xusb.h
@@ -26,6 +26,7 @@ void tegra_phy_xusb_utmi_pad_power_down(struct phy *phy);
int tegra_phy_xusb_utmi_port_reset(struct phy *phy);
int tegra_xusb_padctl_get_usb3_companion(struct tegra_xusb_padctl *padctl,
unsigned int port);
+int tegra_xusb_padctl_get_port_number(struct phy *phy);
int tegra_xusb_padctl_enable_phy_sleepwalk(struct tegra_xusb_padctl *padctl, struct phy *phy,
enum usb_device_speed speed);
int tegra_xusb_padctl_disable_phy_sleepwalk(struct tegra_xusb_padctl *padctl, struct phy *phy);
diff --git a/include/linux/phylib_stubs.h b/include/linux/phylib_stubs.h
new file mode 100644
index 000000000000..1279f48c8a70
--- /dev/null
+++ b/include/linux/phylib_stubs.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Stubs for the Network PHY library
+ */
+
+#include <linux/rtnetlink.h>
+
+struct kernel_hwtstamp_config;
+struct netlink_ext_ack;
+struct phy_device;
+
+#if IS_ENABLED(CONFIG_PHYLIB)
+
+extern const struct phylib_stubs *phylib_stubs;
+
+struct phylib_stubs {
+ int (*hwtstamp_get)(struct phy_device *phydev,
+ struct kernel_hwtstamp_config *config);
+ int (*hwtstamp_set)(struct phy_device *phydev,
+ struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack);
+};
+
+static inline int phy_hwtstamp_get(struct phy_device *phydev,
+ struct kernel_hwtstamp_config *config)
+{
+ /* phylib_register_stubs() and phylib_unregister_stubs()
+ * also run under rtnl_lock().
+ */
+ ASSERT_RTNL();
+
+ if (!phylib_stubs)
+ return -EOPNOTSUPP;
+
+ return phylib_stubs->hwtstamp_get(phydev, config);
+}
+
+static inline int phy_hwtstamp_set(struct phy_device *phydev,
+ struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
+{
+ /* phylib_register_stubs() and phylib_unregister_stubs()
+ * also run under rtnl_lock().
+ */
+ ASSERT_RTNL();
+
+ if (!phylib_stubs)
+ return -EOPNOTSUPP;
+
+ return phylib_stubs->hwtstamp_set(phydev, config, extack);
+}
+
+#else
+
+static inline int phy_hwtstamp_get(struct phy_device *phydev,
+ struct kernel_hwtstamp_config *config)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int phy_hwtstamp_set(struct phy_device *phydev,
+ struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+#endif
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 1817940a3418..9a57deefcb07 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -9,6 +9,7 @@ struct device_node;
struct ethtool_cmd;
struct fwnode_handle;
struct net_device;
+struct phylink;
enum {
MLO_PAUSE_NONE,
@@ -98,72 +99,6 @@ static inline bool phylink_autoneg_inband(unsigned int mode)
}
/**
- * phylink_pcs_neg_mode() - helper to determine PCS inband mode
- * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND.
- * @interface: interface mode to be used
- * @advertising: adertisement ethtool link mode mask
- *
- * Determines the negotiation mode to be used by the PCS, and returns
- * one of:
- *
- * - %PHYLINK_PCS_NEG_NONE: interface mode does not support inband
- * - %PHYLINK_PCS_NEG_OUTBAND: an out of band mode (e.g. reading the PHY)
- * will be used.
- * - %PHYLINK_PCS_NEG_INBAND_DISABLED: inband mode selected but autoneg
- * disabled
- * - %PHYLINK_PCS_NEG_INBAND_ENABLED: inband mode selected and autoneg enabled
- *
- * Note: this is for cases where the PCS itself is involved in negotiation
- * (e.g. Clause 37, SGMII and similar) not Clause 73.
- */
-static inline unsigned int phylink_pcs_neg_mode(unsigned int mode,
- phy_interface_t interface,
- const unsigned long *advertising)
-{
- unsigned int neg_mode;
-
- switch (interface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_QSGMII:
- case PHY_INTERFACE_MODE_QUSGMII:
- case PHY_INTERFACE_MODE_USXGMII:
- /* These protocols are designed for use with a PHY which
- * communicates its negotiation result back to the MAC via
- * inband communication. Note: there exist PHYs that run
- * with SGMII but do not send the inband data.
- */
- if (!phylink_autoneg_inband(mode))
- neg_mode = PHYLINK_PCS_NEG_OUTBAND;
- else
- neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED;
- break;
-
- case PHY_INTERFACE_MODE_1000BASEX:
- case PHY_INTERFACE_MODE_2500BASEX:
- /* 1000base-X is designed for use media-side for Fibre
- * connections, and thus the Autoneg bit needs to be
- * taken into account. We also do this for 2500base-X
- * as well, but drivers may not support this, so may
- * need to override this.
- */
- if (!phylink_autoneg_inband(mode))
- neg_mode = PHYLINK_PCS_NEG_OUTBAND;
- else if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
- advertising))
- neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED;
- else
- neg_mode = PHYLINK_PCS_NEG_INBAND_DISABLED;
- break;
-
- default:
- neg_mode = PHYLINK_PCS_NEG_NONE;
- break;
- }
-
- return neg_mode;
-}
-
-/**
* struct phylink_link_state - link state structure
* @advertising: ethtool bitmask containing advertised link modes
* @lp_advertising: ethtool bitmask containing link partner advertised link
@@ -200,8 +135,6 @@ enum phylink_op_type {
* struct phylink_config - PHYLINK configuration structure
* @dev: a pointer to a struct device associated with the MAC
* @type: operation type of PHYLINK instance
- * @legacy_pre_march2020: driver has not been updated for March 2020 updates
- * (See commit 7cceb599d15d ("net: phylink: avoid mac_config calls")
* @poll_fixed_state: if true, starts link_poll,
* if MAC link is at %MLO_AN_FIXED mode.
* @mac_managed_pm: if true, indicate the MAC driver is responsible for PHY PM.
@@ -215,7 +148,6 @@ enum phylink_op_type {
struct phylink_config {
struct device *dev;
enum phylink_op_type type;
- bool legacy_pre_march2020;
bool poll_fixed_state;
bool mac_managed_pm;
bool ovr_an_inband;
@@ -225,35 +157,31 @@ struct phylink_config {
unsigned long mac_capabilities;
};
+void phylink_limit_mac_speed(struct phylink_config *config, u32 max_speed);
+
/**
* struct phylink_mac_ops - MAC operations structure.
- * @validate: Validate and update the link configuration.
+ * @mac_get_caps: Get MAC capabilities for interface mode.
* @mac_select_pcs: Select a PCS for the interface mode.
- * @mac_pcs_get_state: Read the current link state from the hardware.
* @mac_prepare: prepare for a major reconfiguration of the interface.
* @mac_config: configure the MAC for the selected mode and state.
* @mac_finish: finish a major reconfiguration of the interface.
- * @mac_an_restart: restart 802.3z BaseX autonegotiation.
* @mac_link_down: take the link down.
* @mac_link_up: allow the link to come up.
*
* The individual methods are described more fully below.
*/
struct phylink_mac_ops {
- void (*validate)(struct phylink_config *config,
- unsigned long *supported,
- struct phylink_link_state *state);
+ unsigned long (*mac_get_caps)(struct phylink_config *config,
+ phy_interface_t interface);
struct phylink_pcs *(*mac_select_pcs)(struct phylink_config *config,
phy_interface_t interface);
- void (*mac_pcs_get_state)(struct phylink_config *config,
- struct phylink_link_state *state);
int (*mac_prepare)(struct phylink_config *config, unsigned int mode,
phy_interface_t iface);
void (*mac_config)(struct phylink_config *config, unsigned int mode,
const struct phylink_link_state *state);
int (*mac_finish)(struct phylink_config *config, unsigned int mode,
phy_interface_t iface);
- void (*mac_an_restart)(struct phylink_config *config);
void (*mac_link_down)(struct phylink_config *config, unsigned int mode,
phy_interface_t interface);
void (*mac_link_up)(struct phylink_config *config,
@@ -264,39 +192,17 @@ struct phylink_mac_ops {
#if 0 /* For kernel-doc purposes only. */
/**
- * validate - Validate and update the link configuration
+ * mac_get_caps: Get MAC capabilities for interface mode.
* @config: a pointer to a &struct phylink_config.
- * @supported: ethtool bitmask for supported link modes.
- * @state: a pointer to a &struct phylink_link_state.
- *
- * Clear bits in the @supported and @state->advertising masks that
- * are not supportable by the MAC.
- *
- * Note that the PHY may be able to transform from one connection
- * technology to another, so, eg, don't clear 1000BaseX just
- * because the MAC is unable to BaseX mode. This is more about
- * clearing unsupported speeds and duplex settings. The port modes
- * should not be cleared; phylink_set_port_modes() will help with this.
+ * @interface: PHY interface mode.
*
- * When @config->supported_interfaces has been set, phylink will iterate
- * over the supported interfaces to determine the full capability of the
- * MAC. The validation function must not print errors if @state->interface
- * is set to an unexpected value.
- *
- * When @config->supported_interfaces is empty, phylink will call this
- * function with @state->interface set to %PHY_INTERFACE_MODE_NA, and
- * expects the MAC driver to return all supported link modes.
- *
- * If the @state->interface mode is not supported, then the @supported
- * mask must be cleared.
- *
- * This member is optional; if not set, the generic validator will be
- * used making use of @config->mac_capabilities and
- * @config->supported_interfaces to determine which link modes are
- * supported.
+ * Optional method. When not provided, config->mac_capabilities will be used.
+ * When implemented, this returns the MAC capabilities for the specified
+ * interface mode where there is some special handling required by the MAC
+ * driver (e.g. not supporting half-duplex in certain interface modes.)
*/
-void validate(struct phylink_config *config, unsigned long *supported,
- struct phylink_link_state *state);
+unsigned long mac_get_caps(struct phylink_config *config,
+ phy_interface_t interface);
/**
* mac_select_pcs: Select a PCS for the interface mode.
* @config: a pointer to a &struct phylink_config.
@@ -314,25 +220,6 @@ struct phylink_pcs *mac_select_pcs(struct phylink_config *config,
phy_interface_t interface);
/**
- * mac_pcs_get_state() - Read the current inband link state from the hardware
- * @config: a pointer to a &struct phylink_config.
- * @state: a pointer to a &struct phylink_link_state.
- *
- * Read the current inband link state from the MAC PCS, reporting the
- * current speed in @state->speed, duplex mode in @state->duplex, pause
- * mode in @state->pause using the %MLO_PAUSE_RX and %MLO_PAUSE_TX bits,
- * negotiation completion state in @state->an_complete, and link up state
- * in @state->link. If possible, @state->lp_advertising should also be
- * populated.
- *
- * Note: This is a legacy method. This function will not be called unless
- * legacy_pre_march2020 is set in &struct phylink_config and there is no
- * PCS attached.
- */
-void mac_pcs_get_state(struct phylink_config *config,
- struct phylink_link_state *state);
-
-/**
* mac_prepare() - prepare to change the PHY interface mode
* @config: a pointer to a &struct phylink_config.
* @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND.
@@ -368,17 +255,9 @@ int mac_prepare(struct phylink_config *config, unsigned int mode,
* guaranteed to be correct, and so any mac_config() implementation must
* never reference these fields.
*
- * Note: For legacy March 2020 drivers (drivers with legacy_pre_march2020 set
- * in their &phylnk_config and which don't have a PCS), this function will be
- * called on each link up event, and to also change the in-band advert. For
- * non-legacy drivers, it will only be called to reconfigure the MAC for a
- * "major" change in e.g. interface mode. It will not be called for changes
- * in speed, duplex or pause modes or to change the in-band advertisement.
- * In any case, it is strongly preferred that speed, duplex and pause settings
- * are handled in the mac_link_up() method and not in this method.
- *
- * (this requires a rewrite - please refer to mac_link_up() for situations
- * where the PCS and MAC are not tightly integrated.)
+ * This will only be called to reconfigure the MAC for a "major" change in
+ * e.g. interface mode. It will not be called for changes in speed, duplex
+ * or pause modes or to change the in-band advertisement.
*
* In all negotiation modes, as defined by @mode, @state->pause indicates the
* pause settings which should be applied as follows. If %MLO_PAUSE_AN is not
@@ -410,7 +289,7 @@ int mac_prepare(struct phylink_config *config, unsigned int mode,
* 1000base-X or Cisco SGMII mode depending on the @state->interface
* mode). In both cases, link state management (whether the link
* is up or not) is performed by the MAC, and reported via the
- * mac_pcs_get_state() callback. Changes in link state must be made
+ * pcs_get_state() callback. Changes in link state must be made
* by calling phylink_mac_change().
*
* Interface mode specific details are mentioned below.
@@ -459,16 +338,6 @@ int mac_finish(struct phylink_config *config, unsigned int mode,
phy_interface_t iface);
/**
- * mac_an_restart() - restart 802.3z BaseX autonegotiation
- * @config: a pointer to a &struct phylink_config.
- *
- * Note: This is a legacy method. This function will not be called unless
- * legacy_pre_march2020 is set in &struct phylink_config and there is no
- * PCS attached.
- */
-void mac_an_restart(struct phylink_config *config);
-
-/**
* mac_link_down() - take the link down
* @config: a pointer to a &struct phylink_config.
* @mode: link autonegotiation mode
@@ -520,14 +389,19 @@ struct phylink_pcs_ops;
/**
* struct phylink_pcs - PHYLINK PCS instance
* @ops: a pointer to the &struct phylink_pcs_ops structure
+ * @phylink: pointer to &struct phylink_config
* @neg_mode: provide PCS neg mode via "mode" argument
* @poll: poll the PCS for link changes
*
* This structure is designed to be embedded within the PCS private data,
* and will be passed between phylink and the PCS.
+ *
+ * The @phylink member is private to phylink and must not be touched by
+ * the PCS driver.
*/
struct phylink_pcs {
const struct phylink_pcs_ops *ops;
+ struct phylink *phylink;
bool neg_mode;
bool poll;
};
@@ -535,6 +409,10 @@ struct phylink_pcs {
/**
* struct phylink_pcs_ops - MAC PCS operations structure.
* @pcs_validate: validate the link configuration.
+ * @pcs_enable: enable the PCS.
+ * @pcs_disable: disable the PCS.
+ * @pcs_pre_config: pre-mac_config method (for errata)
+ * @pcs_post_config: post-mac_config method (for arrata)
* @pcs_get_state: read the current MAC PCS link state from the hardware.
* @pcs_config: configure the MAC PCS for the selected mode and state.
* @pcs_an_restart: restart 802.3z BaseX autonegotiation.
@@ -544,6 +422,12 @@ struct phylink_pcs {
struct phylink_pcs_ops {
int (*pcs_validate)(struct phylink_pcs *pcs, unsigned long *supported,
const struct phylink_link_state *state);
+ int (*pcs_enable)(struct phylink_pcs *pcs);
+ void (*pcs_disable)(struct phylink_pcs *pcs);
+ void (*pcs_pre_config)(struct phylink_pcs *pcs,
+ phy_interface_t interface);
+ int (*pcs_post_config)(struct phylink_pcs *pcs,
+ phy_interface_t interface);
void (*pcs_get_state)(struct phylink_pcs *pcs,
struct phylink_link_state *state);
int (*pcs_config)(struct phylink_pcs *pcs, unsigned int neg_mode,
@@ -574,6 +458,18 @@ int pcs_validate(struct phylink_pcs *pcs, unsigned long *supported,
const struct phylink_link_state *state);
/**
+ * pcs_enable() - enable the PCS.
+ * @pcs: a pointer to a &struct phylink_pcs.
+ */
+int pcs_enable(struct phylink_pcs *pcs);
+
+/**
+ * pcs_disable() - disable the PCS.
+ * @pcs: a pointer to a &struct phylink_pcs.
+ */
+void pcs_disable(struct phylink_pcs *pcs);
+
+/**
* pcs_get_state() - Read the current inband link state from the hardware
* @pcs: a pointer to a &struct phylink_pcs.
* @state: a pointer to a &struct phylink_link_state.
@@ -584,9 +480,6 @@ int pcs_validate(struct phylink_pcs *pcs, unsigned long *supported,
* negotiation completion state in @state->an_complete, and link up state
* in @state->link. If possible, @state->lp_advertising should also be
* populated.
- *
- * When present, this overrides mac_pcs_get_state() in &struct
- * phylink_mac_ops.
*/
void pcs_get_state(struct phylink_pcs *pcs,
struct phylink_link_state *state);
@@ -615,7 +508,7 @@ void pcs_get_state(struct phylink_pcs *pcs,
*
* The %neg_mode argument should be tested via the phylink_mode_*() family of
* functions, or for PCS that set pcs->neg_mode true, should be tested
- * against the %PHYLINK_PCS_NEG_* definitions.
+ * against the PHYLINK_PCS_NEG_* definitions.
*/
int pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
phy_interface_t interface, const unsigned long *advertising,
@@ -645,23 +538,12 @@ void pcs_an_restart(struct phylink_pcs *pcs);
*
* The %mode argument should be tested via the phylink_mode_*() family of
* functions, or for PCS that set pcs->neg_mode true, should be tested
- * against the %PHYLINK_PCS_NEG_* definitions.
+ * against the PHYLINK_PCS_NEG_* definitions.
*/
void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
phy_interface_t interface, int speed, int duplex);
#endif
-void phylink_caps_to_linkmodes(unsigned long *linkmodes, unsigned long caps);
-unsigned long phylink_get_capabilities(phy_interface_t interface,
- unsigned long mac_capabilities,
- int rate_matching);
-void phylink_validate_mask_caps(unsigned long *supported,
- struct phylink_link_state *state,
- unsigned long caps);
-void phylink_generic_validate(struct phylink_config *config,
- unsigned long *supported,
- struct phylink_link_state *state);
-
struct phylink *phylink_create(struct phylink_config *,
const struct fwnode_handle *,
phy_interface_t,
@@ -677,6 +559,7 @@ int phylink_fwnode_phy_connect(struct phylink *pl,
void phylink_disconnect_phy(struct phylink *);
void phylink_mac_change(struct phylink *, bool up);
+void phylink_pcs_change(struct phylink_pcs *, bool up);
void phylink_start(struct phylink *);
void phylink_stop(struct phylink *);
@@ -698,8 +581,8 @@ int phylink_ethtool_set_pauseparam(struct phylink *,
struct ethtool_pauseparam *);
int phylink_get_eee_err(struct phylink *);
int phylink_init_eee(struct phylink *, bool);
-int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *);
-int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *);
+int phylink_ethtool_get_eee(struct phylink *link, struct ethtool_keee *eee);
+int phylink_ethtool_set_eee(struct phylink *link, struct ethtool_keee *eee);
int phylink_mii_ioctl(struct phylink *, struct ifreq *, int);
int phylink_speed_down(struct phylink *pl, bool sync);
int phylink_speed_up(struct phylink *pl);
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 653a527574c4..a3aad9b4074c 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -2,18 +2,12 @@
#ifndef _LINUX_PID_H
#define _LINUX_PID_H
+#include <linux/pid_types.h>
#include <linux/rculist.h>
-#include <linux/wait.h>
+#include <linux/rcupdate.h>
#include <linux/refcount.h>
-
-enum pid_type
-{
- PIDTYPE_PID,
- PIDTYPE_TGID,
- PIDTYPE_PGID,
- PIDTYPE_SID,
- PIDTYPE_MAX,
-};
+#include <linux/sched.h>
+#include <linux/wait.h>
/*
* What is struct pid?
@@ -51,6 +45,8 @@ enum pid_type
* find_pid_ns() using the int nr and struct pid_namespace *ns.
*/
+#define RESERVED_PIDS 300
+
struct upid {
int nr;
struct pid_namespace *ns;
@@ -61,6 +57,8 @@ struct pid
refcount_t count;
unsigned int level;
spinlock_t lock;
+ struct dentry *stashed;
+ u64 ino;
/* lists of tasks that use this pid */
struct hlist_head tasks[PIDTYPE_MAX];
struct hlist_head inodes;
@@ -72,15 +70,13 @@ struct pid
extern struct pid init_struct_pid;
-extern const struct file_operations pidfd_fops;
-
struct file;
-extern struct pid *pidfd_pid(const struct file *file);
+struct pid *pidfd_pid(const struct file *file);
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
-int pidfd_create(struct pid *pid, unsigned int flags);
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret);
+void do_notify_pidfd(struct task_struct *task);
static inline struct pid *get_pid(struct pid *pid)
{
@@ -110,9 +106,6 @@ extern void exchange_tids(struct task_struct *task, struct task_struct *old);
extern void transfer_pid(struct task_struct *old, struct task_struct *new,
enum pid_type);
-struct pid_namespace;
-extern struct pid_namespace init_pid_ns;
-
extern int pid_max;
extern int pid_max_min, pid_max_max;
@@ -215,4 +208,127 @@ pid_t pid_vnr(struct pid *pid);
} \
task = tg___; \
} while_each_pid_task(pid, type, task)
+
+static inline struct pid *task_pid(struct task_struct *task)
+{
+ return task->thread_pid;
+}
+
+/*
+ * the helpers to get the task's different pids as they are seen
+ * from various namespaces
+ *
+ * task_xid_nr() : global id, i.e. the id seen from the init namespace;
+ * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of
+ * current.
+ * task_xid_nr_ns() : id seen from the ns specified;
+ *
+ * see also pid_nr() etc in include/linux/pid.h
+ */
+pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns);
+
+static inline pid_t task_pid_nr(struct task_struct *tsk)
+{
+ return tsk->pid;
+}
+
+static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+ return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
+}
+
+static inline pid_t task_pid_vnr(struct task_struct *tsk)
+{
+ return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
+}
+
+
+static inline pid_t task_tgid_nr(struct task_struct *tsk)
+{
+ return tsk->tgid;
+}
+
+/**
+ * pid_alive - check that a task structure is not stale
+ * @p: Task structure to be checked.
+ *
+ * Test if a process is not yet dead (at most zombie state)
+ * If pid_alive fails, then pointers within the task structure
+ * can be stale and must not be dereferenced.
+ *
+ * Return: 1 if the process is alive. 0 otherwise.
+ */
+static inline int pid_alive(const struct task_struct *p)
+{
+ return p->thread_pid != NULL;
+}
+
+static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+ return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns);
+}
+
+static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
+{
+ return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL);
+}
+
+
+static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+ return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns);
+}
+
+static inline pid_t task_session_vnr(struct task_struct *tsk)
+{
+ return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
+}
+
+static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+ return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns);
+}
+
+static inline pid_t task_tgid_vnr(struct task_struct *tsk)
+{
+ return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL);
+}
+
+static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
+{
+ pid_t pid = 0;
+
+ rcu_read_lock();
+ if (pid_alive(tsk))
+ pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns);
+ rcu_read_unlock();
+
+ return pid;
+}
+
+static inline pid_t task_ppid_nr(const struct task_struct *tsk)
+{
+ return task_ppid_nr_ns(tsk, &init_pid_ns);
+}
+
+/* Obsolete, do not use: */
+static inline pid_t task_pgrp_nr(struct task_struct *tsk)
+{
+ return task_pgrp_nr_ns(tsk, &init_pid_ns);
+}
+
+/**
+ * is_global_init - check if a task structure is init. Since init
+ * is free to have sub-threads we need to check tgid.
+ * @tsk: Task structure to be checked.
+ *
+ * Check if a task structure is the first user space task the kernel created.
+ *
+ * Return: 1 if the task structure is init. 0 otherwise.
+ */
+static inline int is_global_init(struct task_struct *tsk)
+{
+ return task_tgid_nr(tsk) == 1;
+}
+
#endif /* _LINUX_PID_H */
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index c758809d5bcf..f9f9931e02d6 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -17,18 +17,10 @@
struct fs_pin;
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
-/*
- * sysctl for vm.memfd_noexec
- * 0: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL
- * acts like MFD_EXEC was set.
- * 1: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL
- * acts like MFD_NOEXEC_SEAL was set.
- * 2: memfd_create() without MFD_NOEXEC_SEAL will be
- * rejected.
- */
-#define MEMFD_NOEXEC_SCOPE_EXEC 0
-#define MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL 1
-#define MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED 2
+/* modes for vm.memfd_noexec sysctl */
+#define MEMFD_NOEXEC_SCOPE_EXEC 0 /* MFD_EXEC implied if unset */
+#define MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL 1 /* MFD_NOEXEC_SEAL implied if unset */
+#define MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED 2 /* same as 1, except MFD_EXEC rejected */
#endif
struct pid_namespace {
@@ -47,7 +39,6 @@ struct pid_namespace {
int reboot; /* group exit code if this pidns was rebooted */
struct ns_common ns;
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
- /* sysctl for vm.memfd_noexec */
int memfd_noexec_scope;
#endif
} __randomize_layout;
@@ -64,6 +55,23 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
return ns;
}
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
+static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns)
+{
+ int scope = MEMFD_NOEXEC_SCOPE_EXEC;
+
+ for (; ns; ns = ns->parent)
+ scope = max(scope, READ_ONCE(ns->memfd_noexec_scope));
+
+ return scope;
+}
+#else
+static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns)
+{
+ return 0;
+}
+#endif
+
extern struct pid_namespace *copy_pid_ns(unsigned long flags,
struct user_namespace *user_ns, struct pid_namespace *ns);
extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
@@ -78,6 +86,11 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
return ns;
}
+static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns)
+{
+ return 0;
+}
+
static inline struct pid_namespace *copy_pid_ns(unsigned long flags,
struct user_namespace *user_ns, struct pid_namespace *ns)
{
diff --git a/include/linux/pid_types.h b/include/linux/pid_types.h
new file mode 100644
index 000000000000..c2aee1d91dcf
--- /dev/null
+++ b/include/linux/pid_types.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PID_TYPES_H
+#define _LINUX_PID_TYPES_H
+
+enum pid_type {
+ PIDTYPE_PID,
+ PIDTYPE_TGID,
+ PIDTYPE_PGID,
+ PIDTYPE_SID,
+ PIDTYPE_MAX,
+};
+
+struct pid_namespace;
+extern struct pid_namespace init_pid_ns;
+
+#endif /* _LINUX_PID_TYPES_H */
diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h
new file mode 100644
index 000000000000..75bdf9807802
--- /dev/null
+++ b/include/linux/pidfs.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PID_FS_H
+#define _LINUX_PID_FS_H
+
+struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags);
+void __init pidfs_init(void);
+
+#endif /* _LINUX_PID_FS_H */
diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h
index 4729d54e8995..73de70362b98 100644
--- a/include/linux/pinctrl/consumer.h
+++ b/include/linux/pinctrl/consumer.h
@@ -17,6 +17,7 @@
#include <linux/pinctrl/pinctrl-state.h>
struct device;
+struct gpio_chip;
/* This struct is private to the core and should be regarded as a cookie */
struct pinctrl;
@@ -25,27 +26,30 @@ struct pinctrl_state;
#ifdef CONFIG_PINCTRL
/* External interface to pin control */
-extern bool pinctrl_gpio_can_use_line(unsigned gpio);
-extern int pinctrl_gpio_request(unsigned gpio);
-extern void pinctrl_gpio_free(unsigned gpio);
-extern int pinctrl_gpio_direction_input(unsigned gpio);
-extern int pinctrl_gpio_direction_output(unsigned gpio);
-extern int pinctrl_gpio_set_config(unsigned gpio, unsigned long config);
-
-extern struct pinctrl * __must_check pinctrl_get(struct device *dev);
-extern void pinctrl_put(struct pinctrl *p);
-extern struct pinctrl_state * __must_check pinctrl_lookup_state(struct pinctrl *p,
- const char *name);
-extern int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *s);
-
-extern struct pinctrl * __must_check devm_pinctrl_get(struct device *dev);
-extern void devm_pinctrl_put(struct pinctrl *p);
-extern int pinctrl_select_default_state(struct device *dev);
+bool pinctrl_gpio_can_use_line(struct gpio_chip *gc, unsigned int offset);
+int pinctrl_gpio_request(struct gpio_chip *gc, unsigned int offset);
+void pinctrl_gpio_free(struct gpio_chip *gc, unsigned int offset);
+int pinctrl_gpio_direction_input(struct gpio_chip *gc,
+ unsigned int offset);
+int pinctrl_gpio_direction_output(struct gpio_chip *gc,
+ unsigned int offset);
+int pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset,
+ unsigned long config);
+
+struct pinctrl * __must_check pinctrl_get(struct device *dev);
+void pinctrl_put(struct pinctrl *p);
+struct pinctrl_state * __must_check pinctrl_lookup_state(struct pinctrl *p,
+ const char *name);
+int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *s);
+
+struct pinctrl * __must_check devm_pinctrl_get(struct device *dev);
+void devm_pinctrl_put(struct pinctrl *p);
+int pinctrl_select_default_state(struct device *dev);
#ifdef CONFIG_PM
-extern int pinctrl_pm_select_default_state(struct device *dev);
-extern int pinctrl_pm_select_sleep_state(struct device *dev);
-extern int pinctrl_pm_select_idle_state(struct device *dev);
+int pinctrl_pm_select_default_state(struct device *dev);
+int pinctrl_pm_select_sleep_state(struct device *dev);
+int pinctrl_pm_select_idle_state(struct device *dev);
#else
static inline int pinctrl_pm_select_default_state(struct device *dev)
{
@@ -63,31 +67,38 @@ static inline int pinctrl_pm_select_idle_state(struct device *dev)
#else /* !CONFIG_PINCTRL */
-static inline bool pinctrl_gpio_can_use_line(unsigned gpio)
+static inline bool
+pinctrl_gpio_can_use_line(struct gpio_chip *gc, unsigned int offset)
{
return true;
}
-static inline int pinctrl_gpio_request(unsigned gpio)
+static inline int
+pinctrl_gpio_request(struct gpio_chip *gc, unsigned int offset)
{
return 0;
}
-static inline void pinctrl_gpio_free(unsigned gpio)
+static inline void
+pinctrl_gpio_free(struct gpio_chip *gc, unsigned int offset)
{
}
-static inline int pinctrl_gpio_direction_input(unsigned gpio)
+static inline int
+pinctrl_gpio_direction_input(struct gpio_chip *gc, unsigned int offset)
{
return 0;
}
-static inline int pinctrl_gpio_direction_output(unsigned gpio)
+static inline int
+pinctrl_gpio_direction_output(struct gpio_chip *gc, unsigned int offset)
{
return 0;
}
-static inline int pinctrl_gpio_set_config(unsigned gpio, unsigned long config)
+static inline int
+pinctrl_gpio_set_config(struct gpio_chip *gc, unsigned int offset,
+ unsigned long config)
{
return 0;
}
diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h
index 0639b36f43c5..673e96df453b 100644
--- a/include/linux/pinctrl/machine.h
+++ b/include/linux/pinctrl/machine.h
@@ -11,7 +11,7 @@
#ifndef __LINUX_PINCTRL_MACHINE_H
#define __LINUX_PINCTRL_MACHINE_H
-#include <linux/kernel.h> /* ARRAY_SIZE() */
+#include <linux/array_size.h>
#include <linux/pinctrl/pinctrl-state.h>
@@ -47,7 +47,7 @@ struct pinctrl_map_mux {
struct pinctrl_map_configs {
const char *group_or_pin;
unsigned long *configs;
- unsigned num_configs;
+ unsigned int num_configs;
};
/**
@@ -154,13 +154,13 @@ struct pinctrl_map;
#ifdef CONFIG_PINCTRL
extern int pinctrl_register_mappings(const struct pinctrl_map *map,
- unsigned num_maps);
+ unsigned int num_maps);
extern void pinctrl_unregister_mappings(const struct pinctrl_map *map);
extern void pinctrl_provide_dummies(void);
#else
static inline int pinctrl_register_mappings(const struct pinctrl_map *map,
- unsigned num_maps)
+ unsigned int num_maps)
{
return 0;
}
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index d74b7a4ea154..a65d3d078e58 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -193,17 +193,17 @@ struct pinconf_generic_params {
int pinconf_generic_dt_subnode_to_map(struct pinctrl_dev *pctldev,
struct device_node *np, struct pinctrl_map **map,
- unsigned *reserved_maps, unsigned *num_maps,
+ unsigned int *reserved_maps, unsigned int *num_maps,
enum pinctrl_map_type type);
int pinconf_generic_dt_node_to_map(struct pinctrl_dev *pctldev,
struct device_node *np_config, struct pinctrl_map **map,
- unsigned *num_maps, enum pinctrl_map_type type);
+ unsigned int *num_maps, enum pinctrl_map_type type);
void pinconf_generic_dt_free_map(struct pinctrl_dev *pctldev,
- struct pinctrl_map *map, unsigned num_maps);
+ struct pinctrl_map *map, unsigned int num_maps);
static inline int pinconf_generic_dt_node_to_map_group(struct pinctrl_dev *pctldev,
struct device_node *np_config, struct pinctrl_map **map,
- unsigned *num_maps)
+ unsigned int *num_maps)
{
return pinconf_generic_dt_node_to_map(pctldev, np_config, map, num_maps,
PIN_MAP_TYPE_CONFIGS_GROUP);
@@ -211,7 +211,7 @@ static inline int pinconf_generic_dt_node_to_map_group(struct pinctrl_dev *pctld
static inline int pinconf_generic_dt_node_to_map_pin(struct pinctrl_dev *pctldev,
struct device_node *np_config, struct pinctrl_map **map,
- unsigned *num_maps)
+ unsigned int *num_maps)
{
return pinconf_generic_dt_node_to_map(pctldev, np_config, map, num_maps,
PIN_MAP_TYPE_CONFIGS_PIN);
diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h
index f8a8215e9021..770ec2221156 100644
--- a/include/linux/pinctrl/pinconf.h
+++ b/include/linux/pinctrl/pinconf.h
@@ -40,25 +40,25 @@ struct pinconf_ops {
bool is_generic;
#endif
int (*pin_config_get) (struct pinctrl_dev *pctldev,
- unsigned pin,
+ unsigned int pin,
unsigned long *config);
int (*pin_config_set) (struct pinctrl_dev *pctldev,
- unsigned pin,
+ unsigned int pin,
unsigned long *configs,
- unsigned num_configs);
+ unsigned int num_configs);
int (*pin_config_group_get) (struct pinctrl_dev *pctldev,
- unsigned selector,
+ unsigned int selector,
unsigned long *config);
int (*pin_config_group_set) (struct pinctrl_dev *pctldev,
- unsigned selector,
+ unsigned int selector,
unsigned long *configs,
- unsigned num_configs);
+ unsigned int num_configs);
void (*pin_config_dbg_show) (struct pinctrl_dev *pctldev,
struct seq_file *s,
- unsigned offset);
+ unsigned int offset);
void (*pin_config_group_dbg_show) (struct pinctrl_dev *pctldev,
struct seq_file *s,
- unsigned selector);
+ unsigned int selector);
void (*pin_config_config_dbg_show) (struct pinctrl_dev *pctldev,
struct seq_file *s,
unsigned long config);
diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index 4d252ea00ed1..9a8189ffd0f2 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -54,7 +54,7 @@ struct pingroup {
* @drv_data: driver-defined per-pin data. pinctrl core does not touch this
*/
struct pinctrl_pin_desc {
- unsigned number;
+ unsigned int number;
const char *name;
void *drv_data;
};
@@ -82,7 +82,7 @@ struct pinctrl_gpio_range {
unsigned int base;
unsigned int pin_base;
unsigned int npins;
- unsigned const *pins;
+ unsigned int const *pins;
struct gpio_chip *gc;
};
@@ -108,18 +108,18 @@ struct pinctrl_gpio_range {
struct pinctrl_ops {
int (*get_groups_count) (struct pinctrl_dev *pctldev);
const char *(*get_group_name) (struct pinctrl_dev *pctldev,
- unsigned selector);
+ unsigned int selector);
int (*get_group_pins) (struct pinctrl_dev *pctldev,
- unsigned selector,
- const unsigned **pins,
- unsigned *num_pins);
+ unsigned int selector,
+ const unsigned int **pins,
+ unsigned int *num_pins);
void (*pin_dbg_show) (struct pinctrl_dev *pctldev, struct seq_file *s,
- unsigned offset);
+ unsigned int offset);
int (*dt_node_to_map) (struct pinctrl_dev *pctldev,
struct device_node *np_config,
- struct pinctrl_map **map, unsigned *num_maps);
+ struct pinctrl_map **map, unsigned int *num_maps);
void (*dt_free_map) (struct pinctrl_dev *pctldev,
- struct pinctrl_map *map, unsigned num_maps);
+ struct pinctrl_map *map, unsigned int num_maps);
};
/**
@@ -193,7 +193,7 @@ extern void pinctrl_add_gpio_range(struct pinctrl_dev *pctldev,
struct pinctrl_gpio_range *range);
extern void pinctrl_add_gpio_ranges(struct pinctrl_dev *pctldev,
struct pinctrl_gpio_range *ranges,
- unsigned nranges);
+ unsigned int nranges);
extern void pinctrl_remove_gpio_range(struct pinctrl_dev *pctldev,
struct pinctrl_gpio_range *range);
@@ -203,8 +203,8 @@ extern struct pinctrl_gpio_range *
pinctrl_find_gpio_range_from_pin(struct pinctrl_dev *pctldev,
unsigned int pin);
extern int pinctrl_get_group_pins(struct pinctrl_dev *pctldev,
- const char *pin_group, const unsigned **pins,
- unsigned *num_pins);
+ const char *pin_group, const unsigned int **pins,
+ unsigned int *num_pins);
/**
* struct pinfunction - Description about a function
diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h
index a7e370965c53..d6f7b58d6ad0 100644
--- a/include/linux/pinctrl/pinmux.h
+++ b/include/linux/pinctrl/pinmux.h
@@ -57,26 +57,26 @@ struct pinctrl_gpio_range;
* the pin request.
*/
struct pinmux_ops {
- int (*request) (struct pinctrl_dev *pctldev, unsigned offset);
- int (*free) (struct pinctrl_dev *pctldev, unsigned offset);
+ int (*request) (struct pinctrl_dev *pctldev, unsigned int offset);
+ int (*free) (struct pinctrl_dev *pctldev, unsigned int offset);
int (*get_functions_count) (struct pinctrl_dev *pctldev);
const char *(*get_function_name) (struct pinctrl_dev *pctldev,
- unsigned selector);
+ unsigned int selector);
int (*get_function_groups) (struct pinctrl_dev *pctldev,
- unsigned selector,
- const char * const **groups,
- unsigned *num_groups);
- int (*set_mux) (struct pinctrl_dev *pctldev, unsigned func_selector,
- unsigned group_selector);
+ unsigned int selector,
+ const char * const **groups,
+ unsigned int *num_groups);
+ int (*set_mux) (struct pinctrl_dev *pctldev, unsigned int func_selector,
+ unsigned int group_selector);
int (*gpio_request_enable) (struct pinctrl_dev *pctldev,
struct pinctrl_gpio_range *range,
- unsigned offset);
+ unsigned int offset);
void (*gpio_disable_free) (struct pinctrl_dev *pctldev,
struct pinctrl_gpio_range *range,
- unsigned offset);
+ unsigned int offset);
int (*gpio_set_direction) (struct pinctrl_dev *pctldev,
struct pinctrl_gpio_range *range,
- unsigned offset,
+ unsigned int offset,
bool input);
bool strict;
};
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 02e0086b10f6..8ff23bf5a819 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -62,9 +62,6 @@ struct pipe_inode_info {
unsigned int tail;
unsigned int max_usage;
unsigned int ring_size;
-#ifdef CONFIG_WATCH_QUEUE
- bool note_loss;
-#endif
unsigned int nr_accounted;
unsigned int readers;
unsigned int writers;
@@ -72,6 +69,9 @@ struct pipe_inode_info {
unsigned int r_counter;
unsigned int w_counter;
bool poll_usage;
+#ifdef CONFIG_WATCH_QUEUE
+ bool note_loss;
+#endif
struct page *tmp_page;
struct fasync_struct *fasync_readers;
struct fasync_struct *fasync_writers;
@@ -125,6 +125,22 @@ struct pipe_buf_operations {
};
/**
+ * pipe_has_watch_queue - Check whether the pipe is a watch_queue,
+ * i.e. it was created with O_NOTIFICATION_PIPE
+ * @pipe: The pipe to check
+ *
+ * Return: true if pipe is a watch queue, false otherwise.
+ */
+static inline bool pipe_has_watch_queue(const struct pipe_inode_info *pipe)
+{
+#ifdef CONFIG_WATCH_QUEUE
+ return pipe->watch_queue != NULL;
+#else
+ return false;
+#endif
+}
+
+/**
* pipe_empty - Return true if the pipe is empty
* @head: The pipe ring head pointer
* @tail: The pipe ring tail pointer
@@ -269,10 +285,10 @@ bool pipe_is_unprivileged_user(void);
/* for F_SETPIPE_SZ and F_GETPIPE_SZ */
int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots);
-long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
+long pipe_fcntl(struct file *, unsigned int, unsigned int arg);
struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice);
int create_pipe_files(struct file **, int);
-unsigned int round_pipe_size(unsigned long size);
+unsigned int round_pipe_size(unsigned int size);
#endif
diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
index 80cb00db42a4..2f1b952d596a 100644
--- a/include/linux/pktcdvd.h
+++ b/include/linux/pktcdvd.h
@@ -154,7 +154,9 @@ struct packet_stacked_data
struct pktcdvd_device
{
- struct block_device *bdev; /* dev attached */
+ struct file *bdev_file; /* dev attached */
+ /* handle acquired for bdev during pkt_open_dev() */
+ struct file *f_open_bdev;
dev_t pkt_dev; /* our dev */
struct packet_settings settings;
struct packet_stats stats;
diff --git a/include/linux/platform_data/bd6107.h b/include/linux/platform_data/bd6107.h
index 54a06a4d2618..596ca4f95cfa 100644
--- a/include/linux/platform_data/bd6107.h
+++ b/include/linux/platform_data/bd6107.h
@@ -8,7 +8,7 @@
struct device;
struct bd6107_platform_data {
- struct device *fbdev;
+ struct device *dev;
unsigned int def_value;
};
diff --git a/include/linux/platform_data/brcmfmac.h b/include/linux/platform_data/brcmfmac.h
index f922a192fe58..ec99b7b73d1d 100644
--- a/include/linux/platform_data/brcmfmac.h
+++ b/include/linux/platform_data/brcmfmac.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 201 Broadcom Corporation
+ * Copyright (c) 2016 Broadcom Corporation
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h
index ab721cf13a98..ecc47d5fe239 100644
--- a/include/linux/platform_data/cros_ec_commands.h
+++ b/include/linux/platform_data/cros_ec_commands.h
@@ -3961,60 +3961,52 @@ struct ec_response_i2c_passthru {
} __ec_align1;
/*****************************************************************************/
-/* Power button hang detect */
-
+/* AP hang detect */
#define EC_CMD_HANG_DETECT 0x009F
-/* Reasons to start hang detection timer */
-/* Power button pressed */
-#define EC_HANG_START_ON_POWER_PRESS BIT(0)
-
-/* Lid closed */
-#define EC_HANG_START_ON_LID_CLOSE BIT(1)
-
- /* Lid opened */
-#define EC_HANG_START_ON_LID_OPEN BIT(2)
+#define EC_HANG_DETECT_MIN_TIMEOUT 5
+#define EC_HANG_DETECT_MAX_TIMEOUT 65535
-/* Start of AP S3->S0 transition (booting or resuming from suspend) */
-#define EC_HANG_START_ON_RESUME BIT(3)
+/* EC hang detect commands */
+enum ec_hang_detect_cmds {
+ /* Reload AP hang detect timer. */
+ EC_HANG_DETECT_CMD_RELOAD = 0x0,
-/* Reasons to cancel hang detection */
+ /* Stop AP hang detect timer. */
+ EC_HANG_DETECT_CMD_CANCEL = 0x1,
-/* Power button released */
-#define EC_HANG_STOP_ON_POWER_RELEASE BIT(8)
-
-/* Any host command from AP received */
-#define EC_HANG_STOP_ON_HOST_COMMAND BIT(9)
-
-/* Stop on end of AP S0->S3 transition (suspending or shutting down) */
-#define EC_HANG_STOP_ON_SUSPEND BIT(10)
+ /* Configure watchdog with given reboot timeout and
+ * cancel currently running AP hang detect timer.
+ */
+ EC_HANG_DETECT_CMD_SET_TIMEOUT = 0x2,
-/*
- * If this flag is set, all the other fields are ignored, and the hang detect
- * timer is started. This provides the AP a way to start the hang timer
- * without reconfiguring any of the other hang detect settings. Note that
- * you must previously have configured the timeouts.
- */
-#define EC_HANG_START_NOW BIT(30)
+ /* Get last hang status - whether the AP boot was clear or not */
+ EC_HANG_DETECT_CMD_GET_STATUS = 0x3,
-/*
- * If this flag is set, all the other fields are ignored (including
- * EC_HANG_START_NOW). This provides the AP a way to stop the hang timer
- * without reconfiguring any of the other hang detect settings.
- */
-#define EC_HANG_STOP_NOW BIT(31)
+ /* Clear last hang status. Called when AP is rebooting/shutting down
+ * gracefully.
+ */
+ EC_HANG_DETECT_CMD_CLEAR_STATUS = 0x4
+};
struct ec_params_hang_detect {
- /* Flags; see EC_HANG_* */
- uint32_t flags;
-
- /* Timeout in msec before generating host event, if enabled */
- uint16_t host_event_timeout_msec;
+ uint16_t command; /* enum ec_hang_detect_cmds */
+ /* Timeout in seconds before generating reboot */
+ uint16_t reboot_timeout_sec;
+} __ec_align2;
- /* Timeout in msec before generating warm reboot, if enabled */
- uint16_t warm_reboot_timeout_msec;
-} __ec_align4;
+/* Status codes that describe whether AP has boot normally or the hang has been
+ * detected and EC has reset AP
+ */
+enum ec_hang_detect_status {
+ EC_HANG_DETECT_AP_BOOT_NORMAL = 0x0,
+ EC_HANG_DETECT_AP_BOOT_EC_WDT = 0x1,
+ EC_HANG_DETECT_AP_BOOT_COUNT,
+};
+struct ec_response_hang_detect {
+ uint8_t status; /* enum ec_hang_detect_status */
+} __ec_align1;
/*****************************************************************************/
/* Commands for battery charging */
@@ -4436,8 +4428,20 @@ struct ec_response_i2c_passthru_protect {
* These commands are for sending and receiving message via HDMI CEC
*/
+#define EC_CEC_MAX_PORTS 16
+
#define MAX_CEC_MSG_LEN 16
+/*
+ * Helper macros for packing/unpacking cec_events.
+ * bits[27:0] : bitmask of events from enum mkbp_cec_event
+ * bits[31:28]: port number
+ */
+#define EC_MKBP_EVENT_CEC_PACK(events, port) \
+ (((events) & GENMASK(27, 0)) | (((port) & 0xf) << 28))
+#define EC_MKBP_EVENT_CEC_GET_EVENTS(event) ((event) & GENMASK(27, 0))
+#define EC_MKBP_EVENT_CEC_GET_PORT(event) (((event) >> 28) & 0xf)
+
/* CEC message from the AP to be written on the CEC bus */
#define EC_CMD_CEC_WRITE_MSG 0x00B8
@@ -4449,19 +4453,54 @@ struct ec_params_cec_write {
uint8_t msg[MAX_CEC_MSG_LEN];
} __ec_align1;
+/**
+ * struct ec_params_cec_write_v1 - Message to write to the CEC bus
+ * @port: CEC port to write the message on
+ * @msg_len: length of msg in bytes
+ * @msg: message content to write to the CEC bus
+ */
+struct ec_params_cec_write_v1 {
+ uint8_t port;
+ uint8_t msg_len;
+ uint8_t msg[MAX_CEC_MSG_LEN];
+} __ec_align1;
+
+/* CEC message read from a CEC bus reported back to the AP */
+#define EC_CMD_CEC_READ_MSG 0x00B9
+
+/**
+ * struct ec_params_cec_read - Read a message from the CEC bus
+ * @port: CEC port to read a message on
+ */
+struct ec_params_cec_read {
+ uint8_t port;
+} __ec_align1;
+
+/**
+ * struct ec_response_cec_read - Message read from the CEC bus
+ * @msg_len: length of msg in bytes
+ * @msg: message content read from the CEC bus
+ */
+struct ec_response_cec_read {
+ uint8_t msg_len;
+ uint8_t msg[MAX_CEC_MSG_LEN];
+} __ec_align1;
+
/* Set various CEC parameters */
#define EC_CMD_CEC_SET 0x00BA
/**
* struct ec_params_cec_set - CEC parameters set
* @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS
+ * @port: CEC port to set the parameter on
* @val: in case cmd is CEC_CMD_ENABLE, this field can be 0 to disable CEC
* or 1 to enable CEC functionality, in case cmd is
* CEC_CMD_LOGICAL_ADDRESS, this field encodes the requested logical
* address between 0 and 15 or 0xff to unregister
*/
struct ec_params_cec_set {
- uint8_t cmd; /* enum cec_command */
+ uint8_t cmd : 4; /* enum cec_command */
+ uint8_t port : 4;
uint8_t val;
} __ec_align1;
@@ -4471,9 +4510,11 @@ struct ec_params_cec_set {
/**
* struct ec_params_cec_get - CEC parameters get
* @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS
+ * @port: CEC port to get the parameter on
*/
struct ec_params_cec_get {
- uint8_t cmd; /* enum cec_command */
+ uint8_t cmd : 4; /* enum cec_command */
+ uint8_t port : 4;
} __ec_align1;
/**
@@ -4487,6 +4528,17 @@ struct ec_response_cec_get {
uint8_t val;
} __ec_align1;
+/* Get the number of CEC ports */
+#define EC_CMD_CEC_PORT_COUNT 0x00C1
+
+/**
+ * struct ec_response_cec_port_count - CEC port count response
+ * @port_count: number of CEC ports
+ */
+struct ec_response_cec_port_count {
+ uint8_t port_count;
+} __ec_align1;
+
/* CEC parameters command */
enum cec_command {
/* CEC reading, writing and events enable */
@@ -4501,6 +4553,8 @@ enum mkbp_cec_event {
EC_MKBP_CEC_SEND_OK = BIT(0),
/* Outgoing message was not acknowledged */
EC_MKBP_CEC_SEND_FAILED = BIT(1),
+ /* Incoming message can be read out by AP */
+ EC_MKBP_CEC_HAVE_DATA = BIT(2),
};
/*****************************************************************************/
diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index 4f9f756bc17c..8865e350c12a 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -258,7 +258,7 @@ bool cros_ec_check_features(struct cros_ec_dev *ec, int feature);
int cros_ec_get_sensor_count(struct cros_ec_dev *ec);
-int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, void *outdata,
+int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, const void *outdata,
size_t outsize, void *indata, size_t insize);
/**
diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h
index f377817ce75c..cdd8cfb424f5 100644
--- a/include/linux/platform_data/gpio-omap.h
+++ b/include/linux/platform_data/gpio-omap.h
@@ -144,9 +144,6 @@
#define OMAP_MAX_GPIO_LINES 192
-#define OMAP_MPUIO(nr) (OMAP_MAX_GPIO_LINES + (nr))
-#define OMAP_GPIO_IS_MPUIO(nr) ((nr) >= OMAP_MAX_GPIO_LINES)
-
#ifndef __ASSEMBLER__
struct omap_gpio_reg_offs {
u16 revision;
diff --git a/include/linux/platform_data/gpio_backlight.h b/include/linux/platform_data/gpio_backlight.h
index 1a8b5b1946fe..323fbf5f7613 100644
--- a/include/linux/platform_data/gpio_backlight.h
+++ b/include/linux/platform_data/gpio_backlight.h
@@ -8,7 +8,7 @@
struct device;
struct gpio_backlight_platform_data {
- struct device *fbdev;
+ struct device *dev;
};
#endif
diff --git a/include/linux/platform_data/gsc_hwmon.h b/include/linux/platform_data/gsc_hwmon.h
index f2781aa7eff8..70e8a6bec0f6 100644
--- a/include/linux/platform_data/gsc_hwmon.h
+++ b/include/linux/platform_data/gsc_hwmon.h
@@ -40,6 +40,6 @@ struct gsc_hwmon_platform_data {
unsigned int resolution;
unsigned int vreference;
unsigned int fan_base;
- struct gsc_hwmon_channel channels[];
+ struct gsc_hwmon_channel channels[] __counted_by(nchannels);
};
#endif
diff --git a/include/linux/platform_data/hirschmann-hellcreek.h b/include/linux/platform_data/hirschmann-hellcreek.h
index 6a000df5541f..8748680e9e3c 100644
--- a/include/linux/platform_data/hirschmann-hellcreek.h
+++ b/include/linux/platform_data/hirschmann-hellcreek.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: (GPL-2.0 or MIT) */
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
/*
* Hirschmann Hellcreek TSN switch platform data.
*
diff --git a/include/linux/platform_data/i2c-mux-reg.h b/include/linux/platform_data/i2c-mux-reg.h
index 2543c2a1c9ae..e2e895768311 100644
--- a/include/linux/platform_data/i2c-mux-reg.h
+++ b/include/linux/platform_data/i2c-mux-reg.h
@@ -17,7 +17,6 @@
* @n_values: Number of multiplexer channels
* @little_endian: Indicating if the register is in little endian
* @write_only: Reading the register is not allowed by hardware
- * @classes: Optional I2C auto-detection classes
* @idle: Value to write to mux when idle
* @idle_in_use: indicate if idle value is in use
* @reg: Virtual address of the register to switch channel
@@ -30,7 +29,6 @@ struct i2c_mux_reg_platform_data {
int n_values;
bool little_endian;
bool write_only;
- const unsigned int *classes;
u32 idle;
bool idle_in_use;
void __iomem *reg;
diff --git a/include/linux/platform_data/lv5207lp.h b/include/linux/platform_data/lv5207lp.h
index c9da8d402750..95d85c1394bc 100644
--- a/include/linux/platform_data/lv5207lp.h
+++ b/include/linux/platform_data/lv5207lp.h
@@ -8,7 +8,7 @@
struct device;
struct lv5207lp_platform_data {
- struct device *fbdev;
+ struct device *dev;
unsigned int max_value;
unsigned int def_value;
};
diff --git a/include/linux/platform_data/mdio-bcm-unimac.h b/include/linux/platform_data/mdio-bcm-unimac.h
index 8a5f9f0b2c52..724e1f57b81f 100644
--- a/include/linux/platform_data/mdio-bcm-unimac.h
+++ b/include/linux/platform_data/mdio-bcm-unimac.h
@@ -1,11 +1,14 @@
#ifndef __MDIO_BCM_UNIMAC_PDATA_H
#define __MDIO_BCM_UNIMAC_PDATA_H
+struct clk;
+
struct unimac_mdio_pdata {
u32 phy_mask;
int (*wait_func)(void *data);
void *wait_func_data;
const char *bus_name;
+ struct clk *clk;
};
#define UNIMAC_MDIO_DRV_NAME "unimac-mdio"
diff --git a/include/linux/platform_data/microchip-ksz.h b/include/linux/platform_data/microchip-ksz.h
index ea1cc6d829e9..8c659db4da6b 100644
--- a/include/linux/platform_data/microchip-ksz.h
+++ b/include/linux/platform_data/microchip-ksz.h
@@ -20,10 +20,32 @@
#define __MICROCHIP_KSZ_H
#include <linux/types.h>
+#include <linux/platform_data/dsa.h>
+
+enum ksz_chip_id {
+ KSZ8563_CHIP_ID = 0x8563,
+ KSZ8795_CHIP_ID = 0x8795,
+ KSZ8794_CHIP_ID = 0x8794,
+ KSZ8765_CHIP_ID = 0x8765,
+ KSZ8830_CHIP_ID = 0x8830,
+ KSZ9477_CHIP_ID = 0x00947700,
+ KSZ9896_CHIP_ID = 0x00989600,
+ KSZ9897_CHIP_ID = 0x00989700,
+ KSZ9893_CHIP_ID = 0x00989300,
+ KSZ9563_CHIP_ID = 0x00956300,
+ KSZ8567_CHIP_ID = 0x00856700,
+ KSZ9567_CHIP_ID = 0x00956700,
+ LAN9370_CHIP_ID = 0x00937000,
+ LAN9371_CHIP_ID = 0x00937100,
+ LAN9372_CHIP_ID = 0x00937200,
+ LAN9373_CHIP_ID = 0x00937300,
+ LAN9374_CHIP_ID = 0x00937400,
+};
struct ksz_platform_data {
+ /* Must be first such that dsa_register_switch() can access it */
+ struct dsa_chip_data cd;
u32 chip_id;
- u16 enabled_ports;
};
#endif
diff --git a/include/linux/platform_data/net-cw1200.h b/include/linux/platform_data/net-cw1200.h
index c510734405bb..89d0ec6f7d46 100644
--- a/include/linux/platform_data/net-cw1200.h
+++ b/include/linux/platform_data/net-cw1200.h
@@ -14,8 +14,6 @@ struct cw1200_platform_data_spi {
/* All others are optional */
bool have_5ghz;
- int reset; /* GPIO to RSTn signal (0 disables) */
- int powerup; /* GPIO to POWERUP signal (0 disables) */
int (*power_ctrl)(const struct cw1200_platform_data_spi *pdata,
bool enable); /* Control 3v3 / 1v8 supply */
int (*clk_ctrl)(const struct cw1200_platform_data_spi *pdata,
@@ -30,8 +28,6 @@ struct cw1200_platform_data_sdio {
/* All others are optional */
bool have_5ghz;
bool no_nptb; /* SDIO hardware does not support non-power-of-2-blocksizes */
- int reset; /* GPIO to RSTn signal (0 disables) */
- int powerup; /* GPIO to POWERUP signal (0 disables) */
int irq; /* IRQ line or 0 to use SDIO IRQ */
int (*power_ctrl)(const struct cw1200_platform_data_sdio *pdata,
bool enable); /* Control 3v3 / 1v8 supply */
diff --git a/include/linux/platform_data/omap-twl4030.h b/include/linux/platform_data/omap-twl4030.h
index 0dd851ea1c72..7fcb55fe21c9 100644
--- a/include/linux/platform_data/omap-twl4030.h
+++ b/include/linux/platform_data/omap-twl4030.h
@@ -37,9 +37,6 @@ struct omap_tw4030_pdata {
bool has_digimic0;
bool has_digimic1;
u8 has_linein;
-
- /* Jack detect GPIO or <= 0 if it is not implemented */
- int jack_detect;
};
#endif /* _OMAP_TWL4030_H_ */
diff --git a/include/linux/platform_data/pca953x.h b/include/linux/platform_data/pca953x.h
index 96c1a14ab365..3c3787c4d96c 100644
--- a/include/linux/platform_data/pca953x.h
+++ b/include/linux/platform_data/pca953x.h
@@ -11,21 +11,8 @@ struct pca953x_platform_data {
/* number of the first GPIO */
unsigned gpio_base;
- /* initial polarity inversion setting */
- u32 invert;
-
/* interrupt base */
int irq_base;
-
- void *context; /* param to setup/teardown */
-
- int (*setup)(struct i2c_client *client,
- unsigned gpio, unsigned ngpio,
- void *context);
- void (*teardown)(struct i2c_client *client,
- unsigned gpio, unsigned ngpio,
- void *context);
- const char *const *names;
};
#endif /* _LINUX_PCA953X_H */
diff --git a/include/linux/platform_data/rtc-ds2404.h b/include/linux/platform_data/rtc-ds2404.h
deleted file mode 100644
index 22c53825528f..000000000000
--- a/include/linux/platform_data/rtc-ds2404.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * ds2404.h - platform data structure for the DS2404 RTC.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 Sven Schnelle <svens@stackframe.org>
- */
-
-#ifndef __LINUX_DS2404_H
-#define __LINUX_DS2404_H
-
-struct ds2404_platform_data {
-
- unsigned int gpio_rst;
- unsigned int gpio_clk;
- unsigned int gpio_dq;
-};
-#endif
diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h
index d661399b217d..6c19d4fbbe39 100644
--- a/include/linux/platform_data/shmob_drm.h
+++ b/include/linux/platform_data/shmob_drm.h
@@ -10,7 +10,7 @@
#ifndef __SHMOB_DRM_H__
#define __SHMOB_DRM_H__
-#include <drm/drm_mode.h>
+#include <video/videomode.h>
enum shmob_drm_clk_source {
SHMOB_DRM_CLK_BUS,
@@ -18,72 +18,21 @@ enum shmob_drm_clk_source {
SHMOB_DRM_CLK_EXTERNAL,
};
-enum shmob_drm_interface {
- SHMOB_DRM_IFACE_RGB8, /* 24bpp, 8:8:8 */
- SHMOB_DRM_IFACE_RGB9, /* 18bpp, 9:9 */
- SHMOB_DRM_IFACE_RGB12A, /* 24bpp, 12:12 */
- SHMOB_DRM_IFACE_RGB12B, /* 12bpp */
- SHMOB_DRM_IFACE_RGB16, /* 16bpp */
- SHMOB_DRM_IFACE_RGB18, /* 18bpp */
- SHMOB_DRM_IFACE_RGB24, /* 24bpp */
- SHMOB_DRM_IFACE_YUV422, /* 16bpp */
- SHMOB_DRM_IFACE_SYS8A, /* 24bpp, 8:8:8 */
- SHMOB_DRM_IFACE_SYS8B, /* 18bpp, 8:8:2 */
- SHMOB_DRM_IFACE_SYS8C, /* 18bpp, 2:8:8 */
- SHMOB_DRM_IFACE_SYS8D, /* 16bpp, 8:8 */
- SHMOB_DRM_IFACE_SYS9, /* 18bpp, 9:9 */
- SHMOB_DRM_IFACE_SYS12, /* 24bpp, 12:12 */
- SHMOB_DRM_IFACE_SYS16A, /* 16bpp */
- SHMOB_DRM_IFACE_SYS16B, /* 18bpp, 16:2 */
- SHMOB_DRM_IFACE_SYS16C, /* 18bpp, 2:16 */
- SHMOB_DRM_IFACE_SYS18, /* 18bpp */
- SHMOB_DRM_IFACE_SYS24, /* 24bpp */
-};
-
-struct shmob_drm_backlight_data {
- const char *name;
- int max_brightness;
- int (*get_brightness)(void);
- int (*set_brightness)(int brightness);
-};
-
struct shmob_drm_panel_data {
unsigned int width_mm; /* Panel width in mm */
unsigned int height_mm; /* Panel height in mm */
- struct drm_mode_modeinfo mode;
+ struct videomode mode;
};
-struct shmob_drm_sys_interface_data {
- unsigned int read_latch:6;
- unsigned int read_setup:8;
- unsigned int read_cycle:8;
- unsigned int read_strobe:8;
- unsigned int write_setup:8;
- unsigned int write_cycle:8;
- unsigned int write_strobe:8;
- unsigned int cs_setup:3;
- unsigned int vsync_active_high:1;
- unsigned int vsync_dir_input:1;
-};
-
-#define SHMOB_DRM_IFACE_FL_DWPOL (1 << 0) /* Rising edge dot clock data latch */
-#define SHMOB_DRM_IFACE_FL_DIPOL (1 << 1) /* Active low display enable */
-#define SHMOB_DRM_IFACE_FL_DAPOL (1 << 2) /* Active low display data */
-#define SHMOB_DRM_IFACE_FL_HSCNT (1 << 3) /* Disable HSYNC during VBLANK */
-#define SHMOB_DRM_IFACE_FL_DWCNT (1 << 4) /* Disable dotclock during blanking */
-
struct shmob_drm_interface_data {
- enum shmob_drm_interface interface;
- struct shmob_drm_sys_interface_data sys;
+ unsigned int bus_fmt; /* MEDIA_BUS_FMT_* */
unsigned int clk_div;
- unsigned int flags;
};
struct shmob_drm_platform_data {
enum shmob_drm_clk_source clk_source;
struct shmob_drm_interface_data iface;
struct shmob_drm_panel_data panel;
- struct shmob_drm_backlight_data backlight;
};
#endif /* __SHMOB_DRM_H__ */
diff --git a/include/linux/platform_data/si5351.h b/include/linux/platform_data/si5351.h
index c71a2dd66143..5f412a615532 100644
--- a/include/linux/platform_data/si5351.h
+++ b/include/linux/platform_data/si5351.h
@@ -105,10 +105,12 @@ struct si5351_clkout_config {
* @clk_xtal: xtal input clock
* @clk_clkin: clkin input clock
* @pll_src: array of pll source clock setting
+ * @pll_reset: array indicating if plls should be reset after setting the rate
* @clkout: array of clkout configuration
*/
struct si5351_platform_data {
enum si5351_pll_src pll_src[2];
+ bool pll_reset[2];
struct si5351_clkout_config clkout[8];
};
diff --git a/include/linux/platform_data/video-mx3fb.h b/include/linux/platform_data/video-mx3fb.h
deleted file mode 100644
index d03dc322a616..000000000000
--- a/include/linux/platform_data/video-mx3fb.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2008
- * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
- */
-
-#ifndef __ASM_ARCH_MX3FB_H__
-#define __ASM_ARCH_MX3FB_H__
-
-#include <linux/device.h>
-#include <linux/fb.h>
-
-/* Proprietary FB_SYNC_ flags */
-#define FB_SYNC_OE_ACT_HIGH 0x80000000
-#define FB_SYNC_CLK_INVERT 0x40000000
-#define FB_SYNC_DATA_INVERT 0x20000000
-#define FB_SYNC_CLK_IDLE_EN 0x10000000
-#define FB_SYNC_SHARP_MODE 0x08000000
-#define FB_SYNC_SWAP_RGB 0x04000000
-#define FB_SYNC_CLK_SEL_EN 0x02000000
-
-/*
- * Specify the way your display is connected. The IPU can arbitrarily
- * map the internal colors to the external data lines. We only support
- * the following mappings at the moment.
- */
-enum disp_data_mapping {
- /* blue -> d[0..5], green -> d[6..11], red -> d[12..17] */
- IPU_DISP_DATA_MAPPING_RGB666,
- /* blue -> d[0..4], green -> d[5..10], red -> d[11..15] */
- IPU_DISP_DATA_MAPPING_RGB565,
- /* blue -> d[0..7], green -> d[8..15], red -> d[16..23] */
- IPU_DISP_DATA_MAPPING_RGB888,
-};
-
-/**
- * struct mx3fb_platform_data - mx3fb platform data
- *
- * @dma_dev: pointer to the dma-device, used for dma-slave connection
- * @mode: pointer to a platform-provided per mxc_register_fb() videomode
- */
-struct mx3fb_platform_data {
- struct device *dma_dev;
- const char *name;
- const struct fb_videomode *mode;
- int num_modes;
- enum disp_data_mapping disp_data_fmt;
-};
-
-#endif
diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
index 28234dc9fa6a..ab1c7deff118 100644
--- a/include/linux/platform_data/x86/asus-wmi.h
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -58,6 +58,10 @@
#define ASUS_WMI_DEVID_KBD_BACKLIGHT 0x00050021
#define ASUS_WMI_DEVID_LIGHT_SENSOR 0x00050022 /* ?? */
#define ASUS_WMI_DEVID_LIGHTBAR 0x00050025
+/* This can only be used to disable the screen, not re-enable */
+#define ASUS_WMI_DEVID_SCREENPAD_POWER 0x00050031
+/* Writing a brightness re-enables the screen if disabled */
+#define ASUS_WMI_DEVID_SCREENPAD_LIGHT 0x00050032
#define ASUS_WMI_DEVID_FAN_BOOST_MODE 0x00110018
#define ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY 0x00120075
@@ -66,6 +70,7 @@
#define ASUS_WMI_DEVID_CAMERA 0x00060013
#define ASUS_WMI_DEVID_LID_FLIP 0x00060062
#define ASUS_WMI_DEVID_LID_FLIP_ROG 0x00060077
+#define ASUS_WMI_DEVID_MINI_LED_MODE 0x0005001E
/* Storage */
#define ASUS_WMI_DEVID_CARDREADER 0x00080013
@@ -80,8 +85,19 @@
#define ASUS_WMI_DEVID_FAN_CTRL 0x00110012 /* deprecated */
#define ASUS_WMI_DEVID_CPU_FAN_CTRL 0x00110013
#define ASUS_WMI_DEVID_GPU_FAN_CTRL 0x00110014
+#define ASUS_WMI_DEVID_MID_FAN_CTRL 0x00110031
#define ASUS_WMI_DEVID_CPU_FAN_CURVE 0x00110024
#define ASUS_WMI_DEVID_GPU_FAN_CURVE 0x00110025
+#define ASUS_WMI_DEVID_MID_FAN_CURVE 0x00110032
+
+/* Tunables for AUS ROG laptops */
+#define ASUS_WMI_DEVID_PPT_PL2_SPPT 0x001200A0
+#define ASUS_WMI_DEVID_PPT_PL1_SPL 0x001200A3
+#define ASUS_WMI_DEVID_PPT_APU_SPPT 0x001200B0
+#define ASUS_WMI_DEVID_PPT_PLAT_SPPT 0x001200B1
+#define ASUS_WMI_DEVID_PPT_FPPT 0x001200C1
+#define ASUS_WMI_DEVID_NV_DYN_BOOST 0x001200C0
+#define ASUS_WMI_DEVID_NV_THERM_TARGET 0x001200C2
/* Power */
#define ASUS_WMI_DEVID_PROCESSOR_STATE 0x00120012
@@ -95,7 +111,15 @@
/* Keyboard dock */
#define ASUS_WMI_DEVID_KBD_DOCK 0x00120063
-/* dgpu on/off */
+/* Charging mode - 1=Barrel, 2=USB */
+#define ASUS_WMI_DEVID_CHARGE_MODE 0x0012006C
+
+/* MCU powersave mode */
+#define ASUS_WMI_DEVID_MCU_POWERSAVE 0x001200E2
+
+/* epu is connected? 1 == true */
+#define ASUS_WMI_DEVID_EGPU_CONNECTED 0x00090018
+/* egpu on/off */
#define ASUS_WMI_DEVID_EGPU 0x00090019
/* dgpu on/off */
diff --git a/include/linux/platform_data/x86/clk-lpss.h b/include/linux/platform_data/x86/clk-lpss.h
index 41df326583f9..7f132029316a 100644
--- a/include/linux/platform_data/x86/clk-lpss.h
+++ b/include/linux/platform_data/x86/clk-lpss.h
@@ -15,6 +15,6 @@ struct lpss_clk_data {
struct clk *clk;
};
-extern int lpss_atom_clk_init(void);
+int lpss_atom_clk_init(void);
#endif /* __CLK_LPSS_H */
diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h
index b8a701c77fd0..161e4bc1c9ee 100644
--- a/include/linux/platform_data/x86/pmc_atom.h
+++ b/include/linux/platform_data/x86/pmc_atom.h
@@ -43,6 +43,19 @@
BIT_ORED_DEDICATED_IRQ_GPSC | \
BIT_SHARED_IRQ_GPSS)
+/* External clk generator settings */
+#define PMC_CLK_CTL_OFFSET 0x60
+#define PMC_CLK_CTL_SIZE 4
+#define PMC_CLK_NUM 6
+#define PMC_CLK_CTL_GATED_ON_D3 0x0
+#define PMC_CLK_CTL_FORCE_ON 0x1
+#define PMC_CLK_CTL_FORCE_OFF 0x2
+#define PMC_CLK_CTL_RESERVED 0x3
+#define PMC_MASK_CLK_CTL GENMASK(1, 0)
+#define PMC_MASK_CLK_FREQ BIT(2)
+#define PMC_CLK_FREQ_XTAL (0 << 2) /* 25 MHz */
+#define PMC_CLK_FREQ_PLL (1 << 2) /* 19.2 MHz */
+
/* The timers accumulate time spent in sleep state */
#define PMC_S0IR_TMR 0x80
#define PMC_S0I1_TMR 0x84
@@ -104,14 +117,14 @@
#define BIT_SCC_SDIO BIT(9)
#define BIT_SCC_SDCARD BIT(10)
#define BIT_SCC_MIPI BIT(11)
-#define BIT_HDA BIT(12)
+#define BIT_HDA BIT(12) /* CHT datasheet: reserved */
#define BIT_LPE BIT(13)
#define BIT_OTG BIT(14)
-#define BIT_USH BIT(15)
-#define BIT_GBE BIT(16)
-#define BIT_SATA BIT(17)
-#define BIT_USB_EHCI BIT(18)
-#define BIT_SEC BIT(19)
+#define BIT_USH BIT(15) /* CHT datasheet: reserved */
+#define BIT_GBE BIT(16) /* CHT datasheet: reserved */
+#define BIT_SATA BIT(17) /* CHT datasheet: reserved */
+#define BIT_USB_EHCI BIT(18) /* CHT datasheet: XHCI! */
+#define BIT_SEC BIT(19) /* BYT datasheet: reserved */
#define BIT_PCIE_PORT0 BIT(20)
#define BIT_PCIE_PORT1 BIT(21)
#define BIT_PCIE_PORT2 BIT(22)
diff --git a/include/linux/platform_data/x86/pwm-lpss.h b/include/linux/platform_data/x86/pwm-lpss.h
index c852fe24fe2a..752c06b47cc8 100644
--- a/include/linux/platform_data/x86/pwm-lpss.h
+++ b/include/linux/platform_data/x86/pwm-lpss.h
@@ -27,7 +27,7 @@ struct pwm_lpss_boardinfo {
bool other_devices_aml_touches_pwm_regs;
};
-struct pwm_lpss_chip *devm_pwm_lpss_probe(struct device *dev, void __iomem *base,
- const struct pwm_lpss_boardinfo *info);
+struct pwm_chip *devm_pwm_lpss_probe(struct device *dev, void __iomem *base,
+ const struct pwm_lpss_boardinfo *info);
#endif /* __PLATFORM_DATA_X86_PWM_LPSS_H */
diff --git a/include/linux/platform_data/x86/simatic-ipc-base.h b/include/linux/platform_data/x86/simatic-ipc-base.h
index 57d6a10dfc9e..2d7f7120ba6b 100644
--- a/include/linux/platform_data/x86/simatic-ipc-base.h
+++ b/include/linux/platform_data/x86/simatic-ipc-base.h
@@ -2,7 +2,7 @@
/*
* Siemens SIMATIC IPC drivers
*
- * Copyright (c) Siemens AG, 2018-2021
+ * Copyright (c) Siemens AG, 2018-2023
*
* Authors:
* Henning Schild <henning.schild@siemens.com>
@@ -20,6 +20,9 @@
#define SIMATIC_IPC_DEVICE_127E 3
#define SIMATIC_IPC_DEVICE_227E 4
#define SIMATIC_IPC_DEVICE_227G 5
+#define SIMATIC_IPC_DEVICE_BX_21A 6
+#define SIMATIC_IPC_DEVICE_BX_39A 7
+#define SIMATIC_IPC_DEVICE_BX_59A 8
struct simatic_ipc_platform {
u8 devmode;
diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h
index a48bb5240977..8d8b3b919674 100644
--- a/include/linux/platform_data/x86/simatic-ipc.h
+++ b/include/linux/platform_data/x86/simatic-ipc.h
@@ -2,7 +2,7 @@
/*
* Siemens SIMATIC IPC drivers
*
- * Copyright (c) Siemens AG, 2018-2021
+ * Copyright (c) Siemens AG, 2018-2023
*
* Authors:
* Henning Schild <henning.schild@siemens.com>
@@ -32,8 +32,12 @@ enum simatic_ipc_station_ids {
SIMATIC_IPC_IPC477E = 0x00000A02,
SIMATIC_IPC_IPC127E = 0x00000D01,
SIMATIC_IPC_IPC227G = 0x00000F01,
+ SIMATIC_IPC_IPC277G = 0x00000F02,
SIMATIC_IPC_IPCBX_39A = 0x00001001,
SIMATIC_IPC_IPCPX_39A = 0x00001002,
+ SIMATIC_IPC_IPCBX_21A = 0x00001101,
+ SIMATIC_IPC_IPCBX_56A = 0x00001201,
+ SIMATIC_IPC_IPCBX_59A = 0x00001202,
};
static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len)
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index b845fd83f429..7a41c72c1959 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -63,6 +63,8 @@ extern struct resource *platform_get_mem_or_io(struct platform_device *,
extern struct device *
platform_find_device_by_driver(struct device *start,
const struct device_driver *drv);
+
+#ifdef CONFIG_HAS_IOMEM
extern void __iomem *
devm_platform_get_and_ioremap_resource(struct platform_device *pdev,
unsigned int index, struct resource **res);
@@ -72,6 +74,32 @@ devm_platform_ioremap_resource(struct platform_device *pdev,
extern void __iomem *
devm_platform_ioremap_resource_byname(struct platform_device *pdev,
const char *name);
+#else
+
+static inline void __iomem *
+devm_platform_get_and_ioremap_resource(struct platform_device *pdev,
+ unsigned int index, struct resource **res)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+
+static inline void __iomem *
+devm_platform_ioremap_resource(struct platform_device *pdev,
+ unsigned int index)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+static inline void __iomem *
+devm_platform_ioremap_resource_byname(struct platform_device *pdev,
+ const char *name)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+#endif
+
extern int platform_get_irq(struct platform_device *, unsigned int);
extern int platform_get_irq_optional(struct platform_device *, unsigned int);
extern int platform_irq_count(struct platform_device *);
diff --git a/include/linux/plist.h b/include/linux/plist.h
index 0f352c1d3c80..8c1c8adf7fe9 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -75,20 +75,10 @@
#include <linux/container_of.h>
#include <linux/list.h>
-#include <linux/types.h>
+#include <linux/plist_types.h>
#include <asm/bug.h>
-struct plist_head {
- struct list_head node_list;
-};
-
-struct plist_node {
- int prio;
- struct list_head prio_list;
- struct list_head node_list;
-};
-
/**
* PLIST_HEAD_INIT - static struct plist_head initializer
* @head: struct plist_head variable name
diff --git a/include/linux/plist_types.h b/include/linux/plist_types.h
new file mode 100644
index 000000000000..c37e784330af
--- /dev/null
+++ b/include/linux/plist_types.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _LINUX_PLIST_TYPES_H
+#define _LINUX_PLIST_TYPES_H
+
+#include <linux/types.h>
+
+struct plist_head {
+ struct list_head node_list;
+};
+
+struct plist_node {
+ int prio;
+ struct list_head prio_list;
+ struct list_head node_list;
+};
+
+#endif /* _LINUX_PLIST_TYPES_H */
diff --git a/include/linux/pm.h b/include/linux/pm.h
index badad7d11f4f..97b0e23363c8 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -374,24 +374,39 @@ const struct dev_pm_ops name = { \
RUNTIME_PM_OPS(runtime_suspend_fn, runtime_resume_fn, idle_fn) \
}
-#ifdef CONFIG_PM
-#define _EXPORT_DEV_PM_OPS(name, license, ns) \
+#define _EXPORT_PM_OPS(name, license, ns) \
const struct dev_pm_ops name; \
__EXPORT_SYMBOL(name, license, ns); \
const struct dev_pm_ops name
-#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name)
-#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, ns)
-#else
-#define _EXPORT_DEV_PM_OPS(name, license, ns) \
+
+#define _DISCARD_PM_OPS(name, license, ns) \
static __maybe_unused const struct dev_pm_ops __static_##name
+
+#ifdef CONFIG_PM
+#define _EXPORT_DEV_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns)
+#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name)
+#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, ns)
+#else
+#define _EXPORT_DEV_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns)
#define EXPORT_PM_FN_GPL(name)
#define EXPORT_PM_FN_NS_GPL(name, ns)
#endif
-#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "")
-#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "GPL", "")
-#define EXPORT_NS_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "", #ns)
-#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "GPL", #ns)
+#ifdef CONFIG_PM_SLEEP
+#define _EXPORT_DEV_SLEEP_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns)
+#else
+#define _EXPORT_DEV_SLEEP_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns)
+#endif
+
+#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "")
+#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "GPL", "")
+#define EXPORT_NS_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "", #ns)
+#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "GPL", #ns)
+
+#define EXPORT_DEV_SLEEP_PM_OPS(name) _EXPORT_DEV_SLEEP_PM_OPS(name, "", "")
+#define EXPORT_GPL_DEV_SLEEP_PM_OPS(name) _EXPORT_DEV_SLEEP_PM_OPS(name, "GPL", "")
+#define EXPORT_NS_DEV_SLEEP_PM_OPS(name, ns) _EXPORT_DEV_SLEEP_PM_OPS(name, "", #ns)
+#define EXPORT_NS_GPL_DEV_SLEEP_PM_OPS(name, ns) _EXPORT_DEV_SLEEP_PM_OPS(name, "GPL", #ns)
/*
* Use this if you want to use the same suspend and resume callbacks for suspend
@@ -404,19 +419,19 @@ const struct dev_pm_ops name = { \
_DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL)
#define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
- EXPORT_DEV_PM_OPS(name) = { \
+ EXPORT_DEV_SLEEP_PM_OPS(name) = { \
SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
}
#define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
- EXPORT_GPL_DEV_PM_OPS(name) = { \
+ EXPORT_GPL_DEV_SLEEP_PM_OPS(name) = { \
SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
}
#define EXPORT_NS_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \
- EXPORT_NS_DEV_PM_OPS(name, ns) = { \
+ EXPORT_NS_DEV_SLEEP_PM_OPS(name, ns) = { \
SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
}
#define EXPORT_NS_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \
- EXPORT_NS_GPL_DEV_PM_OPS(name, ns) = { \
+ EXPORT_NS_GPL_DEV_SLEEP_PM_OPS(name, ns) = { \
SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
}
@@ -448,6 +463,15 @@ const struct dev_pm_ops __maybe_unused name = { \
SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
}
+/*
+ * Use this if you want to have the suspend and resume callbacks be called
+ * with IRQs disabled.
+ */
+#define DEFINE_NOIRQ_DEV_PM_OPS(name, suspend_fn, resume_fn) \
+const struct dev_pm_ops name = { \
+ NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+}
+
#define pm_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM), (_ptr))
#define pm_sleep_ptr(_ptr) PTR_IF(IS_ENABLED(CONFIG_PM_SLEEP), (_ptr))
@@ -638,8 +662,8 @@ struct pm_subsys_data {
struct dev_pm_info {
pm_message_t power_state;
- unsigned int can_wakeup:1;
- unsigned int async_suspend:1;
+ bool can_wakeup:1;
+ bool async_suspend:1;
bool in_dpm_list:1; /* Owned by the PM core */
bool is_prepared:1; /* Owned by the PM core */
bool is_suspended:1; /* Ditto */
@@ -657,10 +681,11 @@ struct dev_pm_info {
bool wakeup_path:1;
bool syscore:1;
bool no_pm_callbacks:1; /* Owned by the PM core */
- unsigned int must_resume:1; /* Owned by the PM core */
- unsigned int may_skip_resume:1; /* Set by subsystems */
+ bool async_in_progress:1; /* Owned by the PM core */
+ bool must_resume:1; /* Owned by the PM core */
+ bool may_skip_resume:1; /* Set by subsystems */
#else
- unsigned int should_wakeup:1;
+ bool should_wakeup:1;
#endif
#ifdef CONFIG_PM
struct hrtimer suspend_timer;
@@ -671,17 +696,17 @@ struct dev_pm_info {
atomic_t usage_count;
atomic_t child_count;
unsigned int disable_depth:3;
- unsigned int idle_notification:1;
- unsigned int request_pending:1;
- unsigned int deferred_resume:1;
- unsigned int needs_force_resume:1;
- unsigned int runtime_auto:1;
+ bool idle_notification:1;
+ bool request_pending:1;
+ bool deferred_resume:1;
+ bool needs_force_resume:1;
+ bool runtime_auto:1;
bool ignore_children:1;
- unsigned int no_callbacks:1;
- unsigned int irq_safe:1;
- unsigned int use_autosuspend:1;
- unsigned int timer_autosuspends:1;
- unsigned int memalloc_noio:1;
+ bool no_callbacks:1;
+ bool irq_safe:1;
+ bool use_autosuspend:1;
+ bool timer_autosuspends:1;
+ bool memalloc_noio:1;
unsigned int links_count;
enum rpm_request request;
enum rpm_status runtime_status;
@@ -710,6 +735,7 @@ extern void dev_pm_put_subsys_data(struct device *dev);
* @activate: Called before executing probe routines for bus types and drivers.
* @sync: Called after successful driver probe.
* @dismiss: Called after unsuccessful driver probe and after driver removal.
+ * @set_performance_state: Called to request a new performance state.
*
* Power domains provide callbacks that are executed during system suspend,
* hibernation, system resume and during runtime PM transitions instead of
@@ -722,6 +748,7 @@ struct dev_pm_domain {
int (*activate)(struct device *dev);
void (*sync)(struct device *dev);
void (*dismiss)(struct device *dev);
+ int (*set_performance_state)(struct device *dev, unsigned int state);
};
/*
diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h
index ada3a0ab10bf..68669ce18720 100644
--- a/include/linux/pm_clock.h
+++ b/include/linux/pm_clock.h
@@ -91,10 +91,10 @@ static inline int devm_pm_clk_create(struct device *dev)
#endif
#ifdef CONFIG_HAVE_CLK
-extern void pm_clk_add_notifier(struct bus_type *bus,
+extern void pm_clk_add_notifier(const struct bus_type *bus,
struct pm_clk_notifier_block *clknb);
#else
-static inline void pm_clk_add_notifier(struct bus_type *bus,
+static inline void pm_clk_add_notifier(const struct bus_type *bus,
struct pm_clk_notifier_block *clknb)
{
}
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index f776fb93eaa0..772d3280d35f 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -20,6 +20,33 @@
#include <linux/time64.h>
/*
+ * Flags to control the behaviour when attaching a device to its PM domains.
+ *
+ * PD_FLAG_NO_DEV_LINK: As the default behaviour creates a device-link
+ * for every PM domain that gets attached, this
+ * flag can be used to skip that.
+ *
+ * PD_FLAG_DEV_LINK_ON: Add the DL_FLAG_RPM_ACTIVE to power-on the
+ * supplier and its PM domain when creating the
+ * device-links.
+ *
+ */
+#define PD_FLAG_NO_DEV_LINK BIT(0)
+#define PD_FLAG_DEV_LINK_ON BIT(1)
+
+struct dev_pm_domain_attach_data {
+ const char * const *pd_names;
+ const u32 num_pd_names;
+ const u32 pd_flags;
+};
+
+struct dev_pm_domain_list {
+ struct device **pd_devs;
+ struct device_link **pd_links;
+ u32 num_pds;
+};
+
+/*
* Flags to control the behaviour of a genpd.
*
* These flags may be set in the struct generic_pm_domain's flags field by a
@@ -61,6 +88,10 @@
* GENPD_FLAG_MIN_RESIDENCY: Enable the genpd governor to consider its
* components' next wakeup when determining the
* optimal idle state.
+ *
+ * GENPD_FLAG_OPP_TABLE_FW: The genpd provider supports performance states,
+ * but its corresponding OPP tables are not
+ * described in DT, but are given directly by FW.
*/
#define GENPD_FLAG_PM_CLK (1U << 0)
#define GENPD_FLAG_IRQ_SAFE (1U << 1)
@@ -69,6 +100,7 @@
#define GENPD_FLAG_CPU_DOMAIN (1U << 4)
#define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5)
#define GENPD_FLAG_MIN_RESIDENCY (1U << 6)
+#define GENPD_FLAG_OPP_TABLE_FW (1U << 7)
enum gpd_status {
GENPD_STATE_ON = 0, /* PM domain is on */
@@ -113,7 +145,6 @@ struct genpd_power_state {
};
struct genpd_lock_ops;
-struct dev_pm_opp;
struct opp_table;
struct generic_pm_domain {
@@ -141,8 +172,6 @@ struct generic_pm_domain {
int (*power_on)(struct generic_pm_domain *domain);
struct raw_notifier_head power_notifiers; /* Power on/off notifiers */
struct opp_table *opp_table; /* OPP table of the genpd */
- unsigned int (*opp_to_performance_state)(struct generic_pm_domain *genpd,
- struct dev_pm_opp *opp);
int (*set_performance_state)(struct generic_pm_domain *genpd,
unsigned int state);
struct gpd_dev_ops dev_ops;
@@ -320,7 +349,7 @@ static inline void dev_pm_genpd_resume(struct device *dev) {}
/* OF PM domain providers */
struct of_device_id;
-typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args,
+typedef struct generic_pm_domain *(*genpd_xlate_t)(const struct of_phandle_args *args,
void *data);
struct genpd_onecell_data {
@@ -335,16 +364,14 @@ int of_genpd_add_provider_simple(struct device_node *np,
int of_genpd_add_provider_onecell(struct device_node *np,
struct genpd_onecell_data *data);
void of_genpd_del_provider(struct device_node *np);
-int of_genpd_add_device(struct of_phandle_args *args, struct device *dev);
-int of_genpd_add_subdomain(struct of_phandle_args *parent_spec,
- struct of_phandle_args *subdomain_spec);
-int of_genpd_remove_subdomain(struct of_phandle_args *parent_spec,
- struct of_phandle_args *subdomain_spec);
+int of_genpd_add_device(const struct of_phandle_args *args, struct device *dev);
+int of_genpd_add_subdomain(const struct of_phandle_args *parent_spec,
+ const struct of_phandle_args *subdomain_spec);
+int of_genpd_remove_subdomain(const struct of_phandle_args *parent_spec,
+ const struct of_phandle_args *subdomain_spec);
struct generic_pm_domain *of_genpd_remove_last(struct device_node *np);
int of_genpd_parse_idle_states(struct device_node *dn,
struct genpd_power_state **states, int *n);
-unsigned int pm_genpd_opp_to_performance_state(struct device *genpd_dev,
- struct dev_pm_opp *opp);
int genpd_dev_pm_attach(struct device *dev);
struct device *genpd_dev_pm_attach_by_id(struct device *dev,
@@ -366,20 +393,20 @@ static inline int of_genpd_add_provider_onecell(struct device_node *np,
static inline void of_genpd_del_provider(struct device_node *np) {}
-static inline int of_genpd_add_device(struct of_phandle_args *args,
+static inline int of_genpd_add_device(const struct of_phandle_args *args,
struct device *dev)
{
return -ENODEV;
}
-static inline int of_genpd_add_subdomain(struct of_phandle_args *parent_spec,
- struct of_phandle_args *subdomain_spec)
+static inline int of_genpd_add_subdomain(const struct of_phandle_args *parent_spec,
+ const struct of_phandle_args *subdomain_spec)
{
return -ENODEV;
}
-static inline int of_genpd_remove_subdomain(struct of_phandle_args *parent_spec,
- struct of_phandle_args *subdomain_spec)
+static inline int of_genpd_remove_subdomain(const struct of_phandle_args *parent_spec,
+ const struct of_phandle_args *subdomain_spec)
{
return -ENODEV;
}
@@ -390,13 +417,6 @@ static inline int of_genpd_parse_idle_states(struct device_node *dn,
return -ENODEV;
}
-static inline unsigned int
-pm_genpd_opp_to_performance_state(struct device *genpd_dev,
- struct dev_pm_opp *opp)
-{
- return 0;
-}
-
static inline int genpd_dev_pm_attach(struct device *dev)
{
return 0;
@@ -427,9 +447,14 @@ struct device *dev_pm_domain_attach_by_id(struct device *dev,
unsigned int index);
struct device *dev_pm_domain_attach_by_name(struct device *dev,
const char *name);
+int dev_pm_domain_attach_list(struct device *dev,
+ const struct dev_pm_domain_attach_data *data,
+ struct dev_pm_domain_list **list);
void dev_pm_domain_detach(struct device *dev, bool power_off);
+void dev_pm_domain_detach_list(struct dev_pm_domain_list *list);
int dev_pm_domain_start(struct device *dev);
void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd);
+int dev_pm_domain_set_performance_state(struct device *dev, unsigned int state);
#else
static inline int dev_pm_domain_attach(struct device *dev, bool power_on)
{
@@ -445,13 +470,25 @@ static inline struct device *dev_pm_domain_attach_by_name(struct device *dev,
{
return NULL;
}
+static inline int dev_pm_domain_attach_list(struct device *dev,
+ const struct dev_pm_domain_attach_data *data,
+ struct dev_pm_domain_list **list)
+{
+ return 0;
+}
static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {}
+static inline void dev_pm_domain_detach_list(struct dev_pm_domain_list *list) {}
static inline int dev_pm_domain_start(struct device *dev)
{
return 0;
}
static inline void dev_pm_domain_set(struct device *dev,
struct dev_pm_domain *pd) {}
+static inline int dev_pm_domain_set_performance_state(struct device *dev,
+ unsigned int state)
+{
+ return 0;
+}
#endif
#endif /* _LINUX_PM_DOMAIN_H */
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index dc1fb5890792..065a47382302 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -16,6 +16,7 @@
#include <linux/notifier.h>
struct clk;
+struct cpufreq_frequency_table;
struct regulator;
struct dev_pm_opp;
struct device;
@@ -45,18 +46,6 @@ struct dev_pm_opp_supply {
unsigned long u_watt;
};
-/**
- * struct dev_pm_opp_icc_bw - Interconnect bandwidth values
- * @avg: Average bandwidth corresponding to this OPP (in icc units)
- * @peak: Peak bandwidth corresponding to this OPP (in icc units)
- *
- * This structure stores the bandwidth values for a single interconnect path.
- */
-struct dev_pm_opp_icc_bw {
- u32 avg;
- u32 peak;
-};
-
typedef int (*config_regulators_t)(struct device *dev,
struct dev_pm_opp *old_opp, struct dev_pm_opp *new_opp,
struct regulator **regulators, unsigned int count);
@@ -74,8 +63,10 @@ typedef int (*config_clks_t)(struct device *dev, struct opp_table *opp_table,
* @supported_hw_count: Number of elements in the array.
* @regulator_names: Array of pointers to the names of the regulator, NULL terminated.
* @genpd_names: Null terminated array of pointers containing names of genpd to
- * attach.
- * @virt_devs: Pointer to return the array of virtual devices.
+ * attach. Mutually exclusive with required_devs.
+ * @virt_devs: Pointer to return the array of genpd virtual devices. Mutually
+ * exclusive with required_devs.
+ * @required_devs: Required OPP devices. Mutually exclusive with genpd_names/virt_devs.
*
* This structure contains platform specific OPP configurations for the device.
*/
@@ -90,6 +81,24 @@ struct dev_pm_opp_config {
const char * const *regulator_names;
const char * const *genpd_names;
struct device ***virt_devs;
+ struct device **required_devs;
+};
+
+#define OPP_LEVEL_UNSET U32_MAX
+
+/**
+ * struct dev_pm_opp_data - The data to use to initialize an OPP.
+ * @turbo: Flag to indicate whether the OPP is to be marked turbo or not.
+ * @level: The performance level for the OPP. Set level to OPP_LEVEL_UNSET if
+ * level field isn't used.
+ * @freq: The clock rate in Hz for the OPP.
+ * @u_volt: The voltage in uV for the OPP.
+ */
+struct dev_pm_opp_data {
+ bool turbo;
+ unsigned int level;
+ unsigned long freq;
+ unsigned long u_volt;
};
#if defined(CONFIG_PM_OPP)
@@ -103,7 +112,7 @@ int dev_pm_opp_get_supplies(struct dev_pm_opp *opp, struct dev_pm_opp_supply *su
unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp);
-unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp);
+unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index);
unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp);
@@ -121,16 +130,31 @@ unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev);
struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
unsigned long freq,
bool available);
+
+struct dev_pm_opp *
+dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq,
+ u32 index, bool available);
+
struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
unsigned long *freq);
+struct dev_pm_opp *dev_pm_opp_find_freq_floor_indexed(struct device *dev,
+ unsigned long *freq, u32 index);
+
+struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
+ unsigned long *freq);
+
+struct dev_pm_opp *dev_pm_opp_find_freq_ceil_indexed(struct device *dev,
+ unsigned long *freq, u32 index);
+
struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev,
unsigned int level);
+
struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev,
unsigned int *level);
-struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
- unsigned long *freq);
+struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev,
+ unsigned int *level);
struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev,
unsigned int *bw, int index);
@@ -140,8 +164,8 @@ struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev,
void dev_pm_opp_put(struct dev_pm_opp *opp);
-int dev_pm_opp_add(struct device *dev, unsigned long freq,
- unsigned long u_volt);
+int dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp);
+
void dev_pm_opp_remove(struct device *dev, unsigned long freq);
void dev_pm_opp_remove_all_dynamic(struct device *dev);
@@ -200,7 +224,7 @@ static inline unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp)
return 0;
}
-static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
+static inline unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index)
{
return 0;
}
@@ -247,26 +271,27 @@ static inline unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev)
return 0;
}
-static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev,
- unsigned int level)
+static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
+ unsigned long freq, bool available)
{
return ERR_PTR(-EOPNOTSUPP);
}
-static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev,
- unsigned int *level)
+static inline struct dev_pm_opp *
+dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq,
+ u32 index, bool available)
{
return ERR_PTR(-EOPNOTSUPP);
}
-static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
- unsigned long freq, bool available)
+static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
+ unsigned long *freq)
{
return ERR_PTR(-EOPNOTSUPP);
}
-static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
- unsigned long *freq)
+static inline struct dev_pm_opp *
+dev_pm_opp_find_freq_floor_indexed(struct device *dev, unsigned long *freq, u32 index)
{
return ERR_PTR(-EOPNOTSUPP);
}
@@ -277,6 +302,30 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
return ERR_PTR(-EOPNOTSUPP);
}
+static inline struct dev_pm_opp *
+dev_pm_opp_find_freq_ceil_indexed(struct device *dev, unsigned long *freq, u32 index)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev,
+ unsigned int level)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev,
+ unsigned int *level)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev,
+ unsigned int *level)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static inline struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev,
unsigned int *bw, int index)
{
@@ -291,8 +340,8 @@ static inline struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev,
static inline void dev_pm_opp_put(struct dev_pm_opp *opp) {}
-static inline int dev_pm_opp_add(struct device *dev, unsigned long freq,
- unsigned long u_volt)
+static inline int
+dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp)
{
return -EOPNOTSUPP;
}
@@ -398,6 +447,21 @@ static inline int dev_pm_opp_sync_regulators(struct device *dev)
#endif /* CONFIG_PM_OPP */
+#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP)
+int dev_pm_opp_init_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table);
+void dev_pm_opp_free_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table);
+#else
+static inline int dev_pm_opp_init_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table)
+{
+ return -EINVAL;
+}
+
+static inline void dev_pm_opp_free_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table)
+{
+}
+#endif
+
+
#if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
int dev_pm_opp_of_add_table(struct device *dev);
int dev_pm_opp_of_add_table_indexed(struct device *dev, int index);
@@ -488,6 +552,17 @@ static inline int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_ta
/* OPP Configuration helpers */
+static inline int dev_pm_opp_add(struct device *dev, unsigned long freq,
+ unsigned long u_volt)
+{
+ struct dev_pm_opp_data data = {
+ .freq = freq,
+ .u_volt = u_volt,
+ };
+
+ return dev_pm_opp_add_dynamic(dev, &data);
+}
+
/* Regulators helpers */
static inline int dev_pm_opp_set_regulators(struct device *dev,
const char * const names[])
@@ -631,4 +706,9 @@ static inline void dev_pm_opp_put_prop_name(int token)
dev_pm_opp_clear_config(token);
}
+static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
+{
+ return dev_pm_opp_get_freq_indexed(opp, 0);
+}
+
#endif /* __LINUX_OPP_H__ */
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 9a8151a2bdea..d39dc863f612 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -72,7 +72,8 @@ extern int pm_runtime_force_resume(struct device *dev);
extern int __pm_runtime_idle(struct device *dev, int rpmflags);
extern int __pm_runtime_suspend(struct device *dev, int rpmflags);
extern int __pm_runtime_resume(struct device *dev, int rpmflags);
-extern int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count);
+extern int pm_runtime_get_if_active(struct device *dev);
+extern int pm_runtime_get_if_in_use(struct device *dev);
extern int pm_schedule_suspend(struct device *dev, unsigned int delay);
extern int __pm_runtime_set_status(struct device *dev, unsigned int status);
extern int pm_runtime_barrier(struct device *dev);
@@ -85,8 +86,6 @@ extern void pm_runtime_irq_safe(struct device *dev);
extern void __pm_runtime_use_autosuspend(struct device *dev, bool use);
extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
extern u64 pm_runtime_autosuspend_expiration(struct device *dev);
-extern void pm_runtime_update_max_time_suspended(struct device *dev,
- s64 delta_ns);
extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable);
extern void pm_runtime_get_suppliers(struct device *dev);
extern void pm_runtime_put_suppliers(struct device *dev);
@@ -97,18 +96,6 @@ extern void pm_runtime_release_supplier(struct device_link *link);
extern int devm_pm_runtime_enable(struct device *dev);
/**
- * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter.
- * @dev: Target device.
- *
- * Increment the runtime PM usage counter of @dev if its runtime PM status is
- * %RPM_ACTIVE and its runtime PM usage counter is greater than 0.
- */
-static inline int pm_runtime_get_if_in_use(struct device *dev)
-{
- return pm_runtime_get_if_active(dev, false);
-}
-
-/**
* pm_suspend_ignore_children - Set runtime PM behavior regarding children.
* @dev: Target device.
* @enable: Whether or not to ignore possible dependencies on children.
@@ -277,8 +264,7 @@ static inline int pm_runtime_get_if_in_use(struct device *dev)
{
return -EINVAL;
}
-static inline int pm_runtime_get_if_active(struct device *dev,
- bool ign_usage_count)
+static inline int pm_runtime_get_if_active(struct device *dev)
{
return -EINVAL;
}
@@ -463,6 +449,18 @@ static inline int pm_runtime_put(struct device *dev)
}
/**
+ * __pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0.
+ * @dev: Target device.
+ *
+ * Decrement the runtime PM usage counter of @dev and if it turns out to be
+ * equal to 0, queue up a work item for @dev like in pm_request_autosuspend().
+ */
+static inline int __pm_runtime_put_autosuspend(struct device *dev)
+{
+ return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_ASYNC | RPM_AUTO);
+}
+
+/**
* pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0.
* @dev: Target device.
*
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 77f4849e3418..6eb9adaef52b 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -194,6 +194,16 @@ static inline void pm_wakeup_dev_event(struct device *dev, unsigned int msec,
#endif /* !CONFIG_PM_SLEEP */
+static inline bool device_awake_path(struct device *dev)
+{
+ return device_wakeup_path(dev);
+}
+
+static inline void device_set_awake_path(struct device *dev)
+{
+ device_set_wakeup_path(dev);
+}
+
static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
{
return pm_wakeup_ws_event(ws, msec, false);
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index c2a7cfbca713..ddbe7c3ca4ce 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -291,7 +291,7 @@ static inline void pnp_set_drvdata(struct pnp_dev *pdev, void *data)
struct pnp_fixup {
char id[7];
- void (*quirk_function) (struct pnp_dev * dev); /* fixup function */
+ void (*quirk_function) (struct pnp_dev *dev); /* fixup function */
};
/* config parameters */
@@ -419,8 +419,8 @@ struct pnp_protocol {
/* protocol specific suspend/resume */
bool (*can_wakeup) (struct pnp_dev *dev);
- int (*suspend) (struct pnp_dev * dev, pm_message_t state);
- int (*resume) (struct pnp_dev * dev);
+ int (*suspend) (struct pnp_dev *dev, pm_message_t state);
+ int (*resume) (struct pnp_dev *dev);
/* used by pnp layer only (look but don't touch) */
unsigned char number; /* protocol number */
@@ -435,7 +435,7 @@ struct pnp_protocol {
#define protocol_for_each_dev(protocol, dev) \
list_for_each_entry(dev, &(protocol)->devices, protocol_list)
-extern struct bus_type pnp_bus_type;
+extern const struct bus_type pnp_bus_type;
#if defined(CONFIG_PNP)
@@ -492,7 +492,7 @@ static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; }
static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; }
static inline int pnp_activate_dev(struct pnp_dev *dev) { return -ENODEV; }
static inline int pnp_disable_dev(struct pnp_dev *dev) { return -ENODEV; }
-static inline int pnp_range_reserved(resource_size_t start, resource_size_t end) { return 0;}
+static inline int pnp_range_reserved(resource_size_t start, resource_size_t end) { return 0; }
/* protocol helpers */
static inline int pnp_is_active(struct pnp_dev *dev) { return 0; }
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 851a855d3868..1f0ee2459f2a 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -83,6 +83,8 @@
/********** net/core/skbuff.c **********/
#define SKB_LIST_POISON_NEXT ((void *)(0x800 + POISON_POINTER_DELTA))
+/********** net/ **********/
+#define NET_PTR_POISON ((void *)(0x801 + POISON_POINTER_DELTA))
/********** kernel/bpf/ **********/
#define BPF_PTR_POISON ((void *)(0xeB9FUL + POISON_POINTER_DELTA))
@@ -90,4 +92,7 @@
/********** VFS **********/
#define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA))
+/********** lib/stackdepot.c **********/
+#define STACK_DEPOT_POISON ((void *)(0xD390 + POISON_POINTER_DELTA))
+
#endif
diff --git a/include/linux/poll.h b/include/linux/poll.h
index a9e0e1c2d1f2..d1ea4f3714a8 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -14,11 +14,7 @@
/* ~832 bytes of stack space used max in sys_select/sys_poll before allocating
additional memory. */
-#ifdef __clang__
-#define MAX_STACK_ALLOC 768
-#else
#define MAX_STACK_ALLOC 832
-#endif
#define FRONTEND_STACK_ALLOC 256
#define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC
#define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
index 468328b1e1dd..ef8619f48920 100644
--- a/include/linux/posix-clock.h
+++ b/include/linux/posix-clock.h
@@ -14,6 +14,7 @@
#include <linux/rwsem.h>
struct posix_clock;
+struct posix_clock_context;
/**
* struct posix_clock_operations - functional interface to the clock
@@ -50,18 +51,18 @@ struct posix_clock_operations {
/*
* Optional character device methods:
*/
- long (*ioctl) (struct posix_clock *pc,
- unsigned int cmd, unsigned long arg);
+ long (*ioctl)(struct posix_clock_context *pccontext, unsigned int cmd,
+ unsigned long arg);
- int (*open) (struct posix_clock *pc, fmode_t f_mode);
+ int (*open)(struct posix_clock_context *pccontext, fmode_t f_mode);
- __poll_t (*poll) (struct posix_clock *pc,
- struct file *file, poll_table *wait);
+ __poll_t (*poll)(struct posix_clock_context *pccontext, struct file *file,
+ poll_table *wait);
- int (*release) (struct posix_clock *pc);
+ int (*release)(struct posix_clock_context *pccontext);
- ssize_t (*read) (struct posix_clock *pc,
- uint flags, char __user *buf, size_t cnt);
+ ssize_t (*read)(struct posix_clock_context *pccontext, uint flags,
+ char __user *buf, size_t cnt);
};
/**
@@ -91,6 +92,24 @@ struct posix_clock {
};
/**
+ * struct posix_clock_context - represents clock file operations context
+ *
+ * @clk: Pointer to the clock
+ * @private_clkdata: Pointer to user data
+ *
+ * Drivers should use struct posix_clock_context during specific character
+ * device file operation methods to access the posix clock.
+ *
+ * Drivers can store a private data structure during the open operation
+ * if they have specific information that is required in other file
+ * operations.
+ */
+struct posix_clock_context {
+ struct posix_clock *clk;
+ void *private_clkdata;
+};
+
+/**
* posix_clock_register() - register a new clock
* @clk: Pointer to the clock. Caller must provide 'ops' field
* @dev: Pointer to the initialized device. Caller must provide
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index d607f51404fc..dc7b738de299 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -2,40 +2,16 @@
#ifndef _linux_POSIX_TIMERS_H
#define _linux_POSIX_TIMERS_H
-#include <linux/spinlock.h>
+#include <linux/alarmtimer.h>
#include <linux/list.h>
#include <linux/mutex.h>
-#include <linux/alarmtimer.h>
+#include <linux/posix-timers_types.h>
+#include <linux/spinlock.h>
#include <linux/timerqueue.h>
struct kernel_siginfo;
struct task_struct;
-/*
- * Bit fields within a clockid:
- *
- * The most significant 29 bits hold either a pid or a file descriptor.
- *
- * Bit 2 indicates whether a cpu clock refers to a thread or a process.
- *
- * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
- *
- * A clockid is invalid if bits 2, 1, and 0 are all set.
- */
-#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3))
-#define CPUCLOCK_PERTHREAD(clock) \
- (((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0)
-
-#define CPUCLOCK_PERTHREAD_MASK 4
-#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK)
-#define CPUCLOCK_CLOCK_MASK 3
-#define CPUCLOCK_PROF 0
-#define CPUCLOCK_VIRT 1
-#define CPUCLOCK_SCHED 2
-#define CPUCLOCK_MAX 3
-#define CLOCKFD CPUCLOCK_MAX
-#define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK)
-
static inline clockid_t make_process_cpuclock(const unsigned int pid,
const clockid_t clock)
{
@@ -109,44 +85,6 @@ static inline void cpu_timer_setexpires(struct cpu_timer *ctmr, u64 exp)
ctmr->node.expires = exp;
}
-/**
- * posix_cputimer_base - Container per posix CPU clock
- * @nextevt: Earliest-expiration cache
- * @tqhead: timerqueue head for cpu_timers
- */
-struct posix_cputimer_base {
- u64 nextevt;
- struct timerqueue_head tqhead;
-};
-
-/**
- * posix_cputimers - Container for posix CPU timer related data
- * @bases: Base container for posix CPU clocks
- * @timers_active: Timers are queued.
- * @expiry_active: Timer expiry is active. Used for
- * process wide timers to avoid multiple
- * task trying to handle expiry concurrently
- *
- * Used in task_struct and signal_struct
- */
-struct posix_cputimers {
- struct posix_cputimer_base bases[CPUCLOCK_MAX];
- unsigned int timers_active;
- unsigned int expiry_active;
-};
-
-/**
- * posix_cputimers_work - Container for task work based posix CPU timer expiry
- * @work: The task work to be scheduled
- * @mutex: Mutex held around expiry in context of this task work
- * @scheduled: @work has been scheduled already, no further processing
- */
-struct posix_cputimers_work {
- struct callback_head work;
- struct mutex mutex;
- unsigned int scheduled;
-};
-
static inline void posix_cputimers_init(struct posix_cputimers *pct)
{
memset(pct, 0, sizeof(*pct));
@@ -179,7 +117,6 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
.bases = INIT_CPU_TIMERBASES(s.posix_cputimers.bases), \
},
#else
-struct posix_cputimers { };
struct cpu_timer { };
#define INIT_CPU_TIMERS(s)
static inline void posix_cputimers_init(struct posix_cputimers *pct) { }
diff --git a/include/linux/posix-timers_types.h b/include/linux/posix-timers_types.h
new file mode 100644
index 000000000000..a4712c1008c9
--- /dev/null
+++ b/include/linux/posix-timers_types.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _linux_POSIX_TIMERS_TYPES_H
+#define _linux_POSIX_TIMERS_TYPES_H
+
+#include <linux/mutex_types.h>
+#include <linux/timerqueue_types.h>
+#include <linux/types.h>
+
+/*
+ * Bit fields within a clockid:
+ *
+ * The most significant 29 bits hold either a pid or a file descriptor.
+ *
+ * Bit 2 indicates whether a cpu clock refers to a thread or a process.
+ *
+ * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
+ *
+ * A clockid is invalid if bits 2, 1, and 0 are all set.
+ */
+#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3))
+#define CPUCLOCK_PERTHREAD(clock) \
+ (((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0)
+
+#define CPUCLOCK_PERTHREAD_MASK 4
+#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK)
+#define CPUCLOCK_CLOCK_MASK 3
+#define CPUCLOCK_PROF 0
+#define CPUCLOCK_VIRT 1
+#define CPUCLOCK_SCHED 2
+#define CPUCLOCK_MAX 3
+#define CLOCKFD CPUCLOCK_MAX
+#define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK)
+
+#ifdef CONFIG_POSIX_TIMERS
+
+/**
+ * posix_cputimer_base - Container per posix CPU clock
+ * @nextevt: Earliest-expiration cache
+ * @tqhead: timerqueue head for cpu_timers
+ */
+struct posix_cputimer_base {
+ u64 nextevt;
+ struct timerqueue_head tqhead;
+};
+
+/**
+ * posix_cputimers - Container for posix CPU timer related data
+ * @bases: Base container for posix CPU clocks
+ * @timers_active: Timers are queued.
+ * @expiry_active: Timer expiry is active. Used for
+ * process wide timers to avoid multiple
+ * task trying to handle expiry concurrently
+ *
+ * Used in task_struct and signal_struct
+ */
+struct posix_cputimers {
+ struct posix_cputimer_base bases[CPUCLOCK_MAX];
+ unsigned int timers_active;
+ unsigned int expiry_active;
+};
+
+/**
+ * posix_cputimers_work - Container for task work based posix CPU timer expiry
+ * @work: The task work to be scheduled
+ * @mutex: Mutex held around expiry in context of this task work
+ * @scheduled: @work has been scheduled already, no further processing
+ */
+struct posix_cputimers_work {
+ struct callback_head work;
+ struct mutex mutex;
+ unsigned int scheduled;
+};
+
+#else /* CONFIG_POSIX_TIMERS */
+
+struct posix_cputimers { };
+
+#endif /* CONFIG_POSIX_TIMERS */
+
+#endif /* _linux_POSIX_TIMERS_TYPES_H */
diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index 7c8d65414a70..b9e5bd2b42d3 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -61,7 +61,6 @@ struct bq27xxx_reg_cache {
struct bq27xxx_device_info {
struct device *dev;
- int id;
enum bq27xxx_chip chip;
u32 opts;
const char *name;
@@ -83,5 +82,6 @@ struct bq27xxx_device_info {
void bq27xxx_battery_update(struct bq27xxx_device_info *di);
int bq27xxx_battery_setup(struct bq27xxx_device_info *di);
void bq27xxx_battery_teardown(struct bq27xxx_device_info *di);
+extern const struct dev_pm_ops bq27xxx_battery_battery_pm_ops;
#endif
diff --git a/include/linux/power/power_on_reason.h b/include/linux/power/power_on_reason.h
new file mode 100644
index 000000000000..95a1ec0c403c
--- /dev/null
+++ b/include/linux/power/power_on_reason.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Author: Kamel Bouhra <kamel.bouhara@bootlin.com>
+ */
+
+#ifndef POWER_ON_REASON_H
+#define POWER_ON_REASON_H
+
+#define POWER_ON_REASON_REGULAR "regular power-up"
+#define POWER_ON_REASON_RTC "RTC wakeup"
+#define POWER_ON_REASON_WATCHDOG "watchdog timeout"
+#define POWER_ON_REASON_SOFTWARE "software reset"
+#define POWER_ON_REASON_RST_BTN "reset button action"
+#define POWER_ON_REASON_CPU_CLK_FAIL "CPU clock failure"
+#define POWER_ON_REASON_XTAL_FAIL "crystal oscillator failure"
+#define POWER_ON_REASON_BROWN_OUT "brown-out reset"
+#define POWER_ON_REASON_UNKNOWN "unknown reason"
+
+#endif /* POWER_ON_REASON_H */
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index a427f13c757f..8e5705a56b85 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -242,6 +242,7 @@ struct power_supply_config {
struct power_supply_desc {
const char *name;
enum power_supply_type type;
+ u8 charge_behaviours;
const enum power_supply_usb_type *usb_types;
size_t num_usb_types;
const enum power_supply_property *properties;
@@ -767,7 +768,6 @@ struct power_supply_battery_info {
int bti_resistance_tolerance;
};
-extern struct atomic_notifier_head power_supply_notifier;
extern int power_supply_reg_notifier(struct notifier_block *nb);
extern void power_supply_unreg_notifier(struct notifier_block *nb);
#if IS_ENABLED(CONFIG_POWER_SUPPLY)
@@ -895,8 +895,7 @@ extern int power_supply_powers(struct power_supply *psy, struct device *dev);
#define to_power_supply(device) container_of(device, struct power_supply, dev)
extern void *power_supply_get_drvdata(struct power_supply *psy);
-/* For APM emulation, think legacy userspace. */
-extern struct class *power_supply_class;
+extern int power_supply_for_each_device(void *data, int (*fn)(struct device *dev, void *data));
static inline bool power_supply_is_amp_property(enum power_supply_property psp)
{
diff --git a/include/linux/prandom.h b/include/linux/prandom.h
index f2ed5b72b3d6..f7f1e5251c67 100644
--- a/include/linux/prandom.h
+++ b/include/linux/prandom.h
@@ -10,7 +10,6 @@
#include <linux/types.h>
#include <linux/once.h>
-#include <linux/percpu.h>
#include <linux/random.h>
struct rnd_state {
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 1424670df161..7233e9cf1bab 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -9,7 +9,7 @@
#include <linux/linkage.h>
#include <linux/cleanup.h>
-#include <linux/list.h>
+#include <linux/types.h>
/*
* We put the hardirq and softirq counter into the preemption
@@ -99,14 +99,21 @@ static __always_inline unsigned char interrupt_context_level(void)
return level;
}
+/*
+ * These macro definitions avoid redundant invocations of preempt_count()
+ * because such invocations would result in redundant loads given that
+ * preempt_count() is commonly implemented with READ_ONCE().
+ */
+
#define nmi_count() (preempt_count() & NMI_MASK)
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
#ifdef CONFIG_PREEMPT_RT
# define softirq_count() (current->softirq_disable_cnt & SOFTIRQ_MASK)
+# define irq_count() ((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | softirq_count())
#else
# define softirq_count() (preempt_count() & SOFTIRQ_MASK)
+# define irq_count() (preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK))
#endif
-#define irq_count() (nmi_count() | hardirq_count() | softirq_count())
/*
* Macros to retrieve the current execution context:
@@ -119,7 +126,11 @@ static __always_inline unsigned char interrupt_context_level(void)
#define in_nmi() (nmi_count())
#define in_hardirq() (hardirq_count())
#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
-#define in_task() (!(in_nmi() | in_hardirq() | in_serving_softirq()))
+#ifdef CONFIG_PREEMPT_RT
+# define in_task() (!((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | in_serving_softirq()))
+#else
+# define in_task() (!(preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
+#endif
/*
* The following macros are deprecated and should not be used in new code:
@@ -349,7 +360,9 @@ void preempt_notifier_unregister(struct preempt_notifier *notifier);
static inline void preempt_notifier_init(struct preempt_notifier *notifier,
struct preempt_ops *ops)
{
- INIT_HLIST_NODE(&notifier->link);
+ /* INIT_HLIST_NODE() open coded, to avoid dependency on list.h */
+ notifier->link.next = NULL;
+ notifier->link.pprev = NULL;
notifier->ops = ops;
}
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 8ef499ab3c1e..955e31860095 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -273,6 +273,8 @@ static inline void printk_trigger_flush(void)
}
#endif
+bool this_cpu_in_panic(void);
+
#ifdef CONFIG_SMP
extern int __printk_cpu_sync_try_get(void);
extern void __printk_cpu_sync_wait(void);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 253f2676d93a..0b2a89854440 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -65,6 +65,7 @@ struct proc_fs_info {
kgid_t pid_gid;
enum proc_hidepid hide_pid;
enum proc_pidonly pidonly;
+ struct rcu_head rcu;
};
static inline struct proc_fs_info *proc_sb_info(struct super_block *sb)
@@ -159,6 +160,7 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
void arch_report_meminfo(struct seq_file *m);
+void arch_proc_pid_thread_features(struct seq_file *m, struct task_struct *task);
#else /* CONFIG_PROC_FS */
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 49539bc416ce..5ea470eb4d76 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -66,7 +66,7 @@ static inline void proc_free_inum(unsigned int inum) {}
static inline int ns_alloc_inum(struct ns_common *ns)
{
- atomic_long_set(&ns->stashed, 0);
+ WRITE_ONCE(ns->stashed, NULL);
return proc_alloc_inum(&ns->inum);
}
diff --git a/include/linux/profile.h b/include/linux/profile.h
index 11db1ec516e2..04ae5ebcb637 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -18,13 +18,8 @@ struct proc_dir_entry;
struct notifier_block;
#if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS)
-void create_prof_cpu_mask(void);
int create_proc_profile(void);
#else
-static inline void create_prof_cpu_mask(void)
-{
-}
-
static inline int create_proc_profile(void)
{
return 0;
diff --git a/include/linux/property.h b/include/linux/property.h
index 8c3c6685a2ae..3a1045eb786c 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -10,6 +10,8 @@
#ifndef _LINUX_PROPERTY_H_
#define _LINUX_PROPERTY_H_
+#include <linux/args.h>
+#include <linux/array_size.h>
#include <linux/bits.h>
#include <linux/fwnode.h>
#include <linux/stddef.h>
@@ -26,12 +28,6 @@ enum dev_prop_type {
DEV_PROP_REF,
};
-enum dev_dma_attr {
- DEV_DMA_NOT_SUPPORTED,
- DEV_DMA_NON_COHERENT,
- DEV_DMA_COHERENT,
-};
-
const struct fwnode_handle *__dev_fwnode_const(const struct device *dev);
struct fwnode_handle *__dev_fwnode(struct device *dev);
#define dev_fwnode(dev) \
@@ -79,6 +75,16 @@ int fwnode_property_match_string(const struct fwnode_handle *fwnode,
bool fwnode_device_is_available(const struct fwnode_handle *fwnode);
+static inline bool fwnode_device_is_big_endian(const struct fwnode_handle *fwnode)
+{
+ if (fwnode_property_present(fwnode, "big-endian"))
+ return true;
+ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) &&
+ fwnode_property_present(fwnode, "native-endian"))
+ return true;
+ return false;
+}
+
static inline
bool fwnode_device_is_compatible(const struct fwnode_handle *fwnode, const char *compat)
{
@@ -86,6 +92,22 @@ bool fwnode_device_is_compatible(const struct fwnode_handle *fwnode, const char
}
/**
+ * device_is_big_endian - check if a device has BE registers
+ * @dev: Pointer to the struct device
+ *
+ * Returns: true if the device has a "big-endian" property, or if the kernel
+ * was compiled for BE *and* the device has a "native-endian" property.
+ * Returns false otherwise.
+ *
+ * Callers would nominally use ioread32be/iowrite32be if
+ * device_is_big_endian() == true, or readl/writel otherwise.
+ */
+static inline bool device_is_big_endian(const struct device *dev)
+{
+ return fwnode_device_is_big_endian(dev_fwnode(dev));
+}
+
+/**
* device_is_compatible - match 'compatible' property of the device with a given string
* @dev: Pointer to the struct device
* @compat: The string to match 'compatible' property with
@@ -97,6 +119,18 @@ static inline bool device_is_compatible(const struct device *dev, const char *co
return fwnode_device_is_compatible(dev_fwnode(dev), compat);
}
+int fwnode_property_match_property_string(const struct fwnode_handle *fwnode,
+ const char *propname,
+ const char * const *array, size_t n);
+
+static inline
+int device_property_match_property_string(const struct device *dev,
+ const char *propname,
+ const char * const *array, size_t n)
+{
+ return fwnode_property_match_property_string(dev_fwnode(dev), propname, array, n);
+}
+
int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode,
const char *prop, const char *nargs_prop,
unsigned int nargs, unsigned int index,
@@ -108,6 +142,7 @@ struct fwnode_handle *fwnode_find_reference(const struct fwnode_handle *fwnode,
const char *fwnode_get_name(const struct fwnode_handle *fwnode);
const char *fwnode_get_name_prefix(const struct fwnode_handle *fwnode);
+bool fwnode_name_eq(const struct fwnode_handle *fwnode, const char *name);
struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode);
struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode);
@@ -116,11 +151,9 @@ struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode);
for (parent = fwnode_get_parent(fwnode); parent; \
parent = fwnode_get_next_parent(parent))
-struct device *fwnode_get_next_parent_dev(const struct fwnode_handle *fwnode);
unsigned int fwnode_count_parents(const struct fwnode_handle *fwn);
struct fwnode_handle *fwnode_get_nth_parent(struct fwnode_handle *fwn,
unsigned int depth);
-bool fwnode_is_ancestor_of(const struct fwnode_handle *ancestor, const struct fwnode_handle *child);
struct fwnode_handle *fwnode_get_next_child_node(
const struct fwnode_handle *fwnode, struct fwnode_handle *child);
struct fwnode_handle *fwnode_get_next_available_child_node(
@@ -288,7 +321,7 @@ struct software_node_ref_args {
#define SOFTWARE_NODE_REFERENCE(_ref_, ...) \
(const struct software_node_ref_args) { \
.node = _ref_, \
- .nargs = ARRAY_SIZE(((u64[]){ 0, ##__VA_ARGS__ })) - 1, \
+ .nargs = COUNT_ARGS(__VA_ARGS__), \
.args = { __VA_ARGS__ }, \
}
@@ -488,6 +521,13 @@ struct software_node {
const struct property_entry *properties;
};
+#define SOFTWARE_NODE(_name_, _properties_, _parent_) \
+ (struct software_node) { \
+ .name = _name_, \
+ .properties = _properties_, \
+ .parent = _parent_, \
+ }
+
bool is_software_node(const struct fwnode_handle *fwnode);
const struct software_node *
to_software_node(const struct fwnode_handle *fwnode);
diff --git a/include/linux/pseudo_fs.h b/include/linux/pseudo_fs.h
index eceda1d1407a..730f77381d55 100644
--- a/include/linux/pseudo_fs.h
+++ b/include/linux/pseudo_fs.h
@@ -5,7 +5,7 @@
struct pseudo_fs_context {
const struct super_operations *ops;
- const struct xattr_handler **xattr;
+ const struct xattr_handler * const *xattr;
const struct dentry_operations *dops;
unsigned long magic;
};
diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h
index 75da8f5f7ad8..c1dc87fc536b 100644
--- a/include/linux/psp-platform-access.h
+++ b/include/linux/psp-platform-access.h
@@ -8,6 +8,10 @@
enum psp_platform_access_msg {
PSP_CMD_NONE = 0x0,
PSP_I2C_REQ_BUS_CMD = 0x64,
+ PSP_DYNAMIC_BOOST_GET_NONCE,
+ PSP_DYNAMIC_BOOST_SET_UID,
+ PSP_DYNAMIC_BOOST_GET_PARAMETER,
+ PSP_DYNAMIC_BOOST_SET_PARAMETER,
};
struct psp_req_buffer_hdr {
diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index 7fd17e82bab4..3705c2044fc0 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -78,6 +78,36 @@ enum sev_cmd {
SEV_CMD_DBG_DECRYPT = 0x060,
SEV_CMD_DBG_ENCRYPT = 0x061,
+ /* SNP specific commands */
+ SEV_CMD_SNP_INIT = 0x081,
+ SEV_CMD_SNP_SHUTDOWN = 0x082,
+ SEV_CMD_SNP_PLATFORM_STATUS = 0x083,
+ SEV_CMD_SNP_DF_FLUSH = 0x084,
+ SEV_CMD_SNP_INIT_EX = 0x085,
+ SEV_CMD_SNP_SHUTDOWN_EX = 0x086,
+ SEV_CMD_SNP_DECOMMISSION = 0x090,
+ SEV_CMD_SNP_ACTIVATE = 0x091,
+ SEV_CMD_SNP_GUEST_STATUS = 0x092,
+ SEV_CMD_SNP_GCTX_CREATE = 0x093,
+ SEV_CMD_SNP_GUEST_REQUEST = 0x094,
+ SEV_CMD_SNP_ACTIVATE_EX = 0x095,
+ SEV_CMD_SNP_LAUNCH_START = 0x0A0,
+ SEV_CMD_SNP_LAUNCH_UPDATE = 0x0A1,
+ SEV_CMD_SNP_LAUNCH_FINISH = 0x0A2,
+ SEV_CMD_SNP_DBG_DECRYPT = 0x0B0,
+ SEV_CMD_SNP_DBG_ENCRYPT = 0x0B1,
+ SEV_CMD_SNP_PAGE_SWAP_OUT = 0x0C0,
+ SEV_CMD_SNP_PAGE_SWAP_IN = 0x0C1,
+ SEV_CMD_SNP_PAGE_MOVE = 0x0C2,
+ SEV_CMD_SNP_PAGE_MD_INIT = 0x0C3,
+ SEV_CMD_SNP_PAGE_SET_STATE = 0x0C6,
+ SEV_CMD_SNP_PAGE_RECLAIM = 0x0C7,
+ SEV_CMD_SNP_PAGE_UNSMASH = 0x0C8,
+ SEV_CMD_SNP_CONFIG = 0x0C9,
+ SEV_CMD_SNP_DOWNLOAD_FIRMWARE_EX = 0x0CA,
+ SEV_CMD_SNP_COMMIT = 0x0CB,
+ SEV_CMD_SNP_VLEK_LOAD = 0x0CD,
+
SEV_CMD_MAX,
};
@@ -523,12 +553,269 @@ struct sev_data_attestation_report {
u32 len; /* In/Out */
} __packed;
+/**
+ * struct sev_data_snp_download_firmware - SNP_DOWNLOAD_FIRMWARE command params
+ *
+ * @address: physical address of firmware image
+ * @len: length of the firmware image
+ */
+struct sev_data_snp_download_firmware {
+ u64 address; /* In */
+ u32 len; /* In */
+} __packed;
+
+/**
+ * struct sev_data_snp_activate - SNP_ACTIVATE command params
+ *
+ * @gctx_paddr: system physical address guest context page
+ * @asid: ASID to bind to the guest
+ */
+struct sev_data_snp_activate {
+ u64 gctx_paddr; /* In */
+ u32 asid; /* In */
+} __packed;
+
+/**
+ * struct sev_data_snp_addr - generic SNP command params
+ *
+ * @address: physical address of generic data param
+ */
+struct sev_data_snp_addr {
+ u64 address; /* In/Out */
+} __packed;
+
+/**
+ * struct sev_data_snp_launch_start - SNP_LAUNCH_START command params
+ *
+ * @gctx_paddr: system physical address of guest context page
+ * @policy: guest policy
+ * @ma_gctx_paddr: system physical address of migration agent
+ * @ma_en: the guest is associated with a migration agent
+ * @imi_en: launch flow is launching an IMI (Incoming Migration Image) for the
+ * purpose of guest-assisted migration.
+ * @rsvd: reserved
+ * @gosvw: guest OS-visible workarounds, as defined by hypervisor
+ */
+struct sev_data_snp_launch_start {
+ u64 gctx_paddr; /* In */
+ u64 policy; /* In */
+ u64 ma_gctx_paddr; /* In */
+ u32 ma_en:1; /* In */
+ u32 imi_en:1; /* In */
+ u32 rsvd:30;
+ u8 gosvw[16]; /* In */
+} __packed;
+
+/* SNP support page type */
+enum {
+ SNP_PAGE_TYPE_NORMAL = 0x1,
+ SNP_PAGE_TYPE_VMSA = 0x2,
+ SNP_PAGE_TYPE_ZERO = 0x3,
+ SNP_PAGE_TYPE_UNMEASURED = 0x4,
+ SNP_PAGE_TYPE_SECRET = 0x5,
+ SNP_PAGE_TYPE_CPUID = 0x6,
+
+ SNP_PAGE_TYPE_MAX
+};
+
+/**
+ * struct sev_data_snp_launch_update - SNP_LAUNCH_UPDATE command params
+ *
+ * @gctx_paddr: system physical address of guest context page
+ * @page_size: page size 0 indicates 4K and 1 indicates 2MB page
+ * @page_type: encoded page type
+ * @imi_page: indicates that this page is part of the IMI (Incoming Migration
+ * Image) of the guest
+ * @rsvd: reserved
+ * @rsvd2: reserved
+ * @address: system physical address of destination page to encrypt
+ * @rsvd3: reserved
+ * @vmpl1_perms: VMPL permission mask for VMPL1
+ * @vmpl2_perms: VMPL permission mask for VMPL2
+ * @vmpl3_perms: VMPL permission mask for VMPL3
+ * @rsvd4: reserved
+ */
+struct sev_data_snp_launch_update {
+ u64 gctx_paddr; /* In */
+ u32 page_size:1; /* In */
+ u32 page_type:3; /* In */
+ u32 imi_page:1; /* In */
+ u32 rsvd:27;
+ u32 rsvd2;
+ u64 address; /* In */
+ u32 rsvd3:8;
+ u32 vmpl1_perms:8; /* In */
+ u32 vmpl2_perms:8; /* In */
+ u32 vmpl3_perms:8; /* In */
+ u32 rsvd4;
+} __packed;
+
+/**
+ * struct sev_data_snp_launch_finish - SNP_LAUNCH_FINISH command params
+ *
+ * @gctx_paddr: system physical address of guest context page
+ * @id_block_paddr: system physical address of ID block
+ * @id_auth_paddr: system physical address of ID block authentication structure
+ * @id_block_en: indicates whether ID block is present
+ * @auth_key_en: indicates whether author key is present in authentication structure
+ * @rsvd: reserved
+ * @host_data: host-supplied data for guest, not interpreted by firmware
+ */
+struct sev_data_snp_launch_finish {
+ u64 gctx_paddr;
+ u64 id_block_paddr;
+ u64 id_auth_paddr;
+ u8 id_block_en:1;
+ u8 auth_key_en:1;
+ u64 rsvd:62;
+ u8 host_data[32];
+} __packed;
+
+/**
+ * struct sev_data_snp_guest_status - SNP_GUEST_STATUS command params
+ *
+ * @gctx_paddr: system physical address of guest context page
+ * @address: system physical address of guest status page
+ */
+struct sev_data_snp_guest_status {
+ u64 gctx_paddr;
+ u64 address;
+} __packed;
+
+/**
+ * struct sev_data_snp_page_reclaim - SNP_PAGE_RECLAIM command params
+ *
+ * @paddr: system physical address of page to be claimed. The 0th bit in the
+ * address indicates the page size. 0h indicates 4KB and 1h indicates
+ * 2MB page.
+ */
+struct sev_data_snp_page_reclaim {
+ u64 paddr;
+} __packed;
+
+/**
+ * struct sev_data_snp_page_unsmash - SNP_PAGE_UNSMASH command params
+ *
+ * @paddr: system physical address of page to be unsmashed. The 0th bit in the
+ * address indicates the page size. 0h indicates 4 KB and 1h indicates
+ * 2 MB page.
+ */
+struct sev_data_snp_page_unsmash {
+ u64 paddr;
+} __packed;
+
+/**
+ * struct sev_data_snp_dbg - DBG_ENCRYPT/DBG_DECRYPT command parameters
+ *
+ * @gctx_paddr: system physical address of guest context page
+ * @src_addr: source address of data to operate on
+ * @dst_addr: destination address of data to operate on
+ */
+struct sev_data_snp_dbg {
+ u64 gctx_paddr; /* In */
+ u64 src_addr; /* In */
+ u64 dst_addr; /* In */
+} __packed;
+
+/**
+ * struct sev_data_snp_guest_request - SNP_GUEST_REQUEST command params
+ *
+ * @gctx_paddr: system physical address of guest context page
+ * @req_paddr: system physical address of request page
+ * @res_paddr: system physical address of response page
+ */
+struct sev_data_snp_guest_request {
+ u64 gctx_paddr; /* In */
+ u64 req_paddr; /* In */
+ u64 res_paddr; /* In */
+} __packed;
+
+/**
+ * struct sev_data_snp_init_ex - SNP_INIT_EX structure
+ *
+ * @init_rmp: indicate that the RMP should be initialized.
+ * @list_paddr_en: indicate that list_paddr is valid
+ * @rsvd: reserved
+ * @rsvd1: reserved
+ * @list_paddr: system physical address of range list
+ * @rsvd2: reserved
+ */
+struct sev_data_snp_init_ex {
+ u32 init_rmp:1;
+ u32 list_paddr_en:1;
+ u32 rsvd:30;
+ u32 rsvd1;
+ u64 list_paddr;
+ u8 rsvd2[48];
+} __packed;
+
+/**
+ * struct sev_data_range - RANGE structure
+ *
+ * @base: system physical address of first byte of range
+ * @page_count: number of 4KB pages in this range
+ * @rsvd: reserved
+ */
+struct sev_data_range {
+ u64 base;
+ u32 page_count;
+ u32 rsvd;
+} __packed;
+
+/**
+ * struct sev_data_range_list - RANGE_LIST structure
+ *
+ * @num_elements: number of elements in RANGE_ARRAY
+ * @rsvd: reserved
+ * @ranges: array of num_elements of type RANGE
+ */
+struct sev_data_range_list {
+ u32 num_elements;
+ u32 rsvd;
+ struct sev_data_range ranges[];
+} __packed;
+
+/**
+ * struct sev_data_snp_shutdown_ex - SNP_SHUTDOWN_EX structure
+ *
+ * @len: length of the command buffer read by the PSP
+ * @iommu_snp_shutdown: Disable enforcement of SNP in the IOMMU
+ * @rsvd1: reserved
+ */
+struct sev_data_snp_shutdown_ex {
+ u32 len;
+ u32 iommu_snp_shutdown:1;
+ u32 rsvd1:31;
+} __packed;
+
+/**
+ * struct sev_platform_init_args
+ *
+ * @error: SEV firmware error code
+ * @probe: True if this is being called as part of CCP module probe, which
+ * will defer SEV_INIT/SEV_INIT_EX firmware initialization until needed
+ * unless psp_init_on_probe module param is set
+ */
+struct sev_platform_init_args {
+ int error;
+ bool probe;
+};
+
+/**
+ * struct sev_data_snp_commit - SNP_COMMIT structure
+ *
+ * @len: length of the command buffer read by the PSP
+ */
+struct sev_data_snp_commit {
+ u32 len;
+} __packed;
+
#ifdef CONFIG_CRYPTO_DEV_SP_PSP
/**
* sev_platform_init - perform SEV INIT command
*
- * @error: SEV command return code
+ * @args: struct sev_platform_init_args to pass in arguments
*
* Returns:
* 0 if the SEV successfully processed the command
@@ -537,7 +824,7 @@ struct sev_data_attestation_report {
* -%ETIMEDOUT if the SEV command timed out
* -%EIO if the SEV returned a non-zero return code
*/
-int sev_platform_init(int *error);
+int sev_platform_init(struct sev_platform_init_args *args);
/**
* sev_platform_status - perform SEV PLATFORM_STATUS command
@@ -637,14 +924,32 @@ int sev_guest_df_flush(int *error);
*/
int sev_guest_decommission(struct sev_data_decommission *data, int *error);
+/**
+ * sev_do_cmd - issue an SEV or an SEV-SNP command
+ *
+ * @cmd: SEV or SEV-SNP firmware command to issue
+ * @data: arguments for firmware command
+ * @psp_ret: SEV command return code
+ *
+ * Returns:
+ * 0 if the SEV device successfully processed the command
+ * -%ENODEV if the PSP device is not available
+ * -%ENOTSUPP if PSP device does not support SEV
+ * -%ETIMEDOUT if the SEV command timed out
+ * -%EIO if PSP device returned a non-zero return code
+ */
+int sev_do_cmd(int cmd, void *data, int *psp_ret);
+
void *psp_copy_user_blob(u64 uaddr, u32 len);
+void *snp_alloc_firmware_page(gfp_t mask);
+void snp_free_firmware_page(void *addr);
#else /* !CONFIG_CRYPTO_DEV_SP_PSP */
static inline int
sev_platform_status(struct sev_user_data_status *status, int *error) { return -ENODEV; }
-static inline int sev_platform_init(int *error) { return -ENODEV; }
+static inline int sev_platform_init(struct sev_platform_init_args *args) { return -ENODEV; }
static inline int
sev_guest_deactivate(struct sev_data_deactivate *data, int *error) { return -ENODEV; }
@@ -653,6 +958,9 @@ static inline int
sev_guest_decommission(struct sev_data_decommission *data, int *error) { return -ENODEV; }
static inline int
+sev_do_cmd(int cmd, void *data, int *psp_ret) { return -ENODEV; }
+
+static inline int
sev_guest_activate(struct sev_data_activate *data, int *error) { return -ENODEV; }
static inline int sev_guest_df_flush(int *error) { return -ENODEV; }
@@ -662,6 +970,13 @@ sev_issue_cmd_external_user(struct file *filep, unsigned int id, void *data, int
static inline void *psp_copy_user_blob(u64 __user uaddr, u32 len) { return ERR_PTR(-EINVAL); }
+static inline void *snp_alloc_firmware_page(gfp_t mask)
+{
+ return NULL;
+}
+
+static inline void snp_free_firmware_page(void *addr) { }
+
#endif /* CONFIG_CRYPTO_DEV_SP_PSP */
#endif /* __PSP_SEV_H__ */
diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h
index 2a3a95586425..8dbd51ea8626 100644
--- a/include/linux/ptdump.h
+++ b/include/linux/ptdump.h
@@ -18,6 +18,16 @@ struct ptdump_state {
const struct ptdump_range *range;
};
+bool ptdump_walk_pgd_level_core(struct seq_file *m,
+ struct mm_struct *mm, pgd_t *pgd,
+ bool checkwx, bool dmesg);
void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd);
+bool ptdump_check_wx(void);
+
+static inline void debug_checkwx(void)
+{
+ if (IS_ENABLED(CONFIG_DEBUG_WX))
+ ptdump_check_wx();
+}
#endif /* _LINUX_PTDUMP_H */
diff --git a/include/linux/pti.h b/include/linux/pti.h
index 1a941efcaa62..1fbf9d6c20ef 100644
--- a/include/linux/pti.h
+++ b/include/linux/pti.h
@@ -2,7 +2,7 @@
#ifndef _INCLUDE_PTI_H
#define _INCLUDE_PTI_H
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
#include <asm/pti.h>
#else
static inline void pti_init(void) { }
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 1ef4e0f9bd2a..6e4b8206c7d0 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -200,6 +200,7 @@ struct ptp_clock;
enum ptp_clock_events {
PTP_CLOCK_ALARM,
PTP_CLOCK_EXTTS,
+ PTP_CLOCK_EXTOFF,
PTP_CLOCK_PPS,
PTP_CLOCK_PPSUSR,
};
@@ -210,6 +211,7 @@ enum ptp_clock_events {
* @type: One of the ptp_clock_events enumeration values.
* @index: Identifies the source of the event.
* @timestamp: When the event occurred (%PTP_CLOCK_EXTTS only).
+ * @offset: When the event occurred (%PTP_CLOCK_EXTOFF only).
* @pps_times: When the event occurred (%PTP_CLOCK_PPSUSR only).
*/
@@ -218,6 +220,7 @@ struct ptp_clock_event {
int index;
union {
u64 timestamp;
+ s64 offset;
struct pps_event_time pps_times;
};
};
diff --git a/include/linux/ptp_kvm.h b/include/linux/ptp_kvm.h
index 746fd67c3480..e8c74fa3f455 100644
--- a/include/linux/ptp_kvm.h
+++ b/include/linux/ptp_kvm.h
@@ -8,15 +8,15 @@
#ifndef _PTP_KVM_H_
#define _PTP_KVM_H_
+#include <linux/clocksource_ids.h>
#include <linux/types.h>
struct timespec64;
-struct clocksource;
int kvm_arch_ptp_init(void);
void kvm_arch_ptp_exit(void);
int kvm_arch_ptp_get_clock(struct timespec64 *ts);
int kvm_arch_ptp_get_crosststamp(u64 *cycle,
- struct timespec64 *tspec, struct clocksource **cs);
+ struct timespec64 *tspec, enum clocksource_ids *cs_id);
#endif /* _PTP_KVM_H_ */
diff --git a/include/linux/ptp_mock.h b/include/linux/ptp_mock.h
new file mode 100644
index 000000000000..72eb401034d9
--- /dev/null
+++ b/include/linux/ptp_mock.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Mock-up PTP Hardware Clock driver for virtual network devices
+ *
+ * Copyright 2023 NXP
+ */
+
+#ifndef _PTP_MOCK_H_
+#define _PTP_MOCK_H_
+
+struct device;
+struct mock_phc;
+
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK_MOCK)
+
+struct mock_phc *mock_phc_create(struct device *dev);
+void mock_phc_destroy(struct mock_phc *phc);
+int mock_phc_index(struct mock_phc *phc);
+
+#else
+
+static inline struct mock_phc *mock_phc_create(struct device *dev)
+{
+ return NULL;
+}
+
+static inline void mock_phc_destroy(struct mock_phc *phc)
+{
+}
+
+static inline int mock_phc_index(struct mock_phc *phc)
+{
+ return -1;
+}
+
+#endif
+
+#endif /* _PTP_MOCK_H_ */
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index eaaef3ffec22..90507d4afcd6 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -393,6 +393,10 @@ static inline void user_single_step_report(struct pt_regs *regs)
#define current_user_stack_pointer() user_stack_pointer(current_pt_regs())
#endif
+#ifndef exception_ip
+#define exception_ip(x) instruction_pointer(x)
+#endif
+
extern int task_current_syscall(struct task_struct *target, struct syscall_info *info);
extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact);
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 04ae1d9073a7..4a6568dfdf3f 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -41,8 +41,8 @@ struct pwm_args {
};
enum {
- PWMF_REQUESTED = 1 << 0,
- PWMF_EXPORTED = 1 << 1,
+ PWMF_REQUESTED = 0,
+ PWMF_EXPORTED = 1,
};
/*
@@ -69,9 +69,7 @@ struct pwm_state {
* @label: name of the PWM device
* @flags: flags associated with the PWM device
* @hwpwm: per-chip relative index of the PWM device
- * @pwm: global index of the PWM device
* @chip: PWM chip providing this PWM device
- * @chip_data: chip-private data associated with the PWM device
* @args: PWM arguments
* @state: last applied state
* @last: last implemented state (for PWM_DEBUG)
@@ -80,9 +78,7 @@ struct pwm_device {
const char *label;
unsigned long flags;
unsigned int hwpwm;
- unsigned int pwm;
struct pwm_chip *chip;
- void *chip_data;
struct pwm_args args;
struct pwm_state state;
@@ -95,8 +91,8 @@ struct pwm_device {
* @state: state to fill with the current PWM state
*
* The returned PWM state represents the state that was applied by a previous call to
- * pwm_apply_state(). Drivers may have to slightly tweak that state before programming it to
- * hardware. If pwm_apply_state() was never called, this returns either the current hardware
+ * pwm_apply_might_sleep(). Drivers may have to slightly tweak that state before programming it to
+ * hardware. If pwm_apply_might_sleep() was never called, this returns either the current hardware
* state (if supported) or the default settings.
*/
static inline void pwm_get_state(const struct pwm_device *pwm,
@@ -114,12 +110,6 @@ static inline bool pwm_is_enabled(const struct pwm_device *pwm)
return state.enabled;
}
-static inline void pwm_set_period(struct pwm_device *pwm, u64 period)
-{
- if (pwm)
- pwm->state.period = period;
-}
-
static inline u64 pwm_get_period(const struct pwm_device *pwm)
{
struct pwm_state state;
@@ -129,12 +119,6 @@ static inline u64 pwm_get_period(const struct pwm_device *pwm)
return state.period;
}
-static inline void pwm_set_duty_cycle(struct pwm_device *pwm, unsigned int duty)
-{
- if (pwm)
- pwm->state.duty_cycle = duty;
-}
-
static inline u64 pwm_get_duty_cycle(const struct pwm_device *pwm)
{
struct pwm_state state;
@@ -160,20 +144,20 @@ static inline void pwm_get_args(const struct pwm_device *pwm,
}
/**
- * pwm_init_state() - prepare a new state to be applied with pwm_apply_state()
+ * pwm_init_state() - prepare a new state to be applied with pwm_apply_might_sleep()
* @pwm: PWM device
* @state: state to fill with the prepared PWM state
*
* This functions prepares a state that can later be tweaked and applied
- * to the PWM device with pwm_apply_state(). This is a convenient function
+ * to the PWM device with pwm_apply_might_sleep(). This is a convenient function
* that first retrieves the current PWM state and the replaces the period
* and polarity fields with the reference values defined in pwm->args.
* Once the function returns, you can adjust the ->enabled and ->duty_cycle
- * fields according to your needs before calling pwm_apply_state().
+ * fields according to your needs before calling pwm_apply_might_sleep().
*
* ->duty_cycle is initially set to zero to avoid cases where the current
* ->duty_cycle value exceed the pwm_args->period one, which would trigger
- * an error if the user calls pwm_apply_state() without adjusting ->duty_cycle
+ * an error if the user calls pwm_apply_might_sleep() without adjusting ->duty_cycle
* first.
*/
static inline void pwm_init_state(const struct pwm_device *pwm,
@@ -229,7 +213,7 @@ pwm_get_relative_duty_cycle(const struct pwm_state *state, unsigned int scale)
*
* pwm_init_state(pwm, &state);
* pwm_set_relative_duty_cycle(&state, 50, 100);
- * pwm_apply_state(pwm, &state);
+ * pwm_apply_might_sleep(pwm, &state);
*
* This functions returns -EINVAL if @duty_cycle and/or @scale are
* inconsistent (@scale == 0 or @duty_cycle > @scale).
@@ -267,7 +251,6 @@ struct pwm_capture {
* @get_state: get the current PWM state. This function is only
* called once per PWM device when the PWM chip is
* registered.
- * @owner: helps prevent removal of modules exporting active PWMs
*/
struct pwm_ops {
int (*request)(struct pwm_chip *chip, struct pwm_device *pwm);
@@ -278,38 +261,63 @@ struct pwm_ops {
const struct pwm_state *state);
int (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm,
struct pwm_state *state);
- struct module *owner;
};
/**
* struct pwm_chip - abstract a PWM controller
* @dev: device providing the PWMs
* @ops: callbacks for this PWM controller
- * @base: number of first PWM controlled by this chip
+ * @owner: module providing this chip
+ * @id: unique number of this PWM chip
* @npwm: number of PWMs controlled by this chip
* @of_xlate: request a PWM device given a device tree PWM specifier
- * @of_pwm_n_cells: number of cells expected in the device tree PWM specifier
- * @list: list node for internal use
+ * @atomic: can the driver's ->apply() be called in atomic context
+ * @driver_data: Private pointer for driver specific info
* @pwms: array of PWM devices allocated by the framework
*/
struct pwm_chip {
struct device *dev;
const struct pwm_ops *ops;
- int base;
+ struct module *owner;
+ unsigned int id;
unsigned int npwm;
- struct pwm_device * (*of_xlate)(struct pwm_chip *pc,
+ struct pwm_device * (*of_xlate)(struct pwm_chip *chip,
const struct of_phandle_args *args);
- unsigned int of_pwm_n_cells;
+ bool atomic;
/* only used internally by the PWM framework */
- struct list_head list;
+ void *driver_data;
struct pwm_device *pwms;
};
+static inline struct device *pwmchip_parent(const struct pwm_chip *chip)
+{
+ return chip->dev;
+}
+
+static inline void *pwmchip_get_drvdata(struct pwm_chip *chip)
+{
+ /*
+ * After pwm_chip got a dedicated struct device, this can be replaced by
+ * dev_get_drvdata(&chip->dev);
+ */
+ return chip->driver_data;
+}
+
+static inline void pwmchip_set_drvdata(struct pwm_chip *chip, void *data)
+{
+ /*
+ * After pwm_chip got a dedicated struct device, this can be replaced by
+ * dev_set_drvdata(&chip->dev, data);
+ */
+ chip->driver_data = data;
+}
+
#if IS_ENABLED(CONFIG_PWM)
/* PWM user APIs */
-int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state);
+int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state);
+int pwm_apply_atomic(struct pwm_device *pwm, const struct pwm_state *state);
int pwm_adjust_config(struct pwm_device *pwm);
/**
@@ -337,7 +345,7 @@ static inline int pwm_config(struct pwm_device *pwm, int duty_ns,
state.duty_cycle = duty_ns;
state.period = period_ns;
- return pwm_apply_state(pwm, &state);
+ return pwm_apply_might_sleep(pwm, &state);
}
/**
@@ -358,7 +366,7 @@ static inline int pwm_enable(struct pwm_device *pwm)
return 0;
state.enabled = true;
- return pwm_apply_state(pwm, &state);
+ return pwm_apply_might_sleep(pwm, &state);
}
/**
@@ -377,27 +385,42 @@ static inline void pwm_disable(struct pwm_device *pwm)
return;
state.enabled = false;
- pwm_apply_state(pwm, &state);
+ pwm_apply_might_sleep(pwm, &state);
+}
+
+/**
+ * pwm_might_sleep() - is pwm_apply_atomic() supported?
+ * @pwm: PWM device
+ *
+ * Returns: false if pwm_apply_atomic() can be called from atomic context.
+ */
+static inline bool pwm_might_sleep(struct pwm_device *pwm)
+{
+ return !pwm->chip->atomic;
}
/* PWM provider APIs */
int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result,
unsigned long timeout);
-int pwm_set_chip_data(struct pwm_device *pwm, void *data);
-void *pwm_get_chip_data(struct pwm_device *pwm);
-int pwmchip_add(struct pwm_chip *chip);
+void pwmchip_put(struct pwm_chip *chip);
+struct pwm_chip *pwmchip_alloc(struct device *parent, unsigned int npwm, size_t sizeof_priv);
+struct pwm_chip *devm_pwmchip_alloc(struct device *parent, unsigned int npwm, size_t sizeof_priv);
+
+int __pwmchip_add(struct pwm_chip *chip, struct module *owner);
+#define pwmchip_add(chip) __pwmchip_add(chip, THIS_MODULE)
void pwmchip_remove(struct pwm_chip *chip);
-int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip);
+int __devm_pwmchip_add(struct device *dev, struct pwm_chip *chip, struct module *owner);
+#define devm_pwmchip_add(dev, chip) __devm_pwmchip_add(dev, chip, THIS_MODULE)
struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
unsigned int index,
const char *label);
-struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc,
+struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *chip,
const struct of_phandle_args *args);
-struct pwm_device *of_pwm_single_xlate(struct pwm_chip *pc,
+struct pwm_device *of_pwm_single_xlate(struct pwm_chip *chip,
const struct of_phandle_args *args);
struct pwm_device *pwm_get(struct device *dev, const char *con_id);
@@ -408,16 +431,27 @@ struct pwm_device *devm_fwnode_pwm_get(struct device *dev,
struct fwnode_handle *fwnode,
const char *con_id);
#else
-static inline int pwm_apply_state(struct pwm_device *pwm,
- const struct pwm_state *state)
+static inline bool pwm_might_sleep(struct pwm_device *pwm)
+{
+ return true;
+}
+
+static inline int pwm_apply_might_sleep(struct pwm_device *pwm,
+ const struct pwm_state *state)
{
might_sleep();
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
+}
+
+static inline int pwm_apply_atomic(struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ return -EOPNOTSUPP;
}
static inline int pwm_adjust_config(struct pwm_device *pwm)
{
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
static inline int pwm_config(struct pwm_device *pwm, int duty_ns,
@@ -445,14 +479,22 @@ static inline int pwm_capture(struct pwm_device *pwm,
return -EINVAL;
}
-static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data)
+static inline void pwmchip_put(struct pwm_chip *chip)
{
- return -EINVAL;
}
-static inline void *pwm_get_chip_data(struct pwm_device *pwm)
+static inline struct pwm_chip *pwmchip_alloc(struct device *parent,
+ unsigned int npwm,
+ size_t sizeof_priv)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+static inline struct pwm_chip *devm_pwmchip_alloc(struct device *parent,
+ unsigned int npwm,
+ size_t sizeof_priv)
{
- return NULL;
+ return pwmchip_alloc(parent, npwm, sizeof_priv);
}
static inline int pwmchip_add(struct pwm_chip *chip)
@@ -536,7 +578,14 @@ static inline void pwm_apply_args(struct pwm_device *pwm)
state.period = pwm->args.period;
state.usage_power = false;
- pwm_apply_state(pwm, &state);
+ pwm_apply_might_sleep(pwm, &state);
+}
+
+/* only for backwards-compatibility, new code should not use this */
+static inline int pwm_apply_state(struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ return pwm_apply_might_sleep(pwm, state);
}
struct pwm_lookup {
diff --git a/include/linux/qed/qed_fcoe_if.h b/include/linux/qed/qed_fcoe_if.h
index 90e3045b2dcb..0d3b6ed21628 100644
--- a/include/linux/qed/qed_fcoe_if.h
+++ b/include/linux/qed/qed_fcoe_if.h
@@ -67,9 +67,6 @@ struct qed_fcoe_cb_ops {
u32 (*get_login_failures)(void *cookie);
};
-void qed_fcoe_set_pf_params(struct qed_dev *cdev,
- struct qed_fcoe_pf_params *params);
-
/**
* struct qed_fcoe_ops - qed FCoE operations.
* @common: common operations pointer
diff --git a/include/linux/quota.h b/include/linux/quota.h
index fd692b4a41d5..07071e64abf3 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -285,7 +285,9 @@ static inline void dqstats_dec(unsigned int type)
#define DQ_FAKE_B 3 /* no limits only usage */
#define DQ_READ_B 4 /* dquot was read into memory */
#define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */
-#define DQ_LASTSET_B 6 /* Following 6 bits (see QIF_) are reserved\
+#define DQ_RELEASING_B 6 /* dquot is in releasing_dquots list waiting
+ * to be cleaned up */
+#define DQ_LASTSET_B 7 /* Following 6 bits (see QIF_) are reserved\
* for the mask of entries set via SETQUOTA\
* quotactl. They are set under dq_data_lock\
* and the quota format handling dquot can\
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 11a4becff3a9..06cc8888199e 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -57,7 +57,7 @@ static inline bool dquot_is_busy(struct dquot *dquot)
{
if (test_bit(DQ_MOD_B, &dquot->dq_flags))
return true;
- if (atomic_read(&dquot->dq_count) > 1)
+ if (atomic_read(&dquot->dq_count) > 0)
return true;
return false;
}
@@ -74,7 +74,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags);
int dquot_alloc_inode(struct inode *inode);
-int dquot_claim_space_nodirty(struct inode *inode, qsize_t number);
+void dquot_claim_space_nodirty(struct inode *inode, qsize_t number);
void dquot_free_inode(struct inode *inode);
void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number);
@@ -257,10 +257,9 @@ static inline void __dquot_free_space(struct inode *inode, qsize_t number,
inode_sub_bytes(inode, number);
}
-static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
+static inline void dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
{
inode_add_bytes(inode, number);
- return 0;
}
static inline int dquot_reclaim_space_nodirty(struct inode *inode,
@@ -358,14 +357,10 @@ static inline int dquot_reserve_block(struct inode *inode, qsize_t nr)
DQUOT_SPACE_WARN|DQUOT_SPACE_RESERVE);
}
-static inline int dquot_claim_block(struct inode *inode, qsize_t nr)
+static inline void dquot_claim_block(struct inode *inode, qsize_t nr)
{
- int ret;
-
- ret = dquot_claim_space_nodirty(inode, nr << inode->i_blkbits);
- if (!ret)
- mark_inode_dirty_sync(inode);
- return ret;
+ dquot_claim_space_nodirty(inode, nr << inode->i_blkbits);
+ mark_inode_dirty_sync(inode);
}
static inline void dquot_reclaim_block(struct inode *inode, qsize_t nr)
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index f29aaaf2eb21..98030accf641 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -84,8 +84,6 @@ extern const struct raid6_calls raid6_intx1;
extern const struct raid6_calls raid6_intx2;
extern const struct raid6_calls raid6_intx4;
extern const struct raid6_calls raid6_intx8;
-extern const struct raid6_calls raid6_intx16;
-extern const struct raid6_calls raid6_intx32;
extern const struct raid6_calls raid6_mmxx1;
extern const struct raid6_calls raid6_mmxx2;
extern const struct raid6_calls raid6_sse1x1;
@@ -108,6 +106,8 @@ extern const struct raid6_calls raid6_vpermxor1;
extern const struct raid6_calls raid6_vpermxor2;
extern const struct raid6_calls raid6_vpermxor4;
extern const struct raid6_calls raid6_vpermxor8;
+extern const struct raid6_calls raid6_lsx;
+extern const struct raid6_calls raid6_lasx;
struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
@@ -123,6 +123,8 @@ extern const struct raid6_recov_calls raid6_recov_avx2;
extern const struct raid6_recov_calls raid6_recov_avx512;
extern const struct raid6_recov_calls raid6_recov_s390xc;
extern const struct raid6_recov_calls raid6_recov_neon;
+extern const struct raid6_recov_calls raid6_recov_lsx;
+extern const struct raid6_recov_calls raid6_recov_lasx;
extern const struct raid6_calls raid6_neonx1;
extern const struct raid6_calls raid6_neonx2;
diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
index 5d868505a94e..6d92b68efbf6 100644
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -80,7 +80,7 @@ DECLARE_PER_CPU(u32, kstack_offset);
if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
&randomize_kstack_offset)) { \
u32 offset = raw_cpu_read(kstack_offset); \
- offset ^= (rand); \
+ offset = ror32(offset, 5) ^ (rand); \
raw_cpu_write(kstack_offset, offset); \
} \
} while (0)
diff --git a/include/linux/range.h b/include/linux/range.h
index 7efb6a9b069b..6ad0b73cb7ad 100644
--- a/include/linux/range.h
+++ b/include/linux/range.h
@@ -31,12 +31,4 @@ int clean_sort_range(struct range *range, int az);
void sort_range(struct range *range, int nr_range);
-#define MAX_RESOURCE ((resource_size_t)~0)
-static inline resource_size_t cap_resource(u64 val)
-{
- if (val > MAX_RESOURCE)
- return MAX_RESOURCE;
-
- return val;
-}
#endif
diff --git a/include/linux/ras.h b/include/linux/ras.h
index 1f4048bf2674..a64182bc72ad 100644
--- a/include/linux/ras.h
+++ b/include/linux/ras.h
@@ -25,6 +25,7 @@ void log_non_standard_event(const guid_t *sec_type,
const guid_t *fru_id, const char *fru_text,
const u8 sev, const u8 *err, const u32 len);
void log_arm_hw_error(struct cper_sec_proc_arm *err);
+
#else
static inline void
log_non_standard_event(const guid_t *sec_type,
@@ -35,4 +36,21 @@ static inline void
log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
#endif
+struct atl_err {
+ u64 addr;
+ u64 ipid;
+ u32 cpu;
+};
+
+#if IS_ENABLED(CONFIG_AMD_ATL)
+void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *));
+void amd_atl_unregister_decoder(void);
+void amd_retire_dram_row(struct atl_err *err);
+unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
+#else
+static inline void amd_retire_dram_row(struct atl_err *err) { }
+static inline unsigned long
+amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; }
+#endif /* CONFIG_AMD_ATL */
+
#endif /* __RAS_H__ */
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 7ee7ed5de722..6dbc5a1bf6a8 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -60,6 +60,32 @@ rb_insert_augmented_cached(struct rb_node *node,
rb_insert_augmented(node, &root->rb_root, augment);
}
+static __always_inline struct rb_node *
+rb_add_augmented_cached(struct rb_node *node, struct rb_root_cached *tree,
+ bool (*less)(struct rb_node *, const struct rb_node *),
+ const struct rb_augment_callbacks *augment)
+{
+ struct rb_node **link = &tree->rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ bool leftmost = true;
+
+ while (*link) {
+ parent = *link;
+ if (less(node, parent)) {
+ link = &parent->rb_left;
+ } else {
+ link = &parent->rb_right;
+ leftmost = false;
+ }
+ }
+
+ rb_link_node(node, parent, link);
+ augment->propagate(parent, NULL); /* suboptimal */
+ rb_insert_augmented_cached(node, tree, leftmost, augment);
+
+ return leftmost ? node : NULL;
+}
+
/*
* Template for declaring augmented rbtree callbacks (generic case)
*
diff --git a/include/linux/rcu_notifier.h b/include/linux/rcu_notifier.h
new file mode 100644
index 000000000000..5640f024773b
--- /dev/null
+++ b/include/linux/rcu_notifier.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Read-Copy Update notifiers, initially RCU CPU stall notifier.
+ * Separate from rcupdate.h to avoid #include loops.
+ *
+ * Copyright (C) 2023 Paul E. McKenney.
+ */
+
+#ifndef __LINUX_RCU_NOTIFIER_H
+#define __LINUX_RCU_NOTIFIER_H
+
+// Actions for RCU CPU stall notifier calls.
+#define RCU_STALL_NOTIFY_NORM 1
+#define RCU_STALL_NOTIFY_EXP 2
+
+#if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
+
+#include <linux/notifier.h>
+#include <linux/types.h>
+
+int rcu_stall_chain_notifier_register(struct notifier_block *n);
+int rcu_stall_chain_notifier_unregister(struct notifier_block *n);
+
+#else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
+
+// No RCU CPU stall warnings in Tiny RCU.
+static inline int rcu_stall_chain_notifier_register(struct notifier_block *n) { return -EEXIST; }
+static inline int rcu_stall_chain_notifier_unregister(struct notifier_block *n) { return -ENOENT; }
+
+#endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
+
+#endif /* __LINUX_RCU_NOTIFIER_H */
diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index 0027d4c8087c..3860dbb9107a 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -37,7 +37,6 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
}
extern void rcu_sync_init(struct rcu_sync *);
-extern void rcu_sync_enter_start(struct rcu_sync *);
extern void rcu_sync_enter(struct rcu_sync *);
extern void rcu_sync_exit(struct rcu_sync *);
extern void rcu_sync_dtor(struct rcu_sync *);
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index d29740be4833..3dc1e58865f7 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -355,7 +355,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
})
/**
- * list_next_or_null_rcu - get the first element from a list
+ * list_next_or_null_rcu - get the next element from a list
* @head: the head for the list.
* @ptr: the list head to take the next element from.
* @type: the type of the struct this is embedded in.
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index ba4c00dd8005..89186c499dd4 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -101,7 +101,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
{
struct hlist_nulls_node *first = h->first;
- n->next = first;
+ WRITE_ONCE(n->next, first);
WRITE_ONCE(n->pprev, &h->first);
rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
if (!is_a_nulls(first))
@@ -137,7 +137,7 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
last = i;
if (last) {
- n->next = last->next;
+ WRITE_ONCE(n->next, last->next);
n->pprev = &last->next;
rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
} else {
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 5e5f920ade90..17d7ed5f3ae6 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -34,9 +34,6 @@
#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
-#define ulong2long(a) (*(long *)(&(a)))
-#define USHORT_CMP_GE(a, b) (USHRT_MAX / 2 >= (unsigned short)((a) - (b)))
-#define USHORT_CMP_LT(a, b) (USHRT_MAX / 2 < (unsigned short)((a) - (b)))
/* Exported common interfaces */
void call_rcu(struct rcu_head *head, rcu_callback_t func);
@@ -122,8 +119,6 @@ static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
void rcu_init(void);
extern int rcu_scheduler_active;
void rcu_sched_clock_irq(int user);
-void rcu_report_dead(unsigned int cpu);
-void rcutree_migrate_callbacks(int cpu);
#ifdef CONFIG_TASKS_RCU_GENERIC
void rcu_init_tasks_generic(void);
@@ -189,9 +184,9 @@ void rcu_tasks_trace_qs_blkd(struct task_struct *t);
do { \
int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \
\
- if (likely(!READ_ONCE((t)->trc_reader_special.b.need_qs)) && \
+ if (unlikely(READ_ONCE((t)->trc_reader_special.b.need_qs) == TRC_NEED_QS) && \
likely(!___rttq_nesting)) { \
- rcu_trc_cmpxchg_need_qs((t), 0, TRC_NEED_QS_CHECKED); \
+ rcu_trc_cmpxchg_need_qs((t), TRC_NEED_QS, TRC_NEED_QS_CHECKED); \
} else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \
!READ_ONCE((t)->trc_reader_special.b.blocked)) { \
rcu_tasks_trace_qs_blkd(t); \
@@ -252,6 +247,37 @@ do { \
cond_resched(); \
} while (0)
+/**
+ * rcu_softirq_qs_periodic - Report RCU and RCU-Tasks quiescent states
+ * @old_ts: jiffies at start of processing.
+ *
+ * This helper is for long-running softirq handlers, such as NAPI threads in
+ * networking. The caller should initialize the variable passed in as @old_ts
+ * at the beginning of the softirq handler. When invoked frequently, this macro
+ * will invoke rcu_softirq_qs() every 100 milliseconds thereafter, which will
+ * provide both RCU and RCU-Tasks quiescent states. Note that this macro
+ * modifies its old_ts argument.
+ *
+ * Because regions of code that have disabled softirq act as RCU read-side
+ * critical sections, this macro should be invoked with softirq (and
+ * preemption) enabled.
+ *
+ * The macro is not needed when CONFIG_PREEMPT_RT is defined. RT kernels would
+ * have more chance to invoke schedule() calls and provide necessary quiescent
+ * states. As a contrast, calling cond_resched() only won't achieve the same
+ * effect because cond_resched() does not provide RCU-Tasks quiescent states.
+ */
+#define rcu_softirq_qs_periodic(old_ts) \
+do { \
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && \
+ time_after(jiffies, (old_ts) + HZ / 10)) { \
+ preempt_disable(); \
+ rcu_softirq_qs(); \
+ preempt_enable(); \
+ (old_ts) = jiffies; \
+ } \
+} while (0)
+
/*
* Infrastructure to implement the synchronize_() primitives in
* TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
@@ -303,6 +329,11 @@ static inline void rcu_lock_acquire(struct lockdep_map *map)
lock_acquire(map, 0, 0, 2, 0, NULL, _THIS_IP_);
}
+static inline void rcu_try_lock_acquire(struct lockdep_map *map)
+{
+ lock_acquire(map, 0, 1, 2, 0, NULL, _THIS_IP_);
+}
+
static inline void rcu_lock_release(struct lockdep_map *map)
{
lock_release(map, _THIS_IP_);
@@ -317,6 +348,7 @@ int rcu_read_lock_any_held(void);
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
# define rcu_lock_acquire(a) do { } while (0)
+# define rcu_try_lock_acquire(a) do { } while (0)
# define rcu_lock_release(a) do { } while (0)
static inline int rcu_read_lock_held(void)
diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
index 9bc8cbb33340..eda493200663 100644
--- a/include/linux/rcupdate_trace.h
+++ b/include/linux/rcupdate_trace.h
@@ -87,6 +87,7 @@ static inline void rcu_read_unlock_trace(void)
void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
void synchronize_rcu_tasks_trace(void);
void rcu_barrier_tasks_trace(void);
+struct task_struct *get_rcu_tasks_trace_gp_kthread(void);
#else
/*
* The BPF JIT forms these addresses even when it doesn't call these
diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
index 699b938358bf..d07f0848802e 100644
--- a/include/linux/rcupdate_wait.h
+++ b/include/linux/rcupdate_wait.h
@@ -8,6 +8,7 @@
#include <linux/rcupdate.h>
#include <linux/completion.h>
+#include <linux/sched.h>
/*
* Structure allowing asynchronous waiting on RCU.
@@ -42,6 +43,11 @@ do { \
* call_srcu() function, with this wrapper supplying the pointer to the
* corresponding srcu_struct.
*
+ * Note that call_rcu_hurry() should be used instead of call_rcu()
+ * because in kernels built with CONFIG_RCU_LAZY=y the delay between the
+ * invocation of call_rcu() and that of the corresponding RCU callback
+ * can be multiple seconds.
+ *
* The first argument tells Tiny RCU's _wait_rcu_gp() not to
* bother waiting for RCU. The reason for this is because anywhere
* synchronize_rcu_mult() can be called is automatically already a full
@@ -50,4 +56,13 @@ do { \
#define synchronize_rcu_mult(...) \
_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__)
+static inline void cond_resched_rcu(void)
+{
+#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
+ rcu_read_unlock();
+ cond_resched();
+ rcu_read_lock();
+#endif
+}
+
#endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 7f17acf29dda..d9ac7b136aea 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -138,6 +138,8 @@ static inline int rcu_needs_cpu(void)
return 0;
}
+static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
+
/*
* Take advantage of the fact that there is only one CPU, which
* allows us to ignore virtualization-based context switches.
@@ -169,6 +171,6 @@ static inline void rcu_all_qs(void) { barrier(); }
#define rcutree_offline_cpu NULL
#define rcutree_dead_cpu NULL
#define rcutree_dying_cpu NULL
-static inline void rcu_cpu_starting(unsigned int cpu) { }
+static inline void rcutree_report_cpu_starting(unsigned int cpu) { }
#endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 56bccb5a8fde..254244202ea9 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -21,6 +21,7 @@ void rcu_softirq_qs(void);
void rcu_note_context_switch(bool preempt);
int rcu_needs_cpu(void);
void rcu_cpu_stall_reset(void);
+void rcu_request_urgent_qs_task(struct task_struct *t);
/*
* Note a virtualization-based context switch. This is simply a
@@ -36,7 +37,6 @@ void synchronize_rcu_expedited(void);
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
void rcu_barrier(void);
-bool rcu_eqs_special_set(int cpu);
void rcu_momentary_dyntick_idle(void);
void kfree_rcu_scheduler_running(void);
bool rcu_gp_might_be_stalled(void);
@@ -110,9 +110,21 @@ void rcu_all_qs(void);
/* RCUtree hotplug events */
int rcutree_prepare_cpu(unsigned int cpu);
int rcutree_online_cpu(unsigned int cpu);
-int rcutree_offline_cpu(unsigned int cpu);
+void rcutree_report_cpu_starting(unsigned int cpu);
+
+#ifdef CONFIG_HOTPLUG_CPU
int rcutree_dead_cpu(unsigned int cpu);
int rcutree_dying_cpu(unsigned int cpu);
-void rcu_cpu_starting(unsigned int cpu);
+int rcutree_offline_cpu(unsigned int cpu);
+#else
+#define rcutree_dead_cpu NULL
+#define rcutree_dying_cpu NULL
+#define rcutree_offline_cpu NULL
+#endif
+
+void rcutree_migrate_callbacks(int cpu);
+
+/* Called from hotplug and also arm64 early secondary boot failure */
+void rcutree_report_cpu_dead(void);
#endif /* __LINUX_RCUTREE_H */
diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 2b6bb593be5b..abcdde4df697 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -129,11 +129,14 @@ enum sys_off_mode {
* @cb_data: User's callback data.
* @cmd: Command string. Currently used only by the sys-off restart mode,
* NULL otherwise.
+ * @dev: Device of the sys-off handler. Only if known (devm_register_*),
+ * NULL otherwise.
*/
struct sys_off_data {
int mode;
void *cb_data;
const char *cmd;
+ struct device *dev;
};
struct sys_off_handler *
@@ -174,7 +177,17 @@ void ctrl_alt_del(void);
extern void orderly_poweroff(bool force);
extern void orderly_reboot(void);
-void hw_protection_shutdown(const char *reason, int ms_until_forced);
+void __hw_protection_shutdown(const char *reason, int ms_until_forced, bool shutdown);
+
+static inline void hw_protection_reboot(const char *reason, int ms_until_forced)
+{
+ __hw_protection_shutdown(reason, ms_until_forced, false);
+}
+
+static inline void hw_protection_shutdown(const char *reason, int ms_until_forced)
+{
+ __hw_protection_shutdown(reason, ms_until_forced, true);
+}
/*
* Emergency restart, callable from an interrupt handler.
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index a62fcca97486..59b3b752394d 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -96,22 +96,11 @@
#include <linux/bug.h>
#include <linux/compiler.h>
#include <linux/limits.h>
+#include <linux/refcount_types.h>
#include <linux/spinlock_types.h>
struct mutex;
-/**
- * typedef refcount_t - variant of atomic_t specialized for reference counts
- * @refs: atomic_t counter field
- *
- * The counter saturates at REFCOUNT_SATURATED and will not move once
- * there. This avoids wrapping the counter and causing 'spurious'
- * use-after-free bugs.
- */
-typedef struct refcount_struct {
- atomic_t refs;
-} refcount_t;
-
#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), }
#define REFCOUNT_MAX INT_MAX
#define REFCOUNT_SATURATED (INT_MIN / 2)
@@ -147,7 +136,8 @@ static inline unsigned int refcount_read(const refcount_t *r)
return atomic_read(&r->refs);
}
-static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp)
+static inline __must_check __signed_wrap
+bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp)
{
int old = refcount_read(r);
@@ -188,7 +178,8 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
return __refcount_add_not_zero(i, r, NULL);
}
-static inline void __refcount_add(int i, refcount_t *r, int *oldp)
+static inline __signed_wrap
+void __refcount_add(int i, refcount_t *r, int *oldp)
{
int old = atomic_fetch_add_relaxed(i, &r->refs);
@@ -267,7 +258,8 @@ static inline void refcount_inc(refcount_t *r)
__refcount_inc(r, NULL);
}
-static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp)
+static inline __must_check __signed_wrap
+bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp)
{
int old = atomic_fetch_sub_release(i, &r->refs);
diff --git a/include/linux/refcount_types.h b/include/linux/refcount_types.h
new file mode 100644
index 000000000000..162004f06edf
--- /dev/null
+++ b/include/linux/refcount_types.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_REFCOUNT_TYPES_H
+#define _LINUX_REFCOUNT_TYPES_H
+
+#include <linux/types.h>
+
+/**
+ * typedef refcount_t - variant of atomic_t specialized for reference counts
+ * @refs: atomic_t counter field
+ *
+ * The counter saturates at REFCOUNT_SATURATED and will not move once
+ * there. This avoids wrapping the counter and causing 'spurious'
+ * use-after-free bugs.
+ */
+typedef struct refcount_struct {
+ atomic_t refs;
+} refcount_t;
+
+#endif /* _LINUX_REFCOUNT_TYPES_H */
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 8fc0b3ebce44..d470303b1bbb 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -332,6 +332,10 @@ typedef void (*regmap_unlock)(void *);
* @io_port: Support IO port accessors. Makes sense only when MMIO vs. IO port
* access can be distinguished.
* @max_register: Optional, specifies the maximum valid register address.
+ * @max_register_is_0: Optional, specifies that zero value in @max_register
+ * should be taken into account. This is a workaround to
+ * apply handling of @max_register for regmap that contains
+ * only one register.
* @wr_table: Optional, points to a struct regmap_access_table specifying
* valid ranges for write access.
* @rd_table: As above, for read access.
@@ -422,6 +426,7 @@ struct regmap_config {
bool io_port;
unsigned int max_register;
+ bool max_register_is_0;
const struct regmap_access_table *wr_table;
const struct regmap_access_table *rd_table;
const struct regmap_access_table *volatile_table;
@@ -1225,6 +1230,7 @@ int regmap_multi_reg_write_bypassed(struct regmap *map,
int regmap_raw_write_async(struct regmap *map, unsigned int reg,
const void *val, size_t val_len);
int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val);
+int regmap_read_bypassed(struct regmap *map, unsigned int reg, unsigned int *val);
int regmap_raw_read(struct regmap *map, unsigned int reg,
void *val, size_t val_len);
int regmap_noinc_read(struct regmap *map, unsigned int reg,
@@ -1287,6 +1293,7 @@ int regcache_drop_region(struct regmap *map, unsigned int min,
void regcache_cache_only(struct regmap *map, bool enable);
void regcache_cache_bypass(struct regmap *map, bool enable);
void regcache_mark_dirty(struct regmap *map);
+bool regcache_reg_cached(struct regmap *map, unsigned int reg);
bool regmap_check_range_table(struct regmap *map, unsigned int reg,
const struct regmap_access_table *table);
@@ -1733,6 +1740,13 @@ static inline int regmap_read(struct regmap *map, unsigned int reg,
return -EINVAL;
}
+static inline int regmap_read_bypassed(struct regmap *map, unsigned int reg,
+ unsigned int *val)
+{
+ WARN_ONCE(1, "regmap API is disabled");
+ return -EINVAL;
+}
+
static inline int regmap_raw_read(struct regmap *map, unsigned int reg,
void *val, size_t val_len)
{
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 39b666b40ea6..ed180ca419da 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -33,6 +33,7 @@
#include <linux/err.h>
#include <linux/suspend.h>
+#include <regulator/regulator.h>
struct device;
struct notifier_block;
@@ -85,52 +86,6 @@ struct regulator_dev;
#define REGULATOR_MODE_STANDBY 0x8
/*
- * Regulator notifier events.
- *
- * UNDER_VOLTAGE Regulator output is under voltage.
- * OVER_CURRENT Regulator output current is too high.
- * REGULATION_OUT Regulator output is out of regulation.
- * FAIL Regulator output has failed.
- * OVER_TEMP Regulator over temp.
- * FORCE_DISABLE Regulator forcibly shut down by software.
- * VOLTAGE_CHANGE Regulator voltage changed.
- * Data passed is old voltage cast to (void *).
- * DISABLE Regulator was disabled.
- * PRE_VOLTAGE_CHANGE Regulator is about to have voltage changed.
- * Data passed is "struct pre_voltage_change_data"
- * ABORT_VOLTAGE_CHANGE Regulator voltage change failed for some reason.
- * Data passed is old voltage cast to (void *).
- * PRE_DISABLE Regulator is about to be disabled
- * ABORT_DISABLE Regulator disable failed for some reason
- *
- * NOTE: These events can be OR'ed together when passed into handler.
- */
-
-#define REGULATOR_EVENT_UNDER_VOLTAGE 0x01
-#define REGULATOR_EVENT_OVER_CURRENT 0x02
-#define REGULATOR_EVENT_REGULATION_OUT 0x04
-#define REGULATOR_EVENT_FAIL 0x08
-#define REGULATOR_EVENT_OVER_TEMP 0x10
-#define REGULATOR_EVENT_FORCE_DISABLE 0x20
-#define REGULATOR_EVENT_VOLTAGE_CHANGE 0x40
-#define REGULATOR_EVENT_DISABLE 0x80
-#define REGULATOR_EVENT_PRE_VOLTAGE_CHANGE 0x100
-#define REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE 0x200
-#define REGULATOR_EVENT_PRE_DISABLE 0x400
-#define REGULATOR_EVENT_ABORT_DISABLE 0x800
-#define REGULATOR_EVENT_ENABLE 0x1000
-/*
- * Following notifications should be emitted only if detected condition
- * is such that the HW is likely to still be working but consumers should
- * take a recovery action to prevent problems esacalating into errors.
- */
-#define REGULATOR_EVENT_UNDER_VOLTAGE_WARN 0x2000
-#define REGULATOR_EVENT_OVER_CURRENT_WARN 0x4000
-#define REGULATOR_EVENT_OVER_VOLTAGE_WARN 0x8000
-#define REGULATOR_EVENT_OVER_TEMP_WARN 0x10000
-#define REGULATOR_EVENT_WARN_MASK 0x1E000
-
-/*
* Regulator errors that can be queried using regulator_get_error_flags
*
* UNDER_VOLTAGE Regulator output is under voltage.
@@ -365,13 +320,13 @@ devm_regulator_get_exclusive(struct device *dev, const char *id)
static inline int devm_regulator_get_enable(struct device *dev, const char *id)
{
- return -ENODEV;
+ return 0;
}
static inline int devm_regulator_get_enable_optional(struct device *dev,
const char *id)
{
- return -ENODEV;
+ return 0;
}
static inline struct regulator *__must_check
diff --git a/include/linux/regulator/db8500-prcmu.h b/include/linux/regulator/db8500-prcmu.h
index f90df9ee703e..d58ff273157e 100644
--- a/include/linux/regulator/db8500-prcmu.h
+++ b/include/linux/regulator/db8500-prcmu.h
@@ -35,10 +35,4 @@ enum db8500_regulator_id {
DB8500_NUM_REGULATORS
};
-/*
- * Exported interface for CPUIdle only. This function is called with all
- * interrupts turned off.
- */
-int power_state_active_is_enabled(void);
-
#endif
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index c6ef7d68eb9a..22a07c0900a4 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -51,12 +51,7 @@ enum regulator_detection_severity {
/* Initialize struct linear_range for regulators */
#define REGULATOR_LINEAR_RANGE(_min_uV, _min_sel, _max_sel, _step_uV) \
-{ \
- .min = _min_uV, \
- .min_sel = _min_sel, \
- .max_sel = _max_sel, \
- .step = _step_uV, \
-}
+ LINEAR_RANGE(_min_uV, _min_sel, _max_sel, _step_uV)
/**
* struct regulator_ops - regulator operations.
@@ -292,11 +287,12 @@ enum regulator_type {
* @ramp_delay: Time to settle down after voltage change (unit: uV/us)
* @min_dropout_uV: The minimum dropout voltage this regulator can handle
* @linear_ranges: A constant table of possible voltage ranges.
- * @linear_range_selectors: A constant table of voltage range selectors.
- * If pickable ranges are used each range must
- * have corresponding selector here.
+ * @linear_range_selectors_bitfield: A constant table of voltage range
+ * selectors as bitfield values. If
+ * pickable ranges are used each range
+ * must have corresponding selector here.
* @n_linear_ranges: Number of entries in the @linear_ranges (and in
- * linear_range_selectors if used) table(s).
+ * linear_range_selectors_bitfield if used) table(s).
* @volt_table: Voltage mapping table (if table based mapping)
* @curr_table: Current limit mapping table (if table based mapping)
*
@@ -384,7 +380,7 @@ struct regulator_desc {
int min_dropout_uV;
const struct linear_range *linear_ranges;
- const unsigned int *linear_range_selectors;
+ const unsigned int *linear_range_selectors_bitfield;
int n_linear_ranges;
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 621b7f4a3639..0cd76d264727 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -49,6 +49,13 @@ struct regulator;
#define DISABLE_IN_SUSPEND 1
#define ENABLE_IN_SUSPEND 2
+/*
+ * Default time window (in milliseconds) following a critical under-voltage
+ * event during which less critical actions can be safely carried out by the
+ * system.
+ */
+#define REGULATOR_DEF_UV_LESS_CRITICAL_WINDOW_MS 10
+
/* Regulator active discharge flags */
enum regulator_active_discharge {
REGULATOR_ACTIVE_DISCHARGE_DEFAULT,
@@ -127,6 +134,8 @@ struct notification_limit {
* @ramp_disable: Disable ramp delay when initialising or when setting voltage.
* @soft_start: Enable soft start so that voltage ramps slowly.
* @pull_down: Enable pull down when regulator is disabled.
+ * @system_critical: Set if the regulator is critical to system stability or
+ * functionality.
* @over_current_protection: Auto disable on over current event.
*
* @over_current_detection: Configure over current limits.
@@ -153,6 +162,13 @@ struct notification_limit {
* regulator_active_discharge values are used for
* initialisation.
* @enable_time: Turn-on time of the rails (unit: microseconds)
+ * @uv_less_critical_window_ms: Specifies the time window (in milliseconds)
+ * following a critical under-voltage (UV) event
+ * during which less critical actions can be
+ * safely carried out by the system (for example
+ * logging). After this time window more critical
+ * actions should be done (for example prevent
+ * HW damage).
*/
struct regulation_constraints {
@@ -204,6 +220,7 @@ struct regulation_constraints {
unsigned int settling_time_up;
unsigned int settling_time_down;
unsigned int enable_time;
+ unsigned int uv_less_critical_window_ms;
unsigned int active_discharge;
@@ -214,6 +231,7 @@ struct regulation_constraints {
unsigned ramp_disable:1; /* disable ramp delay */
unsigned soft_start:1; /* ramp voltage slowly */
unsigned pull_down:1; /* pull down resistor when regulator off */
+ unsigned system_critical:1; /* critical to system stability */
unsigned over_current_protection:1; /* auto disable on over current */
unsigned over_current_detection:1; /* notify on over current */
unsigned over_voltage_detection:1; /* notify on over voltage */
diff --git a/include/linux/regulator/max8973-regulator.h b/include/linux/regulator/max8973-regulator.h
index 8313e7ed6aec..a225e9eeb30d 100644
--- a/include/linux/regulator/max8973-regulator.h
+++ b/include/linux/regulator/max8973-regulator.h
@@ -48,10 +48,6 @@
* control signal from EN input pin. If it is false then
* voltage output will be enabled/disabled through EN bit of
* device register.
- * @enable_gpio: Enable GPIO. If EN pin is controlled through GPIO from host
- * then GPIO number can be provided. If no GPIO controlled then
- * it should be -1.
- * @dvs_gpio: GPIO for dvs. It should be -1 if this is tied with fixed logic.
* @dvs_def_state: Default state of dvs. 1 if it is high else 0.
*/
struct max8973_regulator_platform_data {
@@ -59,8 +55,6 @@ struct max8973_regulator_platform_data {
unsigned long control_flags;
unsigned long junction_temp_warning;
bool enable_ext_control;
- int enable_gpio;
- int dvs_gpio;
unsigned dvs_def_state:1;
};
diff --git a/include/linux/regulator/mt6358-regulator.h b/include/linux/regulator/mt6358-regulator.h
index c71a6a9fce7a..562386f9b80e 100644
--- a/include/linux/regulator/mt6358-regulator.h
+++ b/include/linux/regulator/mt6358-regulator.h
@@ -86,6 +86,9 @@ enum {
MT6366_ID_VMC,
MT6366_ID_VAUD28,
MT6366_ID_VSIM2,
+ MT6366_ID_VM18,
+ MT6366_ID_VMDDR,
+ MT6366_ID_VSRAM_CORE,
MT6366_ID_RG_MAX,
};
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index fe8978eb69f1..b4795698d8c2 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -690,6 +690,10 @@ int rproc_detach(struct rproc *rproc);
int rproc_set_firmware(struct rproc *rproc, const char *fw_name);
void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type);
void *rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem);
+
+/* from remoteproc_coredump.c */
+void rproc_coredump_cleanup(struct rproc *rproc);
+void rproc_coredump(struct rproc *rproc);
void rproc_coredump_using_sections(struct rproc *rproc);
int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size);
int rproc_coredump_add_custom_segment(struct rproc *rproc,
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 8334eeacfec5..a365f67131ec 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -6,6 +6,12 @@
#include <linux/list.h>
#include <linux/pid.h>
+/* CLOSID, RMID value used by the default control group */
+#define RESCTRL_RESERVED_CLOSID 0
+#define RESCTRL_RESERVED_RMID 0
+
+#define RESCTRL_PICK_ANY_CPU -1
+
#ifdef CONFIG_PROC_CPU_RESCTRL
int proc_resctrl_show(struct seq_file *m,
@@ -94,7 +100,7 @@ struct rdt_domain {
* zero CBM.
* @shareable_bits: Bitmask of shareable resource with other
* executing entities
- * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid.
+ * @arch_has_sparse_bitmasks: True if a bitmask like f00f is valid.
* @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache
* level has CPU scope.
*/
@@ -102,7 +108,7 @@ struct resctrl_cache {
unsigned int cbm_len;
unsigned int min_cbm_bits;
unsigned int shareable_bits;
- bool arch_has_sparse_bitmaps;
+ bool arch_has_sparse_bitmasks;
bool arch_has_per_cpu_cfg;
};
@@ -153,7 +159,7 @@ struct resctrl_schema;
* @cache_level: Which cache level defines scope of this resource
* @cache: Cache allocation related data
* @membw: If the component has bandwidth controls, their properties.
- * @domains: All domains for this resource
+ * @domains: RCU list of all domains for this resource
* @name: Name to use in "schemata" file.
* @data_width: Character width of data when displaying
* @default_ctrl: Specifies default cache cbm or memory B/W percent.
@@ -219,36 +225,70 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
u32 closid, enum resctrl_conf_type type);
int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d);
void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
+void resctrl_online_cpu(unsigned int cpu);
+void resctrl_offline_cpu(unsigned int cpu);
/**
* resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid
* for this resource and domain.
* @r: resource that the counter should be read from.
* @d: domain that the counter should be read from.
+ * @closid: closid that matches the rmid. Depending on the architecture, the
+ * counter may match traffic of both @closid and @rmid, or @rmid
+ * only.
* @rmid: rmid of the counter to read.
* @eventid: eventid to read, e.g. L3 occupancy.
* @val: result of the counter read in bytes.
+ * @arch_mon_ctx: An architecture specific value from
+ * resctrl_arch_mon_ctx_alloc(), for MPAM this identifies
+ * the hardware monitor allocated for this read request.
*
- * Call from process context on a CPU that belongs to domain @d.
+ * Some architectures need to sleep when first programming some of the counters.
+ * (specifically: arm64's MPAM cache occupancy counters can return 'not ready'
+ * for a short period of time). Call from a non-migrateable process context on
+ * a CPU that belongs to domain @d. e.g. use smp_call_on_cpu() or
+ * schedule_work_on(). This function can be called with interrupts masked,
+ * e.g. using smp_call_function_any(), but may consistently return an error.
*
* Return:
* 0 on success, or -EIO, -EINVAL etc on error.
*/
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
- u32 rmid, enum resctrl_event_id eventid, u64 *val);
+ u32 closid, u32 rmid, enum resctrl_event_id eventid,
+ u64 *val, void *arch_mon_ctx);
+
+/**
+ * resctrl_arch_rmid_read_context_check() - warn about invalid contexts
+ *
+ * When built with CONFIG_DEBUG_ATOMIC_SLEEP generate a warning when
+ * resctrl_arch_rmid_read() is called with preemption disabled.
+ *
+ * The contract with resctrl_arch_rmid_read() is that if interrupts
+ * are unmasked, it can sleep. This allows NOHZ_FULL systems to use an
+ * IPI, (and fail if the call needed to sleep), while most of the time
+ * the work is scheduled, allowing the call to sleep.
+ */
+static inline void resctrl_arch_rmid_read_context_check(void)
+{
+ if (!irqs_disabled())
+ might_sleep();
+}
/**
* resctrl_arch_reset_rmid() - Reset any private state associated with rmid
* and eventid.
* @r: The domain's resource.
* @d: The rmid's domain.
+ * @closid: closid that matches the rmid. Depending on the architecture, the
+ * counter may match traffic of both @closid and @rmid, or @rmid only.
* @rmid: The rmid whose counter values should be reset.
* @eventid: The eventid whose counter values should be reset.
*
* This can be called from any CPU.
*/
void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
- u32 rmid, enum resctrl_event_id eventid);
+ u32 closid, u32 rmid,
+ enum resctrl_event_id eventid);
/**
* resctrl_arch_reset_rmid_all() - Reset all private state associated with
diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h
index 0fa4f60e1186..357df16ede32 100644
--- a/include/linux/reset-controller.h
+++ b/include/linux/reset-controller.h
@@ -60,6 +60,9 @@ struct reset_control_lookup {
* @reset_control_head: head of internal list of requested reset controls
* @dev: corresponding driver model device struct
* @of_node: corresponding device tree node as phandle target
+ * @of_args: for reset-gpios controllers: corresponding phandle args with
+ * of_node and GPIO number complementing of_node; either this or
+ * of_node should be present
* @of_reset_n_cells: number of cells in reset line specifiers
* @of_xlate: translation function to translate from specifier as found in the
* device tree to id as given to the reset control ops, defaults
@@ -73,6 +76,7 @@ struct reset_controller_dev {
struct list_head reset_control_head;
struct device *dev;
struct device_node *of_node;
+ const struct of_phandle_args *of_args;
int of_reset_n_cells;
int (*of_xlate)(struct reset_controller_dev *rcdev,
const struct of_phandle_args *reset_spec);
diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h
index 980a65594412..13f17676c5f4 100644
--- a/include/linux/restart_block.h
+++ b/include/linux/restart_block.h
@@ -7,8 +7,8 @@
#include <linux/compiler.h>
#include <linux/types.h>
-#include <linux/time64.h>
+struct __kernel_timespec;
struct timespec;
struct old_timespec32;
struct pollfd;
diff --git a/include/linux/resume_user_mode.h b/include/linux/resume_user_mode.h
index 285189454449..e0135e0adae0 100644
--- a/include/linux/resume_user_mode.h
+++ b/include/linux/resume_user_mode.h
@@ -6,6 +6,7 @@
#include <linux/sched.h>
#include <linux/task_work.h>
#include <linux/memcontrol.h>
+#include <linux/rseq.h>
#include <linux/blk-cgroup.h>
/**
@@ -55,7 +56,7 @@ static inline void resume_user_mode_work(struct pt_regs *regs)
}
#endif
- mem_cgroup_handle_over_high();
+ mem_cgroup_handle_over_high(GFP_KERNEL);
blkcg_maybe_throttle_current();
rseq_handle_notify_resume(NULL, regs);
diff --git a/include/linux/rethook.h b/include/linux/rethook.h
index 26b6f3c81a76..ba60962805f6 100644
--- a/include/linux/rethook.h
+++ b/include/linux/rethook.h
@@ -6,11 +6,10 @@
#define _LINUX_RETHOOK_H
#include <linux/compiler.h>
-#include <linux/freelist.h>
+#include <linux/objpool.h>
#include <linux/kallsyms.h>
#include <linux/llist.h>
#include <linux/rcupdate.h>
-#include <linux/refcount.h>
struct rethook_node;
@@ -29,15 +28,18 @@ typedef void (*rethook_handler_t) (struct rethook_node *, void *, unsigned long,
*/
struct rethook {
void *data;
- rethook_handler_t handler;
- struct freelist_head pool;
- refcount_t ref;
+ /*
+ * To avoid sparse warnings, this uses a raw function pointer with
+ * __rcu, instead of rethook_handler_t. But this must be same as
+ * rethook_handler_t.
+ */
+ void (__rcu *handler) (struct rethook_node *, void *, unsigned long, struct pt_regs *);
+ struct objpool_head pool;
struct rcu_head rcu;
};
/**
* struct rethook_node - The rethook shadow-stack entry node.
- * @freelist: The freelist, linked to struct rethook::pool.
* @rcu: The rcu_head for deferred freeing.
* @llist: The llist, linked to a struct task_struct::rethooks.
* @rethook: The pointer to the struct rethook.
@@ -48,20 +50,16 @@ struct rethook {
* on each entry of the shadow stack.
*/
struct rethook_node {
- union {
- struct freelist_node freelist;
- struct rcu_head rcu;
- };
+ struct rcu_head rcu;
struct llist_node llist;
struct rethook *rethook;
unsigned long ret_addr;
unsigned long frame;
};
-struct rethook *rethook_alloc(void *data, rethook_handler_t handler);
+struct rethook *rethook_alloc(void *data, rethook_handler_t handler, int size, int num);
void rethook_stop(struct rethook *rh);
void rethook_free(struct rethook *rh);
-void rethook_add_node(struct rethook *rh, struct rethook_node *node);
struct rethook_node *rethook_try_get(struct rethook *rh);
void rethook_recycle(struct rethook_node *node);
void rethook_hook(struct rethook_node *node, struct pt_regs *regs, bool mcount);
@@ -98,4 +96,3 @@ void rethook_flush_task(struct task_struct *tk);
#endif
#endif
-
diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h
index 57467cbf4c5b..b6f3797277ff 100644
--- a/include/linux/rhashtable-types.h
+++ b/include/linux/rhashtable-types.h
@@ -12,7 +12,7 @@
#include <linux/atomic.h>
#include <linux/compiler.h>
#include <linux/mutex.h>
-#include <linux/workqueue.h>
+#include <linux/workqueue_types.h>
struct rhash_head {
struct rhash_head __rcu *next;
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 782e14f62201..dc5ae4e96aee 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -98,7 +98,9 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
__ring_buffer_alloc((size), (flags), &__key); \
})
-int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full);
+typedef bool (*ring_buffer_cond_fn)(void *data);
+int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full,
+ ring_buffer_cond_fn cond, void *data);
__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table, int full);
void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu);
@@ -141,6 +143,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter);
bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter);
unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu);
+unsigned long ring_buffer_max_event_size(struct trace_buffer *buffer);
void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu);
void ring_buffer_reset_online_cpus(struct trace_buffer *buffer);
@@ -191,15 +194,24 @@ bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer);
size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu);
size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu);
-void *ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu);
-void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data);
-int ring_buffer_read_page(struct trace_buffer *buffer, void **data_page,
+struct buffer_data_read_page;
+struct buffer_data_read_page *
+ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu);
+void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu,
+ struct buffer_data_read_page *page);
+int ring_buffer_read_page(struct trace_buffer *buffer,
+ struct buffer_data_read_page *data_page,
size_t len, int cpu, int full);
+void *ring_buffer_read_page_data(struct buffer_data_read_page *page);
struct trace_seq;
int ring_buffer_print_entry_header(struct trace_seq *s);
-int ring_buffer_print_page_header(struct trace_seq *s);
+int ring_buffer_print_page_header(struct trace_buffer *buffer, struct trace_seq *s);
+
+int ring_buffer_subbuf_order_get(struct trace_buffer *buffer);
+int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order);
+int ring_buffer_subbuf_size_get(struct trace_buffer *buffer);
enum ring_buffer_flags {
RB_FL_OVERWRITE = 1 << 0,
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b87d01660412..b7944a833668 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -121,6 +121,11 @@ static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
down_write(&anon_vma->root->rwsem);
}
+static inline int anon_vma_trylock_write(struct anon_vma *anon_vma)
+{
+ return down_write_trylock(&anon_vma->root->rwsem);
+}
+
static inline void anon_vma_unlock_write(struct anon_vma *anon_vma)
{
up_write(&anon_vma->root->rwsem);
@@ -172,131 +177,323 @@ struct anon_vma *folio_get_anon_vma(struct folio *folio);
typedef int __bitwise rmap_t;
/*
- * No special request: if the page is a subpage of a compound page, it is
- * mapped via a PTE. The mapped (sub)page is possibly shared between processes.
+ * No special request: A mapped anonymous (sub)page is possibly shared between
+ * processes.
*/
#define RMAP_NONE ((__force rmap_t)0)
-/* The (sub)page is exclusive to a single process. */
+/* The anonymous (sub)page is exclusive to a single process. */
#define RMAP_EXCLUSIVE ((__force rmap_t)BIT(0))
/*
- * The compound page is not mapped via PTEs, but instead via a single PMD and
- * should be accounted accordingly.
+ * Internally, we're using an enum to specify the granularity. We make the
+ * compiler emit specialized code for each granularity.
*/
-#define RMAP_COMPOUND ((__force rmap_t)BIT(1))
+enum rmap_level {
+ RMAP_LEVEL_PTE = 0,
+ RMAP_LEVEL_PMD,
+};
+
+static inline void __folio_rmap_sanity_checks(struct folio *folio,
+ struct page *page, int nr_pages, enum rmap_level level)
+{
+ /* hugetlb folios are handled separately. */
+ VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
+
+ /*
+ * TODO: we get driver-allocated folios that have nothing to do with
+ * the rmap using vm_insert_page(); therefore, we cannot assume that
+ * folio_test_large_rmappable() holds for large folios. We should
+ * handle any desired mapcount+stats accounting for these folios in
+ * VM_MIXEDMAP VMAs separately, and then sanity-check here that
+ * we really only get rmappable folios.
+ */
+
+ VM_WARN_ON_ONCE(nr_pages <= 0);
+ VM_WARN_ON_FOLIO(page_folio(page) != folio, folio);
+ VM_WARN_ON_FOLIO(page_folio(page + nr_pages - 1) != folio, folio);
+
+ switch (level) {
+ case RMAP_LEVEL_PTE:
+ break;
+ case RMAP_LEVEL_PMD:
+ /*
+ * We don't support folios larger than a single PMD yet. So
+ * when RMAP_LEVEL_PMD is set, we assume that we are creating
+ * a single "entire" mapping of the folio.
+ */
+ VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio);
+ VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio);
+ break;
+ default:
+ VM_WARN_ON_ONCE(true);
+ }
+}
/*
* rmap interfaces called when adding or removing pte of page
*/
-void page_move_anon_rmap(struct page *, struct vm_area_struct *);
-void page_add_anon_rmap(struct page *, struct vm_area_struct *,
- unsigned long address, rmap_t flags);
-void page_add_new_anon_rmap(struct page *, struct vm_area_struct *,
- unsigned long address);
+void folio_move_anon_rmap(struct folio *, struct vm_area_struct *);
+void folio_add_anon_rmap_ptes(struct folio *, struct page *, int nr_pages,
+ struct vm_area_struct *, unsigned long address, rmap_t flags);
+#define folio_add_anon_rmap_pte(folio, page, vma, address, flags) \
+ folio_add_anon_rmap_ptes(folio, page, 1, vma, address, flags)
+void folio_add_anon_rmap_pmd(struct folio *, struct page *,
+ struct vm_area_struct *, unsigned long address, rmap_t flags);
void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *,
unsigned long address);
-void page_add_file_rmap(struct page *, struct vm_area_struct *,
- bool compound);
-void page_remove_rmap(struct page *, struct vm_area_struct *,
- bool compound);
-
-void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
+void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages,
+ struct vm_area_struct *);
+#define folio_add_file_rmap_pte(folio, page, vma) \
+ folio_add_file_rmap_ptes(folio, page, 1, vma)
+void folio_add_file_rmap_pmd(struct folio *, struct page *,
+ struct vm_area_struct *);
+void folio_remove_rmap_ptes(struct folio *, struct page *, int nr_pages,
+ struct vm_area_struct *);
+#define folio_remove_rmap_pte(folio, page, vma) \
+ folio_remove_rmap_ptes(folio, page, 1, vma)
+void folio_remove_rmap_pmd(struct folio *, struct page *,
+ struct vm_area_struct *);
+
+void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *,
unsigned long address, rmap_t flags);
-void hugepage_add_new_anon_rmap(struct folio *, struct vm_area_struct *,
+void hugetlb_add_new_anon_rmap(struct folio *, struct vm_area_struct *,
unsigned long address);
-static inline void __page_dup_rmap(struct page *page, bool compound)
+/* See folio_try_dup_anon_rmap_*() */
+static inline int hugetlb_try_dup_anon_rmap(struct folio *folio,
+ struct vm_area_struct *vma)
{
- if (compound) {
- struct folio *folio = (struct folio *)page;
+ VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
+ VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
- VM_BUG_ON_PAGE(compound && !PageHead(page), page);
- atomic_inc(&folio->_entire_mapcount);
- } else {
- atomic_inc(&page->_mapcount);
+ if (PageAnonExclusive(&folio->page)) {
+ if (unlikely(folio_needs_cow_for_dma(vma, folio)))
+ return -EBUSY;
+ ClearPageAnonExclusive(&folio->page);
}
+ atomic_inc(&folio->_entire_mapcount);
+ return 0;
+}
+
+/* See folio_try_share_anon_rmap_*() */
+static inline int hugetlb_try_share_anon_rmap(struct folio *folio)
+{
+ VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
+ VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
+ VM_WARN_ON_FOLIO(!PageAnonExclusive(&folio->page), folio);
+
+ /* Paired with the memory barrier in try_grab_folio(). */
+ if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
+ smp_mb();
+
+ if (unlikely(folio_maybe_dma_pinned(folio)))
+ return -EBUSY;
+ ClearPageAnonExclusive(&folio->page);
+
+ /*
+ * This is conceptually a smp_wmb() paired with the smp_rmb() in
+ * gup_must_unshare().
+ */
+ if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
+ smp_mb__after_atomic();
+ return 0;
+}
+
+static inline void hugetlb_add_file_rmap(struct folio *folio)
+{
+ VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
+ VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);
+
+ atomic_inc(&folio->_entire_mapcount);
+}
+
+static inline void hugetlb_remove_rmap(struct folio *folio)
+{
+ VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
+
+ atomic_dec(&folio->_entire_mapcount);
}
-static inline void page_dup_file_rmap(struct page *page, bool compound)
+static __always_inline void __folio_dup_file_rmap(struct folio *folio,
+ struct page *page, int nr_pages, enum rmap_level level)
{
- __page_dup_rmap(page, compound);
+ __folio_rmap_sanity_checks(folio, page, nr_pages, level);
+
+ switch (level) {
+ case RMAP_LEVEL_PTE:
+ do {
+ atomic_inc(&page->_mapcount);
+ } while (page++, --nr_pages > 0);
+ break;
+ case RMAP_LEVEL_PMD:
+ atomic_inc(&folio->_entire_mapcount);
+ break;
+ }
}
/**
- * page_try_dup_anon_rmap - try duplicating a mapping of an already mapped
- * anonymous page
- * @page: the page to duplicate the mapping for
- * @compound: the page is mapped as compound or as a small page
- * @vma: the source vma
+ * folio_dup_file_rmap_ptes - duplicate PTE mappings of a page range of a folio
+ * @folio: The folio to duplicate the mappings of
+ * @page: The first page to duplicate the mappings of
+ * @nr_pages: The number of pages of which the mapping will be duplicated
*
- * The caller needs to hold the PT lock and the vma->vma_mm->write_protect_seq.
+ * The page range of the folio is defined by [page, page + nr_pages)
*
- * Duplicating the mapping can only fail if the page may be pinned; device
- * private pages cannot get pinned and consequently this function cannot fail.
+ * The caller needs to hold the page table lock.
+ */
+static inline void folio_dup_file_rmap_ptes(struct folio *folio,
+ struct page *page, int nr_pages)
+{
+ __folio_dup_file_rmap(folio, page, nr_pages, RMAP_LEVEL_PTE);
+}
+#define folio_dup_file_rmap_pte(folio, page) \
+ folio_dup_file_rmap_ptes(folio, page, 1)
+
+/**
+ * folio_dup_file_rmap_pmd - duplicate a PMD mapping of a page range of a folio
+ * @folio: The folio to duplicate the mapping of
+ * @page: The first page to duplicate the mapping of
*
- * If duplicating the mapping succeeds, the page has to be mapped R/O into
- * the parent and the child. It must *not* get mapped writable after this call.
+ * The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
*
- * Returns 0 if duplicating the mapping succeeded. Returns -EBUSY otherwise.
+ * The caller needs to hold the page table lock.
*/
-static inline int page_try_dup_anon_rmap(struct page *page, bool compound,
- struct vm_area_struct *vma)
+static inline void folio_dup_file_rmap_pmd(struct folio *folio,
+ struct page *page)
{
- VM_BUG_ON_PAGE(!PageAnon(page), page);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, RMAP_LEVEL_PTE);
+#else
+ WARN_ON_ONCE(true);
+#endif
+}
- /*
- * No need to check+clear for already shared pages, including KSM
- * pages.
- */
- if (!PageAnonExclusive(page))
- goto dup;
+static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
+ struct page *page, int nr_pages, struct vm_area_struct *src_vma,
+ enum rmap_level level)
+{
+ bool maybe_pinned;
+ int i;
+
+ VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
+ __folio_rmap_sanity_checks(folio, page, nr_pages, level);
/*
- * If this page may have been pinned by the parent process,
- * don't allow to duplicate the mapping but instead require to e.g.,
- * copy the page immediately for the child so that we'll always
- * guarantee the pinned page won't be randomly replaced in the
+ * If this folio may have been pinned by the parent process,
+ * don't allow to duplicate the mappings but instead require to e.g.,
+ * copy the subpage immediately for the child so that we'll always
+ * guarantee the pinned folio won't be randomly replaced in the
* future on write faults.
*/
- if (likely(!is_device_private_page(page) &&
- unlikely(page_needs_cow_for_dma(vma, page))))
- return -EBUSY;
+ maybe_pinned = likely(!folio_is_device_private(folio)) &&
+ unlikely(folio_needs_cow_for_dma(src_vma, folio));
- ClearPageAnonExclusive(page);
/*
- * It's okay to share the anon page between both processes, mapping
- * the page R/O into both processes.
+ * No need to check+clear for already shared PTEs/PMDs of the
+ * folio. But if any page is PageAnonExclusive, we must fallback to
+ * copying if the folio maybe pinned.
*/
-dup:
- __page_dup_rmap(page, compound);
+ switch (level) {
+ case RMAP_LEVEL_PTE:
+ if (unlikely(maybe_pinned)) {
+ for (i = 0; i < nr_pages; i++)
+ if (PageAnonExclusive(page + i))
+ return -EBUSY;
+ }
+ do {
+ if (PageAnonExclusive(page))
+ ClearPageAnonExclusive(page);
+ atomic_inc(&page->_mapcount);
+ } while (page++, --nr_pages > 0);
+ break;
+ case RMAP_LEVEL_PMD:
+ if (PageAnonExclusive(page)) {
+ if (unlikely(maybe_pinned))
+ return -EBUSY;
+ ClearPageAnonExclusive(page);
+ }
+ atomic_inc(&folio->_entire_mapcount);
+ break;
+ }
return 0;
}
/**
- * page_try_share_anon_rmap - try marking an exclusive anonymous page possibly
- * shared to prepare for KSM or temporary unmapping
- * @page: the exclusive anonymous page to try marking possibly shared
+ * folio_try_dup_anon_rmap_ptes - try duplicating PTE mappings of a page range
+ * of a folio
+ * @folio: The folio to duplicate the mappings of
+ * @page: The first page to duplicate the mappings of
+ * @nr_pages: The number of pages of which the mapping will be duplicated
+ * @src_vma: The vm area from which the mappings are duplicated
*
- * The caller needs to hold the PT lock and has to have the page table entry
- * cleared/invalidated.
+ * The page range of the folio is defined by [page, page + nr_pages)
*
- * This is similar to page_try_dup_anon_rmap(), however, not used during fork()
- * to duplicate a mapping, but instead to prepare for KSM or temporarily
- * unmapping a page (swap, migration) via page_remove_rmap().
+ * The caller needs to hold the page table lock and the
+ * vma->vma_mm->write_protect_seq.
*
- * Marking the page shared can only fail if the page may be pinned; device
- * private pages cannot get pinned and consequently this function cannot fail.
+ * Duplicating the mappings can only fail if the folio may be pinned; device
+ * private folios cannot get pinned and consequently this function cannot fail
+ * for them.
*
- * Returns 0 if marking the page possibly shared succeeded. Returns -EBUSY
- * otherwise.
+ * If duplicating the mappings succeeded, the duplicated PTEs have to be R/O in
+ * the parent and the child. They must *not* be writable after this call
+ * succeeded.
+ *
+ * Returns 0 if duplicating the mappings succeeded. Returns -EBUSY otherwise.
+ */
+static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio,
+ struct page *page, int nr_pages, struct vm_area_struct *src_vma)
+{
+ return __folio_try_dup_anon_rmap(folio, page, nr_pages, src_vma,
+ RMAP_LEVEL_PTE);
+}
+#define folio_try_dup_anon_rmap_pte(folio, page, vma) \
+ folio_try_dup_anon_rmap_ptes(folio, page, 1, vma)
+
+/**
+ * folio_try_dup_anon_rmap_pmd - try duplicating a PMD mapping of a page range
+ * of a folio
+ * @folio: The folio to duplicate the mapping of
+ * @page: The first page to duplicate the mapping of
+ * @src_vma: The vm area from which the mapping is duplicated
+ *
+ * The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
+ *
+ * The caller needs to hold the page table lock and the
+ * vma->vma_mm->write_protect_seq.
+ *
+ * Duplicating the mapping can only fail if the folio may be pinned; device
+ * private folios cannot get pinned and consequently this function cannot fail
+ * for them.
+ *
+ * If duplicating the mapping succeeds, the duplicated PMD has to be R/O in
+ * the parent and the child. They must *not* be writable after this call
+ * succeeded.
+ *
+ * Returns 0 if duplicating the mapping succeeded. Returns -EBUSY otherwise.
*/
-static inline int page_try_share_anon_rmap(struct page *page)
+static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio,
+ struct page *page, struct vm_area_struct *src_vma)
{
- VM_BUG_ON_PAGE(!PageAnon(page) || !PageAnonExclusive(page), page);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, src_vma,
+ RMAP_LEVEL_PMD);
+#else
+ WARN_ON_ONCE(true);
+ return -EBUSY;
+#endif
+}
+
+static __always_inline int __folio_try_share_anon_rmap(struct folio *folio,
+ struct page *page, int nr_pages, enum rmap_level level)
+{
+ VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
+ VM_WARN_ON_FOLIO(!PageAnonExclusive(page), folio);
+ __folio_rmap_sanity_checks(folio, page, nr_pages, level);
- /* device private pages cannot get pinned via GUP. */
- if (unlikely(is_device_private_page(page))) {
+ /* device private folios cannot get pinned via GUP. */
+ if (unlikely(folio_is_device_private(folio))) {
ClearPageAnonExclusive(page);
return 0;
}
@@ -347,7 +544,7 @@ static inline int page_try_share_anon_rmap(struct page *page)
if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
smp_mb();
- if (unlikely(page_maybe_dma_pinned(page)))
+ if (unlikely(folio_maybe_dma_pinned(folio)))
return -EBUSY;
ClearPageAnonExclusive(page);
@@ -360,6 +557,68 @@ static inline int page_try_share_anon_rmap(struct page *page)
return 0;
}
+/**
+ * folio_try_share_anon_rmap_pte - try marking an exclusive anonymous page
+ * mapped by a PTE possibly shared to prepare
+ * for KSM or temporary unmapping
+ * @folio: The folio to share a mapping of
+ * @page: The mapped exclusive page
+ *
+ * The caller needs to hold the page table lock and has to have the page table
+ * entries cleared/invalidated.
+ *
+ * This is similar to folio_try_dup_anon_rmap_pte(), however, not used during
+ * fork() to duplicate mappings, but instead to prepare for KSM or temporarily
+ * unmapping parts of a folio (swap, migration) via folio_remove_rmap_pte().
+ *
+ * Marking the mapped page shared can only fail if the folio maybe pinned;
+ * device private folios cannot get pinned and consequently this function cannot
+ * fail.
+ *
+ * Returns 0 if marking the mapped page possibly shared succeeded. Returns
+ * -EBUSY otherwise.
+ */
+static inline int folio_try_share_anon_rmap_pte(struct folio *folio,
+ struct page *page)
+{
+ return __folio_try_share_anon_rmap(folio, page, 1, RMAP_LEVEL_PTE);
+}
+
+/**
+ * folio_try_share_anon_rmap_pmd - try marking an exclusive anonymous page
+ * range mapped by a PMD possibly shared to
+ * prepare for temporary unmapping
+ * @folio: The folio to share the mapping of
+ * @page: The first page to share the mapping of
+ *
+ * The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
+ *
+ * The caller needs to hold the page table lock and has to have the page table
+ * entries cleared/invalidated.
+ *
+ * This is similar to folio_try_dup_anon_rmap_pmd(), however, not used during
+ * fork() to duplicate a mapping, but instead to prepare for temporarily
+ * unmapping parts of a folio (swap, migration) via folio_remove_rmap_pmd().
+ *
+ * Marking the mapped pages shared can only fail if the folio maybe pinned;
+ * device private folios cannot get pinned and consequently this function cannot
+ * fail.
+ *
+ * Returns 0 if marking the mapped pages possibly shared succeeded. Returns
+ * -EBUSY otherwise.
+ */
+static inline int folio_try_share_anon_rmap_pmd(struct folio *folio,
+ struct page *page)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ return __folio_try_share_anon_rmap(folio, page, HPAGE_PMD_NR,
+ RMAP_LEVEL_PMD);
+#else
+ WARN_ON_ONCE(true);
+ return -EBUSY;
+#endif
+}
+
/*
* Called from mm/vmscan.c to handle paging out
*/
@@ -477,7 +736,6 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
#define anon_vma_init() do {} while (0)
#define anon_vma_prepare(vma) (0)
-#define anon_vma_link(vma) do {} while (0)
static inline int folio_referenced(struct folio *folio, int is_locked,
struct mem_cgroup *memcg,
diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h
index 523c98b96cb4..90d8e4475f80 100644
--- a/include/linux/rpmsg.h
+++ b/include/linux/rpmsg.h
@@ -64,12 +64,14 @@ struct rpmsg_device {
};
typedef int (*rpmsg_rx_cb_t)(struct rpmsg_device *, void *, int, void *, u32);
+typedef int (*rpmsg_flowcontrol_cb_t)(struct rpmsg_device *, void *, bool);
/**
* struct rpmsg_endpoint - binds a local rpmsg address to its user
* @rpdev: rpmsg channel device
* @refcount: when this drops to zero, the ept is deallocated
* @cb: rx callback handler
+ * @flow_cb: remote flow control callback handler
* @cb_lock: must be taken before accessing/changing @cb
* @addr: local rpmsg address
* @priv: private data for the driver's use
@@ -92,6 +94,7 @@ struct rpmsg_endpoint {
struct rpmsg_device *rpdev;
struct kref refcount;
rpmsg_rx_cb_t cb;
+ rpmsg_flowcontrol_cb_t flow_cb;
struct mutex cb_lock;
u32 addr;
void *priv;
@@ -106,6 +109,7 @@ struct rpmsg_endpoint {
* @probe: invoked when a matching rpmsg channel (i.e. device) is found
* @remove: invoked when the rpmsg channel is removed
* @callback: invoked when an inbound message is received on the channel
+ * @flowcontrol: invoked when remote side flow control request is received
*/
struct rpmsg_driver {
struct device_driver drv;
@@ -113,6 +117,7 @@ struct rpmsg_driver {
int (*probe)(struct rpmsg_device *dev);
void (*remove)(struct rpmsg_device *dev);
int (*callback)(struct rpmsg_device *, void *, int, void *, u32);
+ int (*flowcontrol)(struct rpmsg_device *, void *, bool);
};
static inline u16 rpmsg16_to_cpu(struct rpmsg_device *rpdev, __rpmsg16 val)
@@ -192,6 +197,8 @@ __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp,
ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept);
+int rpmsg_set_flow_control(struct rpmsg_endpoint *ept, bool pause, u32 dst);
+
#else
static inline int rpmsg_register_device_override(struct rpmsg_device *rpdev,
@@ -316,6 +323,14 @@ static inline ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept)
return -ENXIO;
}
+static inline int rpmsg_set_flow_control(struct rpmsg_endpoint *ept, bool pause, u32 dst)
+{
+ /* This shouldn't be possible */
+ WARN_ON(1);
+
+ return -ENXIO;
+}
+
#endif /* IS_ENABLED(CONFIG_RPMSG) */
/* use a macro to avoid include chaining to get THIS_MODULE */
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
new file mode 100644
index 000000000000..bc8af3eb5598
--- /dev/null
+++ b/include/linux/rseq.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _LINUX_RSEQ_H
+#define _LINUX_RSEQ_H
+
+#ifdef CONFIG_RSEQ
+
+#include <linux/preempt.h>
+#include <linux/sched.h>
+
+/*
+ * Map the event mask on the user-space ABI enum rseq_cs_flags
+ * for direct mask checks.
+ */
+enum rseq_event_mask_bits {
+ RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
+ RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
+ RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
+};
+
+enum rseq_event_mask {
+ RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT),
+ RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT),
+ RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT),
+};
+
+static inline void rseq_set_notify_resume(struct task_struct *t)
+{
+ if (t->rseq)
+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+}
+
+void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs);
+
+static inline void rseq_handle_notify_resume(struct ksignal *ksig,
+ struct pt_regs *regs)
+{
+ if (current->rseq)
+ __rseq_handle_notify_resume(ksig, regs);
+}
+
+static inline void rseq_signal_deliver(struct ksignal *ksig,
+ struct pt_regs *regs)
+{
+ preempt_disable();
+ __set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
+ preempt_enable();
+ rseq_handle_notify_resume(ksig, regs);
+}
+
+/* rseq_preempt() requires preemption to be disabled. */
+static inline void rseq_preempt(struct task_struct *t)
+{
+ __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
+ rseq_set_notify_resume(t);
+}
+
+/* rseq_migrate() requires preemption to be disabled. */
+static inline void rseq_migrate(struct task_struct *t)
+{
+ __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
+ rseq_set_notify_resume(t);
+}
+
+/*
+ * If parent process has a registered restartable sequences area, the
+ * child inherits. Unregister rseq for a clone with CLONE_VM set.
+ */
+static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
+{
+ if (clone_flags & CLONE_VM) {
+ t->rseq = NULL;
+ t->rseq_len = 0;
+ t->rseq_sig = 0;
+ t->rseq_event_mask = 0;
+ } else {
+ t->rseq = current->rseq;
+ t->rseq_len = current->rseq_len;
+ t->rseq_sig = current->rseq_sig;
+ t->rseq_event_mask = current->rseq_event_mask;
+ }
+}
+
+static inline void rseq_execve(struct task_struct *t)
+{
+ t->rseq = NULL;
+ t->rseq_len = 0;
+ t->rseq_sig = 0;
+ t->rseq_event_mask = 0;
+}
+
+#else
+
+static inline void rseq_set_notify_resume(struct task_struct *t)
+{
+}
+static inline void rseq_handle_notify_resume(struct ksignal *ksig,
+ struct pt_regs *regs)
+{
+}
+static inline void rseq_signal_deliver(struct ksignal *ksig,
+ struct pt_regs *regs)
+{
+}
+static inline void rseq_preempt(struct task_struct *t)
+{
+}
+static inline void rseq_migrate(struct task_struct *t)
+{
+}
+static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
+{
+}
+static inline void rseq_execve(struct task_struct *t)
+{
+}
+
+#endif
+
+#ifdef CONFIG_DEBUG_RSEQ
+
+void rseq_syscall(struct pt_regs *regs);
+
+#else
+
+static inline void rseq_syscall(struct pt_regs *regs)
+{
+}
+
+#endif
+
+#endif /* _LINUX_RSEQ_H */
diff --git a/include/linux/rslib.h b/include/linux/rslib.h
index 238bb85243d3..a04dacbdc8ae 100644
--- a/include/linux/rslib.h
+++ b/include/linux/rslib.h
@@ -10,7 +10,6 @@
#ifndef _RSLIB_H_
#define _RSLIB_H_
-#include <linux/list.h>
#include <linux/types.h> /* for gfp_t */
#include <linux/gfp.h> /* for GFP_KERNEL */
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 1fd9c6a21ebe..3f4d315aaec9 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -42,7 +42,7 @@ static inline time64_t rtc_tm_sub(struct rtc_time *lhs, struct rtc_time *rhs)
#include <linux/timerqueue.h>
#include <linux/workqueue.h>
-extern struct class *rtc_class;
+extern const struct class rtc_class;
/*
* For these RTC methods the device parameter is the physical device
@@ -146,6 +146,7 @@ struct rtc_device {
time64_t range_min;
timeu64_t range_max;
+ timeu64_t alarm_offset_max;
time64_t start_secs;
time64_t offset_secs;
bool set_start_time;
@@ -224,6 +225,23 @@ static inline bool is_leap_year(unsigned int year)
return (!(year % 4) && (year % 100)) || !(year % 400);
}
+/**
+ * rtc_bound_alarmtime() - Return alarm time bound by rtc limit
+ * @rtc: Pointer to rtc device structure
+ * @requested: Requested alarm timeout
+ *
+ * Return: Alarm timeout bound by maximum alarm time supported by rtc.
+ */
+static inline ktime_t rtc_bound_alarmtime(struct rtc_device *rtc,
+ ktime_t requested)
+{
+ if (rtc->alarm_offset_max &&
+ rtc->alarm_offset_max * MSEC_PER_SEC < ktime_to_ms(requested))
+ return ms_to_ktime(rtc->alarm_offset_max * MSEC_PER_SEC);
+
+ return requested;
+}
+
#define devm_rtc_register_device(device) \
__devm_rtc_register_device(THIS_MODULE, device)
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 3d6cf306cd55..cdfc897f1e3c 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -10,6 +10,13 @@
#include <uapi/linux/rtnetlink.h>
extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo);
+
+static inline int rtnetlink_maybe_send(struct sk_buff *skb, struct net *net,
+ u32 pid, u32 group, int echo)
+{
+ return !skb ? 0 : rtnetlink_send(skb, net, pid, group, echo);
+}
+
extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid);
extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid,
u32 group, const struct nlmsghdr *nlh, gfp_t flags);
@@ -40,6 +47,7 @@ extern int rtnl_lock_killable(void);
extern bool refcount_dec_and_rtnl_lock(refcount_t *r);
extern wait_queue_head_t netdev_unregistering_wq;
+extern atomic_t dev_unreg_count;
extern struct rw_semaphore pernet_ops_rwsem;
extern struct rw_semaphore net_rwsem;
@@ -72,6 +80,18 @@ static inline bool lockdep_rtnl_is_held(void)
#define rtnl_dereference(p) \
rcu_dereference_protected(p, lockdep_rtnl_is_held())
+/**
+ * rcu_replace_pointer_rtnl - replace an RCU pointer under rtnl_lock, returning
+ * its old value
+ * @rp: RCU pointer, whose value is returned
+ * @p: regular pointer
+ *
+ * Perform a replacement under rtnl_lock, where @rp is an RCU-annotated
+ * pointer. The old value of @rp is returned, and @rp is set to @p
+ */
+#define rcu_replace_pointer_rtnl(rp, p) \
+ rcu_replace_pointer(rp, p, lockdep_rtnl_is_held())
+
static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
{
return rtnl_dereference(dev->ingress_queue);
@@ -130,4 +150,28 @@ extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
extern void rtnl_offload_xstats_notify(struct net_device *dev);
+static inline int rtnl_has_listeners(const struct net *net, u32 group)
+{
+ struct sock *rtnl = net->rtnl;
+
+ return netlink_has_listeners(rtnl, group);
+}
+
+/**
+ * rtnl_notify_needed - check if notification is needed
+ * @net: Pointer to the net namespace
+ * @nlflags: netlink ingress message flags
+ * @group: rtnl group
+ *
+ * Based on the ingress message flags and rtnl group, returns true
+ * if a notification is needed, false otherwise.
+ */
+static inline bool
+rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group)
+{
+ return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group);
+}
+
+void netdev_set_operstate(struct net_device *dev, int newstate);
+
#endif /* __LINUX_RTNETLINK_H */
diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h
index 534038d962e4..4612ef09a0c7 100644
--- a/include/linux/rtsx_pci.h
+++ b/include/linux/rtsx_pci.h
@@ -60,6 +60,7 @@
#define SD_EXIST (1 << 16)
#define DELINK_INT GPIO0_INT
#define MS_OC_INT (1 << 23)
+#define SD_OVP_INT (1 << 23)
#define SD_OC_INT (1 << 22)
#define CARD_INT (XD_INT | MS_INT | SD_INT)
@@ -80,6 +81,7 @@
#define OC_INT_EN (1 << 23)
#define DELINK_INT_EN GPIO0_INT_EN
#define MS_OC_INT_EN (1 << 23)
+#define SD_OVP_INT_EN (1 << 23)
#define SD_OC_INT_EN (1 << 22)
#define RTSX_DUM_REG 0x1C
@@ -583,6 +585,7 @@
#define OBFF_DISABLE 0x00
#define CDRESUMECTL 0xFE52
+#define CDGW 0xFE53
#define WAKE_SEL_CTL 0xFE54
#define PCLK_CTL 0xFE55
#define PCLK_MODE_SEL 0x20
@@ -764,6 +767,9 @@
#define SD_VIO_LDO_1V8 0x40
#define SD_VIO_LDO_3V3 0x70
+#define RTS5264_AUTOLOAD_CFG2 0xFF7D
+#define RTS5264_CHIP_RST_N_SEL (1 << 6)
+
#define RTS5260_AUTOLOAD_CFG4 0xFF7F
#define RTS5260_MIMO_DISABLE 0x8A
/*RTS5261*/
@@ -1261,6 +1267,7 @@ struct rtsx_pcr {
u8 dma_error_count;
u8 ocp_stat;
u8 ocp_stat2;
+ u8 ovp_stat;
u8 rtd3_en;
};
@@ -1271,6 +1278,7 @@ struct rtsx_pcr {
#define PID_5260 0x5260
#define PID_5261 0x5261
#define PID_5228 0x5228
+#define PID_5264 0x5264
#define CHK_PCI_PID(pcr, pid) ((pcr)->pci->device == (pid))
#define PCI_VID(pcr) ((pcr)->pci->vendor)
diff --git a/include/linux/rw_hint.h b/include/linux/rw_hint.h
new file mode 100644
index 000000000000..309ca72f2dfb
--- /dev/null
+++ b/include/linux/rw_hint.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_RW_HINT_H
+#define _LINUX_RW_HINT_H
+
+#include <linux/build_bug.h>
+#include <linux/compiler_attributes.h>
+#include <uapi/linux/fcntl.h>
+
+/* Block storage write lifetime hint values. */
+enum rw_hint {
+ WRITE_LIFE_NOT_SET = RWH_WRITE_LIFE_NOT_SET,
+ WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE,
+ WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT,
+ WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM,
+ WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG,
+ WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
+} __packed;
+
+/* Sparse ignores __packed annotations on enums, hence the #ifndef below. */
+#ifndef __CHECKER__
+static_assert(sizeof(enum rw_hint) == 1);
+#endif
+
+#endif /* _LINUX_RW_HINT_H */
diff --git a/include/linux/rwbase_rt.h b/include/linux/rwbase_rt.h
index 1d264dd08625..f2394a409c9d 100644
--- a/include/linux/rwbase_rt.h
+++ b/include/linux/rwbase_rt.h
@@ -26,12 +26,17 @@ struct rwbase_rt {
} while (0)
-static __always_inline bool rw_base_is_locked(struct rwbase_rt *rwb)
+static __always_inline bool rw_base_is_locked(const struct rwbase_rt *rwb)
{
return atomic_read(&rwb->readers) != READER_BIAS;
}
-static __always_inline bool rw_base_is_contended(struct rwbase_rt *rwb)
+static __always_inline bool rw_base_is_write_locked(const struct rwbase_rt *rwb)
+{
+ return atomic_read(&rwb->readers) == WRITER_BIAS;
+}
+
+static __always_inline bool rw_base_is_contended(const struct rwbase_rt *rwb)
{
return atomic_read(&rwb->readers) > 0;
}
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 1dd530ce8b45..c8b543d428b0 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -66,14 +66,24 @@ struct rw_semaphore {
#endif
};
-/* In all implementations count != 0 means locked */
+#define RWSEM_UNLOCKED_VALUE 0UL
+#define RWSEM_WRITER_LOCKED (1UL << 0)
+#define __RWSEM_COUNT_INIT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
+
static inline int rwsem_is_locked(struct rw_semaphore *sem)
{
- return atomic_long_read(&sem->count) != 0;
+ return atomic_long_read(&sem->count) != RWSEM_UNLOCKED_VALUE;
}
-#define RWSEM_UNLOCKED_VALUE 0L
-#define __RWSEM_COUNT_INIT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
+static inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem)
+{
+ WARN_ON(atomic_long_read(&sem->count) == RWSEM_UNLOCKED_VALUE);
+}
+
+static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem)
+{
+ WARN_ON(!(atomic_long_read(&sem->count) & RWSEM_WRITER_LOCKED));
+}
/* Common initializer macros and functions */
@@ -152,11 +162,21 @@ do { \
__init_rwsem((sem), #sem, &__key); \
} while (0)
-static __always_inline int rwsem_is_locked(struct rw_semaphore *sem)
+static __always_inline int rwsem_is_locked(const struct rw_semaphore *sem)
{
return rw_base_is_locked(&sem->rwbase);
}
+static __always_inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem)
+{
+ WARN_ON(!rwsem_is_locked(sem));
+}
+
+static __always_inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem)
+{
+ WARN_ON(!rw_base_is_write_locked(&sem->rwbase));
+}
+
static __always_inline int rwsem_is_contended(struct rw_semaphore *sem)
{
return rw_base_is_contended(&sem->rwbase);
@@ -169,6 +189,22 @@ static __always_inline int rwsem_is_contended(struct rw_semaphore *sem)
* the RT specific variant.
*/
+static inline void rwsem_assert_held(const struct rw_semaphore *sem)
+{
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ lockdep_assert_held(sem);
+ else
+ rwsem_assert_held_nolockdep(sem);
+}
+
+static inline void rwsem_assert_held_write(const struct rw_semaphore *sem)
+{
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ lockdep_assert_held_write(sem);
+ else
+ rwsem_assert_held_write_nolockdep(sem);
+}
+
/*
* lock for reading
*/
@@ -203,11 +239,11 @@ extern void up_read(struct rw_semaphore *sem);
extern void up_write(struct rw_semaphore *sem);
DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T))
-DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T))
-
-DEFINE_FREE(up_read, struct rw_semaphore *, if (_T) up_read(_T))
-DEFINE_FREE(up_write, struct rw_semaphore *, if (_T) up_write(_T))
+DEFINE_GUARD_COND(rwsem_read, _try, down_read_trylock(_T))
+DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T) == 0)
+DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T))
+DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T))
/*
* downgrade write lock to read lock
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 609bde814cb0..3c2abbc587b4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -10,33 +10,41 @@
#include <uapi/linux/sched.h>
#include <asm/current.h>
-
-#include <linux/pid.h>
-#include <linux/sem.h>
+#include <asm/processor.h>
+#include <linux/thread_info.h>
+#include <linux/preempt.h>
+#include <linux/cpumask.h>
+
+#include <linux/cache.h>
+#include <linux/irqflags_types.h>
+#include <linux/smp_types.h>
+#include <linux/pid_types.h>
+#include <linux/sem_types.h>
#include <linux/shm.h>
#include <linux/kmsan_types.h>
-#include <linux/mutex.h>
-#include <linux/plist.h>
-#include <linux/hrtimer.h>
-#include <linux/irqflags.h>
-#include <linux/seccomp.h>
-#include <linux/nodemask.h>
-#include <linux/rcupdate.h>
-#include <linux/refcount.h>
+#include <linux/mutex_types.h>
+#include <linux/plist_types.h>
+#include <linux/hrtimer_types.h>
+#include <linux/timer_types.h>
+#include <linux/seccomp_types.h>
+#include <linux/nodemask_types.h>
+#include <linux/refcount_types.h>
#include <linux/resource.h>
#include <linux/latencytop.h>
#include <linux/sched/prio.h>
#include <linux/sched/types.h>
#include <linux/signal_types.h>
-#include <linux/syscall_user_dispatch.h>
+#include <linux/syscall_user_dispatch_types.h>
#include <linux/mm_types_task.h>
#include <linux/task_io_accounting.h>
-#include <linux/posix-timers.h>
-#include <linux/rseq.h>
-#include <linux/seqlock.h>
+#include <linux/posix-timers_types.h>
+#include <linux/restart_block.h>
+#include <uapi/linux/rseq.h>
+#include <linux/seqlock_types.h>
#include <linux/kcsan.h>
#include <linux/rv.h>
#include <linux/livepatch_sched.h>
+#include <linux/uidgid_types.h>
#include <asm/kmap_size.h>
/* task_struct member predeclarations (sorted alphabetically): */
@@ -63,26 +71,27 @@ struct robust_list_head;
struct root_domain;
struct rq;
struct sched_attr;
-struct sched_param;
+struct sched_dl_entity;
struct seq_file;
struct sighand_struct;
struct signal_struct;
struct task_delay_info;
struct task_group;
+struct task_struct;
struct user_event_mm;
/*
* Task state bitmask. NOTE! These bits are also
* encoded in fs/proc/array.c: get_task_state().
*
- * We have two separate sets of flags: task->state
+ * We have two separate sets of flags: task->__state
* is about runnability, while task->exit_state are
* about the task exiting. Confusing, but this way
* modifying one set can't modify the other one by
* mistake.
*/
-/* Used in tsk->state: */
+/* Used in tsk->__state: */
#define TASK_RUNNING 0x00000000
#define TASK_INTERRUPTIBLE 0x00000001
#define TASK_UNINTERRUPTIBLE 0x00000002
@@ -92,7 +101,7 @@ struct user_event_mm;
#define EXIT_DEAD 0x00000010
#define EXIT_ZOMBIE 0x00000020
#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD)
-/* Used in tsk->state again: */
+/* Used in tsk->__state again: */
#define TASK_PARKED 0x00000040
#define TASK_DEAD 0x00000080
#define TASK_WAKEKILL 0x00000100
@@ -173,7 +182,7 @@ struct user_event_mm;
#endif
/*
- * set_current_state() includes a barrier so that the write of current->state
+ * set_current_state() includes a barrier so that the write of current->__state
* is correctly serialised wrt the caller's subsequent test of whether to
* actually sleep:
*
@@ -196,9 +205,9 @@ struct user_event_mm;
* wake_up_state(p, TASK_UNINTERRUPTIBLE);
*
* where wake_up_state()/try_to_wake_up() executes a full memory barrier before
- * accessing p->state.
+ * accessing p->__state.
*
- * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
+ * Wakeup will do: if (@state & p->__state) p->__state = TASK_RUNNING, that is,
* once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
* TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
*
@@ -370,6 +379,10 @@ extern struct root_domain def_root_domain;
extern struct mutex sched_domains_mutex;
#endif
+struct sched_param {
+ int sched_priority;
+};
+
struct sched_info {
#ifdef CONFIG_SCHED_INFO
/* Cumulative counters: */
@@ -410,42 +423,6 @@ struct load_weight {
u32 inv_weight;
};
-/**
- * struct util_est - Estimation utilization of FAIR tasks
- * @enqueued: instantaneous estimated utilization of a task/cpu
- * @ewma: the Exponential Weighted Moving Average (EWMA)
- * utilization of a task
- *
- * Support data structure to track an Exponential Weighted Moving Average
- * (EWMA) of a FAIR task's utilization. New samples are added to the moving
- * average each time a task completes an activation. Sample's weight is chosen
- * so that the EWMA will be relatively insensitive to transient changes to the
- * task's workload.
- *
- * The enqueued attribute has a slightly different meaning for tasks and cpus:
- * - task: the task's util_avg at last task dequeue time
- * - cfs_rq: the sum of util_est.enqueued for each RUNNABLE task on that CPU
- * Thus, the util_est.enqueued of a task represents the contribution on the
- * estimated utilization of the CPU where that task is currently enqueued.
- *
- * Only for tasks we track a moving average of the past instantaneous
- * estimated utilization. This allows to absorb sporadic drops in utilization
- * of an otherwise almost periodic task.
- *
- * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg
- * updates. When a task is dequeued, its util_est should not be updated if its
- * util_avg has not been updated in the meantime.
- * This information is mapped into the MSB bit of util_est.enqueued at dequeue
- * time. Since max value of util_est.enqueued for a task is 1024 (PELT util_avg
- * for a task) it is safe to use MSB.
- */
-struct util_est {
- unsigned int enqueued;
- unsigned int ewma;
-#define UTIL_EST_WEIGHT_SHIFT 2
-#define UTIL_AVG_UNCHANGED 0x80000000
-} __attribute__((__aligned__(sizeof(u64))));
-
/*
* The load/runnable/util_avg accumulates an infinite geometric series
* (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c).
@@ -500,9 +477,20 @@ struct sched_avg {
unsigned long load_avg;
unsigned long runnable_avg;
unsigned long util_avg;
- struct util_est util_est;
+ unsigned int util_est;
} ____cacheline_aligned;
+/*
+ * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg
+ * updates. When a task is dequeued, its util_est should not be updated if its
+ * util_avg has not been updated in the meantime.
+ * This information is mapped into the MSB bit of util_est at dequeue time.
+ * Since max value of util_est for a task is 1024 (PELT util_avg for a task)
+ * it is safe to use MSB.
+ */
+#define UTIL_EST_WEIGHT_SHIFT 2
+#define UTIL_AVG_UNCHANGED 0x80000000
+
struct sched_statistics {
#ifdef CONFIG_SCHEDSTATS
u64 wait_start;
@@ -520,7 +508,7 @@ struct sched_statistics {
u64 block_max;
s64 sum_block_runtime;
- u64 exec_max;
+ s64 exec_max;
u64 slice_max;
u64 nr_migrations_cold;
@@ -549,13 +537,18 @@ struct sched_entity {
/* For load-balancing: */
struct load_weight load;
struct rb_node run_node;
+ u64 deadline;
+ u64 min_vruntime;
+
struct list_head group_node;
unsigned int on_rq;
u64 exec_start;
u64 sum_exec_runtime;
- u64 vruntime;
u64 prev_sum_exec_runtime;
+ u64 vruntime;
+ s64 vlag;
+ u64 slice;
u64 nr_migrations;
@@ -599,6 +592,9 @@ struct sched_rt_entity {
#endif
} __randomize_layout;
+typedef bool (*dl_server_has_tasks_f)(struct sched_dl_entity *);
+typedef struct task_struct *(*dl_server_pick_f)(struct sched_dl_entity *);
+
struct sched_dl_entity {
struct rb_node rb_node;
@@ -646,6 +642,7 @@ struct sched_dl_entity {
unsigned int dl_yielded : 1;
unsigned int dl_non_contending : 1;
unsigned int dl_overrun : 1;
+ unsigned int dl_server : 1;
/*
* Bandwidth enforcement timer. Each -deadline task has its
@@ -660,7 +657,20 @@ struct sched_dl_entity {
* timer is needed to decrease the active utilization at the correct
* time.
*/
- struct hrtimer inactive_timer;
+ struct hrtimer inactive_timer;
+
+ /*
+ * Bits for DL-server functionality. Also see the comment near
+ * dl_server_update().
+ *
+ * @rq the runqueue this server is for
+ *
+ * @server_has_tasks() returns true if @server_pick return a
+ * runnable task.
+ */
+ struct rq *rq;
+ dl_server_has_tasks_f server_has_tasks;
+ dl_server_pick_f server_pick;
#ifdef CONFIG_RT_MUTEXES
/*
@@ -745,10 +755,8 @@ struct task_struct {
#endif
unsigned int __state;
-#ifdef CONFIG_PREEMPT_RT
/* saved state for "spinlock sleepers" */
unsigned int saved_state;
-#endif
/*
* This begins the randomizable portion of task_struct. Only
@@ -789,6 +797,7 @@ struct task_struct {
struct sched_entity se;
struct sched_rt_entity rt;
struct sched_dl_entity dl;
+ struct sched_dl_entity *dl_server;
const struct sched_class *sched_class;
#ifdef CONFIG_SCHED_CORE
@@ -849,6 +858,8 @@ struct task_struct {
u8 rcu_tasks_idx;
int rcu_tasks_idle_cpu;
struct list_head rcu_tasks_holdout_list;
+ int rcu_tasks_exit_cpu;
+ struct list_head rcu_tasks_exit_list;
#endif /* #ifdef CONFIG_TASKS_RCU */
#ifdef CONFIG_TASKS_TRACE_RCU
@@ -870,6 +881,7 @@ struct task_struct {
struct mm_struct *mm;
struct mm_struct *active_mm;
+ struct address_space *faults_disabled_mapping;
int exit_state;
int exit_code;
@@ -906,8 +918,11 @@ struct task_struct {
* ->sched_remote_wakeup gets used, so it can be in this word.
*/
unsigned sched_remote_wakeup:1;
+#ifdef CONFIG_RT_MUTEXES
+ unsigned sched_rt_mutex:1;
+#endif
- /* Bit to tell LSMs we're in execve(): */
+ /* Bit to tell TOMOYO we're in execve(): */
unsigned in_execve:1;
unsigned in_iowait:1;
#ifndef TIF_RESTORE_SIGMASK
@@ -944,7 +959,7 @@ struct task_struct {
/* Recursion prevention for eventfd_signal() */
unsigned in_eventfd:1;
#endif
-#ifdef CONFIG_IOMMU_SVA
+#ifdef CONFIG_ARCH_HAS_CPU_PASID
unsigned pasid_activated:1;
#endif
#ifdef CONFIG_CPU_SUP_INTEL
@@ -997,7 +1012,6 @@ struct task_struct {
/* PID/PID hash table linkage. */
struct pid *thread_pid;
struct hlist_node pid_links[PIDTYPE_MAX];
- struct list_head thread_group;
struct list_head thread_node;
struct completion *vfork_done;
@@ -1247,6 +1261,7 @@ struct task_struct {
/* Protected by alloc_lock: */
struct mempolicy *mempolicy;
short il_prev;
+ u8 il_weight;
short pref_node_fork;
#endif
#ifdef CONFIG_NUMA_BALANCING
@@ -1438,6 +1453,10 @@ struct task_struct {
struct mem_cgroup *active_memcg;
#endif
+#ifdef CONFIG_MEMCG_KMEM
+ struct obj_cgroup *objcg;
+#endif
+
#ifdef CONFIG_BLK_CGROUP
struct gendisk *throttle_disk;
#endif
@@ -1548,114 +1567,6 @@ struct task_struct {
*/
};
-static inline struct pid *task_pid(struct task_struct *task)
-{
- return task->thread_pid;
-}
-
-/*
- * the helpers to get the task's different pids as they are seen
- * from various namespaces
- *
- * task_xid_nr() : global id, i.e. the id seen from the init namespace;
- * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of
- * current.
- * task_xid_nr_ns() : id seen from the ns specified;
- *
- * see also pid_nr() etc in include/linux/pid.h
- */
-pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns);
-
-static inline pid_t task_pid_nr(struct task_struct *tsk)
-{
- return tsk->pid;
-}
-
-static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
-{
- return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
-}
-
-static inline pid_t task_pid_vnr(struct task_struct *tsk)
-{
- return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
-}
-
-
-static inline pid_t task_tgid_nr(struct task_struct *tsk)
-{
- return tsk->tgid;
-}
-
-/**
- * pid_alive - check that a task structure is not stale
- * @p: Task structure to be checked.
- *
- * Test if a process is not yet dead (at most zombie state)
- * If pid_alive fails, then pointers within the task structure
- * can be stale and must not be dereferenced.
- *
- * Return: 1 if the process is alive. 0 otherwise.
- */
-static inline int pid_alive(const struct task_struct *p)
-{
- return p->thread_pid != NULL;
-}
-
-static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
-{
- return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns);
-}
-
-static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
-{
- return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL);
-}
-
-
-static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
-{
- return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns);
-}
-
-static inline pid_t task_session_vnr(struct task_struct *tsk)
-{
- return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
-}
-
-static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
-{
- return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns);
-}
-
-static inline pid_t task_tgid_vnr(struct task_struct *tsk)
-{
- return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL);
-}
-
-static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
-{
- pid_t pid = 0;
-
- rcu_read_lock();
- if (pid_alive(tsk))
- pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns);
- rcu_read_unlock();
-
- return pid;
-}
-
-static inline pid_t task_ppid_nr(const struct task_struct *tsk)
-{
- return task_ppid_nr_ns(tsk, &init_pid_ns);
-}
-
-/* Obsolete, do not use: */
-static inline pid_t task_pgrp_nr(struct task_struct *tsk)
-{
- return task_pgrp_nr_ns(tsk, &init_pid_ns);
-}
-
#define TASK_REPORT_IDLE (TASK_REPORT + 1)
#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
@@ -1666,7 +1577,7 @@ static inline unsigned int __task_state_index(unsigned int tsk_state,
BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
- if (tsk_state == TASK_IDLE)
+ if ((tsk_state & TASK_IDLE) == TASK_IDLE)
state = TASK_REPORT_IDLE;
/*
@@ -1674,7 +1585,7 @@ static inline unsigned int __task_state_index(unsigned int tsk_state,
* to userspace, we can make this appear as if the task has gone through
* a regular rt_mutex_lock() call.
*/
- if (tsk_state == TASK_RTLOCK_WAIT)
+ if (tsk_state & TASK_RTLOCK_WAIT)
state = TASK_UNINTERRUPTIBLE;
return fls(state);
@@ -1699,20 +1610,6 @@ static inline char task_state_to_char(struct task_struct *tsk)
return task_index_to_char(task_state_index(tsk));
}
-/**
- * is_global_init - check if a task structure is init. Since init
- * is free to have sub-threads we need to check tgid.
- * @tsk: Task structure to be checked.
- *
- * Check if a task structure is the first user space task the kernel created.
- *
- * Return: 1 if the task structure is init. 0 otherwise.
- */
-static inline int is_global_init(struct task_struct *tsk)
-{
- return task_tgid_nr(tsk) == 1;
-}
-
extern struct pid *cad_pid;
/*
@@ -1729,26 +1626,27 @@ extern struct pid *cad_pid;
#define PF_SUPERPRIV 0x00000100 /* Used super-user privileges */
#define PF_DUMPCORE 0x00000200 /* Dumped core */
#define PF_SIGNALED 0x00000400 /* Killed by a signal */
-#define PF_MEMALLOC 0x00000800 /* Allocating memory */
+#define PF_MEMALLOC 0x00000800 /* Allocating memory to free memory. See memalloc_noreclaim_save() */
#define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */
#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */
#define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */
#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
#define PF__HOLE__00010000 0x00010000
#define PF_KSWAPD 0x00020000 /* I am kswapd */
-#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */
-#define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */
+#define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */
+#define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */
#define PF_LOCAL_THROTTLE 0x00100000 /* Throttle writes only against the bdi I write to,
* I am cleaning dirty pages from some other bdi. */
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
-#define PF__HOLE__00800000 0x00800000
-#define PF__HOLE__01000000 0x01000000
+#define PF_MEMALLOC_NORECLAIM 0x00800000 /* All allocation requests will clear __GFP_DIRECT_RECLAIM */
+#define PF_MEMALLOC_NOWARN 0x01000000 /* All allocation requests will inherit __GFP_NOWARN */
#define PF__HOLE__02000000 0x02000000
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
-#define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */
-#define PF__HOLE__20000000 0x20000000
+#define PF_MEMALLOC_PIN 0x10000000 /* Allocations constrained to zones which allow long term pinning.
+ * See memalloc_pin_save() */
+#define PF_BLOCK_TS 0x20000000 /* plug has ts that needs updating */
#define PF__HOLE__40000000 0x40000000
#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */
@@ -1853,7 +1751,17 @@ extern int task_can_attach(struct task_struct *p);
extern int dl_bw_alloc(int cpu, u64 dl_bw);
extern void dl_bw_free(int cpu, u64 dl_bw);
#ifdef CONFIG_SMP
+
+/* do_set_cpus_allowed() - consider using set_cpus_allowed_ptr() instead */
extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
+
+/**
+ * set_cpus_allowed_ptr - set CPU affinity mask of a task
+ * @p: the task
+ * @new_mask: CPU affinity mask
+ *
+ * Return: zero if successful, or a negative error code
+ */
extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node);
extern void release_user_cpus_ptr(struct task_struct *p);
@@ -1932,9 +1840,7 @@ extern void ia64_set_curr_task(int cpu, struct task_struct *p);
void yield(void);
union thread_union {
-#ifndef CONFIG_ARCH_TASK_STRUCT_ON_STACK
struct task_struct task;
-#endif
#ifndef CONFIG_THREAD_INFO_IN_TASK
struct thread_info thread_info;
#endif
@@ -2154,15 +2060,6 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock);
__cond_resched_rwlock_write(lock); \
})
-static inline void cond_resched_rcu(void)
-{
-#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
- rcu_read_unlock();
- cond_resched();
- rcu_read_lock();
-#endif
-}
-
#ifdef CONFIG_PREEMPT_DYNAMIC
extern bool preempt_model_none(void);
@@ -2204,37 +2101,6 @@ static inline bool preempt_model_preemptible(void)
return preempt_model_full() || preempt_model_rt();
}
-/*
- * Does a critical section need to be broken due to another
- * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
- * but a general need for low latency)
- */
-static inline int spin_needbreak(spinlock_t *lock)
-{
-#ifdef CONFIG_PREEMPTION
- return spin_is_contended(lock);
-#else
- return 0;
-#endif
-}
-
-/*
- * Check if a rwlock is contended.
- * Returns non-zero if there is another task waiting on the rwlock.
- * Returns zero if the lock is not contended or the system / underlying
- * rwlock implementation does not support contention detection.
- * Technically does not depend on CONFIG_PREEMPTION, but a general need
- * for low latency.
- */
-static inline int rwlock_needbreak(rwlock_t *lock)
-{
-#ifdef CONFIG_PREEMPTION
- return rwlock_is_contended(lock);
-#else
- return 0;
-#endif
-}
-
static __always_inline bool need_resched(void)
{
return unlikely(tif_need_resched());
@@ -2269,6 +2135,8 @@ extern bool sched_task_on_rq(struct task_struct *p);
extern unsigned long get_wchan(struct task_struct *p);
extern struct task_struct *cpu_curr_snapshot(int cpu);
+#include <linux/spinlock.h>
+
/*
* In order to reduce various lock holder preemption latencies provide an
* interface to see if a vCPU is currently running or not.
@@ -2305,137 +2173,16 @@ static inline bool owner_on_cpu(struct task_struct *owner)
unsigned long sched_cpu_util(int cpu);
#endif /* CONFIG_SMP */
-#ifdef CONFIG_RSEQ
-
-/*
- * Map the event mask on the user-space ABI enum rseq_cs_flags
- * for direct mask checks.
- */
-enum rseq_event_mask_bits {
- RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
- RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
- RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
-};
-
-enum rseq_event_mask {
- RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT),
- RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT),
- RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT),
-};
-
-static inline void rseq_set_notify_resume(struct task_struct *t)
-{
- if (t->rseq)
- set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
-}
-
-void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs);
-
-static inline void rseq_handle_notify_resume(struct ksignal *ksig,
- struct pt_regs *regs)
-{
- if (current->rseq)
- __rseq_handle_notify_resume(ksig, regs);
-}
-
-static inline void rseq_signal_deliver(struct ksignal *ksig,
- struct pt_regs *regs)
-{
- preempt_disable();
- __set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
- preempt_enable();
- rseq_handle_notify_resume(ksig, regs);
-}
-
-/* rseq_preempt() requires preemption to be disabled. */
-static inline void rseq_preempt(struct task_struct *t)
-{
- __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
- rseq_set_notify_resume(t);
-}
-
-/* rseq_migrate() requires preemption to be disabled. */
-static inline void rseq_migrate(struct task_struct *t)
-{
- __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
- rseq_set_notify_resume(t);
-}
-
-/*
- * If parent process has a registered restartable sequences area, the
- * child inherits. Unregister rseq for a clone with CLONE_VM set.
- */
-static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
-{
- if (clone_flags & CLONE_VM) {
- t->rseq = NULL;
- t->rseq_len = 0;
- t->rseq_sig = 0;
- t->rseq_event_mask = 0;
- } else {
- t->rseq = current->rseq;
- t->rseq_len = current->rseq_len;
- t->rseq_sig = current->rseq_sig;
- t->rseq_event_mask = current->rseq_event_mask;
- }
-}
-
-static inline void rseq_execve(struct task_struct *t)
-{
- t->rseq = NULL;
- t->rseq_len = 0;
- t->rseq_sig = 0;
- t->rseq_event_mask = 0;
-}
-
-#else
-
-static inline void rseq_set_notify_resume(struct task_struct *t)
-{
-}
-static inline void rseq_handle_notify_resume(struct ksignal *ksig,
- struct pt_regs *regs)
-{
-}
-static inline void rseq_signal_deliver(struct ksignal *ksig,
- struct pt_regs *regs)
-{
-}
-static inline void rseq_preempt(struct task_struct *t)
-{
-}
-static inline void rseq_migrate(struct task_struct *t)
-{
-}
-static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
-{
-}
-static inline void rseq_execve(struct task_struct *t)
-{
-}
-
-#endif
-
-#ifdef CONFIG_DEBUG_RSEQ
-
-void rseq_syscall(struct pt_regs *regs);
-
-#else
-
-static inline void rseq_syscall(struct pt_regs *regs)
-{
-}
-
-#endif
-
#ifdef CONFIG_SCHED_CORE
extern void sched_core_free(struct task_struct *tsk);
extern void sched_core_fork(struct task_struct *p);
extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
unsigned long uaddr);
+extern int sched_core_idle_cpu(int cpu);
#else
static inline void sched_core_free(struct task_struct *tsk) { }
static inline void sched_core_fork(struct task_struct *p) { }
+static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
#endif
extern void sched_set_stop_task(int cpu, struct task_struct *stop);
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index 0ee96ea7a0e9..02f5090ffea2 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -71,6 +71,7 @@ static inline int get_dumpable(struct mm_struct *mm)
#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */
#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */
#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */
+#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP)
#define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */
#define MMF_MULTIPROCESS 26 /* mm is shared between processes */
/*
@@ -85,10 +86,22 @@ static inline int get_dumpable(struct mm_struct *mm)
#define MMF_HAS_MDWE 28
#define MMF_HAS_MDWE_MASK (1 << MMF_HAS_MDWE)
-#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP)
+
+#define MMF_HAS_MDWE_NO_INHERIT 29
+
+#define MMF_VM_MERGE_ANY 30
+#define MMF_VM_MERGE_ANY_MASK (1 << MMF_VM_MERGE_ANY)
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
- MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK)
+ MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\
+ MMF_VM_MERGE_ANY_MASK)
+
+static inline unsigned long mmf_init_flags(unsigned long flags)
+{
+ if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT))
+ flags &= ~((1UL << MMF_HAS_MDWE) |
+ (1UL << MMF_HAS_MDWE_NO_INHERIT));
+ return flags & MMF_INIT_MASK;
+}
-#define MMF_VM_MERGE_ANY 29
#endif /* _LINUX_SCHED_COREDUMP_H */
diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 7c83d4d5a971..df3aca89d4f5 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -1,4 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SCHED_DEADLINE_H
+#define _LINUX_SCHED_DEADLINE_H
/*
* SCHED_DEADLINE tasks has negative priorities, reflecting
@@ -34,3 +36,5 @@ extern void dl_add_task_root_domain(struct task_struct *p);
extern void dl_clear_root_domain(struct root_domain *rd);
#endif /* CONFIG_SMP */
+
+#endif /* _LINUX_SCHED_DEADLINE_H */
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index fe1a46f30d24..2b461129d1fa 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -2,6 +2,7 @@
#define _LINUX_SCHED_ISOLATION_H
#include <linux/cpumask.h>
+#include <linux/cpuset.h>
#include <linux/init.h>
#include <linux/tick.h>
@@ -67,7 +68,8 @@ static inline bool housekeeping_cpu(int cpu, enum hk_type type)
static inline bool cpu_is_isolated(int cpu)
{
return !housekeeping_test_cpu(cpu, HK_TYPE_DOMAIN) ||
- !housekeeping_test_cpu(cpu, HK_TYPE_TICK);
+ !housekeeping_test_cpu(cpu, HK_TYPE_TICK) ||
+ cpuset_cpu_is_isolated(cpu);
}
#endif /* _LINUX_SCHED_ISOLATION_H */
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 8d89c8c4fac1..b6543f9d78d6 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -236,16 +236,25 @@ static inline gfp_t current_gfp_context(gfp_t flags)
{
unsigned int pflags = READ_ONCE(current->flags);
- if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) {
+ if (unlikely(pflags & (PF_MEMALLOC_NOIO |
+ PF_MEMALLOC_NOFS |
+ PF_MEMALLOC_NORECLAIM |
+ PF_MEMALLOC_NOWARN |
+ PF_MEMALLOC_PIN))) {
/*
- * NOIO implies both NOIO and NOFS and it is a weaker context
- * so always make sure it makes precedence
+ * Stronger flags before weaker flags:
+ * NORECLAIM implies NOIO, which in turn implies NOFS
*/
- if (pflags & PF_MEMALLOC_NOIO)
+ if (pflags & PF_MEMALLOC_NORECLAIM)
+ flags &= ~__GFP_DIRECT_RECLAIM;
+ else if (pflags & PF_MEMALLOC_NOIO)
flags &= ~(__GFP_IO | __GFP_FS);
else if (pflags & PF_MEMALLOC_NOFS)
flags &= ~__GFP_FS;
+ if (pflags & PF_MEMALLOC_NOWARN)
+ flags |= __GFP_NOWARN;
+
if (pflags & PF_MEMALLOC_PIN)
flags &= ~__GFP_MOVABLE;
}
@@ -307,6 +316,24 @@ static inline void might_alloc(gfp_t gfp_mask)
}
/**
+ * memalloc_flags_save - Add a PF_* flag to current->flags, save old value
+ *
+ * This allows PF_* flags to be conveniently added, irrespective of current
+ * value, and then the old version restored with memalloc_flags_restore().
+ */
+static inline unsigned memalloc_flags_save(unsigned flags)
+{
+ unsigned oldflags = ~current->flags & flags;
+ current->flags |= flags;
+ return oldflags;
+}
+
+static inline void memalloc_flags_restore(unsigned flags)
+{
+ current->flags &= ~flags;
+}
+
+/**
* memalloc_noio_save - Marks implicit GFP_NOIO allocation scope.
*
* This functions marks the beginning of the GFP_NOIO allocation scope.
@@ -315,13 +342,12 @@ static inline void might_alloc(gfp_t gfp_mask)
* point of view. Use memalloc_noio_restore to end the scope with flags
* returned by this function.
*
- * This function is safe to be used from any context.
+ * Context: This function is safe to be used from any context.
+ * Return: The saved flags to be passed to memalloc_noio_restore.
*/
static inline unsigned int memalloc_noio_save(void)
{
- unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
- current->flags |= PF_MEMALLOC_NOIO;
- return flags;
+ return memalloc_flags_save(PF_MEMALLOC_NOIO);
}
/**
@@ -334,7 +360,7 @@ static inline unsigned int memalloc_noio_save(void)
*/
static inline void memalloc_noio_restore(unsigned int flags)
{
- current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
+ memalloc_flags_restore(flags);
}
/**
@@ -346,13 +372,12 @@ static inline void memalloc_noio_restore(unsigned int flags)
* point of view. Use memalloc_nofs_restore to end the scope with flags
* returned by this function.
*
- * This function is safe to be used from any context.
+ * Context: This function is safe to be used from any context.
+ * Return: The saved flags to be passed to memalloc_nofs_restore.
*/
static inline unsigned int memalloc_nofs_save(void)
{
- unsigned int flags = current->flags & PF_MEMALLOC_NOFS;
- current->flags |= PF_MEMALLOC_NOFS;
- return flags;
+ return memalloc_flags_save(PF_MEMALLOC_NOFS);
}
/**
@@ -365,32 +390,76 @@ static inline unsigned int memalloc_nofs_save(void)
*/
static inline void memalloc_nofs_restore(unsigned int flags)
{
- current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags;
+ memalloc_flags_restore(flags);
}
+/**
+ * memalloc_noreclaim_save - Marks implicit __GFP_MEMALLOC scope.
+ *
+ * This function marks the beginning of the __GFP_MEMALLOC allocation scope.
+ * All further allocations will implicitly add the __GFP_MEMALLOC flag, which
+ * prevents entering reclaim and allows access to all memory reserves. This
+ * should only be used when the caller guarantees the allocation will allow more
+ * memory to be freed very shortly, i.e. it needs to allocate some memory in
+ * the process of freeing memory, and cannot reclaim due to potential recursion.
+ *
+ * Users of this scope have to be extremely careful to not deplete the reserves
+ * completely and implement a throttling mechanism which controls the
+ * consumption of the reserve based on the amount of freed memory. Usage of a
+ * pre-allocated pool (e.g. mempool) should be always considered before using
+ * this scope.
+ *
+ * Individual allocations under the scope can opt out using __GFP_NOMEMALLOC
+ *
+ * Context: This function should not be used in an interrupt context as that one
+ * does not give PF_MEMALLOC access to reserves.
+ * See __gfp_pfmemalloc_flags().
+ * Return: The saved flags to be passed to memalloc_noreclaim_restore.
+ */
static inline unsigned int memalloc_noreclaim_save(void)
{
- unsigned int flags = current->flags & PF_MEMALLOC;
- current->flags |= PF_MEMALLOC;
- return flags;
+ return memalloc_flags_save(PF_MEMALLOC);
}
+/**
+ * memalloc_noreclaim_restore - Ends the implicit __GFP_MEMALLOC scope.
+ * @flags: Flags to restore.
+ *
+ * Ends the implicit __GFP_MEMALLOC scope started by memalloc_noreclaim_save
+ * function. Always make sure that the given flags is the return value from the
+ * pairing memalloc_noreclaim_save call.
+ */
static inline void memalloc_noreclaim_restore(unsigned int flags)
{
- current->flags = (current->flags & ~PF_MEMALLOC) | flags;
+ memalloc_flags_restore(flags);
}
+/**
+ * memalloc_pin_save - Marks implicit ~__GFP_MOVABLE scope.
+ *
+ * This function marks the beginning of the ~__GFP_MOVABLE allocation scope.
+ * All further allocations will implicitly remove the __GFP_MOVABLE flag, which
+ * will constraint the allocations to zones that allow long term pinning, i.e.
+ * not ZONE_MOVABLE zones.
+ *
+ * Return: The saved flags to be passed to memalloc_pin_restore.
+ */
static inline unsigned int memalloc_pin_save(void)
{
- unsigned int flags = current->flags & PF_MEMALLOC_PIN;
-
- current->flags |= PF_MEMALLOC_PIN;
- return flags;
+ return memalloc_flags_save(PF_MEMALLOC_PIN);
}
+/**
+ * memalloc_pin_restore - Ends the implicit ~__GFP_MOVABLE scope.
+ * @flags: Flags to restore.
+ *
+ * Ends the implicit ~__GFP_MOVABLE scope started by memalloc_pin_save function.
+ * Always make sure that the given flags is the return value from the pairing
+ * memalloc_pin_save call.
+ */
static inline void memalloc_pin_restore(unsigned int flags)
{
- current->flags = (current->flags & ~PF_MEMALLOC_PIN) | flags;
+ memalloc_flags_restore(flags);
}
#ifdef CONFIG_MEMCG
@@ -403,6 +472,10 @@ DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg);
* __GFP_ACCOUNT allocations till the end of the scope will be charged to the
* given memcg.
*
+ * Please, make sure that caller has a reference to the passed memcg structure,
+ * so its lifetime is guaranteed to exceed the scope between two
+ * set_active_memcg() calls.
+ *
* NOTE: This function can nest. Users must save the return value and
* reset the previous value after their own charging scope is over.
*/
diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
index 3988762efe15..52b22c5c396d 100644
--- a/include/linux/sched/numa_balancing.h
+++ b/include/linux/sched/numa_balancing.h
@@ -15,13 +15,23 @@
#define TNF_FAULT_LOCAL 0x08
#define TNF_MIGRATE_FAIL 0x10
+enum numa_vmaskip_reason {
+ NUMAB_SKIP_UNSUITABLE,
+ NUMAB_SKIP_SHARED_RO,
+ NUMAB_SKIP_INACCESSIBLE,
+ NUMAB_SKIP_SCAN_DELAY,
+ NUMAB_SKIP_PID_INACTIVE,
+ NUMAB_SKIP_IGNORE_PID,
+ NUMAB_SKIP_SEQ_COMPLETED,
+};
+
#ifdef CONFIG_NUMA_BALANCING
extern void task_numa_fault(int last_node, int node, int pages, int flags);
extern pid_t task_numa_group_id(struct task_struct *p);
extern void set_numabalancing_state(bool enabled);
extern void task_numa_free(struct task_struct *p, bool final);
-extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
- int src_nid, int dst_cpu);
+bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
+ int src_nid, int dst_cpu);
#else
static inline void task_numa_fault(int last_node, int node, int pages,
int flags)
@@ -38,7 +48,7 @@ static inline void task_numa_free(struct task_struct *p, bool final)
{
}
static inline bool should_numa_migrate_memory(struct task_struct *p,
- struct page *page, int src_nid, int dst_cpu)
+ struct folio *folio, int src_nid, int dst_cpu)
{
return true;
}
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index 994c25640e15..b2b9e6eb9683 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -30,6 +30,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
}
#ifdef CONFIG_RT_MUTEXES
+extern void rt_mutex_pre_schedule(void);
+extern void rt_mutex_schedule(void);
+extern void rt_mutex_post_schedule(void);
+
/*
* Must hold either p->pi_lock or task_rq(p)->lock.
*/
diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h
index fad77b5172e2..b04a5d04dee9 100644
--- a/include/linux/sched/sd_flags.h
+++ b/include/linux/sched/sd_flags.h
@@ -110,13 +110,20 @@ SD_FLAG(SD_ASYM_CPUCAPACITY_FULL, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS)
SD_FLAG(SD_SHARE_CPUCAPACITY, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)
/*
- * Domain members share CPU package resources (i.e. caches)
+ * Domain members share CPU cluster (LLC tags or L2 cache)
+ *
+ * NEEDS_GROUPS: Clusters are shared between groups.
+ */
+SD_FLAG(SD_CLUSTER, SDF_NEEDS_GROUPS)
+
+/*
+ * Domain members share CPU Last Level Caches
*
* SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share
* the same cache(s).
* NEEDS_GROUPS: Caches are shared between groups.
*/
-SD_FLAG(SD_SHARE_PKG_RESOURCES, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)
+SD_FLAG(SD_SHARE_LLC, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)
/*
* Only a single load balancing instance
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 669e8cff40c7..0a0e23c45406 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -9,6 +9,7 @@
#include <linux/sched/task.h>
#include <linux/cred.h>
#include <linux/refcount.h>
+#include <linux/pid.h>
#include <linux/posix-timers.h>
#include <linux/mm_types.h>
#include <asm/ptrace.h>
@@ -303,20 +304,11 @@ static inline void kernel_signal_stop(void)
schedule();
}
-#ifdef __ia64__
-# define ___ARCH_SI_IA64(_a1, _a2, _a3) , _a1, _a2, _a3
-#else
-# define ___ARCH_SI_IA64(_a1, _a2, _a3)
-#endif
-int force_sig_fault_to_task(int sig, int code, void __user *addr
- ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)
- , struct task_struct *t);
-int force_sig_fault(int sig, int code, void __user *addr
- ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr));
-int send_sig_fault(int sig, int code, void __user *addr
- ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)
- , struct task_struct *t);
+int force_sig_fault_to_task(int sig, int code, void __user *addr,
+ struct task_struct *t);
+int force_sig_fault(int sig, int code, void __user *addr);
+int send_sig_fault(int sig, int code, void __user *addr, struct task_struct *t);
int force_sig_mceerr(int code, void __user *, short);
int send_sig_mceerr(int code, void __user *, short, struct task_struct *);
@@ -441,7 +433,6 @@ static inline bool fault_signal_pending(vm_fault_t fault_flags,
* This is required every time the blocked sigset_t changes.
* callers must hold sighand->siglock.
*/
-extern void recalc_sigpending_and_wake(struct task_struct *t);
extern void recalc_sigpending(void);
extern void calculate_sigpending(void);
@@ -649,17 +640,18 @@ extern void flush_itimer_signals(void);
extern bool current_is_single_threaded(void);
/*
- * Careful: do_each_thread/while_each_thread is a double loop so
- * 'break' will not work as expected - use goto instead.
+ * Without tasklist/siglock it is only rcu-safe if g can't exit/exec,
+ * otherwise next_thread(t) will never reach g after list_del_rcu(g).
*/
-#define do_each_thread(g, t) \
- for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
-
#define while_each_thread(g, t) \
while ((t = next_thread(t)) != g)
+#define for_other_threads(p, t) \
+ for (t = p; (t = next_thread(t)) != p; )
+
#define __for_each_thread(signal, t) \
- list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
+ list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node, \
+ lockdep_is_held(&tasklist_lock))
#define for_each_thread(p, t) \
__for_each_thread((p)->signal, t)
@@ -718,22 +710,31 @@ bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
return p1->signal == p2->signal;
}
-static inline struct task_struct *next_thread(const struct task_struct *p)
+/*
+ * returns NULL if p is the last thread in the thread group
+ */
+static inline struct task_struct *__next_thread(struct task_struct *p)
+{
+ return list_next_or_null_rcu(&p->signal->thread_head,
+ &p->thread_node,
+ struct task_struct,
+ thread_node);
+}
+
+static inline struct task_struct *next_thread(struct task_struct *p)
{
- return list_entry_rcu(p->thread_group.next,
- struct task_struct, thread_group);
+ return __next_thread(p) ?: p->group_leader;
}
static inline int thread_group_empty(struct task_struct *p)
{
- return list_empty(&p->thread_group);
+ return thread_group_leader(p) &&
+ list_is_last(&p->thread_node, &p->signal->thread_head);
}
#define delay_group_leader(p) \
(thread_group_leader(p) && !thread_group_empty(p))
-extern bool thread_group_exited(struct pid *pid);
-
extern struct sighand_struct *__lock_task_sighand(struct task_struct *task,
unsigned long *flags);
diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
index 59d3736c454c..fb1e295e7e63 100644
--- a/include/linux/sched/smt.h
+++ b/include/linux/sched/smt.h
@@ -17,4 +17,4 @@ static inline bool sched_smt_active(void) { return false; }
void arch_smt_update(void);
-#endif
+#endif /* _LINUX_SCHED_SMT_H */
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index dd35ce28bb90..d362aacf9f89 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -7,6 +7,8 @@
* functionality:
*/
+#include <linux/rcupdate.h>
+#include <linux/refcount.h>
#include <linux/sched.h>
#include <linux/uaccess.h>
@@ -118,11 +120,47 @@ static inline struct task_struct *get_task_struct(struct task_struct *t)
}
extern void __put_task_struct(struct task_struct *t);
+extern void __put_task_struct_rcu_cb(struct rcu_head *rhp);
static inline void put_task_struct(struct task_struct *t)
{
- if (refcount_dec_and_test(&t->usage))
+ if (!refcount_dec_and_test(&t->usage))
+ return;
+
+ /*
+ * In !RT, it is always safe to call __put_task_struct().
+ * Under RT, we can only call it in preemptible context.
+ */
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
+ static DEFINE_WAIT_OVERRIDE_MAP(put_task_map, LD_WAIT_SLEEP);
+
+ lock_map_acquire_try(&put_task_map);
__put_task_struct(t);
+ lock_map_release(&put_task_map);
+ return;
+ }
+
+ /*
+ * under PREEMPT_RT, we can't call put_task_struct
+ * in atomic context because it will indirectly
+ * acquire sleeping locks.
+ *
+ * call_rcu() will schedule delayed_put_task_struct_rcu()
+ * to be called in process context.
+ *
+ * __put_task_struct() is called when
+ * refcount_dec_and_test(&t->usage) succeeds.
+ *
+ * This means that it can't "conflict" with
+ * put_task_struct_rcu_user() which abuses ->rcu the same
+ * way; rcu_users has a reference so task->usage can't be
+ * zero after rcu_users 1 -> 0 transition.
+ *
+ * delayed_free_task() also uses ->rcu, but it is only called
+ * when it fails to fork a process. Therefore, there is no
+ * way it can conflict with put_task_struct().
+ */
+ call_rcu(&t->rcu, __put_task_struct_rcu_cb);
}
DEFINE_FREE(put_task, struct task_struct *, if (_T) put_task_struct(_T))
@@ -190,4 +228,6 @@ static inline void task_unlock(struct task_struct *p)
spin_unlock(&p->alloc_lock);
}
+DEFINE_GUARD(task_lock, struct task_struct *, task_lock(_T), task_unlock(_T))
+
#endif /* _LINUX_SCHED_TASK_H */
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index f158b025c175..ccd72b978e1f 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -8,6 +8,7 @@
#include <linux/sched.h>
#include <linux/magic.h>
+#include <linux/refcount.h>
#ifdef CONFIG_THREAD_INFO_IN_TASK
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 67b573d5bf28..18572c9ea724 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -38,21 +38,21 @@ extern const struct sd_flag_debug sd_flag_debug[];
#ifdef CONFIG_SCHED_SMT
static inline int cpu_smt_flags(void)
{
- return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+ return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
}
#endif
#ifdef CONFIG_SCHED_CLUSTER
static inline int cpu_cluster_flags(void)
{
- return SD_SHARE_PKG_RESOURCES;
+ return SD_CLUSTER | SD_SHARE_LLC;
}
#endif
#ifdef CONFIG_SCHED_MC
static inline int cpu_core_flags(void)
{
- return SD_SHARE_PKG_RESOURCES;
+ return SD_SHARE_LLC;
}
#endif
@@ -109,8 +109,6 @@ struct sched_domain {
u64 max_newidle_lb_cost;
unsigned long last_decay_max_lb_cost;
- u64 avg_scan_cost; /* select_idle_sibling */
-
#ifdef CONFIG_SCHEDSTATS
/* load_balance() stats */
unsigned int lb_count[CPU_MAX_IDLE_TYPES];
@@ -178,7 +176,9 @@ extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
+bool cpus_equal_capacity(int this_cpu, int that_cpu);
bool cpus_share_cache(int this_cpu, int that_cpu);
+bool cpus_share_resources(int this_cpu, int that_cpu);
typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
typedef int (*sched_domain_flags_f)(void);
@@ -227,11 +227,21 @@ partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
{
}
+static inline bool cpus_equal_capacity(int this_cpu, int that_cpu)
+{
+ return true;
+}
+
static inline bool cpus_share_cache(int this_cpu, int that_cpu)
{
return true;
}
+static inline bool cpus_share_resources(int this_cpu, int that_cpu)
+{
+ return true;
+}
+
#endif /* !CONFIG_SMP */
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
@@ -275,6 +285,14 @@ void arch_update_thermal_pressure(const struct cpumask *cpus,
{ }
#endif
+#ifndef arch_scale_freq_ref
+static __always_inline
+unsigned int arch_scale_freq_ref(int cpu)
+{
+ return 0;
+}
+#endif
+
static inline int task_node(const struct task_struct *p)
{
return cpu_to_node(task_cpu(p));
diff --git a/include/linux/sched/types.h b/include/linux/sched/types.h
index 3c3e049224ae..969aaf5ef9d6 100644
--- a/include/linux/sched/types.h
+++ b/include/linux/sched/types.h
@@ -20,4 +20,4 @@ struct task_cputime {
unsigned long long sum_exec_runtime;
};
-#endif
+#endif /* _LINUX_SCHED_TYPES_H */
diff --git a/include/linux/sched/vhost_task.h b/include/linux/sched/vhost_task.h
index 837a23624a66..bc60243d43b3 100644
--- a/include/linux/sched/vhost_task.h
+++ b/include/linux/sched/vhost_task.h
@@ -1,7 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_VHOST_TASK_H
-#define _LINUX_VHOST_TASK_H
-
+#ifndef _LINUX_SCHED_VHOST_TASK_H
+#define _LINUX_SCHED_VHOST_TASK_H
struct vhost_task;
@@ -11,4 +10,4 @@ void vhost_task_start(struct vhost_task *vtsk);
void vhost_task_stop(struct vhost_task *vtsk);
void vhost_task_wake(struct vhost_task *vtsk);
-#endif
+#endif /* _LINUX_SCHED_VHOST_TASK_H */
diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index e6fe4f73ffe6..b807141acc14 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h
@@ -47,6 +47,10 @@ struct scmi_clock_info {
bool rate_discrete;
bool rate_changed_notifications;
bool rate_change_requested_notifications;
+ bool state_ctrl_forbidden;
+ bool rate_ctrl_forbidden;
+ bool parent_ctrl_forbidden;
+ bool extended_config;
union {
struct {
int num_rates;
@@ -58,6 +62,8 @@ struct scmi_clock_info {
u64 step_size;
} range;
};
+ int num_parents;
+ u32 *parents;
};
enum scmi_power_scale {
@@ -70,6 +76,13 @@ struct scmi_handle;
struct scmi_device;
struct scmi_protocol_handle;
+enum scmi_clock_oem_config {
+ SCMI_CLOCK_CFG_DUTY_CYCLE = 0x1,
+ SCMI_CLOCK_CFG_PHASE,
+ SCMI_CLOCK_CFG_OEM_START = 0x80,
+ SCMI_CLOCK_CFG_OEM_END = 0xFF,
+};
+
/**
* struct scmi_clk_proto_ops - represents the various operations provided
* by SCMI Clock Protocol
@@ -80,6 +93,11 @@ struct scmi_protocol_handle;
* @rate_set: set the clock rate of a clock
* @enable: enables the specified clock
* @disable: disables the specified clock
+ * @state_get: get the status of the specified clock
+ * @config_oem_get: get the value of an OEM specific clock config
+ * @config_oem_set: set the value of an OEM specific clock config
+ * @parent_get: get the parent id of a clk
+ * @parent_set: set the parent of a clock
*/
struct scmi_clk_proto_ops {
int (*count_get)(const struct scmi_protocol_handle *ph);
@@ -90,23 +108,40 @@ struct scmi_clk_proto_ops {
u64 *rate);
int (*rate_set)(const struct scmi_protocol_handle *ph, u32 clk_id,
u64 rate);
- int (*enable)(const struct scmi_protocol_handle *ph, u32 clk_id);
- int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id);
- int (*enable_atomic)(const struct scmi_protocol_handle *ph, u32 clk_id);
- int (*disable_atomic)(const struct scmi_protocol_handle *ph,
- u32 clk_id);
+ int (*enable)(const struct scmi_protocol_handle *ph, u32 clk_id,
+ bool atomic);
+ int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id,
+ bool atomic);
+ int (*state_get)(const struct scmi_protocol_handle *ph, u32 clk_id,
+ bool *enabled, bool atomic);
+ int (*config_oem_get)(const struct scmi_protocol_handle *ph, u32 clk_id,
+ enum scmi_clock_oem_config oem_type,
+ u32 *oem_val, u32 *attributes, bool atomic);
+ int (*config_oem_set)(const struct scmi_protocol_handle *ph, u32 clk_id,
+ enum scmi_clock_oem_config oem_type,
+ u32 oem_val, bool atomic);
+ int (*parent_get)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 *parent_id);
+ int (*parent_set)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 parent_id);
+};
+
+struct scmi_perf_domain_info {
+ char name[SCMI_MAX_STR_SIZE];
+ bool set_perf;
};
/**
* struct scmi_perf_proto_ops - represents the various operations provided
* by SCMI Performance Protocol
*
+ * @num_domains_get: gets the number of supported performance domains
+ * @info_get: get the information of a performance domain
* @limits_set: sets limits on the performance level of a domain
* @limits_get: gets limits on the performance level of a domain
* @level_set: sets the performance level of a domain
* @level_get: gets the performance level of a domain
- * @device_domain_id: gets the scmi domain id for a given device
* @transition_latency_get: gets the DVFS transition latency for a given device
+ * @rate_limit_get: gets the minimum time (us) required between successive
+ * requests
* @device_opps_add: adds all the OPPs for a given device
* @freq_set: sets the frequency for a given device using sustained frequency
* to sustained performance level mapping
@@ -116,10 +151,15 @@ struct scmi_clk_proto_ops {
* at a given frequency
* @fast_switch_possible: indicates if fast DVFS switching is possible or not
* for a given device
+ * @fast_switch_rate_limit: gets the minimum time (us) required between
+ * successive fast_switching requests
* @power_scale_mw_get: indicates if the power values provided are in milliWatts
* or in some other (abstract) scale
*/
struct scmi_perf_proto_ops {
+ int (*num_domains_get)(const struct scmi_protocol_handle *ph);
+ const struct scmi_perf_domain_info __must_check *(*info_get)
+ (const struct scmi_protocol_handle *ph, u32 domain);
int (*limits_set)(const struct scmi_protocol_handle *ph, u32 domain,
u32 max_perf, u32 min_perf);
int (*limits_get)(const struct scmi_protocol_handle *ph, u32 domain,
@@ -128,11 +168,12 @@ struct scmi_perf_proto_ops {
u32 level, bool poll);
int (*level_get)(const struct scmi_protocol_handle *ph, u32 domain,
u32 *level, bool poll);
- int (*device_domain_id)(struct device *dev);
int (*transition_latency_get)(const struct scmi_protocol_handle *ph,
- struct device *dev);
+ u32 domain);
+ int (*rate_limit_get)(const struct scmi_protocol_handle *ph,
+ u32 domain, u32 *rate_limit);
int (*device_opps_add)(const struct scmi_protocol_handle *ph,
- struct device *dev);
+ struct device *dev, u32 domain);
int (*freq_set)(const struct scmi_protocol_handle *ph, u32 domain,
unsigned long rate, bool poll);
int (*freq_get)(const struct scmi_protocol_handle *ph, u32 domain,
@@ -140,7 +181,9 @@ struct scmi_perf_proto_ops {
int (*est_power_get)(const struct scmi_protocol_handle *ph, u32 domain,
unsigned long *rate, unsigned long *power);
bool (*fast_switch_possible)(const struct scmi_protocol_handle *ph,
- struct device *dev);
+ u32 domain);
+ int (*fast_switch_rate_limit)(const struct scmi_protocol_handle *ph,
+ u32 domain, u32 *rate_limit);
enum scmi_power_scale (*power_scale_get)(const struct scmi_protocol_handle *ph);
};
@@ -930,6 +973,8 @@ struct scmi_perf_limits_report {
unsigned int domain_id;
unsigned int range_max;
unsigned int range_min;
+ unsigned long range_max_freq;
+ unsigned long range_min_freq;
};
struct scmi_perf_level_report {
@@ -937,6 +982,7 @@ struct scmi_perf_level_report {
unsigned int agent_id;
unsigned int domain_id;
unsigned int performance_level;
+ unsigned long performance_level_freq;
};
struct scmi_sensor_trip_point_report {
diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
index eab7081392d5..75303c126285 100644
--- a/include/linux/screen_info.h
+++ b/include/linux/screen_info.h
@@ -4,6 +4,132 @@
#include <uapi/linux/screen_info.h>
+#include <linux/bits.h>
+
+/**
+ * SCREEN_INFO_MAX_RESOURCES - maximum number of resources per screen_info
+ */
+#define SCREEN_INFO_MAX_RESOURCES 3
+
+struct pci_dev;
+struct resource;
+
+static inline bool __screen_info_has_lfb(unsigned int type)
+{
+ return (type == VIDEO_TYPE_VLFB) || (type == VIDEO_TYPE_EFI);
+}
+
+static inline u64 __screen_info_lfb_base(const struct screen_info *si)
+{
+ u64 lfb_base = si->lfb_base;
+
+ if (si->capabilities & VIDEO_CAPABILITY_64BIT_BASE)
+ lfb_base |= (u64)si->ext_lfb_base << 32;
+
+ return lfb_base;
+}
+
+static inline void __screen_info_set_lfb_base(struct screen_info *si, u64 lfb_base)
+{
+ si->lfb_base = lfb_base & GENMASK_ULL(31, 0);
+ si->ext_lfb_base = (lfb_base & GENMASK_ULL(63, 32)) >> 32;
+
+ if (si->ext_lfb_base)
+ si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+ else
+ si->capabilities &= ~VIDEO_CAPABILITY_64BIT_BASE;
+}
+
+static inline u64 __screen_info_lfb_size(const struct screen_info *si, unsigned int type)
+{
+ u64 lfb_size = si->lfb_size;
+
+ if (type == VIDEO_TYPE_VLFB)
+ lfb_size <<= 16;
+ return lfb_size;
+}
+
+static inline unsigned int __screen_info_video_type(unsigned int type)
+{
+ switch (type) {
+ case VIDEO_TYPE_MDA:
+ case VIDEO_TYPE_CGA:
+ case VIDEO_TYPE_EGAM:
+ case VIDEO_TYPE_EGAC:
+ case VIDEO_TYPE_VGAC:
+ case VIDEO_TYPE_VLFB:
+ case VIDEO_TYPE_PICA_S3:
+ case VIDEO_TYPE_MIPS_G364:
+ case VIDEO_TYPE_SGI:
+ case VIDEO_TYPE_TGAC:
+ case VIDEO_TYPE_SUN:
+ case VIDEO_TYPE_SUNPCI:
+ case VIDEO_TYPE_PMAC:
+ case VIDEO_TYPE_EFI:
+ return type;
+ default:
+ return 0;
+ }
+}
+
+/**
+ * screen_info_video_type() - Decodes the video type from struct screen_info
+ * @si: an instance of struct screen_info
+ *
+ * Returns:
+ * A VIDEO_TYPE_ constant representing si's type of video display, or 0 otherwise.
+ */
+static inline unsigned int screen_info_video_type(const struct screen_info *si)
+{
+ unsigned int type;
+
+ // check if display output is on
+ if (!si->orig_video_isVGA)
+ return 0;
+
+ // check for a known VIDEO_TYPE_ constant
+ type = __screen_info_video_type(si->orig_video_isVGA);
+ if (type)
+ return si->orig_video_isVGA;
+
+ // check if text mode has been initialized
+ if (!si->orig_video_lines || !si->orig_video_cols)
+ return 0;
+
+ // 80x25 text, mono
+ if (si->orig_video_mode == 0x07) {
+ if ((si->orig_video_ega_bx & 0xff) != 0x10)
+ return VIDEO_TYPE_EGAM;
+ else
+ return VIDEO_TYPE_MDA;
+ }
+
+ // EGA/VGA, 16 colors
+ if ((si->orig_video_ega_bx & 0xff) != 0x10) {
+ if (si->orig_video_isVGA)
+ return VIDEO_TYPE_VGAC;
+ else
+ return VIDEO_TYPE_EGAC;
+ }
+
+ // the rest...
+ return VIDEO_TYPE_CGA;
+}
+
+ssize_t screen_info_resources(const struct screen_info *si, struct resource *r, size_t num);
+
+#if defined(CONFIG_PCI)
+void screen_info_apply_fixups(void);
+struct pci_dev *screen_info_pci_dev(const struct screen_info *si);
+#else
+static inline void screen_info_apply_fixups(void)
+{ }
+static inline struct pci_dev *screen_info_pci_dev(const struct screen_info *si)
+{
+ return NULL;
+}
+#endif
+
extern struct screen_info screen_info;
#endif /* _SCREEN_INFO_H */
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 175079552f68..709ad84809e1 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -3,6 +3,7 @@
#define _LINUX_SECCOMP_H
#include <uapi/linux/seccomp.h>
+#include <linux/seccomp_types.h>
#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \
SECCOMP_FILTER_FLAG_LOG | \
@@ -21,25 +22,6 @@
#include <linux/atomic.h>
#include <asm/seccomp.h>
-struct seccomp_filter;
-/**
- * struct seccomp - the state of a seccomp'ed process
- *
- * @mode: indicates one of the valid values above for controlled
- * system calls available to a process.
- * @filter_count: number of seccomp filters
- * @filter: must always point to a valid seccomp-filter or NULL as it is
- * accessed without locking during system call entry.
- *
- * @filter must only be accessed from the context of current as there
- * is no read locking.
- */
-struct seccomp {
- int mode;
- atomic_t filter_count;
- struct seccomp_filter *filter;
-};
-
#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
extern int __secure_computing(const struct seccomp_data *sd);
static inline int secure_computing(void)
@@ -64,8 +46,6 @@ static inline int seccomp_mode(struct seccomp *s)
#include <linux/errno.h>
-struct seccomp { };
-struct seccomp_filter { };
struct seccomp_data;
#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
@@ -126,6 +106,8 @@ static inline long seccomp_get_metadata(struct task_struct *task,
#ifdef CONFIG_SECCOMP_CACHE_DEBUG
struct seq_file;
+struct pid_namespace;
+struct pid;
int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task);
diff --git a/include/linux/seccomp_types.h b/include/linux/seccomp_types.h
new file mode 100644
index 000000000000..cf0a0355024f
--- /dev/null
+++ b/include/linux/seccomp_types.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SECCOMP_TYPES_H
+#define _LINUX_SECCOMP_TYPES_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_SECCOMP
+
+struct seccomp_filter;
+/**
+ * struct seccomp - the state of a seccomp'ed process
+ *
+ * @mode: indicates one of the valid values above for controlled
+ * system calls available to a process.
+ * @filter_count: number of seccomp filters
+ * @filter: must always point to a valid seccomp-filter or NULL as it is
+ * accessed without locking during system call entry.
+ *
+ * @filter must only be accessed from the context of current as there
+ * is no read locking.
+ */
+struct seccomp {
+ int mode;
+ atomic_t filter_count;
+ struct seccomp_filter *filter;
+};
+
+#else
+
+struct seccomp { };
+struct seccomp_filter { };
+
+#endif
+
+#endif /* _LINUX_SECCOMP_TYPES_H */
diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h
index 988528b5da43..acf7e1a3f3de 100644
--- a/include/linux/secretmem.h
+++ b/include/linux/secretmem.h
@@ -6,24 +6,23 @@
extern const struct address_space_operations secretmem_aops;
-static inline bool page_is_secretmem(struct page *page)
+static inline bool folio_is_secretmem(struct folio *folio)
{
struct address_space *mapping;
/*
- * Using page_mapping() is quite slow because of the actual call
- * instruction and repeated compound_head(page) inside the
- * page_mapping() function.
- * We know that secretmem pages are not compound and LRU so we can
+ * Using folio_mapping() is quite slow because of the actual call
+ * instruction.
+ * We know that secretmem pages are not compound, so we can
* save a couple of cycles here.
*/
- if (PageCompound(page) || !PageLRU(page))
+ if (folio_test_large(folio))
return false;
mapping = (struct address_space *)
- ((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);
+ ((unsigned long)folio->mapping & ~PAGE_MAPPING_FLAGS);
- if (!mapping || mapping != page->mapping)
+ if (!mapping || mapping != folio->mapping)
return false;
return mapping->a_ops == &secretmem_aops;
@@ -39,7 +38,7 @@ static inline bool vma_is_secretmem(struct vm_area_struct *vma)
return false;
}
-static inline bool page_is_secretmem(struct page *page)
+static inline bool folio_is_secretmem(struct folio *folio)
{
return false;
}
diff --git a/include/linux/security.h b/include/linux/security.h
index 32828502f09e..41a8f667bdfa 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -32,6 +32,8 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/sockptr.h>
+#include <linux/bpf.h>
+#include <uapi/linux/lsm.h>
struct linux_binprm;
struct cred;
@@ -60,6 +62,7 @@ struct fs_parameter;
enum fs_value_type;
struct watch;
struct watch_notification;
+struct lsm_ctx;
/* Default (no) options for the capable function */
#define CAP_OPT_NONE 0x0
@@ -138,6 +141,8 @@ enum lockdown_reason {
};
extern const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1];
+extern u32 lsm_active_cnt;
+extern const struct lsm_id *lsm_idlist[];
/* These functions are in security/commoncap.c */
extern int cap_capable(const struct cred *cred, struct user_namespace *ns,
@@ -145,12 +150,13 @@ extern int cap_capable(const struct cred *cred, struct user_namespace *ns,
extern int cap_settime(const struct timespec64 *ts, const struct timezone *tz);
extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode);
extern int cap_ptrace_traceme(struct task_struct *parent);
-extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
+extern int cap_capget(const struct task_struct *target, kernel_cap_t *effective,
+ kernel_cap_t *inheritable, kernel_cap_t *permitted);
extern int cap_capset(struct cred *new, const struct cred *old,
const kernel_cap_t *effective,
const kernel_cap_t *inheritable,
const kernel_cap_t *permitted);
-extern int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file);
+extern int cap_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file);
int cap_inode_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
int cap_inode_removexattr(struct mnt_idmap *idmap,
@@ -260,6 +266,7 @@ int unregister_blocking_lsm_notifier(struct notifier_block *nb);
/* prototypes */
extern int security_init(void);
extern int early_security_init(void);
+extern u64 lsm_name_to_attr(const char *name);
/* Security operations */
int security_binder_set_context_mgr(const struct cred *mgr);
@@ -268,10 +275,10 @@ int security_binder_transaction(const struct cred *from,
int security_binder_transfer_binder(const struct cred *from,
const struct cred *to);
int security_binder_transfer_file(const struct cred *from,
- const struct cred *to, struct file *file);
+ const struct cred *to, const struct file *file);
int security_ptrace_access_check(struct task_struct *child, unsigned int mode);
int security_ptrace_traceme(struct task_struct *parent);
-int security_capget(struct task_struct *target,
+int security_capget(const struct task_struct *target,
kernel_cap_t *effective,
kernel_cap_t *inheritable,
kernel_cap_t *permitted);
@@ -283,16 +290,17 @@ int security_capable(const struct cred *cred,
struct user_namespace *ns,
int cap,
unsigned int opts);
-int security_quotactl(int cmds, int type, int id, struct super_block *sb);
+int security_quotactl(int cmds, int type, int id, const struct super_block *sb);
int security_quota_on(struct dentry *dentry);
int security_syslog(int type);
int security_settime64(const struct timespec64 *ts, const struct timezone *tz);
int security_vm_enough_memory_mm(struct mm_struct *mm, long pages);
int security_bprm_creds_for_exec(struct linux_binprm *bprm);
-int security_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file);
+int security_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file);
int security_bprm_check(struct linux_binprm *bprm);
-void security_bprm_committing_creds(struct linux_binprm *bprm);
-void security_bprm_committed_creds(struct linux_binprm *bprm);
+void security_bprm_committing_creds(const struct linux_binprm *bprm);
+void security_bprm_committed_creds(const struct linux_binprm *bprm);
+int security_fs_context_submount(struct fs_context *fc, struct super_block *reference);
int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc);
int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param);
int security_sb_alloc(struct super_block *sb);
@@ -302,7 +310,7 @@ void security_free_mnt_opts(void **mnt_opts);
int security_sb_eat_lsm_opts(char *options, void **mnt_opts);
int security_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts);
int security_sb_remount(struct super_block *sb, void *mnt_opts);
-int security_sb_kern_mount(struct super_block *sb);
+int security_sb_kern_mount(const struct super_block *sb);
int security_sb_show_options(struct seq_file *m, struct super_block *sb);
int security_sb_statfs(struct dentry *dentry);
int security_sb_mount(const char *dev_name, const struct path *path,
@@ -337,6 +345,8 @@ int security_inode_init_security_anon(struct inode *inode,
const struct qstr *name,
const struct inode *context_inode);
int security_inode_create(struct inode *dir, struct dentry *dentry, umode_t mode);
+void security_inode_post_create_tmpfile(struct mnt_idmap *idmap,
+ struct inode *inode);
int security_inode_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry);
int security_inode_unlink(struct inode *dir, struct dentry *dentry);
@@ -354,6 +364,8 @@ int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
int security_inode_permission(struct inode *inode, int mask);
int security_inode_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr);
+void security_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ int ia_valid);
int security_inode_getattr(const struct path *path);
int security_inode_setxattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name,
@@ -361,16 +373,22 @@ int security_inode_setxattr(struct mnt_idmap *idmap,
int security_inode_set_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name,
struct posix_acl *kacl);
+void security_inode_post_set_acl(struct dentry *dentry, const char *acl_name,
+ struct posix_acl *kacl);
int security_inode_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name);
int security_inode_remove_acl(struct mnt_idmap *idmap,
struct dentry *dentry, const char *acl_name);
+void security_inode_post_remove_acl(struct mnt_idmap *idmap,
+ struct dentry *dentry,
+ const char *acl_name);
void security_inode_post_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
int security_inode_getxattr(struct dentry *dentry, const char *name);
int security_inode_listxattr(struct dentry *dentry);
int security_inode_removexattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *name);
+void security_inode_post_removexattr(struct dentry *dentry, const char *name);
int security_inode_need_killpriv(struct dentry *dentry);
int security_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry);
int security_inode_getsecurity(struct mnt_idmap *idmap,
@@ -385,8 +403,11 @@ int security_kernfs_init_security(struct kernfs_node *kn_dir,
struct kernfs_node *kn);
int security_file_permission(struct file *file, int mask);
int security_file_alloc(struct file *file);
+void security_file_release(struct file *file);
void security_file_free(struct file *file);
int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int security_file_ioctl_compat(struct file *file, unsigned int cmd,
+ unsigned long arg);
int security_mmap_file(struct file *file, unsigned long prot,
unsigned long flags);
int security_mmap_addr(unsigned long addr);
@@ -399,6 +420,7 @@ int security_file_send_sigiotask(struct task_struct *tsk,
struct fown_struct *fown, int sig);
int security_file_receive(struct file *file);
int security_file_open(struct file *file);
+int security_file_post_open(struct file *file, int mask);
int security_file_truncate(struct file *file);
int security_task_alloc(struct task_struct *task, unsigned long clone_flags);
void security_task_free(struct task_struct *task);
@@ -468,10 +490,13 @@ int security_sem_semctl(struct kern_ipc_perm *sma, int cmd);
int security_sem_semop(struct kern_ipc_perm *sma, struct sembuf *sops,
unsigned nsops, int alter);
void security_d_instantiate(struct dentry *dentry, struct inode *inode);
-int security_getprocattr(struct task_struct *p, const char *lsm, const char *name,
+int security_getselfattr(unsigned int attr, struct lsm_ctx __user *ctx,
+ u32 __user *size, u32 flags);
+int security_setselfattr(unsigned int attr, struct lsm_ctx __user *ctx,
+ u32 size, u32 flags);
+int security_getprocattr(struct task_struct *p, int lsmid, const char *name,
char **value);
-int security_setprocattr(const char *lsm, const char *name, void *value,
- size_t size);
+int security_setprocattr(int lsmid, const char *name, void *value, size_t size);
int security_netlink_send(struct sock *sk, struct sk_buff *skb);
int security_ismaclabel(const char *name);
int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
@@ -482,6 +507,8 @@ int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
int security_locked_down(enum lockdown_reason what);
+int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len,
+ void *val, size_t val_len, u64 id, u64 flags);
#else /* CONFIG_SECURITY */
static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data)
@@ -499,6 +526,11 @@ static inline int unregister_blocking_lsm_notifier(struct notifier_block *nb)
return 0;
}
+static inline u64 lsm_name_to_attr(const char *name)
+{
+ return LSM_ATTR_UNDEF;
+}
+
static inline void security_free_mnt_opts(void **mnt_opts)
{
}
@@ -537,7 +569,7 @@ static inline int security_binder_transfer_binder(const struct cred *from,
static inline int security_binder_transfer_file(const struct cred *from,
const struct cred *to,
- struct file *file)
+ const struct file *file)
{
return 0;
}
@@ -553,7 +585,7 @@ static inline int security_ptrace_traceme(struct task_struct *parent)
return cap_ptrace_traceme(parent);
}
-static inline int security_capget(struct task_struct *target,
+static inline int security_capget(const struct task_struct *target,
kernel_cap_t *effective,
kernel_cap_t *inheritable,
kernel_cap_t *permitted)
@@ -579,7 +611,7 @@ static inline int security_capable(const struct cred *cred,
}
static inline int security_quotactl(int cmds, int type, int id,
- struct super_block *sb)
+ const struct super_block *sb)
{
return 0;
}
@@ -611,7 +643,7 @@ static inline int security_bprm_creds_for_exec(struct linux_binprm *bprm)
}
static inline int security_bprm_creds_from_file(struct linux_binprm *bprm,
- struct file *file)
+ const struct file *file)
{
return cap_bprm_creds_from_file(bprm, file);
}
@@ -621,14 +653,19 @@ static inline int security_bprm_check(struct linux_binprm *bprm)
return 0;
}
-static inline void security_bprm_committing_creds(struct linux_binprm *bprm)
+static inline void security_bprm_committing_creds(const struct linux_binprm *bprm)
{
}
-static inline void security_bprm_committed_creds(struct linux_binprm *bprm)
+static inline void security_bprm_committed_creds(const struct linux_binprm *bprm)
{
}
+static inline int security_fs_context_submount(struct fs_context *fc,
+ struct super_block *reference)
+{
+ return 0;
+}
static inline int security_fs_context_dup(struct fs_context *fc,
struct fs_context *src_fc)
{
@@ -782,6 +819,10 @@ static inline int security_inode_create(struct inode *dir,
return 0;
}
+static inline void
+security_inode_post_create_tmpfile(struct mnt_idmap *idmap, struct inode *inode)
+{ }
+
static inline int security_inode_link(struct dentry *old_dentry,
struct inode *dir,
struct dentry *new_dentry)
@@ -855,6 +896,11 @@ static inline int security_inode_setattr(struct mnt_idmap *idmap,
return 0;
}
+static inline void
+security_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ int ia_valid)
+{ }
+
static inline int security_inode_getattr(const struct path *path)
{
return 0;
@@ -875,6 +921,11 @@ static inline int security_inode_set_acl(struct mnt_idmap *idmap,
return 0;
}
+static inline void security_inode_post_set_acl(struct dentry *dentry,
+ const char *acl_name,
+ struct posix_acl *kacl)
+{ }
+
static inline int security_inode_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry,
const char *acl_name)
@@ -889,6 +940,11 @@ static inline int security_inode_remove_acl(struct mnt_idmap *idmap,
return 0;
}
+static inline void security_inode_post_remove_acl(struct mnt_idmap *idmap,
+ struct dentry *dentry,
+ const char *acl_name)
+{ }
+
static inline void security_inode_post_setxattr(struct dentry *dentry,
const char *name, const void *value, size_t size, int flags)
{ }
@@ -911,6 +967,10 @@ static inline int security_inode_removexattr(struct mnt_idmap *idmap,
return cap_inode_removexattr(idmap, dentry, name);
}
+static inline void security_inode_post_removexattr(struct dentry *dentry,
+ const char *name)
+{ }
+
static inline int security_inode_need_killpriv(struct dentry *dentry)
{
return cap_inode_need_killpriv(dentry);
@@ -971,6 +1031,9 @@ static inline int security_file_alloc(struct file *file)
return 0;
}
+static inline void security_file_release(struct file *file)
+{ }
+
static inline void security_file_free(struct file *file)
{ }
@@ -980,6 +1043,13 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd,
return 0;
}
+static inline int security_file_ioctl_compat(struct file *file,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ return 0;
+}
+
static inline int security_mmap_file(struct file *file, unsigned long prot,
unsigned long flags)
{
@@ -1031,6 +1101,11 @@ static inline int security_file_open(struct file *file)
return 0;
}
+static inline int security_file_post_open(struct file *file, int mask)
+{
+ return 0;
+}
+
static inline int security_file_truncate(struct file *file)
{
return 0;
@@ -1330,14 +1405,28 @@ static inline void security_d_instantiate(struct dentry *dentry,
struct inode *inode)
{ }
-static inline int security_getprocattr(struct task_struct *p, const char *lsm,
+static inline int security_getselfattr(unsigned int attr,
+ struct lsm_ctx __user *ctx,
+ size_t __user *size, u32 flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int security_setselfattr(unsigned int attr,
+ struct lsm_ctx __user *ctx,
+ size_t size, u32 flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int security_getprocattr(struct task_struct *p, int lsmid,
const char *name, char **value)
{
return -EINVAL;
}
-static inline int security_setprocattr(const char *lsm, char *name,
- void *value, size_t size)
+static inline int security_setprocattr(int lsmid, char *name, void *value,
+ size_t size)
{
return -EINVAL;
}
@@ -1388,6 +1477,12 @@ static inline int security_locked_down(enum lockdown_reason what)
{
return 0;
}
+static inline int lsm_fill_user_ctx(struct lsm_ctx __user *uctx,
+ u32 *uctx_len, void *val, size_t val_len,
+ u64 id, u64 flags)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_SECURITY */
#if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE)
@@ -1439,7 +1534,8 @@ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u
int security_sk_alloc(struct sock *sk, int family, gfp_t priority);
void security_sk_free(struct sock *sk);
void security_sk_clone(const struct sock *sk, struct sock *newsk);
-void security_sk_classify_flow(struct sock *sk, struct flowi_common *flic);
+void security_sk_classify_flow(const struct sock *sk,
+ struct flowi_common *flic);
void security_req_classify_flow(const struct request_sock *req,
struct flowi_common *flic);
void security_sock_graft(struct sock*sk, struct socket *parent);
@@ -1597,7 +1693,7 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk)
{
}
-static inline void security_sk_classify_flow(struct sock *sk,
+static inline void security_sk_classify_flow(const struct sock *sk,
struct flowi_common *flic)
{
}
@@ -1819,6 +1915,7 @@ int security_path_mkdir(const struct path *dir, struct dentry *dentry, umode_t m
int security_path_rmdir(const struct path *dir, struct dentry *dentry);
int security_path_mknod(const struct path *dir, struct dentry *dentry, umode_t mode,
unsigned int dev);
+void security_path_post_mknod(struct mnt_idmap *idmap, struct dentry *dentry);
int security_path_truncate(const struct path *path);
int security_path_symlink(const struct path *dir, struct dentry *dentry,
const char *old_name);
@@ -1853,6 +1950,10 @@ static inline int security_path_mknod(const struct path *dir, struct dentry *den
return 0;
}
+static inline void security_path_post_mknod(struct mnt_idmap *idmap,
+ struct dentry *dentry)
+{ }
+
static inline int security_path_truncate(const struct path *path)
{
return 0;
@@ -1904,6 +2005,9 @@ void security_key_free(struct key *key);
int security_key_permission(key_ref_t key_ref, const struct cred *cred,
enum key_need_perm need_perm);
int security_key_getsecurity(struct key *key, char **_buffer);
+void security_key_post_create_or_update(struct key *keyring, struct key *key,
+ const void *payload, size_t payload_len,
+ unsigned long flags, bool create);
#else
@@ -1931,6 +2035,14 @@ static inline int security_key_getsecurity(struct key *key, char **_buffer)
return 0;
}
+static inline void security_key_post_create_or_update(struct key *keyring,
+ struct key *key,
+ const void *payload,
+ size_t payload_len,
+ unsigned long flags,
+ bool create)
+{ }
+
#endif
#endif /* CONFIG_KEYS */
@@ -2012,15 +2124,22 @@ static inline void securityfs_remove(struct dentry *dentry)
union bpf_attr;
struct bpf_map;
struct bpf_prog;
-struct bpf_prog_aux;
+struct bpf_token;
#ifdef CONFIG_SECURITY
extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size);
extern int security_bpf_map(struct bpf_map *map, fmode_t fmode);
extern int security_bpf_prog(struct bpf_prog *prog);
-extern int security_bpf_map_alloc(struct bpf_map *map);
+extern int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
+ struct bpf_token *token);
extern void security_bpf_map_free(struct bpf_map *map);
-extern int security_bpf_prog_alloc(struct bpf_prog_aux *aux);
-extern void security_bpf_prog_free(struct bpf_prog_aux *aux);
+extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
+ struct bpf_token *token);
+extern void security_bpf_prog_free(struct bpf_prog *prog);
+extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
+ struct path *path);
+extern void security_bpf_token_free(struct bpf_token *token);
+extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
+extern int security_bpf_token_capable(const struct bpf_token *token, int cap);
#else
static inline int security_bpf(int cmd, union bpf_attr *attr,
unsigned int size)
@@ -2038,7 +2157,8 @@ static inline int security_bpf_prog(struct bpf_prog *prog)
return 0;
}
-static inline int security_bpf_map_alloc(struct bpf_map *map)
+static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
+ struct bpf_token *token)
{
return 0;
}
@@ -2046,13 +2166,33 @@ static inline int security_bpf_map_alloc(struct bpf_map *map)
static inline void security_bpf_map_free(struct bpf_map *map)
{ }
-static inline int security_bpf_prog_alloc(struct bpf_prog_aux *aux)
+static inline int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
+ struct bpf_token *token)
{
return 0;
}
-static inline void security_bpf_prog_free(struct bpf_prog_aux *aux)
+static inline void security_bpf_prog_free(struct bpf_prog *prog)
{ }
+
+static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
+ struct path *path)
+{
+ return 0;
+}
+
+static inline void security_bpf_token_free(struct bpf_token *token)
+{ }
+
+static inline int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd)
+{
+ return 0;
+}
+
+static inline int security_bpf_token_capable(const struct bpf_token *token, int cap)
+{
+ return 0;
+}
#endif /* CONFIG_SECURITY */
#endif /* CONFIG_BPF_SYSCALL */
diff --git a/include/linux/sed-opal-key.h b/include/linux/sed-opal-key.h
new file mode 100644
index 000000000000..0ca03054e8f6
--- /dev/null
+++ b/include/linux/sed-opal-key.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * SED key operations.
+ *
+ * Copyright (C) 2023 IBM Corporation
+ *
+ * These are the accessor functions (read/write) for SED Opal
+ * keys. Specific keystores can provide overrides.
+ *
+ */
+
+#include <linux/kernel.h>
+
+#ifdef CONFIG_PSERIES_PLPKS_SED
+int sed_read_key(char *keyname, char *key, u_int *keylen);
+int sed_write_key(char *keyname, char *key, u_int keylen);
+#else
+static inline
+int sed_read_key(char *keyname, char *key, u_int *keylen) {
+ return -EOPNOTSUPP;
+}
+static inline
+int sed_write_key(char *keyname, char *key, u_int keylen) {
+ return -EOPNOTSUPP;
+}
+#endif
diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
index bbae1e52ab4f..2ac50822554e 100644
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@@ -25,6 +25,9 @@ bool opal_unlock_from_suspend(struct opal_dev *dev);
struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv);
int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr);
+#define OPAL_AUTH_KEY "opal-boot-pin"
+#define OPAL_AUTH_KEY_PREV "opal-boot-pin-prev"
+
static inline bool is_sed_ioctl(unsigned int cmd)
{
switch (cmd) {
@@ -47,6 +50,8 @@ static inline bool is_sed_ioctl(unsigned int cmd)
case IOC_OPAL_GET_STATUS:
case IOC_OPAL_GET_LR_STATUS:
case IOC_OPAL_GET_GEOMETRY:
+ case IOC_OPAL_DISCOVERY:
+ case IOC_OPAL_REVERT_LSP:
return true;
}
return false;
diff --git a/include/linux/selection.h b/include/linux/selection.h
index 170ef28ff26b..bab7d30d3446 100644
--- a/include/linux/selection.h
+++ b/include/linux/selection.h
@@ -14,17 +14,16 @@
struct tty_struct;
struct vc_data;
-extern void clear_selection(void);
-extern int set_selection_user(const struct tiocl_selection __user *sel,
- struct tty_struct *tty);
-extern int set_selection_kernel(struct tiocl_selection *v,
- struct tty_struct *tty);
-extern int paste_selection(struct tty_struct *tty);
-extern int sel_loadlut(char __user *p);
-extern int mouse_reporting(void);
-extern void mouse_report(struct tty_struct * tty, int butt, int mrx, int mry);
-
-bool vc_is_sel(struct vc_data *vc);
+void clear_selection(void);
+int set_selection_user(const struct tiocl_selection __user *sel,
+ struct tty_struct *tty);
+int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty);
+int paste_selection(struct tty_struct *tty);
+int sel_loadlut(u32 __user *lut);
+int mouse_reporting(void);
+void mouse_report(struct tty_struct *tty, int butt, int mrx, int mry);
+
+bool vc_is_sel(const struct vc_data *vc);
extern int console_blanked;
@@ -33,24 +32,21 @@ extern unsigned char default_red[];
extern unsigned char default_grn[];
extern unsigned char default_blu[];
-extern unsigned short *screen_pos(const struct vc_data *vc, int w_offset,
- bool viewed);
-extern u16 screen_glyph(const struct vc_data *vc, int offset);
-extern u32 screen_glyph_unicode(const struct vc_data *vc, int offset);
-extern void complement_pos(struct vc_data *vc, int offset);
-extern void invert_screen(struct vc_data *vc, int offset, int count, bool viewed);
-
-extern void getconsxy(const struct vc_data *vc, unsigned char xy[static 2]);
-extern void putconsxy(struct vc_data *vc, unsigned char xy[static const 2]);
-
-extern u16 vcs_scr_readw(const struct vc_data *vc, const u16 *org);
-extern void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org);
-extern void vcs_scr_updated(struct vc_data *vc);
-
-extern int vc_uniscr_check(struct vc_data *vc);
-extern void vc_uniscr_copy_line(const struct vc_data *vc, void *dest,
- bool viewed,
- unsigned int row, unsigned int col,
- unsigned int nr);
+unsigned short *screen_pos(const struct vc_data *vc, int w_offset, bool viewed);
+u16 screen_glyph(const struct vc_data *vc, int offset);
+u32 screen_glyph_unicode(const struct vc_data *vc, int offset);
+void complement_pos(struct vc_data *vc, int offset);
+void invert_screen(struct vc_data *vc, int offset, int count, bool viewed);
+
+void getconsxy(const struct vc_data *vc, unsigned char xy[static 2]);
+void putconsxy(struct vc_data *vc, unsigned char xy[static const 2]);
+
+u16 vcs_scr_readw(const struct vc_data *vc, const u16 *org);
+void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org);
+void vcs_scr_updated(struct vc_data *vc);
+
+int vc_uniscr_check(struct vc_data *vc);
+void vc_uniscr_copy_line(const struct vc_data *vc, void *dest, bool viewed,
+ unsigned int row, unsigned int col, unsigned int nr);
#endif
diff --git a/include/linux/sem.h b/include/linux/sem.h
index 5608a500c43e..c4deefe42aeb 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -3,25 +3,17 @@
#define _LINUX_SEM_H
#include <uapi/linux/sem.h>
+#include <linux/sem_types.h>
struct task_struct;
-struct sem_undo_list;
#ifdef CONFIG_SYSVIPC
-struct sysv_sem {
- struct sem_undo_list *undo_list;
-};
-
extern int copy_semundo(unsigned long clone_flags, struct task_struct *tsk);
extern void exit_sem(struct task_struct *tsk);
#else
-struct sysv_sem {
- /* empty */
-};
-
static inline int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
{
return 0;
diff --git a/include/linux/sem_types.h b/include/linux/sem_types.h
new file mode 100644
index 000000000000..73df1971a7ae
--- /dev/null
+++ b/include/linux/sem_types.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SEM_TYPES_H
+#define _LINUX_SEM_TYPES_H
+
+struct sem_undo_list;
+
+struct sysv_sem {
+#ifdef CONFIG_SYSVIPC
+ struct sem_undo_list *undo_list;
+#endif
+};
+
+#endif /* _LINUX_SEM_TYPES_H */
diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h
index 515d7fcb9634..fe41da005970 100644
--- a/include/linux/seq_buf.h
+++ b/include/linux/seq_buf.h
@@ -2,7 +2,10 @@
#ifndef _LINUX_SEQ_BUF_H
#define _LINUX_SEQ_BUF_H
-#include <linux/fs.h>
+#include <linux/bug.h>
+#include <linux/minmax.h>
+#include <linux/seq_file.h>
+#include <linux/types.h>
/*
* Trace sequences are used to allow a function to call several other functions
@@ -10,23 +13,28 @@
*/
/**
- * seq_buf - seq buffer structure
+ * struct seq_buf - seq buffer structure
* @buffer: pointer to the buffer
* @size: size of the buffer
* @len: the amount of data inside the buffer
- * @readpos: The next position to read in the buffer.
*/
struct seq_buf {
char *buffer;
size_t size;
size_t len;
- loff_t readpos;
};
+#define DECLARE_SEQ_BUF(NAME, SIZE) \
+ struct seq_buf NAME = { \
+ .buffer = (char[SIZE]) { 0 }, \
+ .size = SIZE, \
+ }
+
static inline void seq_buf_clear(struct seq_buf *s)
{
s->len = 0;
- s->readpos = 0;
+ if (s->size)
+ s->buffer[0] = '\0';
}
static inline void
@@ -39,7 +47,7 @@ seq_buf_init(struct seq_buf *s, char *buf, unsigned int size)
/*
* seq_buf have a buffer that might overflow. When this happens
- * the len and size are set to be equal.
+ * len is set to be greater than size.
*/
static inline bool
seq_buf_has_overflowed(struct seq_buf *s)
@@ -72,10 +80,10 @@ static inline unsigned int seq_buf_used(struct seq_buf *s)
}
/**
- * seq_buf_terminate - Make sure buffer is nul terminated
- * @s: the seq_buf descriptor to terminate.
+ * seq_buf_str - get NUL-terminated C string from seq_buf
+ * @s: the seq_buf handle
*
- * This makes sure that the buffer in @s is nul terminated and
+ * This makes sure that the buffer in @s is NUL-terminated and
* safe to read as a string.
*
* Note, if this is called when the buffer has overflowed, then
@@ -84,16 +92,20 @@ static inline unsigned int seq_buf_used(struct seq_buf *s)
*
* After this function is called, s->buffer is safe to use
* in string operations.
+ *
+ * Returns: @s->buf after making sure it is terminated.
*/
-static inline void seq_buf_terminate(struct seq_buf *s)
+static inline const char *seq_buf_str(struct seq_buf *s)
{
if (WARN_ON(s->size == 0))
- return;
+ return "";
if (seq_buf_buffer_left(s))
s->buffer[s->len] = 0;
else
s->buffer[s->size - 1] = 0;
+
+ return s->buffer;
}
/**
@@ -101,7 +113,7 @@ static inline void seq_buf_terminate(struct seq_buf *s)
* @s: the seq_buf handle
* @bufp: the beginning of the buffer is stored here
*
- * Return the number of bytes available in the buffer, or zero if
+ * Returns: the number of bytes available in the buffer, or zero if
* there's no space.
*/
static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp)
@@ -123,7 +135,7 @@ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp)
* @num: the number of bytes to commit
*
* Commit @num bytes of data written to a buffer previously acquired
- * by seq_buf_get. To signal an error condition, or that the data
+ * by seq_buf_get_buf(). To signal an error condition, or that the data
* didn't fit in the available space, pass a negative @num value.
*/
static inline void seq_buf_commit(struct seq_buf *s, int num)
@@ -143,7 +155,7 @@ extern __printf(2, 0)
int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args);
extern int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s);
extern int seq_buf_to_user(struct seq_buf *s, char __user *ubuf,
- int cnt);
+ size_t start, int cnt);
extern int seq_buf_puts(struct seq_buf *s, const char *str);
extern int seq_buf_putc(struct seq_buf *s, unsigned char c);
extern int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len);
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index bd023dd38ae6..234bcdb1fba4 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -207,6 +207,21 @@ static const struct file_operations __name ## _fops = { \
.release = single_release, \
}
+#define DEFINE_SHOW_STORE_ATTRIBUTE(__name) \
+static int __name ## _open(struct inode *inode, struct file *file) \
+{ \
+ return single_open(file, __name ## _show, inode->i_private); \
+} \
+ \
+static const struct file_operations __name ## _fops = { \
+ .owner = THIS_MODULE, \
+ .open = __name ## _open, \
+ .read = seq_read, \
+ .write = __name ## _write, \
+ .llseek = seq_lseek, \
+ .release = single_release, \
+}
+
#define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \
static int __name ## _open(struct inode *inode, struct file *file) \
{ \
@@ -249,18 +264,19 @@ static inline void seq_show_option(struct seq_file *m, const char *name,
/**
* seq_show_option_n - display mount options with appropriate escapes
- * where @value must be a specific length.
+ * where @value must be a specific length (i.e.
+ * not NUL-terminated).
* @m: the seq_file handle
* @name: the mount option name
* @value: the mount option name's value, cannot be NULL
- * @length: the length of @value to display
+ * @length: the exact length of @value to display, must be constant expression
*
* This is a macro since this uses "length" to define the size of the
* stack buffer.
*/
#define seq_show_option_n(m, name, value, length) { \
char val_buf[length + 1]; \
- strncpy(val_buf, value, length); \
+ memcpy(val_buf, value, length); \
val_buf[length] = '\0'; \
seq_show_option(m, name, val_buf); \
}
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 987a59d977c5..d90d8ee29d81 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -18,6 +18,7 @@
#include <linux/lockdep.h>
#include <linux/mutex.h>
#include <linux/preempt.h>
+#include <linux/seqlock_types.h>
#include <linux/spinlock.h>
#include <asm/processor.h>
@@ -37,37 +38,6 @@
*/
#define KCSAN_SEQLOCK_REGION_MAX 1000
-/*
- * Sequence counters (seqcount_t)
- *
- * This is the raw counting mechanism, without any writer protection.
- *
- * Write side critical sections must be serialized and non-preemptible.
- *
- * If readers can be invoked from hardirq or softirq contexts,
- * interrupts or bottom halves must also be respectively disabled before
- * entering the write section.
- *
- * This mechanism can't be used if the protected data contains pointers,
- * as the writer can invalidate a pointer that a reader is following.
- *
- * If the write serialization mechanism is one of the common kernel
- * locking primitives, use a sequence counter with associated lock
- * (seqcount_LOCKNAME_t) instead.
- *
- * If it's desired to automatically handle the sequence counter writer
- * serialization and non-preemptibility requirements, use a sequential
- * lock (seqlock_t) instead.
- *
- * See Documentation/locking/seqlock.rst
- */
-typedef struct seqcount {
- unsigned sequence;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-} seqcount_t;
-
static inline void __seqcount_init(seqcount_t *s, const char *name,
struct lock_class_key *key)
{
@@ -132,28 +102,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
*/
/*
- * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
- * disable preemption. It can lead to higher latencies, and the write side
- * sections will not be able to acquire locks which become sleeping locks
- * (e.g. spinlock_t).
- *
- * To remain preemptible while avoiding a possible livelock caused by the
- * reader preempting the writer, use a different technique: let the reader
- * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
- * case, acquire then release the associated LOCKNAME writer serialization
- * lock. This will allow any possibly-preempted writer to make progress
- * until the end of its writer serialization lock critical section.
- *
- * This lock-unlock technique must be implemented for all of PREEMPT_RT
- * sleeping locks. See Documentation/locking/locktypes.rst
- */
-#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
-#define __SEQ_LOCK(expr) expr
-#else
-#define __SEQ_LOCK(expr)
-#endif
-
-/*
* typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated
* @seqcount: The real sequence counter
* @lock: Pointer to the associated lock
@@ -191,22 +139,21 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
* @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t
* @locktype: LOCKNAME canonical C data type
* @preemptible: preemptibility of above locktype
- * @lockmember: argument for lockdep_assert_held()
- * @lockbase: associated lock release function (prefix only)
- * @lock_acquire: associated lock acquisition function (full call)
+ * @lockbase: prefix for associated lock/unlock
*/
-#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \
-typedef struct seqcount_##lockname { \
- seqcount_t seqcount; \
- __SEQ_LOCK(locktype *lock); \
-} seqcount_##lockname##_t; \
- \
+#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \
static __always_inline seqcount_t * \
__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \
{ \
return &s->seqcount; \
} \
\
+static __always_inline const seqcount_t * \
+__seqprop_##lockname##_const_ptr(const seqcount_##lockname##_t *s) \
+{ \
+ return &s->seqcount; \
+} \
+ \
static __always_inline unsigned \
__seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \
{ \
@@ -216,7 +163,7 @@ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \
return seq; \
\
if (preemptible && unlikely(seq & 1)) { \
- __SEQ_LOCK(lock_acquire); \
+ __SEQ_LOCK(lockbase##_lock(s->lock)); \
__SEQ_LOCK(lockbase##_unlock(s->lock)); \
\
/* \
@@ -242,7 +189,7 @@ __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \
static __always_inline void \
__seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \
{ \
- __SEQ_LOCK(lockdep_assert_held(lockmember)); \
+ __SEQ_LOCK(lockdep_assert_held(s->lock)); \
}
/*
@@ -254,6 +201,11 @@ static inline seqcount_t *__seqprop_ptr(seqcount_t *s)
return s;
}
+static inline const seqcount_t *__seqprop_const_ptr(const seqcount_t *s)
+{
+ return s;
+}
+
static inline unsigned __seqprop_sequence(const seqcount_t *s)
{
return READ_ONCE(s->sequence);
@@ -271,10 +223,11 @@ static inline void __seqprop_assert(const seqcount_t *s)
#define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT)
-SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_spin, raw_spin_lock(s->lock))
-SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock))
-SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock))
-SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock))
+SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin)
+SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin)
+SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read)
+SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex)
+#undef SEQCOUNT_LOCKNAME
/*
* SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
@@ -294,19 +247,20 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex
#define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock)
#define __seqprop_case(s, lockname, prop) \
- seqcount_##lockname##_t: __seqprop_##lockname##_##prop((void *)(s))
+ seqcount_##lockname##_t: __seqprop_##lockname##_##prop
#define __seqprop(s, prop) _Generic(*(s), \
- seqcount_t: __seqprop_##prop((void *)(s)), \
+ seqcount_t: __seqprop_##prop, \
__seqprop_case((s), raw_spinlock, prop), \
__seqprop_case((s), spinlock, prop), \
__seqprop_case((s), rwlock, prop), \
__seqprop_case((s), mutex, prop))
-#define seqprop_ptr(s) __seqprop(s, ptr)
-#define seqprop_sequence(s) __seqprop(s, sequence)
-#define seqprop_preemptible(s) __seqprop(s, preemptible)
-#define seqprop_assert(s) __seqprop(s, assert)
+#define seqprop_ptr(s) __seqprop(s, ptr)(s)
+#define seqprop_const_ptr(s) __seqprop(s, const_ptr)(s)
+#define seqprop_sequence(s) __seqprop(s, sequence)(s)
+#define seqprop_preemptible(s) __seqprop(s, preemptible)(s)
+#define seqprop_assert(s) __seqprop(s, assert)(s)
/**
* __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
@@ -355,7 +309,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex
*/
#define read_seqcount_begin(s) \
({ \
- seqcount_lockdep_reader_access(seqprop_ptr(s)); \
+ seqcount_lockdep_reader_access(seqprop_const_ptr(s)); \
raw_read_seqcount_begin(s); \
})
@@ -421,7 +375,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex
* Return: true if a read section retry is required, else false
*/
#define __read_seqcount_retry(s, start) \
- do___read_seqcount_retry(seqprop_ptr(s), start)
+ do___read_seqcount_retry(seqprop_const_ptr(s), start)
static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start)
{
@@ -441,7 +395,7 @@ static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start)
* Return: true if a read section retry is required, else false
*/
#define read_seqcount_retry(s, start) \
- do_read_seqcount_retry(seqprop_ptr(s), start)
+ do_read_seqcount_retry(seqprop_const_ptr(s), start)
static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start)
{
@@ -512,8 +466,8 @@ do { \
static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass)
{
- do_raw_write_seqcount_begin(s);
seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
+ do_raw_write_seqcount_begin(s);
}
/**
@@ -574,7 +528,7 @@ static inline void do_write_seqcount_end(seqcount_t *s)
* via WRITE_ONCE): a) to ensure the writes become visible to other threads
* atomically, avoiding compiler optimizations; b) to document which writes are
* meant to propagate to the reader critical section. This is necessary because
- * neither writes before and after the barrier are enclosed in a seq-writer
+ * neither writes before nor after the barrier are enclosed in a seq-writer
* critical section that would ensure readers are aware of ongoing writes::
*
* seqcount_t seq;
@@ -784,25 +738,6 @@ static inline void raw_write_seqcount_latch(seqcount_latch_t *s)
smp_wmb(); /* increment "sequence" before following stores */
}
-/*
- * Sequential locks (seqlock_t)
- *
- * Sequence counters with an embedded spinlock for writer serialization
- * and non-preemptibility.
- *
- * For more info, see:
- * - Comments on top of seqcount_t
- * - Documentation/locking/seqlock.rst
- */
-typedef struct {
- /*
- * Make sure that readers don't starve writers on PREEMPT_RT: use
- * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK().
- */
- seqcount_spinlock_t seqcount;
- spinlock_t lock;
-} seqlock_t;
-
#define __SEQLOCK_UNLOCKED(lockname) \
{ \
.seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \
@@ -864,7 +799,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
}
/*
- * For all seqlock_t write side functions, use the the internal
+ * For all seqlock_t write side functions, use the internal
* do_write_seqcount_begin() instead of generic write_seqcount_begin().
* This way, no redundant lockdep_assert_held() checks are added.
*/
diff --git a/include/linux/seqlock_types.h b/include/linux/seqlock_types.h
new file mode 100644
index 000000000000..dfdf43e3fa3d
--- /dev/null
+++ b/include/linux/seqlock_types.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_SEQLOCK_TYPES_H
+#define __LINUX_SEQLOCK_TYPES_H
+
+#include <linux/lockdep_types.h>
+#include <linux/mutex_types.h>
+#include <linux/spinlock_types.h>
+
+/*
+ * Sequence counters (seqcount_t)
+ *
+ * This is the raw counting mechanism, without any writer protection.
+ *
+ * Write side critical sections must be serialized and non-preemptible.
+ *
+ * If readers can be invoked from hardirq or softirq contexts,
+ * interrupts or bottom halves must also be respectively disabled before
+ * entering the write section.
+ *
+ * This mechanism can't be used if the protected data contains pointers,
+ * as the writer can invalidate a pointer that a reader is following.
+ *
+ * If the write serialization mechanism is one of the common kernel
+ * locking primitives, use a sequence counter with associated lock
+ * (seqcount_LOCKNAME_t) instead.
+ *
+ * If it's desired to automatically handle the sequence counter writer
+ * serialization and non-preemptibility requirements, use a sequential
+ * lock (seqlock_t) instead.
+ *
+ * See Documentation/locking/seqlock.rst
+ */
+typedef struct seqcount {
+ unsigned sequence;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+} seqcount_t;
+
+/*
+ * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
+ * disable preemption. It can lead to higher latencies, and the write side
+ * sections will not be able to acquire locks which become sleeping locks
+ * (e.g. spinlock_t).
+ *
+ * To remain preemptible while avoiding a possible livelock caused by the
+ * reader preempting the writer, use a different technique: let the reader
+ * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
+ * case, acquire then release the associated LOCKNAME writer serialization
+ * lock. This will allow any possibly-preempted writer to make progress
+ * until the end of its writer serialization lock critical section.
+ *
+ * This lock-unlock technique must be implemented for all of PREEMPT_RT
+ * sleeping locks. See Documentation/locking/locktypes.rst
+ */
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
+#define __SEQ_LOCK(expr) expr
+#else
+#define __SEQ_LOCK(expr)
+#endif
+
+#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \
+typedef struct seqcount_##lockname { \
+ seqcount_t seqcount; \
+ __SEQ_LOCK(locktype *lock); \
+} seqcount_##lockname##_t;
+
+SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin)
+SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin)
+SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read)
+SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex)
+#undef SEQCOUNT_LOCKNAME
+
+/*
+ * Sequential locks (seqlock_t)
+ *
+ * Sequence counters with an embedded spinlock for writer serialization
+ * and non-preemptibility.
+ *
+ * For more info, see:
+ * - Comments on top of seqcount_t
+ * - Documentation/locking/seqlock.rst
+ */
+typedef struct {
+ /*
+ * Make sure that readers don't starve writers on PREEMPT_RT: use
+ * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK().
+ */
+ seqcount_spinlock_t seqcount;
+ spinlock_t lock;
+} seqlock_t;
+
+#endif /* __LINUX_SEQLOCK_TYPES_H */
diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index f5f97fa25e8a..ff78efc1f60d 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -27,7 +27,7 @@ struct serdev_device;
* not sleep.
*/
struct serdev_device_ops {
- int (*receive_buf)(struct serdev_device *, const unsigned char *, size_t);
+ size_t (*receive_buf)(struct serdev_device *, const u8 *, size_t);
void (*write_wakeup)(struct serdev_device *);
};
@@ -82,7 +82,7 @@ enum serdev_parity {
* serdev controller structures
*/
struct serdev_controller_ops {
- int (*write_buf)(struct serdev_controller *, const unsigned char *, size_t);
+ ssize_t (*write_buf)(struct serdev_controller *, const u8 *, size_t);
void (*write_flush)(struct serdev_controller *);
int (*write_room)(struct serdev_controller *);
int (*open)(struct serdev_controller *);
@@ -99,12 +99,14 @@ struct serdev_controller_ops {
/**
* struct serdev_controller - interface to the serdev controller
* @dev: Driver model representation of the device.
+ * @host: Serial port hardware controller device
* @nr: number identifier for this controller/bus.
* @serdev: Pointer to slave device for this controller.
* @ops: Controller operations.
*/
struct serdev_controller {
struct device dev;
+ struct device *host;
unsigned int nr;
struct serdev_device *serdev;
const struct serdev_controller_ops *ops;
@@ -167,7 +169,9 @@ struct serdev_device *serdev_device_alloc(struct serdev_controller *);
int serdev_device_add(struct serdev_device *);
void serdev_device_remove(struct serdev_device *);
-struct serdev_controller *serdev_controller_alloc(struct device *, size_t);
+struct serdev_controller *serdev_controller_alloc(struct device *host,
+ struct device *parent,
+ size_t size);
int serdev_controller_add(struct serdev_controller *);
void serdev_controller_remove(struct serdev_controller *);
@@ -181,9 +185,9 @@ static inline void serdev_controller_write_wakeup(struct serdev_controller *ctrl
serdev->ops->write_wakeup(serdev);
}
-static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl,
- const unsigned char *data,
- size_t count)
+static inline size_t serdev_controller_receive_buf(struct serdev_controller *ctrl,
+ const u8 *data,
+ size_t count)
{
struct serdev_device *serdev = ctrl->serdev;
@@ -200,13 +204,13 @@ void serdev_device_close(struct serdev_device *);
int devm_serdev_device_open(struct device *, struct serdev_device *);
unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int);
void serdev_device_set_flow_control(struct serdev_device *, bool);
-int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t);
+int serdev_device_write_buf(struct serdev_device *, const u8 *, size_t);
void serdev_device_wait_until_sent(struct serdev_device *, long);
int serdev_device_get_tiocm(struct serdev_device *);
int serdev_device_set_tiocm(struct serdev_device *, int, int);
int serdev_device_break_ctl(struct serdev_device *serdev, int break_state);
void serdev_device_write_wakeup(struct serdev_device *);
-int serdev_device_write(struct serdev_device *, const unsigned char *, size_t, long);
+ssize_t serdev_device_write(struct serdev_device *, const u8 *, size_t, long);
void serdev_device_write_flush(struct serdev_device *);
int serdev_device_write_room(struct serdev_device *);
@@ -244,7 +248,7 @@ static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev
}
static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {}
static inline int serdev_device_write_buf(struct serdev_device *serdev,
- const unsigned char *buf,
+ const u8 *buf,
size_t count)
{
return -ENODEV;
@@ -262,8 +266,9 @@ static inline int serdev_device_break_ctl(struct serdev_device *serdev, int brea
{
return -EOPNOTSUPP;
}
-static inline int serdev_device_write(struct serdev_device *sdev, const unsigned char *buf,
- size_t count, unsigned long timeout)
+static inline ssize_t serdev_device_write(struct serdev_device *sdev,
+ const u8 *buf, size_t count,
+ unsigned long timeout)
{
return -ENODEV;
}
@@ -311,11 +316,13 @@ struct tty_driver;
#ifdef CONFIG_SERIAL_DEV_CTRL_TTYPORT
struct device *serdev_tty_port_register(struct tty_port *port,
+ struct device *host,
struct device *parent,
struct tty_driver *drv, int idx);
int serdev_tty_port_unregister(struct tty_port *port);
#else
static inline struct device *serdev_tty_port_register(struct tty_port *port,
+ struct device *host,
struct device *parent,
struct tty_driver *drv, int idx)
{
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index be65de65fe61..fd59ed2cca53 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -210,6 +210,12 @@ int serial8250_console_exit(struct uart_port *port);
void serial8250_set_isa_configurator(void (*v)(int port, struct uart_port *up,
u32 *capabilities));
+#ifdef CONFIG_SERIAL_8250_CONSOLE
+extern int hp300_setup_serial_console(void) __init;
+#else
+static inline int hp300_setup_serial_console(void) { return 0; }
+#endif
+
#ifdef CONFIG_SERIAL_8250_RT288X
int rt288x_setup(struct uart_port *p);
int au_platform_setup(struct plat_serial8250_port *p);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index a156d2ed8d9e..0a0f6e21d40e 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -467,9 +467,10 @@ struct uart_port {
unsigned int fifosize; /* tx fifo size */
unsigned char x_char; /* xon/xoff char */
unsigned char regshift; /* reg offset shift */
+
unsigned char iotype; /* io access style */
- unsigned char quirks; /* internal quirks */
+#define UPIO_UNKNOWN ((unsigned char)~0U) /* UCHAR_MAX */
#define UPIO_PORT (SERIAL_IO_PORT) /* 8b I/O port access */
#define UPIO_HUB6 (SERIAL_IO_HUB6) /* Hub6 ISA card */
#define UPIO_MEM (SERIAL_IO_MEM) /* driver-specific */
@@ -479,7 +480,9 @@ struct uart_port {
#define UPIO_MEM32BE (SERIAL_IO_MEM32BE) /* 32b big endian */
#define UPIO_MEM16 (SERIAL_IO_MEM16) /* 16b little endian */
- /* quirks must be updated while holding port mutex */
+ unsigned char quirks; /* internal quirks */
+
+ /* internal quirks must be updated while holding port mutex */
#define UPQ_NO_TXEN_TEST BIT(0)
unsigned int read_status_mask; /* driver specific */
@@ -570,7 +573,7 @@ struct uart_port {
struct serial_port_device *port_dev; /* serial core port device */
unsigned long sysrq; /* sysrq timeout */
- unsigned int sysrq_ch; /* char for sysrq */
+ u8 sysrq_ch; /* char for sysrq */
unsigned char has_sysrq;
unsigned char sysrq_seq; /* index in sysrq_toggle_seq */
@@ -588,6 +591,85 @@ struct uart_port {
void *private_data; /* generic platform data pointer */
};
+/**
+ * uart_port_lock - Lock the UART port
+ * @up: Pointer to UART port structure
+ */
+static inline void uart_port_lock(struct uart_port *up)
+{
+ spin_lock(&up->lock);
+}
+
+/**
+ * uart_port_lock_irq - Lock the UART port and disable interrupts
+ * @up: Pointer to UART port structure
+ */
+static inline void uart_port_lock_irq(struct uart_port *up)
+{
+ spin_lock_irq(&up->lock);
+}
+
+/**
+ * uart_port_lock_irqsave - Lock the UART port, save and disable interrupts
+ * @up: Pointer to UART port structure
+ * @flags: Pointer to interrupt flags storage
+ */
+static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags)
+{
+ spin_lock_irqsave(&up->lock, *flags);
+}
+
+/**
+ * uart_port_trylock - Try to lock the UART port
+ * @up: Pointer to UART port structure
+ *
+ * Returns: True if lock was acquired, false otherwise
+ */
+static inline bool uart_port_trylock(struct uart_port *up)
+{
+ return spin_trylock(&up->lock);
+}
+
+/**
+ * uart_port_trylock_irqsave - Try to lock the UART port, save and disable interrupts
+ * @up: Pointer to UART port structure
+ * @flags: Pointer to interrupt flags storage
+ *
+ * Returns: True if lock was acquired, false otherwise
+ */
+static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags)
+{
+ return spin_trylock_irqsave(&up->lock, *flags);
+}
+
+/**
+ * uart_port_unlock - Unlock the UART port
+ * @up: Pointer to UART port structure
+ */
+static inline void uart_port_unlock(struct uart_port *up)
+{
+ spin_unlock(&up->lock);
+}
+
+/**
+ * uart_port_unlock_irq - Unlock the UART port and re-enable interrupts
+ * @up: Pointer to UART port structure
+ */
+static inline void uart_port_unlock_irq(struct uart_port *up)
+{
+ spin_unlock_irq(&up->lock);
+}
+
+/**
+ * uart_port_unlock_irqrestore - Unlock the UART port, restore interrupts
+ * @up: Pointer to UART port structure
+ * @flags: The saved interrupt flags for restore
+ */
+static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags)
+{
+ spin_unlock_irqrestore(&up->lock, flags);
+}
+
static inline int serial_port_in(struct uart_port *up, int offset)
{
return up->serial_in(up, offset);
@@ -669,8 +751,17 @@ struct uart_driver {
void uart_write_wakeup(struct uart_port *port);
-#define __uart_port_tx(uport, ch, tx_ready, put_char, tx_done, for_test, \
- for_post) \
+/**
+ * enum UART_TX_FLAGS -- flags for uart_port_tx_flags()
+ *
+ * @UART_TX_NOSTOP: don't call port->ops->stop_tx() on empty buffer
+ */
+enum UART_TX_FLAGS {
+ UART_TX_NOSTOP = BIT(0),
+};
+
+#define __uart_port_tx(uport, ch, flags, tx_ready, put_char, tx_done, \
+ for_test, for_post) \
({ \
struct uart_port *__port = (uport); \
struct circ_buf *xmit = &__port->state->xmit; \
@@ -698,7 +789,8 @@ void uart_write_wakeup(struct uart_port *port);
if (pending < WAKEUP_CHARS) { \
uart_write_wakeup(__port); \
\
- if (pending == 0) \
+ if (!((flags) & UART_TX_NOSTOP) && pending == 0 && \
+ __port->ops->tx_empty(__port)) \
__port->ops->stop_tx(__port); \
} \
\
@@ -733,7 +825,7 @@ void uart_write_wakeup(struct uart_port *port);
*/
#define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \
unsigned int __count = (count); \
- __uart_port_tx(port, ch, tx_ready, put_char, tx_done, __count, \
+ __uart_port_tx(port, ch, 0, tx_ready, put_char, tx_done, __count, \
__count--); \
})
@@ -747,8 +839,21 @@ void uart_write_wakeup(struct uart_port *port);
* See uart_port_tx_limited() for more details.
*/
#define uart_port_tx(port, ch, tx_ready, put_char) \
- __uart_port_tx(port, ch, tx_ready, put_char, ({}), true, ({}))
+ __uart_port_tx(port, ch, 0, tx_ready, put_char, ({}), true, ({}))
+
+/**
+ * uart_port_tx_flags -- transmit helper for uart_port with flags
+ * @port: uart port
+ * @ch: variable to store a character to be written to the HW
+ * @flags: %UART_TX_NOSTOP or similar
+ * @tx_ready: can HW accept more data function
+ * @put_char: function to write a character
+ *
+ * See uart_port_tx_limited() for more details.
+ */
+#define uart_port_tx_flags(port, ch, flags, tx_ready, put_char) \
+ __uart_port_tx(port, ch, flags, tx_ready, put_char, ({}), true, ({}))
/*
* Baud rate helpers.
*/
@@ -773,9 +878,9 @@ static inline unsigned long uart_fifo_timeout(struct uart_port *port)
}
/* Base timer interval for polling */
-static inline int uart_poll_timeout(struct uart_port *port)
+static inline unsigned long uart_poll_timeout(struct uart_port *port)
{
- int timeout = uart_fifo_timeout(port);
+ unsigned long timeout = uart_fifo_timeout(port);
return timeout > 6 ? (timeout / 2 - 2) : 1;
}
@@ -858,6 +963,8 @@ int uart_register_driver(struct uart_driver *uart);
void uart_unregister_driver(struct uart_driver *uart);
int uart_add_one_port(struct uart_driver *reg, struct uart_port *port);
void uart_remove_one_port(struct uart_driver *reg, struct uart_port *port);
+int uart_read_port_properties(struct uart_port *port);
+int uart_read_and_validate_port_properties(struct uart_port *port);
bool uart_match_port(const struct uart_port *port1,
const struct uart_port *port2);
@@ -904,16 +1011,16 @@ void uart_handle_dcd_change(struct uart_port *uport, bool active);
void uart_handle_cts_change(struct uart_port *uport, bool active);
void uart_insert_char(struct uart_port *port, unsigned int status,
- unsigned int overrun, unsigned int ch, unsigned int flag);
+ unsigned int overrun, u8 ch, u8 flag);
void uart_xchar_out(struct uart_port *uport, int offset);
#ifdef CONFIG_MAGIC_SYSRQ_SERIAL
#define SYSRQ_TIMEOUT (HZ * 5)
-bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch);
+bool uart_try_toggle_sysrq(struct uart_port *port, u8 ch);
-static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch)
+static inline int uart_handle_sysrq_char(struct uart_port *port, u8 ch)
{
if (!port->sysrq)
return 0;
@@ -932,7 +1039,7 @@ static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch
return 0;
}
-static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch)
+static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch)
{
if (!port->sysrq)
return 0;
@@ -953,17 +1060,17 @@ static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int c
static inline void uart_unlock_and_check_sysrq(struct uart_port *port)
{
- int sysrq_ch;
+ u8 sysrq_ch;
if (!port->has_sysrq) {
- spin_unlock(&port->lock);
+ uart_port_unlock(port);
return;
}
sysrq_ch = port->sysrq_ch;
port->sysrq_ch = 0;
- spin_unlock(&port->lock);
+ uart_port_unlock(port);
if (sysrq_ch)
handle_sysrq(sysrq_ch);
@@ -972,38 +1079,38 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port)
static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port,
unsigned long flags)
{
- int sysrq_ch;
+ u8 sysrq_ch;
if (!port->has_sysrq) {
- spin_unlock_irqrestore(&port->lock, flags);
+ uart_port_unlock_irqrestore(port, flags);
return;
}
sysrq_ch = port->sysrq_ch;
port->sysrq_ch = 0;
- spin_unlock_irqrestore(&port->lock, flags);
+ uart_port_unlock_irqrestore(port, flags);
if (sysrq_ch)
handle_sysrq(sysrq_ch);
}
#else /* CONFIG_MAGIC_SYSRQ_SERIAL */
-static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch)
+static inline int uart_handle_sysrq_char(struct uart_port *port, u8 ch)
{
return 0;
}
-static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch)
+static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch)
{
return 0;
}
static inline void uart_unlock_and_check_sysrq(struct uart_port *port)
{
- spin_unlock(&port->lock);
+ uart_port_unlock(port);
}
static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port,
unsigned long flags)
{
- spin_unlock_irqrestore(&port->lock, flags);
+ uart_port_unlock_irqrestore(port, flags);
}
#endif /* CONFIG_MAGIC_SYSRQ_SERIAL */
diff --git a/include/linux/shm.h b/include/linux/shm.h
index d8e69aed3d32..c55bef0538e5 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -2,12 +2,12 @@
#ifndef _LINUX_SHM_H_
#define _LINUX_SHM_H_
-#include <linux/list.h>
+#include <linux/types.h>
#include <asm/page.h>
-#include <uapi/linux/shm.h>
#include <asm/shmparam.h>
struct file;
+struct task_struct;
#ifdef CONFIG_SYSVIPC
struct sysv_shm {
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 9029abd29b1c..3fb18f7eb73e 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -13,20 +13,32 @@
/* inode in-kernel data */
+#ifdef CONFIG_TMPFS_QUOTA
+#define SHMEM_MAXQUOTAS 2
+#endif
+
struct shmem_inode_info {
spinlock_t lock;
unsigned int seals; /* shmem seals */
unsigned long flags;
unsigned long alloced; /* data pages alloced to file */
unsigned long swapped; /* subtotal assigned to swap */
- pgoff_t fallocend; /* highest fallocate endindex */
- struct list_head shrinklist; /* shrinkable hpage inodes */
- struct list_head swaplist; /* chain of maybes on swap */
+ union {
+ struct offset_ctx dir_offsets; /* stable directory offsets */
+ struct {
+ struct list_head shrinklist; /* shrinkable hpage inodes */
+ struct list_head swaplist; /* chain of maybes on swap */
+ };
+ };
+ struct timespec64 i_crtime; /* file creation time */
struct shared_policy policy; /* NUMA memory alloc policy */
struct simple_xattrs xattrs; /* list of xattrs */
+ pgoff_t fallocend; /* highest fallocate endindex */
+ unsigned int fsflags; /* for FS_IOC_[SG]ETFLAGS */
atomic_t stop_eviction; /* hold when working on inode */
- struct timespec64 i_crtime; /* file creation time */
- unsigned int fsflags; /* flags for FS_IOC_[SG]ETFLAGS */
+#ifdef CONFIG_TMPFS_QUOTA
+ struct dquot __rcu *i_dquot[MAXQUOTAS];
+#endif
struct inode vfs_inode;
};
@@ -35,11 +47,18 @@ struct shmem_inode_info {
(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL)
#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL)
+struct shmem_quota_limits {
+ qsize_t usrquota_bhardlimit; /* Default user quota block hard limit */
+ qsize_t usrquota_ihardlimit; /* Default user quota inode hard limit */
+ qsize_t grpquota_bhardlimit; /* Default group quota block hard limit */
+ qsize_t grpquota_ihardlimit; /* Default group quota inode hard limit */
+};
+
struct shmem_sb_info {
unsigned long max_blocks; /* How many blocks are allowed */
struct percpu_counter used_blocks; /* How many are allocated */
unsigned long max_inodes; /* How many inodes are allowed */
- unsigned long free_inodes; /* How many are left for allocation */
+ unsigned long free_ispace; /* How much ispace left for allocation */
raw_spinlock_t stat_lock; /* Serialize shmem_sb_info changes */
umode_t mode; /* Mount mode for root directory */
unsigned char huge; /* Whether to try for hugepages */
@@ -53,6 +72,7 @@ struct shmem_sb_info {
spinlock_t shrinklist_lock; /* Protects shrinklist */
struct list_head shrinklist; /* List of shinkable inodes */
unsigned long shrinklist_len; /* Length of shrinklist */
+ struct shmem_quota_limits qlimits; /* Default quota limits */
};
static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
@@ -77,11 +97,7 @@ extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags);
extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts);
#ifdef CONFIG_SHMEM
-extern const struct address_space_operations shmem_aops;
-static inline bool shmem_mapping(struct address_space *mapping)
-{
- return mapping->a_ops == &shmem_aops;
-}
+bool shmem_mapping(struct address_space *mapping);
#else
static inline bool shmem_mapping(struct address_space *mapping)
{
@@ -94,8 +110,17 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
int shmem_unuse(unsigned int type);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
struct mm_struct *mm, unsigned long vm_flags);
+#else
+static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
+ struct mm_struct *mm, unsigned long vm_flags)
+{
+ return false;
+}
+#endif
+
#ifdef CONFIG_SHMEM
extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
#else
@@ -172,4 +197,17 @@ extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
#endif /* CONFIG_SHMEM */
#endif /* CONFIG_USERFAULTFD */
+/*
+ * Used space is stored as unsigned 64-bit value in bytes but
+ * quota core supports only signed 64-bit values so use that
+ * as a limit
+ */
+#define SHMEM_QUOTA_MAX_SPC_LIMIT 0x7fffffffffffffffLL /* 2^63-1 */
+#define SHMEM_QUOTA_MAX_INO_LIMIT 0x7fffffffffffffffLL
+
+#ifdef CONFIG_TMPFS_QUOTA
+extern const struct dquot_operations shmem_quota_operations;
+extern struct quota_format_type shmem_quota_format;
+#endif /* CONFIG_TMPFS_QUOTA */
+
#endif
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 224293b2dd06..1a00be90d93a 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -4,6 +4,25 @@
#include <linux/atomic.h>
#include <linux/types.h>
+#include <linux/refcount.h>
+#include <linux/completion.h>
+
+#define SHRINKER_UNIT_BITS BITS_PER_LONG
+
+/*
+ * Bitmap and deferred work of shrinker::id corresponding to memcg-aware
+ * shrinkers, which have elements charged to the memcg.
+ */
+struct shrinker_info_unit {
+ atomic_long_t nr_deferred[SHRINKER_UNIT_BITS];
+ DECLARE_BITMAP(map, SHRINKER_UNIT_BITS);
+};
+
+struct shrinker_info {
+ struct rcu_head rcu;
+ int map_nr_max;
+ struct shrinker_info_unit *unit[];
+};
/*
* This struct is used to pass information from page reclaim to the shrinkers.
@@ -70,6 +89,19 @@ struct shrinker {
int seeks; /* seeks to recreate an obj */
unsigned flags;
+ /*
+ * The reference count of this shrinker. Registered shrinker have an
+ * initial refcount of 1, then the lookup operations are now allowed
+ * to use it via shrinker_try_get(). Later in the unregistration step,
+ * the initial refcount will be discarded, and will free the shrinker
+ * asynchronously via RCU after its refcount reaches 0.
+ */
+ refcount_t refcount;
+ struct completion done; /* use to wait for refcount to reach 0 */
+ struct rcu_head rcu;
+
+ void *private_data;
+
/* These are for internal use */
struct list_head list;
#ifdef CONFIG_MEMCG
@@ -86,48 +118,39 @@ struct shrinker {
};
#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
-/* Flags */
-#define SHRINKER_REGISTERED (1 << 0)
-#define SHRINKER_NUMA_AWARE (1 << 1)
-#define SHRINKER_MEMCG_AWARE (1 << 2)
+/* Internal flags */
+#define SHRINKER_REGISTERED BIT(0)
+#define SHRINKER_ALLOCATED BIT(1)
+
+/* Flags for users to use */
+#define SHRINKER_NUMA_AWARE BIT(2)
+#define SHRINKER_MEMCG_AWARE BIT(3)
/*
* It just makes sense when the shrinker is also MEMCG_AWARE for now,
* non-MEMCG_AWARE shrinker should not have this flag set.
*/
-#define SHRINKER_NONSLAB (1 << 3)
+#define SHRINKER_NONSLAB BIT(4)
-extern int __printf(2, 3) prealloc_shrinker(struct shrinker *shrinker,
- const char *fmt, ...);
-extern void register_shrinker_prepared(struct shrinker *shrinker);
-extern int __printf(2, 3) register_shrinker(struct shrinker *shrinker,
- const char *fmt, ...);
-extern void unregister_shrinker(struct shrinker *shrinker);
-extern void free_prealloced_shrinker(struct shrinker *shrinker);
-extern void synchronize_shrinkers(void);
+__printf(2, 3)
+struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...);
+void shrinker_register(struct shrinker *shrinker);
+void shrinker_free(struct shrinker *shrinker);
-#ifdef CONFIG_SHRINKER_DEBUG
-extern int shrinker_debugfs_add(struct shrinker *shrinker);
-extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
- int *debugfs_id);
-extern void shrinker_debugfs_remove(struct dentry *debugfs_entry,
- int debugfs_id);
-extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker,
- const char *fmt, ...);
-#else /* CONFIG_SHRINKER_DEBUG */
-static inline int shrinker_debugfs_add(struct shrinker *shrinker)
-{
- return 0;
-}
-static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
- int *debugfs_id)
+static inline bool shrinker_try_get(struct shrinker *shrinker)
{
- *debugfs_id = -1;
- return NULL;
+ return refcount_inc_not_zero(&shrinker->refcount);
}
-static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry,
- int debugfs_id)
+
+static inline void shrinker_put(struct shrinker *shrinker)
{
+ if (refcount_dec_and_test(&shrinker->refcount))
+ complete(&shrinker->done);
}
+
+#ifdef CONFIG_SHRINKER_DEBUG
+extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker,
+ const char *fmt, ...);
+#else /* CONFIG_SHRINKER_DEBUG */
static inline __printf(2, 3)
int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
{
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 3b98e7a28538..f19816832f05 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -3,6 +3,7 @@
#define _LINUX_SIGNAL_H
#include <linux/bug.h>
+#include <linux/list.h>
#include <linux/signal_types.h>
#include <linux/string.h>
diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
index a70b2bdbf4d9..caf4f7a59ab9 100644
--- a/include/linux/signal_types.h
+++ b/include/linux/signal_types.h
@@ -6,7 +6,7 @@
* Basic signal handling related data type definitions:
*/
-#include <linux/list.h>
+#include <linux/types.h>
#include <uapi/linux/signal.h>
typedef struct kernel_siginfo {
diff --git a/include/linux/sizes.h b/include/linux/sizes.h
index 84aa448d8bb3..c3a00b967d18 100644
--- a/include/linux/sizes.h
+++ b/include/linux/sizes.h
@@ -47,8 +47,17 @@
#define SZ_8G _AC(0x200000000, ULL)
#define SZ_16G _AC(0x400000000, ULL)
#define SZ_32G _AC(0x800000000, ULL)
+#define SZ_64G _AC(0x1000000000, ULL)
+#define SZ_128G _AC(0x2000000000, ULL)
+#define SZ_256G _AC(0x4000000000, ULL)
+#define SZ_512G _AC(0x8000000000, ULL)
#define SZ_1T _AC(0x10000000000, ULL)
+#define SZ_2T _AC(0x20000000000, ULL)
+#define SZ_4T _AC(0x40000000000, ULL)
+#define SZ_8T _AC(0x80000000000, ULL)
+#define SZ_16T _AC(0x100000000000, ULL)
+#define SZ_32T _AC(0x200000000000, ULL)
#define SZ_64T _AC(0x400000000000, ULL)
#endif /* __LINUX_SIZES_H__ */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 91ed66952580..4ff48eda3f64 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -32,12 +32,12 @@
#include <linux/if_packet.h>
#include <linux/llist.h>
#include <net/flow.h>
-#include <net/page_pool.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <linux/netfilter/nf_conntrack_common.h>
#endif
#include <net/net_debug.h>
#include <net/dropreason-core.h>
+#include <net/netmem.h>
/**
* DOC: skb checksums
@@ -296,7 +296,7 @@ struct nf_bridge_info {
u8 bridged_dnat:1;
u8 sabotage_in_done:1;
__u16 frag_max_size;
- struct net_device *physindev;
+ int physinif;
/* always valid & non-NULL from FORWARD on, for physdev match */
struct net_device *physoutdev;
@@ -360,7 +360,11 @@ extern int sysctl_max_skb_frags;
*/
#define GSO_BY_FRAGS 0xFFFF
-typedef struct bio_vec skb_frag_t;
+typedef struct skb_frag {
+ netmem_ref netmem;
+ unsigned int len;
+ unsigned int offset;
+} skb_frag_t;
/**
* skb_frag_size() - Returns the size of a skb fragment
@@ -368,7 +372,7 @@ typedef struct bio_vec skb_frag_t;
*/
static inline unsigned int skb_frag_size(const skb_frag_t *frag)
{
- return frag->bv_len;
+ return frag->len;
}
/**
@@ -378,7 +382,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag)
*/
static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
{
- frag->bv_len = size;
+ frag->len = size;
}
/**
@@ -388,7 +392,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
*/
static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
{
- frag->bv_len += delta;
+ frag->len += delta;
}
/**
@@ -398,7 +402,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
*/
static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
{
- frag->bv_len -= delta;
+ frag->len -= delta;
}
/**
@@ -418,7 +422,7 @@ static inline bool skb_frag_must_loop(struct page *p)
* skb_frag_foreach_page - loop over pages in a fragment
*
* @f: skb frag to operate on
- * @f_off: offset from start of f->bv_page
+ * @f_off: offset from start of f->netmem
* @f_len: length from f_off to loop over
* @p: (temp var) current page
* @p_off: (temp var) offset from start of current page,
@@ -441,8 +445,6 @@ static inline bool skb_frag_must_loop(struct page *p)
copied += p_len, p++, p_off = 0, \
p_len = min_t(u32, f_len - copied, PAGE_SIZE)) \
-#define HAVE_HW_TIME_STAMP
-
/**
* struct skb_shared_hwtstamps - hardware time stamps
* @hwtstamp: hardware time stamp transformed into duration
@@ -569,6 +571,15 @@ struct ubuf_info_msgzc {
int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
void mm_unaccount_pinned_pages(struct mmpin *mmp);
+/* Preserve some data across TX submission and completion.
+ *
+ * Note, this state is stored in the driver. Extending the layout
+ * might need some special care.
+ */
+struct xsk_tx_metadata_compl {
+ __u64 *tx_timestamp;
+};
+
/* This data is invariant across clones and lives at
* the end of the header data, ie. at skb->end.
*/
@@ -581,7 +592,10 @@ struct skb_shared_info {
/* Warning: this field is not always filled in (UFO)! */
unsigned short gso_segs;
struct sk_buff *frag_list;
- struct skb_shared_hwtstamps hwtstamps;
+ union {
+ struct skb_shared_hwtstamps hwtstamps;
+ struct xsk_tx_metadata_compl xsk_meta;
+ };
unsigned int gso_type;
u32 tskey;
@@ -739,13 +753,10 @@ typedef unsigned char *sk_buff_data_t;
* @list: queue head
* @ll_node: anchor in an llist (eg socket defer_list)
* @sk: Socket we are owned by
- * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in
- * fragmentation management
* @dev: Device we arrived on/are leaving by
* @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL
* @cb: Control buffer. Free for use by every layer. Put private vars here
* @_skb_refdst: destination entry (with norefcount bit)
- * @sp: the security path, used for xfrm
* @len: Length of actual data
* @data_len: Data length
* @mac_len: Length of link layer header
@@ -779,7 +790,6 @@ typedef unsigned char *sk_buff_data_t;
* @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue)
* @_sk_redir: socket redirection information for skmsg
* @_nfct: Associated connection, if any (with nfctinfo bits)
- * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
* @skb_iif: ifindex of device we arrived on
* @tc_index: Traffic control index
* @hash: the packet hash
@@ -863,10 +873,7 @@ struct sk_buff {
struct llist_node ll_node;
};
- union {
- struct sock *sk;
- int ip_defrag_offset;
- };
+ struct sock *sk;
union {
ktime_t tstamp;
@@ -944,7 +951,7 @@ struct sk_buff {
__u8 __mono_tc_offset[0];
/* public: */
__u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_XGRESS
__u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
__u8 tc_skip_classify:1;
#endif
@@ -993,7 +1000,7 @@ struct sk_buff {
__u8 csum_not_inet:1;
#endif
-#ifdef CONFIG_NET_SCHED
+#if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS)
__u16 tc_index; /* traffic control index */
#endif
@@ -1060,7 +1067,7 @@ struct sk_buff {
refcount_t users;
#ifdef CONFIG_SKB_EXTENSIONS
- /* only useable after checking ->active_extensions != 0 */
+ /* only usable after checking ->active_extensions != 0 */
struct skb_ext *extensions;
#endif
};
@@ -1225,6 +1232,24 @@ static inline bool skb_unref(struct sk_buff *skb)
return true;
}
+static inline bool skb_data_unref(const struct sk_buff *skb,
+ struct skb_shared_info *shinfo)
+{
+ int bias;
+
+ if (!skb->cloned)
+ return true;
+
+ bias = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
+
+ if (atomic_read(&shinfo->dataref) == bias)
+ smp_rmb();
+ else if (atomic_sub_return(bias, &shinfo->dataref))
+ return false;
+
+ return true;
+}
+
void __fix_address
kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
@@ -1259,7 +1284,6 @@ static inline void consume_skb(struct sk_buff *skb)
void __consume_stateless_skb(struct sk_buff *skb);
void __kfree_skb(struct sk_buff *skb);
-extern struct kmem_cache *skbuff_cache;
void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
@@ -1312,7 +1336,7 @@ struct sk_buff_fclones {
*
* Returns true if skb is a fast clone, and its clone is not freed.
* Some drivers call skb_orphan() in their ndo_start_xmit(),
- * so we also check that this didnt happen.
+ * so we also check that didn't happen.
*/
static inline bool skb_fclone_busy(const struct sock *sk,
const struct sk_buff *skb)
@@ -2019,7 +2043,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
* Copy shared buffers into a new sk_buff. We effectively do COW on
* packets to handle cases where we have a local reader and forward
* and a couple of other messy ones. The normal one is tcpdumping
- * a packet thats being forwarded.
+ * a packet that's being forwarded.
*/
/**
@@ -2422,22 +2446,37 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb)
return skb_headlen(skb) + __skb_pagelen(skb);
}
+static inline void skb_frag_fill_netmem_desc(skb_frag_t *frag,
+ netmem_ref netmem, int off,
+ int size)
+{
+ frag->netmem = netmem;
+ frag->offset = off;
+ skb_frag_size_set(frag, size);
+}
+
static inline void skb_frag_fill_page_desc(skb_frag_t *frag,
struct page *page,
int off, int size)
{
- frag->bv_page = page;
- frag->bv_offset = off;
- skb_frag_size_set(frag, size);
+ skb_frag_fill_netmem_desc(frag, page_to_netmem(page), off, size);
+}
+
+static inline void __skb_fill_netmem_desc_noacc(struct skb_shared_info *shinfo,
+ int i, netmem_ref netmem,
+ int off, int size)
+{
+ skb_frag_t *frag = &shinfo->frags[i];
+
+ skb_frag_fill_netmem_desc(frag, netmem, off, size);
}
static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo,
int i, struct page *page,
int off, int size)
{
- skb_frag_t *frag = &shinfo->frags[i];
-
- skb_frag_fill_page_desc(frag, page, off, size);
+ __skb_fill_netmem_desc_noacc(shinfo, i, page_to_netmem(page), off,
+ size);
}
/**
@@ -2453,10 +2492,10 @@ static inline void skb_len_add(struct sk_buff *skb, int delta)
}
/**
- * __skb_fill_page_desc - initialise a paged fragment in an skb
+ * __skb_fill_netmem_desc - initialise a fragment in an skb
* @skb: buffer containing fragment to be initialised
- * @i: paged fragment index to initialise
- * @page: the page to use for this fragment
+ * @i: fragment index to initialise
+ * @netmem: the netmem to use for this fragment
* @off: the offset to the data with @page
* @size: the length of the data
*
@@ -2465,10 +2504,12 @@ static inline void skb_len_add(struct sk_buff *skb, int delta)
*
* Does not take any additional reference on the fragment.
*/
-static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
- struct page *page, int off, int size)
+static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i,
+ netmem_ref netmem, int off, int size)
{
- __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size);
+ struct page *page = netmem_to_page(netmem);
+
+ __skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size);
/* Propagate page pfmemalloc to the skb if we can. The problem is
* that not all callers have unique ownership of the page but rely
@@ -2476,7 +2517,20 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
*/
page = compound_head(page);
if (page_is_pfmemalloc(page))
- skb->pfmemalloc = true;
+ skb->pfmemalloc = true;
+}
+
+static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
+ struct page *page, int off, int size)
+{
+ __skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size);
+}
+
+static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i,
+ netmem_ref netmem, int off, int size)
+{
+ __skb_fill_netmem_desc(skb, i, netmem, off, size);
+ skb_shinfo(skb)->nr_frags = i + 1;
}
/**
@@ -2496,8 +2550,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
struct page *page, int off, int size)
{
- __skb_fill_page_desc(skb, i, page, off, size);
- skb_shinfo(skb)->nr_frags = i + 1;
+ skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size);
}
/**
@@ -2521,8 +2574,16 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i,
shinfo->nr_frags = i + 1;
}
-void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
- int size, unsigned int truesize);
+void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
+ int off, int size, unsigned int truesize);
+
+static inline void skb_add_rx_frag(struct sk_buff *skb, int i,
+ struct page *page, int off, int size,
+ unsigned int truesize)
+{
+ skb_add_rx_frag_netmem(skb, i, page_to_netmem(page), off, size,
+ truesize);
+}
void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
unsigned int truesize);
@@ -2635,6 +2696,8 @@ static inline void skb_put_u8(struct sk_buff *skb, u8 val)
void *skb_push(struct sk_buff *skb, unsigned int len);
static inline void *__skb_push(struct sk_buff *skb, unsigned int len)
{
+ DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);
+
skb->data -= len;
skb->len += len;
return skb->data;
@@ -2643,6 +2706,8 @@ static inline void *__skb_push(struct sk_buff *skb, unsigned int len)
void *skb_pull(struct sk_buff *skb, unsigned int len);
static inline void *__skb_pull(struct sk_buff *skb, unsigned int len)
{
+ DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);
+
skb->len -= len;
if (unlikely(skb->len < skb->data_len)) {
#if defined(CONFIG_DEBUG_NET)
@@ -2667,6 +2732,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta);
static inline enum skb_drop_reason
pskb_may_pull_reason(struct sk_buff *skb, unsigned int len)
{
+ DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);
+
if (likely(len <= skb_headlen(skb)))
return SKB_NOT_DROPPED_YET;
@@ -2839,6 +2906,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb,
skb->inner_network_header += offset;
}
+static inline bool skb_inner_network_header_was_set(const struct sk_buff *skb)
+{
+ return skb->inner_network_header > 0;
+}
+
static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb)
{
return skb->head + skb->inner_mac_header;
@@ -2959,6 +3031,21 @@ static inline void skb_mac_header_rebuild(struct sk_buff *skb)
}
}
+/* Move the full mac header up to current network_header.
+ * Leaves skb->data pointing at offset skb->mac_len into the mac_header.
+ * Must be provided the complete mac header length.
+ */
+static inline void skb_mac_header_rebuild_full(struct sk_buff *skb, u32 full_mac_len)
+{
+ if (skb_mac_header_was_set(skb)) {
+ const unsigned char *old_mac = skb_mac_header(skb);
+
+ skb_set_mac_header(skb, -full_mac_len);
+ memmove(skb_mac_header(skb), old_mac, full_mac_len);
+ __skb_push(skb, full_mac_len - skb->mac_len);
+ }
+}
+
static inline int skb_checksum_start_offset(const struct sk_buff *skb)
{
return skb->csum_start - skb_headroom(skb);
@@ -2976,6 +3063,7 @@ static inline int skb_transport_offset(const struct sk_buff *skb)
static inline u32 skb_network_header_len(const struct sk_buff *skb)
{
+ DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb));
return skb->transport_header - skb->network_header;
}
@@ -3152,22 +3240,38 @@ static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask)
}
/**
- * __skb_queue_purge - empty a list
+ * __skb_queue_purge_reason - empty a list
* @list: list to empty
+ * @reason: drop reason
*
* Delete all buffers on an &sk_buff list. Each buffer is removed from
* the list and one reference dropped. This function does not take the
* list lock and the caller must hold the relevant locks to use it.
*/
-static inline void __skb_queue_purge(struct sk_buff_head *list)
+static inline void __skb_queue_purge_reason(struct sk_buff_head *list,
+ enum skb_drop_reason reason)
{
struct sk_buff *skb;
+
while ((skb = __skb_dequeue(list)) != NULL)
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
+}
+
+static inline void __skb_queue_purge(struct sk_buff_head *list)
+{
+ __skb_queue_purge_reason(list, SKB_DROP_REASON_QUEUE_PURGE);
+}
+
+void skb_queue_purge_reason(struct sk_buff_head *list,
+ enum skb_drop_reason reason);
+
+static inline void skb_queue_purge(struct sk_buff_head *list)
+{
+ skb_queue_purge_reason(list, SKB_DROP_REASON_QUEUE_PURGE);
}
-void skb_queue_purge(struct sk_buff_head *list);
unsigned int skb_rbtree_purge(struct rb_root *root);
+void skb_errqueue_purge(struct sk_buff_head *list);
void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask);
@@ -3286,7 +3390,7 @@ static inline struct page *__dev_alloc_pages(gfp_t gfp_mask,
unsigned int order)
{
/* This piece of code contains several assumptions.
- * 1. This is for device Rx, therefor a cold page is preferred.
+ * 1. This is for device Rx, therefore a cold page is preferred.
* 2. The expectation is the user wants a compound page.
* 3. If requesting a order 0 page it will not be compound
* due to the check to see if order has a value in prep_new_page
@@ -3355,7 +3459,7 @@ static inline void skb_propagate_pfmemalloc(const struct page *page,
*/
static inline unsigned int skb_frag_off(const skb_frag_t *frag)
{
- return frag->bv_offset;
+ return frag->offset;
}
/**
@@ -3365,7 +3469,7 @@ static inline unsigned int skb_frag_off(const skb_frag_t *frag)
*/
static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
{
- frag->bv_offset += delta;
+ frag->offset += delta;
}
/**
@@ -3375,7 +3479,7 @@ static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
*/
static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
{
- frag->bv_offset = offset;
+ frag->offset = offset;
}
/**
@@ -3386,7 +3490,7 @@ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
static inline void skb_frag_off_copy(skb_frag_t *fragto,
const skb_frag_t *fragfrom)
{
- fragto->bv_offset = fragfrom->bv_offset;
+ fragto->offset = fragfrom->offset;
}
/**
@@ -3397,7 +3501,7 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto,
*/
static inline struct page *skb_frag_page(const skb_frag_t *frag)
{
- return frag->bv_page;
+ return netmem_to_page(frag->netmem);
}
/**
@@ -3423,13 +3527,29 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
__skb_frag_ref(&skb_shinfo(skb)->frags[f]);
}
+int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
+ unsigned int headroom);
+int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
+ struct bpf_prog *prog);
+bool napi_pp_put_page(struct page *page, bool napi_safe);
+
+static inline void
+skb_page_unref(const struct sk_buff *skb, struct page *page, bool napi_safe)
+{
+#ifdef CONFIG_PAGE_POOL
+ if (skb->pp_recycle && napi_pp_put_page(page, napi_safe))
+ return;
+#endif
+ put_page(page);
+}
+
static inline void
napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
{
struct page *page = skb_frag_page(frag);
#ifdef CONFIG_PAGE_POOL
- if (recycle && page_pool_return_skb_page(page, napi_safe))
+ if (recycle && napi_pp_put_page(page, napi_safe))
return;
#endif
put_page(page);
@@ -3499,7 +3619,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag)
static inline void skb_frag_page_copy(skb_frag_t *fragto,
const skb_frag_t *fragfrom)
{
- fragto->bv_page = fragfrom->bv_page;
+ fragto->netmem = fragfrom->netmem;
}
bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
@@ -3664,6 +3784,9 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l
return __skb_put_padto(skb, len, true);
}
+bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i)
+ __must_check;
+
static inline int skb_add_data(struct sk_buff *skb,
struct iov_iter *from, int copy)
{
@@ -3979,6 +4102,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features
unsigned int offset);
struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len);
+int skb_ensure_writable_head_tail(struct sk_buff *skb, struct net_device *dev);
int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci);
int skb_vlan_pop(struct sk_buff *skb);
int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
@@ -4023,7 +4147,7 @@ __skb_header_pointer(const struct sk_buff *skb, int offset, int len,
if (likely(hlen - offset >= len))
return (void *)data + offset;
- if (!skb || !buffer || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0))
+ if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0))
return NULL;
return buffer;
@@ -4036,6 +4160,14 @@ skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer)
skb_headlen(skb), buffer);
}
+static inline void * __must_check
+skb_pointer_if_linear(const struct sk_buff *skb, int offset, int len)
+{
+ if (likely(skb_headlen(skb) - offset >= len))
+ return skb->data + offset;
+ return NULL;
+}
+
/**
* skb_needs_linearize - check if we need to linearize a given skb
* depending on the given device features.
@@ -4209,10 +4341,13 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a,
{
const void *a = skb_metadata_end(skb_a);
const void *b = skb_metadata_end(skb_b);
- /* Using more efficient varaiant than plain call to memcmp(). */
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
u64 diffs = 0;
+ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
+ BITS_PER_LONG != 64)
+ goto slow;
+
+ /* Using more efficient variant than plain call to memcmp(). */
switch (meta_len) {
#define __it(x, op) (x -= sizeof(u##op))
#define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op))
@@ -4232,11 +4367,11 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a,
fallthrough;
case 4: diffs |= __it_diff(a, b, 32);
break;
+ default:
+slow:
+ return memcmp(a - meta_len, b - meta_len, meta_len);
}
return diffs;
-#else
- return memcmp(a - meta_len, b - meta_len, meta_len);
-#endif
}
static inline bool skb_metadata_differs(const struct sk_buff *skb_a,
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index c1637515a8a4..a509caf823d6 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -100,12 +100,18 @@ struct sk_psock {
void (*saved_close)(struct sock *sk, long timeout);
void (*saved_write_space)(struct sock *sk);
void (*saved_data_ready)(struct sock *sk);
+ /* psock_update_sk_prot may be called with restore=false many times
+ * so the handler must be safe for this case. It will be called
+ * exactly once with restore=true when the psock is being destroyed
+ * and psock refcnt is zero, but before an RCU grace period.
+ */
int (*psock_update_sk_prot)(struct sock *sk, struct sk_psock *psock,
bool restore);
struct proto *sk_proto;
struct mutex work_mutex;
struct sk_psock_work_state work_state;
struct delayed_work work;
+ struct sock *sk_pair;
struct rcu_work rwork;
};
@@ -455,10 +461,12 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock)
{
+ read_lock_bh(&sk->sk_callback_lock);
if (psock->saved_data_ready)
psock->saved_data_ready(sk);
else
sk->sk_data_ready(sk);
+ read_unlock_bh(&sk->sk_callback_lock);
}
static inline void psock_set_prog(struct bpf_prog **pprog,
@@ -499,12 +507,6 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
return !!psock->saved_data_ready;
}
-static inline bool sk_is_udp(const struct sock *sk)
-{
- return sk->sk_type == SOCK_DGRAM &&
- sk->sk_protocol == IPPROTO_UDP;
-}
-
#if IS_ENABLED(CONFIG_NET_SOCK_MSG)
#define BPF_F_STRPARSER (1UL << 1)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 848c7c82ad5a..739b21262507 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -19,30 +19,71 @@
#include <linux/workqueue.h>
#include <linux/percpu-refcount.h>
#include <linux/cleanup.h>
+#include <linux/hash.h>
+
+enum _slab_flag_bits {
+ _SLAB_CONSISTENCY_CHECKS,
+ _SLAB_RED_ZONE,
+ _SLAB_POISON,
+ _SLAB_KMALLOC,
+ _SLAB_HWCACHE_ALIGN,
+ _SLAB_CACHE_DMA,
+ _SLAB_CACHE_DMA32,
+ _SLAB_STORE_USER,
+ _SLAB_PANIC,
+ _SLAB_TYPESAFE_BY_RCU,
+ _SLAB_TRACE,
+#ifdef CONFIG_DEBUG_OBJECTS
+ _SLAB_DEBUG_OBJECTS,
+#endif
+ _SLAB_NOLEAKTRACE,
+ _SLAB_NO_MERGE,
+#ifdef CONFIG_FAILSLAB
+ _SLAB_FAILSLAB,
+#endif
+#ifdef CONFIG_MEMCG_KMEM
+ _SLAB_ACCOUNT,
+#endif
+#ifdef CONFIG_KASAN_GENERIC
+ _SLAB_KASAN,
+#endif
+ _SLAB_NO_USER_FLAGS,
+#ifdef CONFIG_KFENCE
+ _SLAB_SKIP_KFENCE,
+#endif
+#ifndef CONFIG_SLUB_TINY
+ _SLAB_RECLAIM_ACCOUNT,
+#endif
+ _SLAB_OBJECT_POISON,
+ _SLAB_CMPXCHG_DOUBLE,
+ _SLAB_FLAGS_LAST_BIT
+};
+#define __SLAB_FLAG_BIT(nr) ((slab_flags_t __force)(1U << (nr)))
+#define __SLAB_FLAG_UNUSED ((slab_flags_t __force)(0U))
/*
* Flags to pass to kmem_cache_create().
- * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set.
+ * The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op
*/
/* DEBUG: Perform (expensive) checks on alloc/free */
-#define SLAB_CONSISTENCY_CHECKS ((slab_flags_t __force)0x00000100U)
+#define SLAB_CONSISTENCY_CHECKS __SLAB_FLAG_BIT(_SLAB_CONSISTENCY_CHECKS)
/* DEBUG: Red zone objs in a cache */
-#define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400U)
+#define SLAB_RED_ZONE __SLAB_FLAG_BIT(_SLAB_RED_ZONE)
/* DEBUG: Poison objects */
-#define SLAB_POISON ((slab_flags_t __force)0x00000800U)
+#define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON)
/* Indicate a kmalloc slab */
-#define SLAB_KMALLOC ((slab_flags_t __force)0x00001000U)
+#define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC)
/* Align objs on cache lines */
-#define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U)
+#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN)
/* Use GFP_DMA memory */
-#define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000U)
+#define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA)
/* Use GFP_DMA32 memory */
-#define SLAB_CACHE_DMA32 ((slab_flags_t __force)0x00008000U)
+#define SLAB_CACHE_DMA32 __SLAB_FLAG_BIT(_SLAB_CACHE_DMA32)
/* DEBUG: Store the last owner for bug hunting */
-#define SLAB_STORE_USER ((slab_flags_t __force)0x00010000U)
+#define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER)
/* Panic if kmem_cache_create() fails */
-#define SLAB_PANIC ((slab_flags_t __force)0x00040000U)
+#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC)
/*
* SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
*
@@ -94,21 +135,19 @@
* Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
*/
/* Defer freeing slabs to RCU */
-#define SLAB_TYPESAFE_BY_RCU ((slab_flags_t __force)0x00080000U)
-/* Spread some memory over cpuset */
-#define SLAB_MEM_SPREAD ((slab_flags_t __force)0x00100000U)
+#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU)
/* Trace allocations and frees */
-#define SLAB_TRACE ((slab_flags_t __force)0x00200000U)
+#define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE)
/* Flag to prevent checks on free */
#ifdef CONFIG_DEBUG_OBJECTS
-# define SLAB_DEBUG_OBJECTS ((slab_flags_t __force)0x00400000U)
+# define SLAB_DEBUG_OBJECTS __SLAB_FLAG_BIT(_SLAB_DEBUG_OBJECTS)
#else
-# define SLAB_DEBUG_OBJECTS 0
+# define SLAB_DEBUG_OBJECTS __SLAB_FLAG_UNUSED
#endif
/* Avoid kmemleak tracing */
-#define SLAB_NOLEAKTRACE ((slab_flags_t __force)0x00800000U)
+#define SLAB_NOLEAKTRACE __SLAB_FLAG_BIT(_SLAB_NOLEAKTRACE)
/*
* Prevent merging with compatible kmem caches. This flag should be used
@@ -120,25 +159,25 @@
* - performance critical caches, should be very rare and consulted with slab
* maintainers, and not used together with CONFIG_SLUB_TINY
*/
-#define SLAB_NO_MERGE ((slab_flags_t __force)0x01000000U)
+#define SLAB_NO_MERGE __SLAB_FLAG_BIT(_SLAB_NO_MERGE)
/* Fault injection mark */
#ifdef CONFIG_FAILSLAB
-# define SLAB_FAILSLAB ((slab_flags_t __force)0x02000000U)
+# define SLAB_FAILSLAB __SLAB_FLAG_BIT(_SLAB_FAILSLAB)
#else
-# define SLAB_FAILSLAB 0
+# define SLAB_FAILSLAB __SLAB_FLAG_UNUSED
#endif
/* Account to memcg */
#ifdef CONFIG_MEMCG_KMEM
-# define SLAB_ACCOUNT ((slab_flags_t __force)0x04000000U)
+# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT)
#else
-# define SLAB_ACCOUNT 0
+# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED
#endif
#ifdef CONFIG_KASAN_GENERIC
-#define SLAB_KASAN ((slab_flags_t __force)0x08000000U)
+#define SLAB_KASAN __SLAB_FLAG_BIT(_SLAB_KASAN)
#else
-#define SLAB_KASAN 0
+#define SLAB_KASAN __SLAB_FLAG_UNUSED
#endif
/*
@@ -146,20 +185,20 @@
* Intended for caches created for self-tests so they have only flags
* specified in the code and other flags are ignored.
*/
-#define SLAB_NO_USER_FLAGS ((slab_flags_t __force)0x10000000U)
+#define SLAB_NO_USER_FLAGS __SLAB_FLAG_BIT(_SLAB_NO_USER_FLAGS)
#ifdef CONFIG_KFENCE
-#define SLAB_SKIP_KFENCE ((slab_flags_t __force)0x20000000U)
+#define SLAB_SKIP_KFENCE __SLAB_FLAG_BIT(_SLAB_SKIP_KFENCE)
#else
-#define SLAB_SKIP_KFENCE 0
+#define SLAB_SKIP_KFENCE __SLAB_FLAG_UNUSED
#endif
/* The following flags affect the page allocator grouping pages by mobility */
/* Objects are reclaimable */
#ifndef CONFIG_SLUB_TINY
-#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U)
+#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT)
#else
-#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0)
+#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_UNUSED
#endif
#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */
@@ -227,7 +266,7 @@ void kfree(const void *objp);
void kfree_sensitive(const void *objp);
size_t __ksize(const void *objp);
-DEFINE_FREE(kfree, void *, if (_T) kfree(_T))
+DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T))
/**
* ksize - Report actual allocation size of associated object
@@ -244,8 +283,9 @@ DEFINE_FREE(kfree, void *, if (_T) kfree(_T))
size_t ksize(const void *objp);
#ifdef CONFIG_PRINTK
-bool kmem_valid_obj(void *object);
-void kmem_dump_obj(void *object);
+bool kmem_dump_obj(void *object);
+#else
+static inline bool kmem_dump_obj(void *object) { return false; }
#endif
/*
@@ -300,25 +340,15 @@ static inline unsigned int arch_slab_minalign(void)
* Kmalloc array related definitions
*/
-#ifdef CONFIG_SLAB
/*
- * SLAB and SLUB directly allocates requests fitting in to an order-1 page
+ * SLUB directly allocates requests fitting in to an order-1 page
* (PAGE_SIZE*2). Larger requests are passed to the page allocator.
*/
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
-#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
-#ifndef KMALLOC_SHIFT_LOW
-#define KMALLOC_SHIFT_LOW 5
-#endif
-#endif
-
-#ifdef CONFIG_SLUB
-#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
-#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
+#define KMALLOC_SHIFT_MAX (MAX_PAGE_ORDER + PAGE_SHIFT)
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW 3
#endif
-#endif
/* Maximum allocatable size */
#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX)
@@ -345,6 +375,12 @@ static inline unsigned int arch_slab_minalign(void)
#define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \
(KMALLOC_MIN_SIZE) : 16)
+#ifdef CONFIG_RANDOM_KMALLOC_CACHES
+#define RANDOM_KMALLOC_CACHES_NR 15 // # of cache copies
+#else
+#define RANDOM_KMALLOC_CACHES_NR 0
+#endif
+
/*
* Whenever changing this, take care of that kmalloc_type() and
* create_kmalloc_caches() still work as intended.
@@ -361,6 +397,8 @@ enum kmalloc_cache_type {
#ifndef CONFIG_MEMCG_KMEM
KMALLOC_CGROUP = KMALLOC_NORMAL,
#endif
+ KMALLOC_RANDOM_START = KMALLOC_NORMAL,
+ KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR,
#ifdef CONFIG_SLUB_TINY
KMALLOC_RECLAIM = KMALLOC_NORMAL,
#else
@@ -386,14 +424,22 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
(IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \
(IS_ENABLED(CONFIG_MEMCG_KMEM) ? __GFP_ACCOUNT : 0))
-static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags)
+extern unsigned long random_kmalloc_seed;
+
+static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller)
{
/*
* The most common case is KMALLOC_NORMAL, so test for it
* with a single branch for all the relevant flags.
*/
if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0))
+#ifdef CONFIG_RANDOM_KMALLOC_CACHES
+ /* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */
+ return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed,
+ ilog2(RANDOM_KMALLOC_CACHES_NR + 1));
+#else
return KMALLOC_NORMAL;
+#endif
/*
* At least one of the flags has to be set. Their priorities in
@@ -580,7 +626,7 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
index = kmalloc_index(size);
return kmalloc_trace(
- kmalloc_caches[kmalloc_type(flags)][index],
+ kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
flags, size);
}
return __kmalloc(size, flags);
@@ -596,7 +642,7 @@ static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t fla
index = kmalloc_index(size);
return kmalloc_node_trace(
- kmalloc_caches[kmalloc_type(flags)][index],
+ kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
flags, node, size);
}
return __kmalloc_node(size, flags, node);
@@ -746,6 +792,8 @@ static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t fla
extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
__realloc_size(3);
extern void kvfree(const void *addr);
+DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T))
+
extern void kvfree_sensitive(const void *addr, size_t len);
unsigned int kmem_cache_size(struct kmem_cache *s);
@@ -768,12 +816,4 @@ size_t kmalloc_size_roundup(size_t size);
void __init kmem_cache_init_late(void);
-#if defined(CONFIG_SMP) && defined(CONFIG_SLAB)
-int slab_prepare_cpu(unsigned int cpu);
-int slab_dead_cpu(unsigned int cpu);
-#else
-#define slab_prepare_cpu NULL
-#define slab_dead_cpu NULL
-#endif
-
#endif /* _LINUX_SLAB_H */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
deleted file mode 100644
index a61e7d55d0d3..000000000000
--- a/include/linux/slab_def.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_SLAB_DEF_H
-#define _LINUX_SLAB_DEF_H
-
-#include <linux/kfence.h>
-#include <linux/reciprocal_div.h>
-
-/*
- * Definitions unique to the original Linux SLAB allocator.
- */
-
-struct kmem_cache {
- struct array_cache __percpu *cpu_cache;
-
-/* 1) Cache tunables. Protected by slab_mutex */
- unsigned int batchcount;
- unsigned int limit;
- unsigned int shared;
-
- unsigned int size;
- struct reciprocal_value reciprocal_buffer_size;
-/* 2) touched by every alloc & free from the backend */
-
- slab_flags_t flags; /* constant flags */
- unsigned int num; /* # of objs per slab */
-
-/* 3) cache_grow/shrink */
- /* order of pgs per slab (2^n) */
- unsigned int gfporder;
-
- /* force GFP flags, e.g. GFP_DMA */
- gfp_t allocflags;
-
- size_t colour; /* cache colouring range */
- unsigned int colour_off; /* colour offset */
- unsigned int freelist_size;
-
- /* constructor func */
- void (*ctor)(void *obj);
-
-/* 4) cache creation/removal */
- const char *name;
- struct list_head list;
- int refcount;
- int object_size;
- int align;
-
-/* 5) statistics */
-#ifdef CONFIG_DEBUG_SLAB
- unsigned long num_active;
- unsigned long num_allocations;
- unsigned long high_mark;
- unsigned long grown;
- unsigned long reaped;
- unsigned long errors;
- unsigned long max_freeable;
- unsigned long node_allocs;
- unsigned long node_frees;
- unsigned long node_overflow;
- atomic_t allochit;
- atomic_t allocmiss;
- atomic_t freehit;
- atomic_t freemiss;
-
- /*
- * If debugging is enabled, then the allocator can add additional
- * fields and/or padding to every object. 'size' contains the total
- * object size including these internal fields, while 'obj_offset'
- * and 'object_size' contain the offset to the user object and its
- * size.
- */
- int obj_offset;
-#endif /* CONFIG_DEBUG_SLAB */
-
-#ifdef CONFIG_KASAN_GENERIC
- struct kasan_cache kasan_info;
-#endif
-
-#ifdef CONFIG_SLAB_FREELIST_RANDOM
- unsigned int *random_seq;
-#endif
-
-#ifdef CONFIG_HARDENED_USERCOPY
- unsigned int useroffset; /* Usercopy region offset */
- unsigned int usersize; /* Usercopy region size */
-#endif
-
- struct kmem_cache_node *node[MAX_NUMNODES];
-};
-
-static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
- void *x)
-{
- void *object = x - (x - slab->s_mem) % cache->size;
- void *last_object = slab->s_mem + (cache->num - 1) * cache->size;
-
- if (unlikely(object > last_object))
- return last_object;
- else
- return object;
-}
-
-/*
- * We want to avoid an expensive divide : (offset / cache->size)
- * Using the fact that size is a constant for a particular cache,
- * we can replace (offset / cache->size) by
- * reciprocal_divide(offset, cache->reciprocal_buffer_size)
- */
-static inline unsigned int obj_to_index(const struct kmem_cache *cache,
- const struct slab *slab, void *obj)
-{
- u32 offset = (obj - slab->s_mem);
- return reciprocal_divide(offset, cache->reciprocal_buffer_size);
-}
-
-static inline int objs_per_slab(const struct kmem_cache *cache,
- const struct slab *slab)
-{
- if (is_kfence_address(slab_address(slab)))
- return 1;
- return cache->num;
-}
-
-#endif /* _LINUX_SLAB_DEF_H */
diff --git a/include/linux/slimbus.h b/include/linux/slimbus.h
index 12c9719b2a55..3042385b7b40 100644
--- a/include/linux/slimbus.h
+++ b/include/linux/slimbus.h
@@ -10,7 +10,7 @@
#include <linux/completion.h>
#include <linux/mod_devicetable.h>
-extern struct bus_type slimbus_bus;
+extern const struct bus_type slimbus_bus;
/**
* struct slim_eaddr - Enumeration address for a SLIMbus device
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
deleted file mode 100644
index deb90cf4bffb..000000000000
--- a/include/linux/slub_def.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_SLUB_DEF_H
-#define _LINUX_SLUB_DEF_H
-
-/*
- * SLUB : A Slab allocator without object queues.
- *
- * (C) 2007 SGI, Christoph Lameter
- */
-#include <linux/kfence.h>
-#include <linux/kobject.h>
-#include <linux/reciprocal_div.h>
-#include <linux/local_lock.h>
-
-enum stat_item {
- ALLOC_FASTPATH, /* Allocation from cpu slab */
- ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
- FREE_FASTPATH, /* Free to cpu slab */
- FREE_SLOWPATH, /* Freeing not to cpu slab */
- FREE_FROZEN, /* Freeing to frozen slab */
- FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
- FREE_REMOVE_PARTIAL, /* Freeing removes last object */
- ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */
- ALLOC_SLAB, /* Cpu slab acquired from page allocator */
- ALLOC_REFILL, /* Refill cpu slab from slab freelist */
- ALLOC_NODE_MISMATCH, /* Switching cpu slab */
- FREE_SLAB, /* Slab freed to the page allocator */
- CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
- DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
- DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
- DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
- DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
- DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
- DEACTIVATE_BYPASS, /* Implicit deactivation */
- ORDER_FALLBACK, /* Number of times fallback was necessary */
- CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
- CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */
- CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */
- CPU_PARTIAL_FREE, /* Refill cpu partial on free */
- CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */
- CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
- NR_SLUB_STAT_ITEMS
-};
-
-#ifndef CONFIG_SLUB_TINY
-/*
- * When changing the layout, make sure freelist and tid are still compatible
- * with this_cpu_cmpxchg_double() alignment requirements.
- */
-struct kmem_cache_cpu {
- union {
- struct {
- void **freelist; /* Pointer to next available object */
- unsigned long tid; /* Globally unique transaction id */
- };
- freelist_aba_t freelist_tid;
- };
- struct slab *slab; /* The slab from which we are allocating */
-#ifdef CONFIG_SLUB_CPU_PARTIAL
- struct slab *partial; /* Partially allocated frozen slabs */
-#endif
- local_lock_t lock; /* Protects the fields above */
-#ifdef CONFIG_SLUB_STATS
- unsigned stat[NR_SLUB_STAT_ITEMS];
-#endif
-};
-#endif /* CONFIG_SLUB_TINY */
-
-#ifdef CONFIG_SLUB_CPU_PARTIAL
-#define slub_percpu_partial(c) ((c)->partial)
-
-#define slub_set_percpu_partial(c, p) \
-({ \
- slub_percpu_partial(c) = (p)->next; \
-})
-
-#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c))
-#else
-#define slub_percpu_partial(c) NULL
-
-#define slub_set_percpu_partial(c, p)
-
-#define slub_percpu_partial_read_once(c) NULL
-#endif // CONFIG_SLUB_CPU_PARTIAL
-
-/*
- * Word size structure that can be atomically updated or read and that
- * contains both the order and the number of objects that a slab of the
- * given order would contain.
- */
-struct kmem_cache_order_objects {
- unsigned int x;
-};
-
-/*
- * Slab cache management.
- */
-struct kmem_cache {
-#ifndef CONFIG_SLUB_TINY
- struct kmem_cache_cpu __percpu *cpu_slab;
-#endif
- /* Used for retrieving partial slabs, etc. */
- slab_flags_t flags;
- unsigned long min_partial;
- unsigned int size; /* The size of an object including metadata */
- unsigned int object_size;/* The size of an object without metadata */
- struct reciprocal_value reciprocal_size;
- unsigned int offset; /* Free pointer offset */
-#ifdef CONFIG_SLUB_CPU_PARTIAL
- /* Number of per cpu partial objects to keep around */
- unsigned int cpu_partial;
- /* Number of per cpu partial slabs to keep around */
- unsigned int cpu_partial_slabs;
-#endif
- struct kmem_cache_order_objects oo;
-
- /* Allocation and freeing of slabs */
- struct kmem_cache_order_objects min;
- gfp_t allocflags; /* gfp flags to use on each alloc */
- int refcount; /* Refcount for slab cache destroy */
- void (*ctor)(void *);
- unsigned int inuse; /* Offset to metadata */
- unsigned int align; /* Alignment */
- unsigned int red_left_pad; /* Left redzone padding size */
- const char *name; /* Name (only for display!) */
- struct list_head list; /* List of slab caches */
-#ifdef CONFIG_SYSFS
- struct kobject kobj; /* For sysfs */
-#endif
-#ifdef CONFIG_SLAB_FREELIST_HARDENED
- unsigned long random;
-#endif
-
-#ifdef CONFIG_NUMA
- /*
- * Defragmentation by allocating from a remote node.
- */
- unsigned int remote_node_defrag_ratio;
-#endif
-
-#ifdef CONFIG_SLAB_FREELIST_RANDOM
- unsigned int *random_seq;
-#endif
-
-#ifdef CONFIG_KASAN_GENERIC
- struct kasan_cache kasan_info;
-#endif
-
-#ifdef CONFIG_HARDENED_USERCOPY
- unsigned int useroffset; /* Usercopy region offset */
- unsigned int usersize; /* Usercopy region size */
-#endif
-
- struct kmem_cache_node *node[MAX_NUMNODES];
-};
-
-#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
-#define SLAB_SUPPORTS_SYSFS
-void sysfs_slab_unlink(struct kmem_cache *);
-void sysfs_slab_release(struct kmem_cache *);
-#else
-static inline void sysfs_slab_unlink(struct kmem_cache *s)
-{
-}
-static inline void sysfs_slab_release(struct kmem_cache *s)
-{
-}
-#endif
-
-void *fixup_red_left(struct kmem_cache *s, void *p);
-
-static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
- void *x) {
- void *object = x - (x - slab_address(slab)) % cache->size;
- void *last_object = slab_address(slab) +
- (slab->objects - 1) * cache->size;
- void *result = (unlikely(object > last_object)) ? last_object : object;
-
- result = fixup_red_left(cache, result);
- return result;
-}
-
-/* Determine object index from a given position */
-static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
- void *addr, void *obj)
-{
- return reciprocal_divide(kasan_reset_tag(obj) - addr,
- cache->reciprocal_size);
-}
-
-static inline unsigned int obj_to_index(const struct kmem_cache *cache,
- const struct slab *slab, void *obj)
-{
- if (is_kfence_address(obj))
- return 0;
- return __obj_to_index(cache, slab_address(slab), obj);
-}
-
-static inline int objs_per_slab(const struct kmem_cache *cache,
- const struct slab *slab)
-{
- return slab->objects;
-}
-#endif /* _LINUX_SLUB_DEF_H */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 91ea4a67f8ca..fcd61dfe2af3 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -53,7 +53,7 @@ int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
void *info, bool wait, const struct cpumask *mask);
-int smp_call_function_single_async(int cpu, struct __call_single_data *csd);
+int smp_call_function_single_async(int cpu, call_single_data_t *csd);
/*
* Cpus stopping functions in panic. All have default weak definitions.
@@ -105,6 +105,12 @@ static inline void on_each_cpu_cond(smp_cond_func_t cond_func,
on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask);
}
+/*
+ * Architecture specific boot CPU setup. Defined as empty weak function in
+ * init/main.c. Architectures can override it.
+ */
+void smp_prepare_boot_cpu(void);
+
#ifdef CONFIG_SMP
#include <linux/preempt.h>
@@ -171,12 +177,6 @@ void generic_smp_call_function_single_interrupt(void);
#define generic_smp_call_function_interrupt \
generic_smp_call_function_single_interrupt
-/*
- * Mark the boot cpu "online" so that it can call console drivers in
- * printk() and can access its per-cpu storage.
- */
-void smp_prepare_boot_cpu(void);
-
extern unsigned int setup_max_cpus;
extern void __init setup_nr_cpu_ids(void);
extern void __init smp_init(void);
@@ -203,7 +203,6 @@ static inline void up_smp_call_function(smp_call_func_t func, void *info)
(up_smp_call_function(func, info))
static inline void smp_send_reschedule(int cpu) { }
-#define smp_prepare_boot_cpu() do {} while (0)
#define smp_call_function_many(mask, func, info, wait) \
(up_smp_call_function(func, info))
static inline void call_function_init(void) { }
@@ -218,6 +217,8 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func,
static inline void kick_all_cpus_sync(void) { }
static inline void wake_up_all_idle_cpus(void) { }
+#define setup_max_cpus 0
+
#ifdef CONFIG_UP_LATE_INIT
extern void __init up_late_init(void);
static inline void smp_init(void) { up_late_init(); }
@@ -261,7 +262,7 @@ static inline int get_boot_cpu_id(void)
* regular asm read for the stable.
*/
#ifndef __smp_processor_id
-#define __smp_processor_id(x) raw_smp_processor_id(x)
+#define __smp_processor_id() raw_smp_processor_id()
#endif
#ifdef CONFIG_DEBUG_PREEMPT
diff --git a/include/linux/smscphy.h b/include/linux/smscphy.h
index e1c88627755a..1a6a851d2cf8 100644
--- a/include/linux/smscphy.h
+++ b/include/linux/smscphy.h
@@ -38,4 +38,38 @@ int smsc_phy_set_tunable(struct phy_device *phydev,
struct ethtool_tunable *tuna, const void *data);
int smsc_phy_probe(struct phy_device *phydev);
+#define MII_LAN874X_PHY_MMD_WOL_WUCSR 0x8010
+#define MII_LAN874X_PHY_MMD_WOL_WUF_CFGA 0x8011
+#define MII_LAN874X_PHY_MMD_WOL_WUF_CFGB 0x8012
+#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK0 0x8021
+#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK1 0x8022
+#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK2 0x8023
+#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK3 0x8024
+#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK4 0x8025
+#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK5 0x8026
+#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK6 0x8027
+#define MII_LAN874X_PHY_MMD_WOL_WUF_MASK7 0x8028
+#define MII_LAN874X_PHY_MMD_WOL_RX_ADDRA 0x8061
+#define MII_LAN874X_PHY_MMD_WOL_RX_ADDRB 0x8062
+#define MII_LAN874X_PHY_MMD_WOL_RX_ADDRC 0x8063
+#define MII_LAN874X_PHY_MMD_MCFGR 0x8064
+
+#define MII_LAN874X_PHY_PME1_SET (2 << 13)
+#define MII_LAN874X_PHY_PME2_SET (2 << 11)
+#define MII_LAN874X_PHY_PME_SELF_CLEAR BIT(9)
+#define MII_LAN874X_PHY_WOL_PFDA_FR BIT(7)
+#define MII_LAN874X_PHY_WOL_WUFR BIT(6)
+#define MII_LAN874X_PHY_WOL_MPR BIT(5)
+#define MII_LAN874X_PHY_WOL_BCAST_FR BIT(4)
+#define MII_LAN874X_PHY_WOL_PFDAEN BIT(3)
+#define MII_LAN874X_PHY_WOL_WUEN BIT(2)
+#define MII_LAN874X_PHY_WOL_MPEN BIT(1)
+#define MII_LAN874X_PHY_WOL_BCSTEN BIT(0)
+
+#define MII_LAN874X_PHY_WOL_FILTER_EN BIT(15)
+#define MII_LAN874X_PHY_WOL_FILTER_MCASTTEN BIT(9)
+#define MII_LAN874X_PHY_WOL_FILTER_BCSTEN BIT(8)
+
+#define MII_LAN874X_PHY_PME_SELF_CLEAR_DELAY 0x1000 /* 81 milliseconds */
+
#endif /* __LINUX_SMSCPHY_H__ */
diff --git a/include/linux/soc/andes/irq.h b/include/linux/soc/andes/irq.h
new file mode 100644
index 000000000000..edc3182d6e66
--- /dev/null
+++ b/include/linux/soc/andes/irq.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 Andes Technology Corporation
+ */
+#ifndef __ANDES_IRQ_H
+#define __ANDES_IRQ_H
+
+/* Andes PMU irq number */
+#define ANDES_RV_IRQ_PMOVI 18
+#define ANDES_RV_IRQ_LAST ANDES_RV_IRQ_PMOVI
+#define ANDES_SLI_CAUSE_BASE 256
+
+/* Andes PMU related registers */
+#define ANDES_CSR_SLIE 0x9c4
+#define ANDES_CSR_SLIP 0x9c5
+#define ANDES_CSR_SCOUNTEROF 0x9d4
+
+#endif /* __ANDES_IRQ_H */
diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h
index fc456f75c131..8c9ca857ccf6 100644
--- a/include/linux/soc/apple/rtkit.h
+++ b/include/linux/soc/apple/rtkit.h
@@ -161,24 +161,6 @@ int apple_rtkit_send_message(struct apple_rtkit *rtk, u8 ep, u64 message,
struct completion *completion, bool atomic);
/*
- * Send a message to the given endpoint and wait until it has been submitted
- * to the hardware FIFO.
- * Will return zero on success and a negative error code on failure
- * (e.g. -ETIME when the message couldn't be written within the given
- * timeout)
- *
- * @rtk: RTKit reference
- * @ep: target endpoint
- * @message: message to be sent
- * @timeout: timeout in milliseconds to allow the message transmission
- * to be completed
- * @atomic: if set to true this function can be called from atomic
- * context.
- */
-int apple_rtkit_send_message_wait(struct apple_rtkit *rtk, u8 ep, u64 message,
- unsigned long timeout, bool atomic);
-
-/*
* Process incoming messages in atomic context.
* This only guarantees that messages arrive as far as the recv_message_early
* callback; drivers expecting to handle incoming messages synchronously
diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h
index 07f67b3d8e97..6c6cccc848f4 100644
--- a/include/linux/soc/mediatek/infracfg.h
+++ b/include/linux/soc/mediatek/infracfg.h
@@ -2,6 +2,47 @@
#ifndef __SOC_MEDIATEK_INFRACFG_H
#define __SOC_MEDIATEK_INFRACFG_H
+#define MT8365_INFRA_TOPAXI_PROTECTEN_STA1 0x228
+#define MT8365_INFRA_TOPAXI_PROTECTEN_SET 0x2a0
+#define MT8365_INFRA_TOPAXI_PROTECTEN_CLR 0x2a4
+#define MT8365_INFRA_TOPAXI_PROTECTEN_MM_M0 BIT(1)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_MDMCU_M1 BIT(2)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_MMAPB_S BIT(6)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_MM2INFRA_AXI_GALS_SLV_0 BIT(10)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_MM2INFRA_AXI_GALS_SLV_1 BIT(11)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_AP2CONN_AHB BIT(13)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_CONN2INFRA_AHB BIT(14)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_MFG_M0 BIT(21)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_INFRA2MFG BIT(22)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_1_STA1 0x258
+#define MT8365_INFRA_TOPAXI_PROTECTEN_1_SET 0x2a8
+#define MT8365_INFRA_TOPAXI_PROTECTEN_1_CLR 0x2ac
+#define MT8365_INFRA_TOPAXI_PROTECTEN_1_APU2AP BIT(2)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_1_MM2INFRA_AXI_GALS_MST_0 BIT(16)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_1_MM2INFRA_AXI_GALS_MST_1 BIT(17)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_1_CONN2INFRA_AXI_GALS_MST BIT(18)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_1_CAM2MM_AXI_GALS_MST BIT(19)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_1_APU_CBIP_GALS_MST BIT(20)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_1_INFRA2CONN_AHB_GALS_SLV BIT(21)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_1_PWRDNREQ_INFRA_GALS_ADB BIT(24)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_1_PWRDNREQ_MP1_L2C_AFIFO BIT(27)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_1_AUDIO_BUS_AUDIO_M BIT(28)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_1_AUDIO_BUS_DSP_M BIT(30)
+#define MT8365_INFRA_TOPAXI_PROTECTEN_1_AUDIO_BUS_DSP_S BIT(31)
+
+#define MT8365_INFRA_NAO_TOPAXI_SI0_STA 0x0
+#define MT8365_INFRA_NAO_TOPAXI_SI0_CTRL_UPDATED BIT(24)
+#define MT8365_INFRA_NAO_TOPAXI_SI2_STA 0x28
+#define MT8365_INFRA_NAO_TOPAXI_SI2_CTRL_UPDATED BIT(14)
+#define MT8365_INFRA_TOPAXI_SI0_CTL 0x200
+#define MT8365_INFRA_TOPAXI_SI0_WAY_EN_MMAPB_S BIT(6)
+#define MT8365_INFRA_TOPAXI_SI2_CTL 0x234
+#define MT8365_INFRA_TOPAXI_SI2_WAY_EN_PERI_M1 BIT(5)
+
+#define MT8365_SMI_COMMON_CLAMP_EN 0x3c0
+#define MT8365_SMI_COMMON_CLAMP_EN_SET 0x3c4
+#define MT8365_SMI_COMMON_CLAMP_EN_CLR 0x3c8
+
#define MT8195_TOP_AXI_PROT_EN_STA1 0x228
#define MT8195_TOP_AXI_PROT_EN_1_STA1 0x258
#define MT8195_TOP_AXI_PROT_EN_SET 0x2a0
diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h
index 2475ef914746..4885b065b849 100644
--- a/include/linux/soc/mediatek/mtk-mmsys.h
+++ b/include/linux/soc/mediatek/mtk-mmsys.h
@@ -62,6 +62,14 @@ enum mtk_ddp_comp_id {
DDP_COMPONENT_OVL_2L1,
DDP_COMPONENT_OVL_2L2,
DDP_COMPONENT_OVL1,
+ DDP_COMPONENT_PADDING0,
+ DDP_COMPONENT_PADDING1,
+ DDP_COMPONENT_PADDING2,
+ DDP_COMPONENT_PADDING3,
+ DDP_COMPONENT_PADDING4,
+ DDP_COMPONENT_PADDING5,
+ DDP_COMPONENT_PADDING6,
+ DDP_COMPONENT_PADDING7,
DDP_COMPONENT_POSTMASK0,
DDP_COMPONENT_PWM0,
DDP_COMPONENT_PWM1,
diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index b2b28180dff7..a476648858a6 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -10,6 +10,7 @@
#define MTK_WED_TX_QUEUES 2
#define MTK_WED_RX_QUEUES 2
+#define MTK_WED_RX_PAGE_QUEUES 3
#define WED_WO_STA_REC 0x6
@@ -45,7 +46,7 @@ enum mtk_wed_wo_cmd {
MTK_WED_WO_CMD_WED_END
};
-struct mtk_rxbm_desc {
+struct mtk_wed_bm_desc {
__le32 buf0;
__le32 token;
} __packed __aligned(4);
@@ -76,6 +77,11 @@ struct mtk_wed_wo_rx_stats {
__le32 rx_drop_cnt;
};
+struct mtk_wed_buf {
+ void *p;
+ dma_addr_t phy_addr;
+};
+
struct mtk_wed_device {
#ifdef CONFIG_NET_MEDIATEK_SOC_WED
const struct mtk_wed_ops *ops;
@@ -94,17 +100,20 @@ struct mtk_wed_device {
struct mtk_wed_ring txfree_ring;
struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES];
struct mtk_wed_ring rx_wdma[MTK_WED_RX_QUEUES];
+ struct mtk_wed_ring rx_rro_ring[MTK_WED_RX_QUEUES];
+ struct mtk_wed_ring rx_page_ring[MTK_WED_RX_PAGE_QUEUES];
+ struct mtk_wed_ring ind_cmd_ring;
struct {
int size;
- void **pages;
+ struct mtk_wed_buf *pages;
struct mtk_wdma_desc *desc;
dma_addr_t desc_phys;
} tx_buf_ring;
struct {
int size;
- struct mtk_rxbm_desc *desc;
+ struct mtk_wed_bm_desc *desc;
dma_addr_t desc_phys;
} rx_buf_ring;
@@ -114,6 +123,13 @@ struct mtk_wed_device {
dma_addr_t fdbk_phys;
} rro;
+ struct {
+ int size;
+ struct mtk_wed_buf *pages;
+ struct mtk_wed_bm_desc *desc;
+ dma_addr_t desc_phys;
+ } hw_rro;
+
/* filled by driver: */
struct {
union {
@@ -123,6 +139,7 @@ struct mtk_wed_device {
enum mtk_wed_bus_tye bus_type;
void __iomem *base;
u32 phy_base;
+ u32 id;
u32 wpdma_phys;
u32 wpdma_int;
@@ -131,18 +148,35 @@ struct mtk_wed_device {
u32 wpdma_txfree;
u32 wpdma_rx_glo;
u32 wpdma_rx;
+ u32 wpdma_rx_rro[MTK_WED_RX_QUEUES];
+ u32 wpdma_rx_pg;
bool wcid_512;
+ bool hw_rro;
+ bool msi;
u16 token_start;
unsigned int nbuf;
unsigned int rx_nbuf;
unsigned int rx_npkt;
unsigned int rx_size;
+ unsigned int amsdu_max_len;
u8 tx_tbit[MTK_WED_TX_QUEUES];
u8 rx_tbit[MTK_WED_RX_QUEUES];
+ u8 rro_rx_tbit[MTK_WED_RX_QUEUES];
+ u8 rx_pg_tbit[MTK_WED_RX_PAGE_QUEUES];
u8 txfree_tbit;
+ u8 amsdu_max_subframes;
+
+ struct {
+ u8 se_group_nums;
+ u16 win_size;
+ u16 particular_sid;
+ u32 ack_sn_addr;
+ dma_addr_t particular_se_phys;
+ dma_addr_t addr_elem_phys[1024];
+ } ind_cmd;
u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id);
int (*offload_enable)(struct mtk_wed_device *wed);
@@ -182,6 +216,14 @@ struct mtk_wed_ops {
void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask);
int (*setup_tc)(struct mtk_wed_device *wed, struct net_device *dev,
enum tc_setup_type type, void *type_data);
+ void (*start_hw_rro)(struct mtk_wed_device *dev, u32 irq_mask,
+ bool reset);
+ void (*rro_rx_ring_setup)(struct mtk_wed_device *dev, int ring,
+ void __iomem *regs);
+ void (*msdu_pg_rx_ring_setup)(struct mtk_wed_device *dev, int ring,
+ void __iomem *regs);
+ int (*ind_rx_ring_setup)(struct mtk_wed_device *dev,
+ void __iomem *regs);
};
extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops;
@@ -206,16 +248,27 @@ mtk_wed_device_attach(struct mtk_wed_device *dev)
return ret;
}
-static inline bool
-mtk_wed_get_rx_capa(struct mtk_wed_device *dev)
+static inline bool mtk_wed_get_rx_capa(struct mtk_wed_device *dev)
{
#ifdef CONFIG_NET_MEDIATEK_SOC_WED
+ if (dev->version == 3)
+ return dev->wlan.hw_rro;
+
return dev->version != 1;
#else
return false;
#endif
}
+static inline bool mtk_wed_is_amsdu_supported(struct mtk_wed_device *dev)
+{
+#ifdef CONFIG_NET_MEDIATEK_SOC_WED
+ return dev->version == 3;
+#else
+ return false;
+#endif
+}
+
#ifdef CONFIG_NET_MEDIATEK_SOC_WED
#define mtk_wed_device_active(_dev) !!(_dev)->ops
#define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev)
@@ -242,6 +295,15 @@ mtk_wed_get_rx_capa(struct mtk_wed_device *dev)
#define mtk_wed_device_dma_reset(_dev) (_dev)->ops->reset_dma(_dev)
#define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) \
(_dev)->ops->setup_tc(_dev, _netdev, _type, _type_data)
+#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) \
+ (_dev)->ops->start_hw_rro(_dev, _mask, _reset)
+#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) \
+ (_dev)->ops->rro_rx_ring_setup(_dev, _ring, _regs)
+#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) \
+ (_dev)->ops->msdu_pg_rx_ring_setup(_dev, _ring, _regs)
+#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) \
+ (_dev)->ops->ind_rx_ring_setup(_dev, _regs)
+
#else
static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
{
@@ -261,6 +323,10 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
#define mtk_wed_device_stop(_dev) do {} while (0)
#define mtk_wed_device_dma_reset(_dev) do {} while (0)
#define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) -EOPNOTSUPP
+#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) do {} while (0)
+#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) -ENODEV
+#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) -ENODEV
+#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) -ENODEV
#endif
#endif
diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h
index be98aebcb3e1..7161a3183eda 100644
--- a/include/linux/soc/qcom/apr.h
+++ b/include/linux/soc/qcom/apr.h
@@ -9,7 +9,7 @@
#include <dt-bindings/soc/qcom,apr.h>
#include <dt-bindings/soc/qcom,gpr.h>
-extern struct bus_type aprbus;
+extern const struct bus_type aprbus;
#define APR_HDR_LEN(hdr_len) ((hdr_len)/4)
diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h
index 821a19135bb6..0f038a1a0330 100644
--- a/include/linux/soc/qcom/geni-se.h
+++ b/include/linux/soc/qcom/geni-se.h
@@ -35,6 +35,7 @@ enum geni_se_protocol_type {
GENI_SE_UART,
GENI_SE_I2C,
GENI_SE_I3C,
+ GENI_SE_SPI_SLAVE,
};
struct geni_wrapper;
@@ -73,12 +74,14 @@ struct geni_se {
/* Common SE registers */
#define GENI_FORCE_DEFAULT_REG 0x20
+#define GENI_OUTPUT_CTRL 0x24
#define SE_GENI_STATUS 0x40
#define GENI_SER_M_CLK_CFG 0x48
#define GENI_SER_S_CLK_CFG 0x4c
#define GENI_IF_DISABLE_RO 0x64
#define GENI_FW_REVISION_RO 0x68
#define SE_GENI_CLK_SEL 0x7c
+#define SE_GENI_CFG_SEQ_START 0x84
#define SE_GENI_DMA_MODE_EN 0x258
#define SE_GENI_M_CMD0 0x600
#define SE_GENI_M_CMD_CTRL_REG 0x604
@@ -111,6 +114,9 @@ struct geni_se {
/* GENI_FORCE_DEFAULT_REG fields */
#define FORCE_DEFAULT BIT(0)
+/* GENI_OUTPUT_CTRL fields */
+#define GENI_IO_MUX_0_EN BIT(0)
+
/* GENI_STATUS fields */
#define M_GENI_CMD_ACTIVE BIT(0)
#define S_GENI_CMD_ACTIVE BIT(12)
@@ -130,6 +136,9 @@ struct geni_se {
/* GENI_CLK_SEL fields */
#define CLK_SEL_MSK GENMASK(2, 0)
+/* SE_GENI_CFG_SEQ_START fields */
+#define START_TRIGGER BIT(0)
+
/* SE_GENI_DMA_MODE_EN */
#define GENI_DMA_MODE_EN BIT(0)
@@ -169,6 +178,7 @@ struct geni_se {
#define M_GP_IRQ_3_EN BIT(12)
#define M_GP_IRQ_4_EN BIT(13)
#define M_GP_IRQ_5_EN BIT(14)
+#define M_TX_FIFO_NOT_EMPTY_EN BIT(21)
#define M_IO_DATA_DEASSERT_EN BIT(22)
#define M_IO_DATA_ASSERT_EN BIT(23)
#define M_RX_FIFO_RD_ERR_EN BIT(24)
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 93417ba1ead4..1a886666bbb6 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -30,7 +30,7 @@
#define LLCC_NPU 23
#define LLCC_WLHW 24
#define LLCC_PIMEM 25
-#define LLCC_DRE 26
+#define LLCC_ECC 26
#define LLCC_CVP 28
#define LLCC_MODPE 29
#define LLCC_APTCM 30
diff --git a/include/linux/soc/qcom/qcom-pbs.h b/include/linux/soc/qcom/qcom-pbs.h
new file mode 100644
index 000000000000..8a46209ccf13
--- /dev/null
+++ b/include/linux/soc/qcom/qcom-pbs.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _QCOM_PBS_H
+#define _QCOM_PBS_H
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct device_node;
+struct pbs_dev;
+
+#if IS_ENABLED(CONFIG_QCOM_PBS)
+int qcom_pbs_trigger_event(struct pbs_dev *pbs, u8 bitmap);
+struct pbs_dev *get_pbs_client_device(struct device *client_dev);
+#else
+static inline int qcom_pbs_trigger_event(struct pbs_dev *pbs, u8 bitmap)
+{
+ return -ENODEV;
+}
+
+static inline struct pbs_dev *get_pbs_client_device(struct device *client_dev)
+{
+ return ERR_PTR(-ENODEV);
+}
+#endif
+
+#endif
diff --git a/include/linux/soc/qcom/qcom_aoss.h b/include/linux/soc/qcom/qcom_aoss.h
index 3c2a82e606f8..7361ca028752 100644
--- a/include/linux/soc/qcom/qcom_aoss.h
+++ b/include/linux/soc/qcom/qcom_aoss.h
@@ -13,13 +13,13 @@ struct qmp;
#if IS_ENABLED(CONFIG_QCOM_AOSS_QMP)
-int qmp_send(struct qmp *qmp, const void *data, size_t len);
+int qmp_send(struct qmp *qmp, const char *fmt, ...);
struct qmp *qmp_get(struct device *dev);
void qmp_put(struct qmp *qmp);
#else
-static inline int qmp_send(struct qmp *qmp, const void *data, size_t len)
+static inline int qmp_send(struct qmp *qmp, const char *fmt, ...)
{
return -ENODEV;
}
diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h
index 2990f425fdef..8190878645f9 100644
--- a/include/linux/soc/qcom/smd-rpm.h
+++ b/include/linux/soc/qcom/smd-rpm.h
@@ -2,10 +2,13 @@
#ifndef __QCOM_SMD_RPM_H__
#define __QCOM_SMD_RPM_H__
+#include <linux/types.h>
+
struct qcom_smd_rpm;
-#define QCOM_SMD_RPM_ACTIVE_STATE 0
-#define QCOM_SMD_RPM_SLEEP_STATE 1
+#define QCOM_SMD_RPM_ACTIVE_STATE 0
+#define QCOM_SMD_RPM_SLEEP_STATE 1
+#define QCOM_SMD_RPM_STATE_NUM 2
/*
* Constants used for addressing resources in the RPM.
@@ -44,6 +47,19 @@ struct qcom_smd_rpm;
#define QCOM_SMD_RPM_PKA_CLK 0x616b70
#define QCOM_SMD_RPM_MCFG_CLK 0x6766636d
+#define QCOM_RPM_KEY_SOFTWARE_ENABLE 0x6e657773
+#define QCOM_RPM_KEY_PIN_CTRL_CLK_BUFFER_ENABLE_KEY 0x62636370
+#define QCOM_RPM_SMD_KEY_RATE 0x007a484b
+#define QCOM_RPM_SMD_KEY_ENABLE 0x62616e45
+#define QCOM_RPM_SMD_KEY_STATE 0x54415453
+#define QCOM_RPM_SCALING_ENABLE_ID 0x2
+
+struct clk_smd_rpm_req {
+ __le32 key;
+ __le32 nbytes;
+ __le32 value;
+};
+
int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm,
int state,
u32 resource_type, u32 resource_id,
diff --git a/include/linux/soc/qcom/smem.h b/include/linux/soc/qcom/smem.h
index 223db6a9c733..a36a3b9d4929 100644
--- a/include/linux/soc/qcom/smem.h
+++ b/include/linux/soc/qcom/smem.h
@@ -4,6 +4,7 @@
#define QCOM_SMEM_HOST_ANY -1
+bool qcom_smem_is_available(void);
int qcom_smem_alloc(unsigned host, unsigned item, size_t size);
void *qcom_smem_get(unsigned host, unsigned item, size_t *size);
diff --git a/include/linux/soc/samsung/exynos-pmu.h b/include/linux/soc/samsung/exynos-pmu.h
index a4f5516cc956..2bd9d12d9a52 100644
--- a/include/linux/soc/samsung/exynos-pmu.h
+++ b/include/linux/soc/samsung/exynos-pmu.h
@@ -10,6 +10,7 @@
#define __LINUX_SOC_EXYNOS_PMU_H
struct regmap;
+struct device_node;
enum sys_powerdown {
SYS_AFTR,
@@ -20,12 +21,20 @@ enum sys_powerdown {
extern void exynos_sys_powerdown_conf(enum sys_powerdown mode);
#ifdef CONFIG_EXYNOS_PMU
-extern struct regmap *exynos_get_pmu_regmap(void);
+struct regmap *exynos_get_pmu_regmap(void);
+struct regmap *exynos_get_pmu_regmap_by_phandle(struct device_node *np,
+ const char *propname);
#else
static inline struct regmap *exynos_get_pmu_regmap(void)
{
return ERR_PTR(-ENODEV);
}
+
+static inline struct regmap *exynos_get_pmu_regmap_by_phandle(struct device_node *np,
+ const char *propname)
+{
+ return ERR_PTR(-ENODEV);
+}
#endif
#endif /* __LINUX_SOC_EXYNOS_PMU_H */
diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 0b9ecd8cf979..110978dc9af1 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -13,6 +13,7 @@ struct nlmsghdr;
struct sock;
struct sock_diag_handler {
+ struct module *owner;
__u8 family;
int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
int (*get_info)(struct sk_buff *skb, struct sock *sk);
@@ -22,8 +23,13 @@ struct sock_diag_handler {
int sock_diag_register(const struct sock_diag_handler *h);
void sock_diag_unregister(const struct sock_diag_handler *h);
-void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
-void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
+struct sock_diag_inet_compat {
+ struct module *owner;
+ int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh);
+};
+
+void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr);
+void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr);
u64 __sock_gen_cookie(struct sock *sk);
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 39b74d83c7c4..139c330ccf2c 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -383,6 +383,7 @@ struct ucred {
#define SOL_MPTCP 284
#define SOL_MCTP 285
#define SOL_SMC 286
+#define SOL_VSOCK 287
/* IPX options */
#define IPX_TYPE 1
@@ -421,13 +422,6 @@ extern long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
struct user_msghdr __user *umsg,
struct sockaddr __user *uaddr,
unsigned int flags);
-extern int sendmsg_copy_msghdr(struct msghdr *msg,
- struct user_msghdr __user *umsg, unsigned flags,
- struct iovec **iov);
-extern int recvmsg_copy_msghdr(struct msghdr *msg,
- struct user_msghdr __user *umsg, unsigned flags,
- struct sockaddr __user **uaddr,
- struct iovec **iov);
extern int __copy_msghdr(struct msghdr *kmsg,
struct user_msghdr *umsg,
struct sockaddr __user **save_addr);
diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
index bae5e2369b4f..317200cd3a60 100644
--- a/include/linux/sockptr.h
+++ b/include/linux/sockptr.h
@@ -50,11 +50,59 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src,
return 0;
}
+/* Deprecated.
+ * This is unsafe, unless caller checked user provided optlen.
+ * Prefer copy_safe_from_sockptr() instead.
+ */
static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size)
{
return copy_from_sockptr_offset(dst, src, 0, size);
}
+/**
+ * copy_safe_from_sockptr: copy a struct from sockptr
+ * @dst: Destination address, in kernel space. This buffer must be @ksize
+ * bytes long.
+ * @ksize: Size of @dst struct.
+ * @optval: Source address. (in user or kernel space)
+ * @optlen: Size of @optval data.
+ *
+ * Returns:
+ * * -EINVAL: @optlen < @ksize
+ * * -EFAULT: access to userspace failed.
+ * * 0 : @ksize bytes were copied
+ */
+static inline int copy_safe_from_sockptr(void *dst, size_t ksize,
+ sockptr_t optval, unsigned int optlen)
+{
+ if (optlen < ksize)
+ return -EINVAL;
+ return copy_from_sockptr(dst, optval, ksize);
+}
+
+static inline int copy_struct_from_sockptr(void *dst, size_t ksize,
+ sockptr_t src, size_t usize)
+{
+ size_t size = min(ksize, usize);
+ size_t rest = max(ksize, usize) - size;
+
+ if (!sockptr_is_kernel(src))
+ return copy_struct_from_user(dst, ksize, src.user, size);
+
+ if (usize < ksize) {
+ memset(dst + size, 0, rest);
+ } else if (usize > ksize) {
+ char *p = src.kernel;
+
+ while (rest--) {
+ if (*p++)
+ return -E2BIG;
+ }
+ }
+ memcpy(dst, src.kernel, size);
+ return 0;
+}
+
static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset,
const void *src, size_t size)
{
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index f523ceabd059..66f814b63a43 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -6,6 +6,8 @@
#include <linux/bug.h>
#include <linux/lockdep_types.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
#include <linux/mod_devicetable.h>
#include <linux/bitfield.h>
@@ -370,6 +372,7 @@ struct sdw_dpn_prop {
* @clock_reg_supported: the Peripheral implements the clock base and scale
* registers introduced with the SoundWire 1.2 specification. SDCA devices
* do not need to set this boolean property as the registers are required.
+ * @use_domain_irq: call actual IRQ handler on slave, as well as callback
*/
struct sdw_slave_prop {
u32 mipi_revision;
@@ -394,6 +397,7 @@ struct sdw_slave_prop {
u8 scp_int1_mask;
u32 quirks;
bool clock_reg_supported;
+ bool use_domain_irq;
};
#define SDW_SLAVE_QUIRKS_INVALID_INITIAL_PARITY BIT(0)
@@ -482,6 +486,11 @@ struct sdw_slave_id {
__u8 sdw_version:4;
};
+struct sdw_extended_slave_id {
+ int link_id;
+ struct sdw_slave_id id;
+};
+
/*
* Helper macros to extract the MIPI-defined IDs
*
@@ -641,6 +650,7 @@ struct sdw_slave_ops {
* struct sdw_slave - SoundWire Slave
* @id: MIPI device ID
* @dev: Linux device
+ * @irq: IRQ number
* @status: Status reported by the Slave
* @bus: Bus handle
* @prop: Slave properties
@@ -670,6 +680,7 @@ struct sdw_slave_ops {
struct sdw_slave {
struct sdw_slave_id id;
struct device dev;
+ int irq;
enum sdw_slave_status status;
struct sdw_bus *bus;
struct sdw_slave_prop prop;
@@ -847,6 +858,8 @@ struct sdw_defer {
* @post_bank_switch: Callback for post bank switch
* @read_ping_status: Read status from PING frames, reported with two bits per Device.
* Bits 31:24 are reserved.
+ * @get_device_num: Callback for vendor-specific device_number allocation
+ * @put_device_num: Callback for vendor-specific device_number release
* @new_peripheral_assigned: Callback to handle enumeration of new peripheral.
*/
struct sdw_master_ops {
@@ -862,14 +875,19 @@ struct sdw_master_ops {
int (*pre_bank_switch)(struct sdw_bus *bus);
int (*post_bank_switch)(struct sdw_bus *bus);
u32 (*read_ping_status)(struct sdw_bus *bus);
- void (*new_peripheral_assigned)(struct sdw_bus *bus, int dev_num);
+ int (*get_device_num)(struct sdw_bus *bus, struct sdw_slave *slave);
+ void (*put_device_num)(struct sdw_bus *bus, struct sdw_slave *slave);
+ void (*new_peripheral_assigned)(struct sdw_bus *bus,
+ struct sdw_slave *slave,
+ int dev_num);
};
/**
* struct sdw_bus - SoundWire bus
* @dev: Shortcut to &bus->md->dev to avoid changing the entire code.
* @md: Master device
- * @link_id: Link id number, can be 0 to N, unique for each Master
+ * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used.
+ * @link_id: Link id number, can be 0 to N, unique for each Controller
* @id: bus system-wide unique id
* @slaves: list of Slaves on this bus
* @assigned: Bitmap for Slave device numbers.
@@ -885,6 +903,7 @@ struct sdw_master_ops {
* is used to compute and program bus bandwidth, clock, frame shape,
* transport and port parameters
* @debugfs: Bus debugfs
+ * @domain: IRQ domain
* @defer_msg: Defer message
* @clk_stop_timeout: Clock stop timeout computed
* @bank_switch_timeout: Bank switch timeout computed
@@ -896,13 +915,11 @@ struct sdw_master_ops {
* meaningful if multi_link is set. If set to 1, hardware-based
* synchronization will be used even if a stream only uses a single
* SoundWire segment.
- * @dev_num_ida_min: if set, defines the minimum values for the IDA
- * used to allocate system-unique device numbers. This value needs to be
- * identical across all SoundWire bus in the system.
*/
struct sdw_bus {
struct device *dev;
struct sdw_master_device *md;
+ int controller_id;
unsigned int link_id;
int id;
struct list_head slaves;
@@ -920,12 +937,13 @@ struct sdw_bus {
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs;
#endif
+ struct irq_chip irq_chip;
+ struct irq_domain *domain;
struct sdw_defer defer_msg;
unsigned int clk_stop_timeout;
u32 bank_switch_timeout;
bool multi_link;
int hw_sync_min_links;
- int dev_num_ida_min;
};
int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent,
@@ -1024,7 +1042,7 @@ int sdw_compute_params(struct sdw_bus *bus);
int sdw_stream_add_master(struct sdw_bus *bus,
struct sdw_stream_config *stream_config,
- struct sdw_port_config *port_config,
+ const struct sdw_port_config *port_config,
unsigned int num_ports,
struct sdw_stream_runtime *stream);
int sdw_stream_remove_master(struct sdw_bus *bus,
@@ -1046,7 +1064,7 @@ void sdw_extract_slave_id(struct sdw_bus *bus, u64 addr, struct sdw_slave_id *id
int sdw_stream_add_slave(struct sdw_slave *slave,
struct sdw_stream_config *stream_config,
- struct sdw_port_config *port_config,
+ const struct sdw_port_config *port_config,
unsigned int num_ports,
struct sdw_stream_runtime *stream);
int sdw_stream_remove_slave(struct sdw_slave *slave,
@@ -1068,7 +1086,7 @@ int sdw_update_no_pm(struct sdw_slave *slave, u32 addr, u8 mask, u8 val);
static inline int sdw_stream_add_slave(struct sdw_slave *slave,
struct sdw_stream_config *stream_config,
- struct sdw_port_config *port_config,
+ const struct sdw_port_config *port_config,
unsigned int num_ports,
struct sdw_stream_runtime *stream)
{
diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h
index ceecad74aef9..28a4eb77717f 100644
--- a/include/linux/soundwire/sdw_amd.h
+++ b/include/linux/soundwire/sdw_amd.h
@@ -1,11 +1,12 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
- * Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (C) 2023-24 Advanced Micro Devices, Inc. All rights reserved.
*/
#ifndef __SDW_AMD_H
#define __SDW_AMD_H
+#include <linux/acpi.h>
#include <linux/soundwire/sdw.h>
/* AMD pm_runtime quirk definitions */
@@ -25,6 +26,7 @@
#define AMD_SDW_POWER_OFF_MODE 2
#define ACP_SDW0 0
#define ACP_SDW1 1
+#define AMD_SDW_MAX_MANAGER_COUNT 2
struct acp_sdw_pdata {
u16 instance;
@@ -32,12 +34,6 @@ struct acp_sdw_pdata {
struct mutex *acp_sdw_lock;
};
-struct sdw_manager_reg_mask {
- u32 sw_pad_enable_mask;
- u32 sw_pad_pulldown_mask;
- u32 acp_sdw_intr_mask;
-};
-
/**
* struct sdw_amd_dai_runtime: AMD sdw dai runtime data
*
@@ -59,10 +55,8 @@ struct sdw_amd_dai_runtime {
* @dev: linux device
* @mmio: SoundWire registers mmio base
* @acp_mmio: acp registers mmio base
- * @reg_mask: register mask structure per manager instance
* @amd_sdw_irq_thread: SoundWire manager irq workqueue
* @amd_sdw_work: peripheral status work queue
- * @probe_work: SoundWire manager probe workqueue
* @acp_sdw_lock: mutex to protect acp share register access
* @status: peripheral devices status array
* @num_din_ports: number of input ports
@@ -83,10 +77,8 @@ struct amd_sdw_manager {
void __iomem *mmio;
void __iomem *acp_mmio;
- struct sdw_manager_reg_mask *reg_mask;
struct work_struct amd_sdw_irq_thread;
struct work_struct amd_sdw_work;
- struct work_struct probe_work;
/* mutex to protect acp common register access */
struct mutex *acp_sdw_lock;
@@ -106,4 +98,71 @@ struct amd_sdw_manager {
struct sdw_amd_dai_runtime **dai_runtime_array;
};
+
+/**
+ * struct sdw_amd_acpi_info - Soundwire AMD information found in ACPI tables
+ * @handle: ACPI controller handle
+ * @count: maximum no of soundwire manager links supported on AMD platform.
+ * @link_mask: bit-wise mask listing links enabled by BIOS menu
+ */
+struct sdw_amd_acpi_info {
+ acpi_handle handle;
+ int count;
+ u32 link_mask;
+};
+
+/**
+ * struct sdw_amd_ctx - context allocated by the controller driver probe
+ *
+ * @count: link count
+ * @num_slaves: total number of devices exposed across all enabled links
+ * @link_mask: bit-wise mask listing SoundWire links reported by the
+ * Controller
+ * @ids: array of slave_id, representing Slaves exposed across all enabled
+ * links
+ * @pdev: platform device structure
+ */
+struct sdw_amd_ctx {
+ int count;
+ int num_slaves;
+ u32 link_mask;
+ struct sdw_extended_slave_id *ids;
+ struct platform_device *pdev[AMD_SDW_MAX_MANAGER_COUNT];
+};
+
+/**
+ * struct sdw_amd_res - Soundwire AMD global resource structure,
+ * typically populated by the DSP driver/Legacy driver
+ *
+ * @addr: acp pci device resource start address
+ * @reg_range: ACP register range
+ * @link_mask: bit-wise mask listing links selected by the DSP driver/
+ * legacy driver
+ * @count: link count
+ * @mmio_base: mmio base of SoundWire registers
+ * @handle: ACPI parent handle
+ * @parent: parent device
+ * @dev: device implementing hwparams and free callbacks
+ * @acp_lock: mutex protecting acp common registers access
+ */
+struct sdw_amd_res {
+ u32 addr;
+ u32 reg_range;
+ u32 link_mask;
+ int count;
+ void __iomem *mmio_base;
+ acpi_handle handle;
+ struct device *parent;
+ struct device *dev;
+ /* use to protect acp common registers access */
+ struct mutex *acp_lock;
+};
+
+int sdw_amd_probe(struct sdw_amd_res *res, struct sdw_amd_ctx **ctx);
+
+void sdw_amd_exit(struct sdw_amd_ctx *ctx);
+
+int sdw_amd_get_slave_info(struct sdw_amd_ctx *ctx);
+
+int amd_sdw_scan_controller(struct sdw_amd_acpi_info *info);
#endif
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 11fc88fb0d78..00bb22d96ae5 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -264,11 +264,6 @@ struct sdw_intel_link_dev;
*/
#define SDW_INTEL_CLK_STOP_BUS_RESET BIT(3)
-struct sdw_intel_slave_id {
- int link_id;
- struct sdw_slave_id id;
-};
-
struct hdac_bus;
/**
@@ -298,7 +293,7 @@ struct sdw_intel_ctx {
int num_slaves;
acpi_handle handle;
struct sdw_intel_link_dev **ldev;
- struct sdw_intel_slave_id *ids;
+ struct sdw_extended_slave_id *ids;
struct list_head link_list;
struct mutex shim_lock; /* lock for access to shared SHIM registers */
u32 shim_mask;
@@ -433,4 +428,11 @@ struct sdw_intel_hw_ops {
extern const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops;
extern const struct sdw_intel_hw_ops sdw_intel_lnl_hw_ops;
+/*
+ * IDA min selected to allow for 5 unconstrained devices per link,
+ * and 6 system-unique Device Numbers for wake-capable devices.
+ */
+
+#define SDW_INTEL_DEV_NUM_IDA_MIN 6
+
#endif
diff --git a/include/linux/soundwire/sdw_type.h b/include/linux/soundwire/sdw_type.h
index d8c27f1e5559..693320b4f5c2 100644
--- a/include/linux/soundwire/sdw_type.h
+++ b/include/linux/soundwire/sdw_type.h
@@ -4,9 +4,9 @@
#ifndef __SOUNDWIRE_TYPES_H
#define __SOUNDWIRE_TYPES_H
-extern struct bus_type sdw_bus_type;
-extern struct device_type sdw_slave_type;
-extern struct device_type sdw_master_type;
+extern const struct bus_type sdw_bus_type;
+extern const struct device_type sdw_slave_type;
+extern const struct device_type sdw_master_type;
static inline int is_sdw_slave(const struct device *dev)
{
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 4658e7801b42..ca2cd4e30ead 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -5,6 +5,7 @@
#ifndef __LINUX_SPI_PXA2XX_SPI_H
#define __LINUX_SPI_PXA2XX_SPI_H
+#include <linux/dmaengine.h>
#include <linux/types.h>
#include <linux/pxa2xx_ssp.h>
@@ -19,10 +20,10 @@ struct pxa2xx_spi_controller {
u16 num_chipselect;
u8 enable_dma;
u8 dma_burst_size;
- bool is_slave;
+ bool is_target;
/* DMA engine specific config */
- bool (*dma_filter)(struct dma_chan *chan, void *param);
+ dma_filter_fn dma_filter;
void *tx_param;
void *rx_param;
@@ -31,7 +32,7 @@ struct pxa2xx_spi_controller {
};
/*
- * The controller specific data for SPI slave devices
+ * The controller specific data for SPI target devices
* (resides in spi_board_info.controller_data),
* copied to spi_device.platform_data ... mostly for
* DMA tuning.
diff --git a/include/linux/spi/sh_msiof.h b/include/linux/spi/sh_msiof.h
index dc2a0cbd210d..f950d280461b 100644
--- a/include/linux/spi/sh_msiof.h
+++ b/include/linux/spi/sh_msiof.h
@@ -3,8 +3,8 @@
#define __SPI_SH_MSIOF_H__
enum {
- MSIOF_SPI_MASTER,
- MSIOF_SPI_SLAVE,
+ MSIOF_SPI_HOST,
+ MSIOF_SPI_TARGET,
};
struct sh_msiof_spi_info {
diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 6b0a7dc48a4b..f866d5c8ed32 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -233,6 +233,8 @@ static inline void *spi_mem_get_drvdata(struct spi_mem *mem)
* limitations)
* @supports_op: check if an operation is supported by the controller
* @exec_op: execute a SPI memory operation
+ * not all driver provides supports_op(), so it can return -EOPNOTSUPP
+ * if the op is not supported by the driver/controller
* @get_name: get a custom name for the SPI mem device from the controller.
* This might be needed if the controller driver has been ported
* to use the SPI mem layer and a custom name is used to keep
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 32c94eae8926..c459809efee4 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -6,18 +6,22 @@
#ifndef __LINUX_SPI_H
#define __LINUX_SPI_H
+#include <linux/acpi.h>
#include <linux/bits.h>
+#include <linux/completion.h>
#include <linux/device.h>
-#include <linux/mod_devicetable.h>
-#include <linux/slab.h>
+#include <linux/gpio/consumer.h>
#include <linux/kthread.h>
-#include <linux/completion.h>
+#include <linux/mod_devicetable.h>
+#include <linux/overflow.h>
#include <linux/scatterlist.h>
-#include <linux/gpio/consumer.h>
+#include <linux/slab.h>
+#include <linux/u64_stats_sync.h>
#include <uapi/linux/spi/spi.h>
-#include <linux/acpi.h>
-#include <linux/u64_stats_sync.h>
+
+/* Max no. of CS supported per spi device */
+#define SPI_CS_CNT_MAX 16
struct dma_chan;
struct software_node;
@@ -32,11 +36,11 @@ struct spi_message;
* INTERFACES between SPI master-side drivers and SPI slave protocol handlers,
* and SPI infrastructure.
*/
-extern struct bus_type spi_bus_type;
+extern const struct bus_type spi_bus_type;
/**
* struct spi_statistics - statistics for spi transfers
- * @syncp: seqcount to protect members in this struct for per-cpu udate
+ * @syncp: seqcount to protect members in this struct for per-cpu update
* on 32-bit systems
*
* @messages: number of spi-messages handled
@@ -55,7 +59,7 @@ extern struct bus_type spi_bus_type;
* @bytes_rx: number of bytes received from device
*
* @transfer_bytes_histo:
- * transfer bytes histogramm
+ * transfer bytes histogram
*
* @transfers_split_maxsize:
* number of transfers that have been split because of
@@ -127,11 +131,11 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
* struct spi_device - Controller side proxy for an SPI slave device
* @dev: Driver model representation of the device.
* @controller: SPI controller used with the device.
- * @master: Copy of controller, for backwards compatibility.
* @max_speed_hz: Maximum clock rate to be used with this chip
* (on this board); may be changed by the device's driver.
* The spi_transfer.speed_hz can override this for each transfer.
- * @chip_select: Chipselect, distinguishing chips handled by @controller.
+ * @chip_select: Array of physical chipselect, spi->chipselect[i] gives
+ * the corresponding physical CS for logical CS i.
* @mode: The spi mode defines how data is clocked out and in.
* This may be changed by the device's driver.
* The "active low" default for chipselect mode can be overridden
@@ -156,8 +160,8 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
* the device will bind to the named driver and only the named driver.
* Do not set directly, because core frees it; use driver_set_override() to
* set or clear it.
- * @cs_gpiod: gpio descriptor of the chipselect line (optional, NULL when
- * not using a GPIO line)
+ * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines
+ * (optional, NULL when not using a GPIO line)
* @word_delay: delay to be inserted between consecutive
* words of a transfer
* @cs_setup: delay to be introduced by the controller after CS is asserted
@@ -166,6 +170,7 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
* deasserted. If @cs_change_delay is used from @spi_transfer, then the
* two delays will be added up.
* @pcpu_statistics: statistics for the spi_device
+ * @cs_index_mask: Bit mask of the active chipselect(s) in the chipselect array
*
* A @spi_device is used to interchange data between an SPI slave
* (usually a discrete chip) and CPU memory.
@@ -179,9 +184,8 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
struct spi_device {
struct device dev;
struct spi_controller *controller;
- struct spi_controller *master; /* Compatibility layer */
u32 max_speed_hz;
- u8 chip_select;
+ u8 chip_select[SPI_CS_CNT_MAX];
u8 bits_per_word;
bool rt;
#define SPI_NO_TX BIT(31) /* No transmit wire */
@@ -212,7 +216,7 @@ struct spi_device {
void *controller_data;
char modalias[SPI_NAME_SIZE];
const char *driver_override;
- struct gpio_desc *cs_gpiod; /* Chip select gpio desc */
+ struct gpio_desc *cs_gpiod[SPI_CS_CNT_MAX]; /* Chip select gpio desc */
struct spi_delay word_delay; /* Inter-word delay */
/* CS delays */
struct spi_delay cs_setup;
@@ -222,8 +226,15 @@ struct spi_device {
/* The statistics */
struct spi_statistics __percpu *pcpu_statistics;
+ /* Bit mask of the chipselect(s) that the driver need to use from
+ * the chipselect array.When the controller is capable to handle
+ * multiple chip selects & memories are connected in parallel
+ * then more than one bit need to be set in cs_index_mask.
+ */
+ u32 cs_index_mask : SPI_CS_CNT_MAX;
+
/*
- * likely need more hooks for more protocol options affecting how
+ * Likely need more hooks for more protocol options affecting how
* the controller talks to each chip, like:
* - memory packing (12 bit samples into low bits, others zeroed)
* - priority
@@ -278,32 +289,43 @@ static inline void *spi_get_drvdata(const struct spi_device *spi)
static inline u8 spi_get_chipselect(const struct spi_device *spi, u8 idx)
{
- return spi->chip_select;
+ return spi->chip_select[idx];
}
static inline void spi_set_chipselect(struct spi_device *spi, u8 idx, u8 chipselect)
{
- spi->chip_select = chipselect;
+ spi->chip_select[idx] = chipselect;
}
static inline struct gpio_desc *spi_get_csgpiod(const struct spi_device *spi, u8 idx)
{
- return spi->cs_gpiod;
+ return spi->cs_gpiod[idx];
}
static inline void spi_set_csgpiod(struct spi_device *spi, u8 idx, struct gpio_desc *csgpiod)
{
- spi->cs_gpiod = csgpiod;
+ spi->cs_gpiod[idx] = csgpiod;
+}
+
+static inline bool spi_is_csgpiod(struct spi_device *spi)
+{
+ u8 idx;
+
+ for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) {
+ if (spi_get_csgpiod(spi, idx))
+ return true;
+ }
+ return false;
}
/**
* struct spi_driver - Host side "protocol" driver
* @id_table: List of SPI devices supported by this driver
- * @probe: Binds this driver to the spi device. Drivers can verify
+ * @probe: Binds this driver to the SPI device. Drivers can verify
* that the device is actually present, and may need to configure
* characteristics (such as bits_per_word) which weren't needed for
* the initial configuration done during system setup.
- * @remove: Unbinds this driver from the spi device
+ * @remove: Unbinds this driver from the SPI device
* @shutdown: Standard shutdown callback used during system state
* transitions such as powerdown/halt and kexec
* @driver: SPI device drivers should initialize the name and owner
@@ -415,7 +437,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* @queued: whether this controller is providing an internal message queue
* @kworker: pointer to thread struct for message pump
* @pump_messages: work struct for scheduling work to the message pump
- * @queue_lock: spinlock to syncronise access to message queue
+ * @queue_lock: spinlock to synchronise access to message queue
* @queue: message queue
* @cur_msg: the currently in-flight message
* @cur_msg_completion: a completion for the current in-flight message
@@ -426,9 +448,11 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* the @cur_msg_completion. This flag is used to signal the context that
* is running spi_finalize_current_message() that it needs to complete()
* @cur_msg_mapped: message has been mapped for DMA
+ * @fallback: fallback to PIO if DMA transfer return failure with
+ * SPI_TRANS_FAIL_NO_START.
+ * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs.
* @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip
* selected
- * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs.
* @xfer_completion: used by core transfer_one_message()
* @busy: message pump is busy
* @running: message pump is running
@@ -451,6 +475,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
*
* @set_cs: set the logic level of the chip select line. May be called
* from interrupt context.
+ * @optimize_message: optimize the message for reuse
+ * @unoptimize_message: release resources allocated by optimize_message
* @prepare_message: set up the controller to transfer a single message,
* for example doing DMA mapping. Called from threaded
* context.
@@ -460,10 +486,13 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* - return 1 if the transfer is still in progress. When
* the driver is finished with this transfer it must
* call spi_finalize_current_transfer() so the subsystem
- * can issue the next transfer. Note: transfer_one and
- * transfer_one_message are mutually exclusive; when both
- * are set, the generic subsystem does not call your
- * transfer_one callback.
+ * can issue the next transfer. If the transfer fails, the
+ * driver must set the flag SPI_TRANS_FAIL_IO to
+ * spi_transfer->error first, before calling
+ * spi_finalize_current_transfer().
+ * Note: transfer_one and transfer_one_message are mutually
+ * exclusive; when both are set, the generic subsystem does
+ * not call your transfer_one callback.
* @handle_err: the subsystem calls the driver to handle an error that occurs
* in the generic implementation of transfer_one_message().
* @mem_ops: optimized/dedicated operations for interactions with SPI memory.
@@ -473,7 +502,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* @unprepare_message: undo any work done by prepare_message().
* @slave_abort: abort the ongoing transfer request on an SPI slave controller
* @target_abort: abort the ongoing transfer request on an SPI target controller
- * @cs_gpiods: Array of GPIO descs to use as chip select lines; one per CS
+ * @cs_gpiods: Array of GPIO descriptors to use as chip select lines; one per CS
* number. Any individual value may be NULL for CS lines that
* are not GPIOs (driven by the SPI controller itself).
* @use_gpio_descriptors: Turns on the code in the SPI core to parse and grab
@@ -500,8 +529,6 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* If the driver does not set this, the SPI core takes the snapshot as
* close to the driver hand-over as possible.
* @irq_flags: Interrupt enable state during PTP system timestamping
- * @fallback: fallback to pio if dma transfer return failure with
- * SPI_TRANS_FAIL_NO_START.
* @queue_empty: signal green light for opportunistically skipping the queue
* for spi_sync transfers.
* @must_async: disable all fast paths in the core
@@ -522,15 +549,17 @@ struct spi_controller {
struct list_head list;
- /* Other than negative (== assign one dynamically), bus_num is fully
- * board-specific. usually that simplifies to being SOC-specific.
- * example: one SOC has three SPI controllers, numbered 0..2,
- * and one board's schematics might show it using SPI-2. software
+ /*
+ * Other than negative (== assign one dynamically), bus_num is fully
+ * board-specific. Usually that simplifies to being SoC-specific.
+ * example: one SoC has three SPI controllers, numbered 0..2,
+ * and one board's schematics might show it using SPI-2. Software
* would normally use bus_num=2 for that controller.
*/
s16 bus_num;
- /* chipselects will be integral to many controllers; some others
+ /*
+ * Chipselects will be integral to many controllers; some others
* might use board-specific GPIOs.
*/
u16 num_chipselect;
@@ -562,8 +591,13 @@ struct spi_controller {
#define SPI_CONTROLLER_NO_TX BIT(2) /* Can't do buffer write */
#define SPI_CONTROLLER_MUST_RX BIT(3) /* Requires rx */
#define SPI_CONTROLLER_MUST_TX BIT(4) /* Requires tx */
-
-#define SPI_MASTER_GPIO_SS BIT(5) /* GPIO CS must select slave */
+#define SPI_CONTROLLER_GPIO_SS BIT(5) /* GPIO CS must select slave */
+#define SPI_CONTROLLER_SUSPENDED BIT(6) /* Currently suspended */
+ /*
+ * The spi-controller has multi chip select capability and can
+ * assert/de-assert more than one chip select at once.
+ */
+#define SPI_CONTROLLER_MULTI_CS BIT(7)
/* Flag indicating if the allocation of this struct is devres-managed */
bool devm_allocated;
@@ -576,8 +610,8 @@ struct spi_controller {
};
/*
- * on some hardware transfer / message size may be constrained
- * the limit may depend on device transfer settings
+ * On some hardware transfer / message size may be constrained
+ * the limit may depend on device transfer settings.
*/
size_t (*max_transfer_size)(struct spi_device *spi);
size_t (*max_message_size)(struct spi_device *spi);
@@ -595,7 +629,8 @@ struct spi_controller {
/* Flag indicating that the SPI bus is locked for exclusive use */
bool bus_lock_flag;
- /* Setup mode and clock, etc (spi driver may call many times).
+ /*
+ * Setup mode and clock, etc (SPI driver may call many times).
*
* IMPORTANT: this may be called when transfers to another
* device are active. DO NOT UPDATE SHARED REGISTERS in ways
@@ -613,18 +648,19 @@ struct spi_controller {
*/
int (*set_cs_timing)(struct spi_device *spi);
- /* Bidirectional bulk transfers
+ /*
+ * Bidirectional bulk transfers
*
* + The transfer() method may not sleep; its main role is
* just to add the message to the queue.
* + For now there's no remove-from-queue operation, or
* any other request management
- * + To a given spi_device, message queueing is pure fifo
+ * + To a given spi_device, message queueing is pure FIFO
*
* + The controller's main job is to process its message queue,
* selecting a chip (for masters), then transferring data
* + If there are multiple spi_device children, the i/o queue
- * arbitration algorithm is unspecified (round robin, fifo,
+ * arbitration algorithm is unspecified (round robin, FIFO,
* priority, reservations, preemption, etc)
*
* + Chipselect stays active during the entire message
@@ -672,12 +708,15 @@ struct spi_controller {
bool rt;
bool auto_runtime_pm;
bool cur_msg_mapped;
- char last_cs;
- bool last_cs_mode_high;
bool fallback;
+ bool last_cs_mode_high;
+ s8 last_cs[SPI_CS_CNT_MAX];
+ u32 last_cs_index_mask : SPI_CS_CNT_MAX;
struct completion xfer_completion;
size_t max_dma_len;
+ int (*optimize_message)(struct spi_message *msg);
+ int (*unoptimize_message)(struct spi_message *msg);
int (*prepare_transfer_hardware)(struct spi_controller *ctlr);
int (*transfer_one_message)(struct spi_controller *ctlr,
struct spi_message *mesg);
@@ -705,7 +744,7 @@ struct spi_controller {
const struct spi_controller_mem_ops *mem_ops;
const struct spi_controller_mem_caps *mem_caps;
- /* gpio chip select */
+ /* GPIO chip select */
struct gpio_desc **cs_gpiods;
bool use_gpio_descriptors;
s8 unused_native_cs;
@@ -789,7 +828,7 @@ void spi_take_timestamp_post(struct spi_controller *ctlr,
struct spi_transfer *xfer,
size_t progress, bool irqs_off);
-/* The spi driver core manages memory for the spi_controller classdev */
+/* The SPI driver core manages memory for the spi_controller classdev */
extern struct spi_controller *__spi_alloc_controller(struct device *host,
unsigned int size, bool slave);
@@ -863,6 +902,7 @@ extern int devm_spi_register_controller(struct device *dev,
extern void spi_unregister_controller(struct spi_controller *ctlr);
#if IS_ENABLED(CONFIG_ACPI)
+extern struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev);
extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
struct acpi_device *adev,
int index);
@@ -878,13 +918,13 @@ typedef void (*spi_res_release_t)(struct spi_controller *ctlr,
void *res);
/**
- * struct spi_res - spi resource management structure
+ * struct spi_res - SPI resource management structure
* @entry: list entry
* @release: release code called prior to freeing this resource
* @data: extra data allocated for the specific use-case
*
- * this is based on ideas from devres, but focused on life-cycle
- * management during spi_message processing
+ * This is based on ideas from devres, but focused on life-cycle
+ * management during spi_message processing.
*/
struct spi_res {
struct list_head entry;
@@ -902,7 +942,7 @@ struct spi_res {
*
* The spi_messages themselves consist of a series of read+write transfer
* segments. Those segments always read the same number of bits as they
- * write; but one or the other is easily ignored by passing a null buffer
+ * write; but one or the other is easily ignored by passing a NULL buffer
* pointer. (This is unlike most types of I/O API, because SPI hardware
* is full duplex.)
*
@@ -913,8 +953,8 @@ struct spi_res {
/**
* struct spi_transfer - a read/write buffer pair
- * @tx_buf: data to be written (dma-safe memory), or NULL
- * @rx_buf: data to be read (dma-safe memory), or NULL
+ * @tx_buf: data to be written (DMA-safe memory), or NULL
+ * @rx_buf: data to be read (DMA-safe memory), or NULL
* @tx_dma: DMA address of tx_buf, if @spi_message.is_dma_mapped
* @rx_dma: DMA address of rx_buf, if @spi_message.is_dma_mapped
* @tx_nbits: number of bits used for writing. If 0 the default
@@ -937,7 +977,7 @@ struct spi_res {
* @word_delay: inter word delay to be introduced after each word size
* (set by bits_per_word) transmission.
* @effective_speed_hz: the effective SCK-speed that was used to
- * transfer this transfer. Set to 0 if the spi bus driver does
+ * transfer this transfer. Set to 0 if the SPI bus driver does
* not support it.
* @transfer_list: transfers are sequenced through @spi_message.transfers
* @tx_sg: Scatterlist for transmit, currently not for client use
@@ -966,16 +1006,16 @@ struct spi_res {
* transmitting the "pre" word, and the "post" timestamp after receiving
* transmit confirmation from the controller for the "post" word.
* @timestamped: true if the transfer has been timestamped
- * @error: Error status logged by spi controller driver.
+ * @error: Error status logged by SPI controller driver.
*
* SPI transfers always write the same number of bytes as they read.
* Protocol drivers should always provide @rx_buf and/or @tx_buf.
* In some cases, they may also want to provide DMA addresses for
* the data being transferred; that may reduce overhead, when the
- * underlying driver uses dma.
+ * underlying driver uses DMA.
*
- * If the transmit buffer is null, zeroes will be shifted out
- * while filling @rx_buf. If the receive buffer is null, the data
+ * If the transmit buffer is NULL, zeroes will be shifted out
+ * while filling @rx_buf. If the receive buffer is NULL, the data
* shifted in will be discarded. Only "len" bytes shift out (or in).
* It's an error to try to shift out a partial word. (For example, by
* shifting out three bytes with word size of sixteen or twenty bits;
@@ -1009,7 +1049,7 @@ struct spi_res {
* Some devices need protocol transactions to be built from a series of
* spi_message submissions, where the content of one message is determined
* by the results of previous messages and where the whole transaction
- * ends when the chipselect goes intactive.
+ * ends when the chipselect goes inactive.
*
* When SPI can transfer in 1x,2x or 4x. It can get this transfer information
* from device through @tx_nbits and @rx_nbits. In Bi-direction, these
@@ -1023,16 +1063,18 @@ struct spi_res {
* and its transfers, ignore them until its completion callback.
*/
struct spi_transfer {
- /* It's ok if tx_buf == rx_buf (right?)
- * for MicroWire, one buffer must be null
- * buffers must work with dma_*map_single() calls, unless
- * spi_message.is_dma_mapped reports a pre-existing mapping
+ /*
+ * It's okay if tx_buf == rx_buf (right?).
+ * For MicroWire, one buffer must be NULL.
+ * Buffers must work with dma_*map_single() calls, unless
+ * spi_message.is_dma_mapped reports a pre-existing mapping.
*/
const void *tx_buf;
void *rx_buf;
unsigned len;
#define SPI_TRANS_FAIL_NO_START BIT(0)
+#define SPI_TRANS_FAIL_IO BIT(1)
u16 error;
dma_addr_t tx_dma;
@@ -1046,9 +1088,9 @@ struct spi_transfer {
unsigned tx_nbits:3;
unsigned rx_nbits:3;
unsigned timestamped:1;
-#define SPI_NBITS_SINGLE 0x01 /* 1bit transfer */
-#define SPI_NBITS_DUAL 0x02 /* 2bits transfer */
-#define SPI_NBITS_QUAD 0x04 /* 4bits transfer */
+#define SPI_NBITS_SINGLE 0x01 /* 1-bit transfer */
+#define SPI_NBITS_DUAL 0x02 /* 2-bit transfer */
+#define SPI_NBITS_QUAD 0x04 /* 4-bit transfer */
u8 bits_per_word;
struct spi_delay delay;
struct spi_delay cs_change_delay;
@@ -1069,18 +1111,21 @@ struct spi_transfer {
* struct spi_message - one multi-segment SPI transaction
* @transfers: list of transfer segments in this transaction
* @spi: SPI device to which the transaction is queued
- * @is_dma_mapped: if true, the caller provided both dma and cpu virtual
+ * @is_dma_mapped: if true, the caller provided both DMA and CPU virtual
* addresses for each transfer buffer
+ * @pre_optimized: peripheral driver pre-optimized the message
+ * @optimized: the message is in the optimized state
+ * @prepared: spi_prepare_message was called for the this message
+ * @status: zero for success, else negative errno
* @complete: called to report transaction completions
* @context: the argument to complete() when it's called
* @frame_length: the total number of bytes in the message
* @actual_length: the total number of bytes that were transferred in all
* successful segments
- * @status: zero for success, else negative errno
* @queue: for use by whichever driver currently owns the message
* @state: for use by whichever driver currently owns the message
- * @resources: for resource management when the spi message is processed
- * @prepared: spi_prepare_message was called for the this message
+ * @opt_state: for use by whichever driver currently owns the message
+ * @resources: for resource management when the SPI message is processed
*
* A @spi_message is used to execute an atomic sequence of data transfers,
* each represented by a struct spi_transfer. The sequence is "atomic"
@@ -1103,10 +1148,16 @@ struct spi_message {
unsigned is_dma_mapped:1;
+ /* spi_optimize_message() was called for this message */
+ bool pre_optimized;
+ /* __spi_optimize_message() was called for this message */
+ bool optimized;
+
/* spi_prepare_message() was called for this message */
bool prepared;
- /* REVISIT: we might want a flag affecting the behavior of the
+ /*
+ * REVISIT: we might want a flag affecting the behavior of the
* last transfer ... allowing things like "read 16 bit length L"
* immediately followed by "read L bytes". Basically imposing
* a specific message scheduling algorithm.
@@ -1124,14 +1175,20 @@ struct spi_message {
unsigned frame_length;
unsigned actual_length;
- /* For optional use by whatever driver currently owns the
+ /*
+ * For optional use by whatever driver currently owns the
* spi_message ... between calls to spi_async and then later
* complete(), that's the spi_controller controller driver.
*/
struct list_head queue;
void *state;
+ /*
+ * Optional state for use by controller driver between calls to
+ * __spi_optimize_message() and __spi_unoptimize_message().
+ */
+ void *opt_state;
- /* List of spi_res reources when the spi message is processed */
+ /* List of spi_res resources when the SPI message is processed */
struct list_head resources;
};
@@ -1168,7 +1225,7 @@ spi_transfer_delay_exec(struct spi_transfer *t)
/**
* spi_message_init_with_transfers - Initialize spi_message and append transfers
* @m: spi_message to be initialized
- * @xfers: An array of spi transfers
+ * @xfers: An array of SPI transfers
* @num_xfers: Number of items in the xfer array
*
* This function initializes the given spi_message and adds each spi_transfer in
@@ -1185,26 +1242,27 @@ struct spi_transfer *xfers, unsigned int num_xfers)
spi_message_add_tail(&xfers[i], m);
}
-/* It's fine to embed message and transaction structures in other data
+/*
+ * It's fine to embed message and transaction structures in other data
* structures so long as you don't free them while they're in use.
*/
-
static inline struct spi_message *spi_message_alloc(unsigned ntrans, gfp_t flags)
{
- struct spi_message *m;
-
- m = kzalloc(sizeof(struct spi_message)
- + ntrans * sizeof(struct spi_transfer),
- flags);
- if (m) {
- unsigned i;
- struct spi_transfer *t = (struct spi_transfer *)(m + 1);
-
- spi_message_init_no_memset(m);
- for (i = 0; i < ntrans; i++, t++)
- spi_message_add_tail(t, m);
- }
- return m;
+ struct spi_message_with_transfers {
+ struct spi_message m;
+ struct spi_transfer t[];
+ } *mwt;
+ unsigned i;
+
+ mwt = kzalloc(struct_size(mwt, t, ntrans), flags);
+ if (!mwt)
+ return NULL;
+
+ spi_message_init_no_memset(&mwt->m);
+ for (i = 0; i < ntrans; i++)
+ spi_message_add_tail(&mwt->t[i], &mwt->m);
+
+ return &mwt->m;
}
static inline void spi_message_free(struct spi_message *m)
@@ -1212,6 +1270,9 @@ static inline void spi_message_free(struct spi_message *m)
kfree(m);
}
+extern int spi_optimize_message(struct spi_device *spi, struct spi_message *msg);
+extern void spi_unoptimize_message(struct spi_message *msg);
+
extern int spi_setup(struct spi_device *spi);
extern int spi_async(struct spi_device *spi, struct spi_message *message);
extern int spi_slave_abort(struct spi_device *spi);
@@ -1253,7 +1314,7 @@ spi_max_transfer_size(struct spi_device *spi)
*/
static inline bool spi_is_bpw_supported(struct spi_device *spi, u32 bpw)
{
- u32 bpw_mask = spi->master->bits_per_word_mask;
+ u32 bpw_mask = spi->controller->bits_per_word_mask;
if (bpw == 8 || (bpw <= 32 && bpw_mask & SPI_BPW_MASK(bpw)))
return true;
@@ -1291,7 +1352,7 @@ typedef void (*spi_replaced_release_t)(struct spi_controller *ctlr,
* replacements that have occurred
* so that they can get reverted
* @release: some extra release code to get executed prior to
- * relasing this structure
+ * releasing this structure
* @extradata: pointer to some extra data if requested or NULL
* @replaced_transfers: transfers that have been replaced and which need
* to get restored
@@ -1301,9 +1362,9 @@ typedef void (*spi_replaced_release_t)(struct spi_controller *ctlr,
* @inserted_transfers: array of spi_transfers of array-size @inserted,
* that have been replacing replaced_transfers
*
- * note: that @extradata will point to @inserted_transfers[@inserted]
+ * Note: that @extradata will point to @inserted_transfers[@inserted]
* if some extra allocation is requested, so alignment will be the same
- * as for spi_transfers
+ * as for spi_transfers.
*/
struct spi_replaced_transfers {
spi_replaced_release_t release;
@@ -1320,16 +1381,15 @@ struct spi_replaced_transfers {
extern int spi_split_transfers_maxsize(struct spi_controller *ctlr,
struct spi_message *msg,
- size_t maxsize,
- gfp_t gfp);
+ size_t maxsize);
extern int spi_split_transfers_maxwords(struct spi_controller *ctlr,
struct spi_message *msg,
- size_t maxwords,
- gfp_t gfp);
+ size_t maxwords);
/*---------------------------------------------------------------------------*/
-/* All these synchronous SPI transfer routines are utilities layered
+/*
+ * All these synchronous SPI transfer routines are utilities layered
* over the core async transfer primitive. Here, "synchronous" means
* they will sleep uninterruptibly until the async transfer completes.
*/
@@ -1472,7 +1532,7 @@ static inline ssize_t spi_w8r16(struct spi_device *spi, u8 cmd)
*
* Callable only from contexts that can sleep.
*
- * Return: the (unsigned) sixteen bit number returned by the device in cpu
+ * Return: the (unsigned) sixteen bit number returned by the device in CPU
* endianness, or else a negative error code.
*/
static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd)
@@ -1500,7 +1560,7 @@ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd)
* As a rule, SPI devices can't be probed. Instead, board init code
* provides a table listing the devices which are present, with enough
* information to bind and set up the device's driver. There's basic
- * support for nonstatic configurations too; enough to handle adding
+ * support for non-static configurations too; enough to handle adding
* parport adapters, or microcontrollers acting as USB-to-SPI bridges.
*/
@@ -1537,12 +1597,13 @@ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd)
* are active in some dynamic board configuration models.
*/
struct spi_board_info {
- /* The device name and module name are coupled, like platform_bus;
+ /*
+ * The device name and module name are coupled, like platform_bus;
* "modalias" is normally the driver name.
*
* platform_data goes to spi_device.dev.platform_data,
* controller_data goes to spi_device.controller_data,
- * irq is copied too
+ * IRQ is copied too.
*/
char modalias[SPI_NAME_SIZE];
const void *platform_data;
@@ -1554,7 +1615,8 @@ struct spi_board_info {
u32 max_speed_hz;
- /* bus_num is board specific and matches the bus_num of some
+ /*
+ * bus_num is board specific and matches the bus_num of some
* spi_controller that will probably be registered later.
*
* chip_select reflects how this chip is wired to that master;
@@ -1563,12 +1625,14 @@ struct spi_board_info {
u16 bus_num;
u16 chip_select;
- /* mode becomes spi_device.mode, and is essential for chips
+ /*
+ * mode becomes spi_device.mode, and is essential for chips
* where the default of SPI_CS_HIGH = 0 is wrong.
*/
u32 mode;
- /* ... may need additional spi_device chip config data here.
+ /*
+ * ... may need additional spi_device chip config data here.
* avoid stuff protocol drivers can set; but include stuff
* needed to behave without being bound to a driver:
* - quirks like clock rate mattering when not selected
@@ -1585,7 +1649,8 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n)
{ return 0; }
#endif
-/* If you're hotplugging an adapter with devices (parport, usb, etc)
+/*
+ * If you're hotplugging an adapter with devices (parport, USB, etc)
* use spi_new_device() to describe each device. You can also call
* spi_unregister_device() to start making that device vanish, but
* normally that would be handled by spi_unregister_controller().
@@ -1619,26 +1684,4 @@ spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer)
return list_is_last(&xfer->transfer_list, &ctlr->cur_msg->transfers);
}
-/* Compatibility layer */
-#define spi_master spi_controller
-
-#define SPI_MASTER_HALF_DUPLEX SPI_CONTROLLER_HALF_DUPLEX
-#define SPI_MASTER_NO_RX SPI_CONTROLLER_NO_RX
-#define SPI_MASTER_NO_TX SPI_CONTROLLER_NO_TX
-#define SPI_MASTER_MUST_RX SPI_CONTROLLER_MUST_RX
-#define SPI_MASTER_MUST_TX SPI_CONTROLLER_MUST_TX
-
-#define spi_master_get_devdata(_ctlr) spi_controller_get_devdata(_ctlr)
-#define spi_master_set_devdata(_ctlr, _data) \
- spi_controller_set_devdata(_ctlr, _data)
-#define spi_master_get(_ctlr) spi_controller_get(_ctlr)
-#define spi_master_put(_ctlr) spi_controller_put(_ctlr)
-#define spi_master_suspend(_ctlr) spi_controller_suspend(_ctlr)
-#define spi_master_resume(_ctlr) spi_controller_resume(_ctlr)
-
-#define spi_register_master(_ctlr) spi_register_controller(_ctlr)
-#define devm_spi_register_master(_dev, _ctlr) \
- devm_spi_register_controller(_dev, _ctlr)
-#define spi_unregister_master(_ctlr) spi_unregister_controller(_ctlr)
-
#endif /* __LINUX_SPI_H */
diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h
index 4444c2a992cb..b930eca2ef7b 100644
--- a/include/linux/spi/spi_bitbang.h
+++ b/include/linux/spi/spi_bitbang.h
@@ -10,7 +10,7 @@ struct spi_bitbang {
u8 use_dma;
u16 flags; /* extra spi->mode support */
- struct spi_master *master;
+ struct spi_controller *ctlr;
/* setup_transfer() changes clock and/or wordsize to match settings
* for this transfer; zeroes restore defaults from spi_device.
diff --git a/include/linux/spi/spi_gpio.h b/include/linux/spi/spi_gpio.h
index 9e7e83d8645b..5f0e1407917a 100644
--- a/include/linux/spi/spi_gpio.h
+++ b/include/linux/spi/spi_gpio.h
@@ -15,8 +15,8 @@
*/
/**
- * struct spi_gpio_platform_data - parameter for bitbanged SPI master
- * @num_chipselect: how many slaves to allow
+ * struct spi_gpio_platform_data - parameter for bitbanged SPI host controller
+ * @num_chipselect: how many target devices to allow
*/
struct spi_gpio_platform_data {
u16 num_chipselect;
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 31d3d747a9db..3fcd20de6ca8 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -456,6 +456,37 @@ static __always_inline int spin_is_contended(spinlock_t *lock)
#endif /* CONFIG_PREEMPT_RT */
/*
+ * Does a critical section need to be broken due to another
+ * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
+ * but a general need for low latency)
+ */
+static inline int spin_needbreak(spinlock_t *lock)
+{
+#ifdef CONFIG_PREEMPTION
+ return spin_is_contended(lock);
+#else
+ return 0;
+#endif
+}
+
+/*
+ * Check if a rwlock is contended.
+ * Returns non-zero if there is another task waiting on the rwlock.
+ * Returns zero if the lock is not contended or the system / underlying
+ * rwlock implementation does not support contention detection.
+ * Technically does not depend on CONFIG_PREEMPTION, but a general need
+ * for low latency.
+ */
+static inline int rwlock_needbreak(rwlock_t *lock)
+{
+#ifdef CONFIG_PREEMPTION
+ return rwlock_is_contended(lock);
+#else
+ return 0;
+#endif
+}
+
+/*
* Pull the atomic_t declaration:
* (asm-mips/atomic.h needs above definitions)
*/
@@ -507,6 +538,8 @@ DEFINE_LOCK_GUARD_1(raw_spinlock, raw_spinlock_t,
raw_spin_lock(_T->lock),
raw_spin_unlock(_T->lock))
+DEFINE_LOCK_GUARD_1_COND(raw_spinlock, _try, raw_spin_trylock(_T->lock))
+
DEFINE_LOCK_GUARD_1(raw_spinlock_nested, raw_spinlock_t,
raw_spin_lock_nested(_T->lock, SINGLE_DEPTH_NESTING),
raw_spin_unlock(_T->lock))
@@ -515,23 +548,62 @@ DEFINE_LOCK_GUARD_1(raw_spinlock_irq, raw_spinlock_t,
raw_spin_lock_irq(_T->lock),
raw_spin_unlock_irq(_T->lock))
+DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irq, _try, raw_spin_trylock_irq(_T->lock))
+
DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t,
raw_spin_lock_irqsave(_T->lock, _T->flags),
raw_spin_unlock_irqrestore(_T->lock, _T->flags),
unsigned long flags)
+DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irqsave, _try,
+ raw_spin_trylock_irqsave(_T->lock, _T->flags))
+
DEFINE_LOCK_GUARD_1(spinlock, spinlock_t,
spin_lock(_T->lock),
spin_unlock(_T->lock))
+DEFINE_LOCK_GUARD_1_COND(spinlock, _try, spin_trylock(_T->lock))
+
DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t,
spin_lock_irq(_T->lock),
spin_unlock_irq(_T->lock))
+DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try,
+ spin_trylock_irq(_T->lock))
+
DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t,
spin_lock_irqsave(_T->lock, _T->flags),
spin_unlock_irqrestore(_T->lock, _T->flags),
unsigned long flags)
+DEFINE_LOCK_GUARD_1_COND(spinlock_irqsave, _try,
+ spin_trylock_irqsave(_T->lock, _T->flags))
+
+DEFINE_LOCK_GUARD_1(read_lock, rwlock_t,
+ read_lock(_T->lock),
+ read_unlock(_T->lock))
+
+DEFINE_LOCK_GUARD_1(read_lock_irq, rwlock_t,
+ read_lock_irq(_T->lock),
+ read_unlock_irq(_T->lock))
+
+DEFINE_LOCK_GUARD_1(read_lock_irqsave, rwlock_t,
+ read_lock_irqsave(_T->lock, _T->flags),
+ read_unlock_irqrestore(_T->lock, _T->flags),
+ unsigned long flags)
+
+DEFINE_LOCK_GUARD_1(write_lock, rwlock_t,
+ write_lock(_T->lock),
+ write_unlock(_T->lock))
+
+DEFINE_LOCK_GUARD_1(write_lock_irq, rwlock_t,
+ write_lock_irq(_T->lock),
+ write_unlock_irq(_T->lock))
+
+DEFINE_LOCK_GUARD_1(write_lock_irqsave, rwlock_t,
+ write_lock_irqsave(_T->lock, _T->flags),
+ write_unlock_irqrestore(_T->lock, _T->flags),
+ unsigned long flags)
+
#undef __LINUX_INSIDE_SPINLOCK_H
#endif /* __LINUX_SPINLOCK_H */
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 6c461573434d..9dec4861d09f 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -68,28 +68,37 @@ typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
typedef int (splice_direct_actor)(struct pipe_inode_info *,
struct splice_desc *);
-extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
- loff_t *, size_t, unsigned int,
- splice_actor *);
-extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
- struct splice_desc *, splice_actor *);
-extern ssize_t splice_to_pipe(struct pipe_inode_info *,
- struct splice_pipe_desc *);
-extern ssize_t add_to_pipe(struct pipe_inode_info *,
- struct pipe_buffer *);
-long vfs_splice_read(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags);
-extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
- splice_direct_actor *);
-extern long do_splice(struct file *in, loff_t *off_in,
- struct file *out, loff_t *off_out,
- size_t len, unsigned int flags);
+ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags,
+ splice_actor *actor);
+ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
+ struct splice_desc *sd, splice_actor *actor);
+ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
+ struct splice_pipe_desc *spd);
+ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf);
+ssize_t vfs_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags);
+ssize_t splice_direct_to_actor(struct file *file, struct splice_desc *sd,
+ splice_direct_actor *actor);
+ssize_t do_splice(struct file *in, loff_t *off_in, struct file *out,
+ loff_t *off_out, size_t len, unsigned int flags);
+ssize_t do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
+ loff_t *opos, size_t len, unsigned int flags);
+ssize_t splice_file_range(struct file *in, loff_t *ppos, struct file *out,
+ loff_t *opos, size_t len);
-extern long do_tee(struct file *in, struct file *out, size_t len,
- unsigned int flags);
-extern ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out,
- loff_t *ppos, size_t len, unsigned int flags);
+static inline long splice_copy_file_range(struct file *in, loff_t pos_in,
+ struct file *out, loff_t pos_out,
+ size_t len)
+{
+ return splice_file_range(in, &pos_in, out, &pos_out, len);
+}
+
+ssize_t do_tee(struct file *in, struct file *out, size_t len,
+ unsigned int flags);
+ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags);
/*
* for dynamic pipe sizing
diff --git a/include/linux/spmi.h b/include/linux/spmi.h
index eac1956a8727..28e8c8bd3944 100644
--- a/include/linux/spmi.h
+++ b/include/linux/spmi.h
@@ -120,6 +120,9 @@ static inline void spmi_controller_put(struct spmi_controller *ctrl)
int spmi_controller_add(struct spmi_controller *ctrl);
void spmi_controller_remove(struct spmi_controller *ctrl);
+struct spmi_controller *devm_spmi_controller_alloc(struct device *parent, size_t size);
+int devm_spmi_controller_add(struct device *parent, struct spmi_controller *ctrl);
+
/**
* struct spmi_driver - SPMI slave device driver
* @driver: SPMI device drivers should initialize name and owner field of
@@ -166,7 +169,7 @@ static inline void spmi_driver_unregister(struct spmi_driver *sdrv)
struct device_node;
-struct spmi_device *spmi_device_from_of(struct device_node *np);
+struct spmi_device *spmi_find_device_by_of_node(struct device_node *np);
int spmi_register_read(struct spmi_device *sdev, u8 addr, u8 *buf);
int spmi_ext_register_read(struct spmi_device *sdev, u8 addr, u8 *buf,
size_t len);
diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h
new file mode 100644
index 000000000000..33dcbec71925
--- /dev/null
+++ b/include/linux/sprintf.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_KERNEL_SPRINTF_H_
+#define _LINUX_KERNEL_SPRINTF_H_
+
+#include <linux/compiler_attributes.h>
+#include <linux/types.h>
+
+int num_to_str(char *buf, int size, unsigned long long num, unsigned int width);
+
+__printf(2, 3) int sprintf(char *buf, const char * fmt, ...);
+__printf(2, 0) int vsprintf(char *buf, const char *, va_list);
+__printf(3, 4) int snprintf(char *buf, size_t size, const char *fmt, ...);
+__printf(3, 0) int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+__printf(3, 4) int scnprintf(char *buf, size_t size, const char *fmt, ...);
+__printf(3, 0) int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
+__printf(2, 3) __malloc char *kasprintf(gfp_t gfp, const char *fmt, ...);
+__printf(2, 0) __malloc char *kvasprintf(gfp_t gfp, const char *fmt, va_list args);
+__printf(2, 0) const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args);
+
+__scanf(2, 3) int sscanf(const char *, const char *, ...);
+__scanf(2, 0) int vsscanf(const char *, const char *, va_list);
+
+/* These are for specific cases, do not use without real need */
+extern bool no_hash_pointers;
+int no_hash_pointers_enable(char *str);
+
+#endif /* _LINUX_KERNEL_SPRINTF_H */
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 127ef3b2e607..236610e4a8fa 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -229,7 +229,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp
srcu_check_nmi_safety(ssp, true);
retval = __srcu_read_lock_nmisafe(ssp);
- rcu_lock_acquire(&ssp->dep_map);
+ rcu_try_lock_acquire(&ssp->dep_map);
return retval;
}
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index ebd72491af99..447133171d95 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -48,6 +48,10 @@ void srcu_drive_gp(struct work_struct *wp);
#define DEFINE_STATIC_SRCU(name) \
static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name, name)
+// Dummy structure for srcu_notifier_head.
+struct srcu_usage { };
+#define __SRCU_USAGE_INIT(name) { }
+
void synchronize_srcu(struct srcu_struct *ssp);
/*
diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index e58306783d8e..e9ec32fb97d4 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -11,8 +11,6 @@
* SLUB_DEBUG needs 256 bytes per object for that). Since allocation and free
* stack traces often repeat, using stack depot allows to save about 100x space.
*
- * Stack traces are never removed from the stack depot.
- *
* Author: Alexander Potapenko <glider@google.com>
* Copyright (C) 2016 Google, Inc.
*
@@ -32,6 +30,64 @@ typedef u32 depot_stack_handle_t;
*/
#define STACK_DEPOT_EXTRA_BITS 5
+#define DEPOT_HANDLE_BITS (sizeof(depot_stack_handle_t) * 8)
+
+#define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */
+#define DEPOT_POOL_SIZE (1LL << (PAGE_SHIFT + DEPOT_POOL_ORDER))
+#define DEPOT_STACK_ALIGN 4
+#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN)
+#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \
+ STACK_DEPOT_EXTRA_BITS)
+
+#ifdef CONFIG_STACKDEPOT
+/* Compact structure that stores a reference to a stack. */
+union handle_parts {
+ depot_stack_handle_t handle;
+ struct {
+ u32 pool_index_plus_1 : DEPOT_POOL_INDEX_BITS;
+ u32 offset : DEPOT_OFFSET_BITS;
+ u32 extra : STACK_DEPOT_EXTRA_BITS;
+ };
+};
+
+struct stack_record {
+ struct list_head hash_list; /* Links in the hash table */
+ u32 hash; /* Hash in hash table */
+ u32 size; /* Number of stored frames */
+ union handle_parts handle; /* Constant after initialization */
+ refcount_t count;
+ union {
+ unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */
+ struct {
+ /*
+ * An important invariant of the implementation is to
+ * only place a stack record onto the freelist iff its
+ * refcount is zero. Because stack records with a zero
+ * refcount are never considered as valid, it is safe to
+ * union @entries and freelist management state below.
+ * Conversely, as soon as an entry is off the freelist
+ * and its refcount becomes non-zero, the below must not
+ * be accessed until being placed back on the freelist.
+ */
+ struct list_head free_list; /* Links in the freelist */
+ unsigned long rcu_state; /* RCU cookie */
+ };
+ };
+};
+#endif
+
+typedef u32 depot_flags_t;
+
+/*
+ * Flags that can be passed to stack_depot_save_flags(); see the comment next
+ * to its declaration for more details.
+ */
+#define STACK_DEPOT_FLAG_CAN_ALLOC ((depot_flags_t)0x0001)
+#define STACK_DEPOT_FLAG_GET ((depot_flags_t)0x0002)
+
+#define STACK_DEPOT_FLAGS_NUM 2
+#define STACK_DEPOT_FLAGS_MASK ((depot_flags_t)((1 << STACK_DEPOT_FLAGS_NUM) - 1))
+
/*
* Using stack depot requires its initialization, which can be done in 3 ways:
*
@@ -69,31 +125,39 @@ static inline int stack_depot_early_init(void) { return 0; }
#endif
/**
- * __stack_depot_save - Save a stack trace to stack depot
+ * stack_depot_save_flags - Save a stack trace to stack depot
*
* @entries: Pointer to the stack trace
* @nr_entries: Number of frames in the stack
* @alloc_flags: Allocation GFP flags
- * @can_alloc: Allocate stack pools (increased chance of failure if false)
+ * @depot_flags: Stack depot flags
+ *
+ * Saves a stack trace from @entries array of size @nr_entries.
+ *
+ * If STACK_DEPOT_FLAG_CAN_ALLOC is set in @depot_flags, stack depot can
+ * replenish the stack pools in case no space is left (allocates using GFP
+ * flags of @alloc_flags). Otherwise, stack depot avoids any allocations and
+ * fails if no space is left to store the stack trace.
*
- * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is
- * %true, stack depot can replenish the stack pools in case no space is left
- * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids
- * any allocations and fails if no space is left to store the stack trace.
+ * If STACK_DEPOT_FLAG_GET is set in @depot_flags, stack depot will increment
+ * the refcount on the saved stack trace if it already exists in stack depot.
+ * Users of this flag must also call stack_depot_put() when keeping the stack
+ * trace is no longer required to avoid overflowing the refcount.
*
* If the provided stack trace comes from the interrupt context, only the part
* up to the interrupt entry is saved.
*
- * Context: Any context, but setting @can_alloc to %false is required if
+ * Context: Any context, but setting STACK_DEPOT_FLAG_CAN_ALLOC is required if
* alloc_pages() cannot be used from the current context. Currently
* this is the case for contexts where neither %GFP_ATOMIC nor
* %GFP_NOWAIT can be used (NMI, raw_spin_lock).
*
* Return: Handle of the stack struct stored in depot, 0 on failure
*/
-depot_stack_handle_t __stack_depot_save(unsigned long *entries,
- unsigned int nr_entries,
- gfp_t gfp_flags, bool can_alloc);
+depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
+ unsigned int nr_entries,
+ gfp_t gfp_flags,
+ depot_flags_t depot_flags);
/**
* stack_depot_save - Save a stack trace to stack depot
@@ -102,8 +166,11 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
* @nr_entries: Number of frames in the stack
* @alloc_flags: Allocation GFP flags
*
- * Context: Contexts where allocations via alloc_pages() are allowed.
- * See __stack_depot_save() for more details.
+ * Does not increment the refcount on the saved stack trace; see
+ * stack_depot_save_flags() for more details.
+ *
+ * Context: Contexts where allocations via alloc_pages() are allowed;
+ * see stack_depot_save_flags() for more details.
*
* Return: Handle of the stack trace stored in depot, 0 on failure
*/
@@ -111,6 +178,17 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
unsigned int nr_entries, gfp_t gfp_flags);
/**
+ * __stack_depot_get_stack_record - Get a pointer to a stack_record struct
+ *
+ * @handle: Stack depot handle
+ *
+ * This function is only for internal purposes.
+ *
+ * Return: Returns a pointer to a stack_record struct
+ */
+struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle);
+
+/**
* stack_depot_fetch - Fetch a stack trace from stack depot
*
* @handle: Stack depot handle returned from stack_depot_save()
@@ -142,6 +220,18 @@ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
int spaces);
/**
+ * stack_depot_put - Drop a reference to a stack trace from stack depot
+ *
+ * @handle: Stack depot handle returned from stack_depot_save()
+ *
+ * The stack trace is evicted from stack depot once all references to it have
+ * been dropped (once the number of stack_depot_evict() calls matches the
+ * number of stack_depot_save_flags() calls with STACK_DEPOT_FLAG_GET set for
+ * this stack trace).
+ */
+void stack_depot_put(depot_stack_handle_t handle);
+
+/**
* stack_depot_set_extra_bits - Set extra bits in a stack depot handle
*
* @handle: Stack depot handle returned from stack_depot_save()
diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h
index c36e7a3b45e7..3be2cb564710 100644
--- a/include/linux/stackleak.h
+++ b/include/linux/stackleak.h
@@ -14,6 +14,7 @@
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
#include <asm/stacktrace.h>
+#include <linux/linkage.h>
/*
* The lowest address on tsk's stack which we can plausibly erase.
@@ -76,6 +77,11 @@ static inline void stackleak_task_init(struct task_struct *t)
# endif
}
+asmlinkage void noinstr stackleak_erase(void);
+asmlinkage void noinstr stackleak_erase_on_task_stack(void);
+asmlinkage void noinstr stackleak_erase_off_task_stack(void);
+void __no_caller_saved_registers noinstr stackleak_track_stack(void);
+
#else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
static inline void stackleak_task_init(struct task_struct *t) { }
#endif
diff --git a/include/linux/start_kernel.h b/include/linux/start_kernel.h
index a9806a44a605..09f994ac87df 100644
--- a/include/linux/start_kernel.h
+++ b/include/linux/start_kernel.h
@@ -9,7 +9,5 @@
up something else. */
extern asmlinkage void __init __noreturn start_kernel(void);
-extern void __init __noreturn arch_call_rest_init(void);
-extern void __ref __noreturn rest_init(void);
#endif /* _LINUX_START_KERNEL_H */
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 06090538fe2d..dfa1828cd756 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -76,6 +76,8 @@
| DMA_AXI_BLEN_32 | DMA_AXI_BLEN_64 \
| DMA_AXI_BLEN_128 | DMA_AXI_BLEN_256)
+struct stmmac_priv;
+
/* Platfrom data for platform device structure's platform_data field */
struct stmmac_mdio_bus_data {
@@ -125,6 +127,7 @@ struct stmmac_est {
u32 gcl_unaligned[EST_GCL];
u32 gcl[EST_GCL];
u32 gcl_size;
+ u32 max_sdu[MTL_MAX_TX_QUEUES];
};
struct stmmac_rxq_cfg {
@@ -137,6 +140,7 @@ struct stmmac_rxq_cfg {
struct stmmac_txq_cfg {
u32 weight;
+ bool coe_unsupported;
u8 mode_to_use;
/* Credit Base Shaper parameters */
u32 send_slope;
@@ -172,6 +176,7 @@ struct stmmac_fpe_cfg {
bool hs_enable; /* FPE handshake enable */
enum stmmac_fpe_state lp_fpe_state; /* Link Partner FPE state */
enum stmmac_fpe_state lo_fpe_state; /* Local station FPE state */
+ u32 fpe_csr; /* MAC_FPE_CTRL_STS reg cache */
};
struct stmmac_safety_feature_cfg {
@@ -204,14 +209,41 @@ struct dwmac4_addrs {
u32 mtl_low_cred_offset;
};
+#define STMMAC_FLAG_HAS_INTEGRATED_PCS BIT(0)
+#define STMMAC_FLAG_SPH_DISABLE BIT(1)
+#define STMMAC_FLAG_USE_PHY_WOL BIT(2)
+#define STMMAC_FLAG_HAS_SUN8I BIT(3)
+#define STMMAC_FLAG_TSO_EN BIT(4)
+#define STMMAC_FLAG_SERDES_UP_AFTER_PHY_LINKUP BIT(5)
+#define STMMAC_FLAG_VLAN_FAIL_Q_EN BIT(6)
+#define STMMAC_FLAG_MULTI_MSI_EN BIT(7)
+#define STMMAC_FLAG_EXT_SNAPSHOT_EN BIT(8)
+#define STMMAC_FLAG_INT_SNAPSHOT_EN BIT(9)
+#define STMMAC_FLAG_RX_CLK_RUNS_IN_LPI BIT(10)
+#define STMMAC_FLAG_EN_TX_LPI_CLOCKGATING BIT(11)
+#define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY BIT(12)
+
struct plat_stmmacenet_data {
int bus_id;
int phy_addr;
- int interface;
+ /* MAC ----- optional PCS ----- SerDes ----- optional PHY ----- Media
+ * ^ ^
+ * mac_interface phy_interface
+ *
+ * mac_interface is the MAC-side interface, which may be the same
+ * as phy_interface if there is no intervening PCS. If there is a
+ * PCS, then mac_interface describes the interface mode between the
+ * MAC and PCS, and phy_interface describes the interface mode
+ * between the PCS and PHY.
+ */
+ phy_interface_t mac_interface;
+ /* phy_interface is the PHY-side interface - the interface used by
+ * an attached PHY.
+ */
phy_interface_t phy_interface;
struct stmmac_mdio_bus_data *mdio_bus_data;
struct device_node *phy_node;
- struct device_node *phylink_node;
+ struct fwnode_handle *port_node;
struct device_node *mdio_node;
struct stmmac_dma_cfg *dma_cfg;
struct stmmac_est *est;
@@ -240,12 +272,12 @@ struct plat_stmmacenet_data {
u8 tx_sched_algorithm;
struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES];
struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES];
- void (*fix_mac_speed)(void *priv, unsigned int speed);
+ void (*fix_mac_speed)(void *priv, unsigned int speed, unsigned int mode);
int (*fix_soc_reset)(void *priv, void __iomem *ioaddr);
int (*serdes_powerup)(struct net_device *ndev, void *priv);
void (*serdes_powerdown)(struct net_device *ndev, void *priv);
void (*speed_mode_2500)(struct net_device *ndev, void *priv);
- void (*ptp_clk_freq_config)(void *priv);
+ void (*ptp_clk_freq_config)(struct stmmac_priv *priv);
int (*init)(struct platform_device *pdev, void *priv);
void (*exit)(struct platform_device *pdev, void *priv);
struct mac_device_info *(*setup)(void *priv);
@@ -266,22 +298,13 @@ struct plat_stmmacenet_data {
struct reset_control *stmmac_ahb_rst;
struct stmmac_axi *axi;
int has_gmac4;
- bool has_sun8i;
- bool tso_en;
int rss_en;
int mac_port_sel_speed;
- bool en_tx_lpi_clockgating;
- bool rx_clk_runs_in_lpi;
int has_xgmac;
- bool vlan_fail_q_en;
u8 vlan_fail_q;
unsigned int eee_usecs_rate;
struct pci_dev *pdev;
int int_snapshot_num;
- int ext_snapshot_num;
- bool int_snapshot_en;
- bool ext_snapshot_en;
- bool multi_msi_en;
int msi_mac_vec;
int msi_wol_vec;
int msi_lpi_vec;
@@ -289,10 +312,7 @@ struct plat_stmmacenet_data {
int msi_sfty_ue_vec;
int msi_rx_base_vec;
int msi_tx_base_vec;
- bool use_phy_wol;
- bool sph_disable;
- bool serdes_up_after_phy_linkup;
const struct dwmac4_addrs *dwmac4_addrs;
- bool has_integrated_pcs;
+ unsigned int flags;
};
#endif
diff --git a/include/linux/string.h b/include/linux/string.h
index dbfc66400050..9ba8b4597009 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -2,10 +2,14 @@
#ifndef _LINUX_STRING_H_
#define _LINUX_STRING_H_
+#include <linux/args.h>
+#include <linux/array_size.h>
#include <linux/compiler.h> /* for inline */
#include <linux/types.h> /* for size_t */
#include <linux/stddef.h> /* for NULL */
+#include <linux/err.h> /* for ERR_PTR() */
#include <linux/errno.h> /* for E2BIG */
+#include <linux/overflow.h> /* for check_mul_overflow() */
#include <linux/stdarg.h>
#include <uapi/linux/string.h>
@@ -14,6 +18,44 @@ extern void *memdup_user(const void __user *, size_t);
extern void *vmemdup_user(const void __user *, size_t);
extern void *memdup_user_nul(const void __user *, size_t);
+/**
+ * memdup_array_user - duplicate array from user space
+ * @src: source address in user space
+ * @n: number of array members to copy
+ * @size: size of one array member
+ *
+ * Return: an ERR_PTR() on failure. Result is physically
+ * contiguous, to be freed by kfree().
+ */
+static inline void *memdup_array_user(const void __user *src, size_t n, size_t size)
+{
+ size_t nbytes;
+
+ if (check_mul_overflow(n, size, &nbytes))
+ return ERR_PTR(-EOVERFLOW);
+
+ return memdup_user(src, nbytes);
+}
+
+/**
+ * vmemdup_array_user - duplicate array from user space
+ * @src: source address in user space
+ * @n: number of array members to copy
+ * @size: size of one array member
+ *
+ * Return: an ERR_PTR() on failure. Result may be not
+ * physically contiguous. Use kvfree() to free.
+ */
+static inline void *vmemdup_array_user(const void __user *src, size_t n, size_t size)
+{
+ size_t nbytes;
+
+ if (check_mul_overflow(n, size, &nbytes))
+ return ERR_PTR(-EOVERFLOW);
+
+ return vmemdup_user(src, nbytes);
+}
+
/*
* Include machine specific inline routines
*/
@@ -25,15 +67,79 @@ extern char * strcpy(char *,const char *);
#ifndef __HAVE_ARCH_STRNCPY
extern char * strncpy(char *,const char *, __kernel_size_t);
#endif
-#ifndef __HAVE_ARCH_STRLCPY
-size_t strlcpy(char *, const char *, size_t);
-#endif
-#ifndef __HAVE_ARCH_STRSCPY
-ssize_t strscpy(char *, const char *, size_t);
-#endif
+ssize_t sized_strscpy(char *, const char *, size_t);
-/* Wraps calls to strscpy()/memset(), no arch specific code required */
-ssize_t strscpy_pad(char *dest, const char *src, size_t count);
+/*
+ * The 2 argument style can only be used when dst is an array with a
+ * known size.
+ */
+#define __strscpy0(dst, src, ...) \
+ sized_strscpy(dst, src, sizeof(dst) + __must_be_array(dst))
+#define __strscpy1(dst, src, size) sized_strscpy(dst, src, size)
+
+#define __strscpy_pad0(dst, src, ...) \
+ sized_strscpy_pad(dst, src, sizeof(dst) + __must_be_array(dst))
+#define __strscpy_pad1(dst, src, size) sized_strscpy_pad(dst, src, size)
+
+/**
+ * strscpy - Copy a C-string into a sized buffer
+ * @dst: Where to copy the string to
+ * @src: Where to copy the string from
+ * @...: Size of destination buffer (optional)
+ *
+ * Copy the source string @src, or as much of it as fits, into the
+ * destination @dst buffer. The behavior is undefined if the string
+ * buffers overlap. The destination @dst buffer is always NUL terminated,
+ * unless it's zero-sized.
+ *
+ * The size argument @... is only required when @dst is not an array, or
+ * when the copy needs to be smaller than sizeof(@dst).
+ *
+ * Preferred to strncpy() since it always returns a valid string, and
+ * doesn't unnecessarily force the tail of the destination buffer to be
+ * zero padded. If padding is desired please use strscpy_pad().
+ *
+ * Returns the number of characters copied in @dst (not including the
+ * trailing %NUL) or -E2BIG if @size is 0 or the copy from @src was
+ * truncated.
+ */
+#define strscpy(dst, src, ...) \
+ CONCATENATE(__strscpy, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__)
+
+#define sized_strscpy_pad(dest, src, count) ({ \
+ char *__dst = (dest); \
+ const char *__src = (src); \
+ const size_t __count = (count); \
+ ssize_t __wrote; \
+ \
+ __wrote = sized_strscpy(__dst, __src, __count); \
+ if (__wrote >= 0 && __wrote < __count) \
+ memset(__dst + __wrote + 1, 0, __count - __wrote - 1); \
+ __wrote; \
+})
+
+/**
+ * strscpy_pad() - Copy a C-string into a sized buffer
+ * @dst: Where to copy the string to
+ * @src: Where to copy the string from
+ * @...: Size of destination buffer
+ *
+ * Copy the string, or as much of it as fits, into the dest buffer. The
+ * behavior is undefined if the string buffers overlap. The destination
+ * buffer is always %NUL terminated, unless it's zero-sized.
+ *
+ * If the source string is shorter than the destination buffer, the
+ * remaining bytes in the buffer will be filled with %NUL bytes.
+ *
+ * For full explanation of why you may want to consider using the
+ * 'strscpy' functions please see the function docstring for strscpy().
+ *
+ * Returns:
+ * * The number of characters copied (not including the trailing %NULs)
+ * * -E2BIG if count is 0 or @src was truncated.
+ */
+#define strscpy_pad(dst, src, ...) \
+ CONCATENATE(__strscpy_pad, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__)
#ifndef __HAVE_ARCH_STRCAT
extern char * strcat(char *, const char *);
@@ -179,10 +285,19 @@ extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
extern void *kmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2);
extern void *kvmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2);
extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp);
+extern void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp);
+/* lib/argv_split.c */
extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
extern void argv_free(char **argv);
+/* lib/cmdline.c */
+extern int get_option(char **str, int *pint);
+extern char *get_options(const char *str, int nints, int *ints);
+extern unsigned long long memparse(const char *ptr, char **retptr);
+extern bool parse_option_str(const char *str, const char *option);
+extern char *next_arg(char *args, char **param, char **val);
+
extern bool sysfs_streq(const char *s1, const char *s2);
int match_string(const char * const *array, size_t n, const char *string);
int __sysfs_match_string(const char * const *array, size_t n, const char *s);
@@ -277,10 +392,12 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
*/
#define strtomem_pad(dest, src, pad) do { \
const size_t _dest_len = __builtin_object_size(dest, 1); \
+ const size_t _src_len = __builtin_object_size(src, 1); \
\
BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \
_dest_len == (size_t)-1); \
- memcpy_and_pad(dest, _dest_len, src, strnlen(src, _dest_len), pad); \
+ memcpy_and_pad(dest, _dest_len, src, \
+ strnlen(src, min(_src_len, _dest_len)), pad); \
} while (0)
/**
@@ -298,10 +415,11 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
*/
#define strtomem(dest, src) do { \
const size_t _dest_len = __builtin_object_size(dest, 1); \
+ const size_t _src_len = __builtin_object_size(src, 1); \
\
BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \
_dest_len == (size_t)-1); \
- memcpy(dest, src, min(_dest_len, strnlen(src, _dest_len))); \
+ memcpy(dest, src, strnlen(src, min(_src_len, _dest_len))); \
} while (0)
/**
diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h
index 48120222b9b2..d9ebe20229f8 100644
--- a/include/linux/string_choices.h
+++ b/include/linux/string_choices.h
@@ -30,6 +30,7 @@ static inline const char *str_read_write(bool v)
{
return v ? "read" : "write";
}
+#define str_write_read(v) str_read_write(!(v))
static inline const char *str_on_off(bool v)
{
@@ -41,4 +42,15 @@ static inline const char *str_yes_no(bool v)
return v ? "yes" : "no";
}
+/**
+ * str_plural - Return the simple pluralization based on English counts
+ * @num: Number used for deciding pluralization
+ *
+ * If @num is 1, returns empty string, otherwise returns "s".
+ */
+static inline const char *str_plural(size_t num)
+{
+ return num == 1 ? "" : "s";
+}
+
#endif
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index 789ab30045da..e93fbb5b0c01 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -17,15 +17,19 @@ static inline bool string_is_terminated(const char *s, int len)
return memchr(s, '\0', len) ? true : false;
}
-/* Descriptions of the types of units to
- * print in */
+/* Descriptions of the types of units to print in */
enum string_size_units {
STRING_UNITS_10, /* use powers of 10^3 (standard SI) */
STRING_UNITS_2, /* use binary powers of 2^10 */
+ STRING_UNITS_MASK = BIT(0),
+
+ /* Modifiers */
+ STRING_UNITS_NO_SPACE = BIT(30),
+ STRING_UNITS_NO_BYTES = BIT(31),
};
-void string_get_size(u64 size, u64 blk_size, enum string_size_units units,
- char *buf, int len);
+int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
+ char *buf, int len);
int parse_int_array_user(const char __user *from, size_t count, int **array);
@@ -109,6 +113,8 @@ char *kstrdup_quotable(const char *src, gfp_t gfp);
char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp);
char *kstrdup_quotable_file(struct file *file, gfp_t gfp);
+char *kstrdup_and_replace(const char *src, char old, char new, gfp_t gfp);
+
char **kasprintf_strarray(gfp_t gfp, const char *prefix, size_t n);
void kfree_strarray(char **array, size_t n);
diff --git a/include/linux/stringify.h b/include/linux/stringify.h
index 841cec8ed525..0e84cbe65270 100644
--- a/include/linux/stringify.h
+++ b/include/linux/stringify.h
@@ -9,4 +9,6 @@
#define __stringify_1(x...) #x
#define __stringify(x...) __stringify_1(x)
+#define FILE_LINE __FILE__ ":" __stringify(__LINE__)
+
#endif /* !__LINUX_STRINGIFY_H */
diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index db30a159f9d5..f22bf915dcf6 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -20,7 +20,8 @@
#ifdef CONFIG_SUNRPC_BACKCHANNEL
struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid);
void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied);
-void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task);
+void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task,
+ const struct rpc_timeout *to);
void xprt_free_bc_request(struct rpc_rqst *req);
int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs);
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 518bd28f5ab8..35766963dd14 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -56,10 +56,14 @@ struct cache_head {
struct kref ref;
unsigned long flags;
};
-#define CACHE_VALID 0 /* Entry contains valid data */
-#define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */
-#define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/
-#define CACHE_CLEANED 3 /* Entry has been cleaned from cache */
+
+/* cache_head.flags */
+enum {
+ CACHE_VALID, /* Entry contains valid data */
+ CACHE_NEGATIVE, /* Negative entry - there is no match for the key */
+ CACHE_PENDING, /* An upcall has been sent but no reply received yet*/
+ CACHE_CLEANED, /* Entry has been cleaned from cache */
+};
#define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 4f41d839face..5321585c778f 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -92,6 +92,7 @@ struct rpc_clnt {
};
const struct cred *cl_cred;
unsigned int cl_max_connect; /* max number of transports not to the same IP */
+ struct super_block *pipefs_sb;
};
/*
@@ -138,6 +139,7 @@ struct rpc_create_args {
const char *servername;
const char *nodename;
const struct rpc_program *program;
+ struct rpc_stat *stats;
u32 prognumber; /* overrides program->number */
u32 version;
rpc_authflavor_t authflavor;
@@ -148,6 +150,8 @@ struct rpc_create_args {
const struct cred *cred;
unsigned int max_connect;
struct xprtsec_parms xprtsec;
+ unsigned long connect_timeout;
+ unsigned long reconnect_timeout;
};
struct rpc_add_xprt_test {
@@ -249,7 +253,6 @@ void rpc_clnt_probe_trunked_xprts(struct rpc_clnt *,
const char *rpc_proc_name(const struct rpc_task *task);
-void rpc_clnt_xprt_switch_put(struct rpc_clnt *);
void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *);
void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *, struct rpc_xprt *);
bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 8ada7dc802d3..0c77ba488bba 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -38,6 +38,17 @@ struct rpc_wait {
};
/*
+ * This describes a timeout strategy
+ */
+struct rpc_timeout {
+ unsigned long to_initval, /* initial timeout */
+ to_maxval, /* max timeout */
+ to_increment; /* if !exponential */
+ unsigned int to_retries; /* max # of retries */
+ unsigned char to_exponential;
+};
+
+/*
* This is the RPC task struct
*/
struct rpc_task {
@@ -186,7 +197,7 @@ struct rpc_wait_queue {
unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */
unsigned char priority; /* current priority */
unsigned char nr; /* # tasks remaining for cookie */
- unsigned short qlen; /* total # tasks waiting in queue */
+ unsigned int qlen; /* total # tasks waiting in queue */
struct rpc_timer timer_list;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
const char * name;
@@ -205,7 +216,8 @@ struct rpc_wait_queue {
*/
struct rpc_task *rpc_new_task(const struct rpc_task_setup *);
struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
-struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req);
+struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
+ struct rpc_timeout *timeout);
void rpc_put_task(struct rpc_task *);
void rpc_put_task_async(struct rpc_task *);
bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status);
diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h
index d94d4f410507..3ce1550d1beb 100644
--- a/include/linux/sunrpc/stats.h
+++ b/include/linux/sunrpc/stats.h
@@ -43,22 +43,6 @@ struct net;
#ifdef CONFIG_PROC_FS
int rpc_proc_init(struct net *);
void rpc_proc_exit(struct net *);
-#else
-static inline int rpc_proc_init(struct net *net)
-{
- return 0;
-}
-
-static inline void rpc_proc_exit(struct net *net)
-{
-}
-#endif
-
-#ifdef MODULE
-void rpc_modcount(struct inode *, int);
-#endif
-
-#ifdef CONFIG_PROC_FS
struct proc_dir_entry * rpc_proc_register(struct net *,struct rpc_stat *);
void rpc_proc_unregister(struct net *,const char *);
void rpc_proc_zero(const struct rpc_program *);
@@ -69,7 +53,14 @@ void svc_proc_unregister(struct net *, const char *);
void svc_seq_show(struct seq_file *,
const struct svc_stat *);
#else
+static inline int rpc_proc_init(struct net *net)
+{
+ return 0;
+}
+static inline void rpc_proc_exit(struct net *net)
+{
+}
static inline struct proc_dir_entry *rpc_proc_register(struct net *net, struct rpc_stat *s) { return NULL; }
static inline void rpc_proc_unregister(struct net *net, const char *p) {}
static inline void rpc_proc_zero(const struct rpc_program *p) {}
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index f8751118c122..23617da0e565 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -17,6 +17,7 @@
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/auth.h>
#include <linux/sunrpc/svcauth.h>
+#include <linux/lwq.h>
#include <linux/wait.h>
#include <linux/mm.h>
#include <linux/pagevec.h>
@@ -33,22 +34,27 @@
*/
struct svc_pool {
unsigned int sp_id; /* pool id; also node id on NUMA */
- spinlock_t sp_lock; /* protects all fields */
- struct list_head sp_sockets; /* pending sockets */
- unsigned int sp_nrthreads; /* # of threads in pool */
+ struct lwq sp_xprts; /* pending transports */
+ atomic_t sp_nrthreads; /* # of threads in pool */
struct list_head sp_all_threads; /* all server threads */
+ struct llist_head sp_idle_threads; /* idle server threads */
/* statistics on pool operation */
+ struct percpu_counter sp_messages_arrived;
struct percpu_counter sp_sockets_queued;
struct percpu_counter sp_threads_woken;
- struct percpu_counter sp_threads_timedout;
-#define SP_TASK_PENDING (0) /* still work to do even if no
- * xprt is queued. */
-#define SP_CONGESTED (1)
unsigned long sp_flags;
} ____cacheline_aligned_in_smp;
+/* bits for sp_flags */
+enum {
+ SP_TASK_PENDING, /* still work to do even if no xprt is queued */
+ SP_NEED_VICTIM, /* One thread needs to agree to exit */
+ SP_VICTIM_REMAINS, /* One thread needs to actually exit */
+};
+
+
/*
* RPC service.
*
@@ -63,7 +69,6 @@ struct svc_serv {
struct svc_program * sv_program; /* RPC program */
struct svc_stat * sv_stats; /* RPC statistics */
spinlock_t sv_lock;
- struct kref sv_refcnt;
unsigned int sv_nrthreads; /* # of server threads */
unsigned int sv_maxconn; /* max connections allowed or
* '0' causing max to be based
@@ -84,54 +89,20 @@ struct svc_serv {
int (*sv_threadfn)(void *data);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
- struct list_head sv_cb_list; /* queue for callback requests
+ struct lwq sv_cb_list; /* queue for callback requests
* that arrive over the same
* connection */
- spinlock_t sv_cb_lock; /* protects the svc_cb_list */
- wait_queue_head_t sv_cb_waitq; /* sleep here if there are no
- * entries in the svc_cb_list */
bool sv_bc_enabled; /* service uses backchannel */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
};
-/**
- * svc_get() - increment reference count on a SUNRPC serv
- * @serv: the svc_serv to have count incremented
- *
- * Returns: the svc_serv that was passed in.
- */
-static inline struct svc_serv *svc_get(struct svc_serv *serv)
-{
- kref_get(&serv->sv_refcnt);
- return serv;
-}
-
-void svc_destroy(struct kref *);
+/* This is used by pool_stats to find and lock an svc */
+struct svc_info {
+ struct svc_serv *serv;
+ struct mutex *mutex;
+};
-/**
- * svc_put - decrement reference count on a SUNRPC serv
- * @serv: the svc_serv to have count decremented
- *
- * When the reference count reaches zero, svc_destroy()
- * is called to clean up and free the serv.
- */
-static inline void svc_put(struct svc_serv *serv)
-{
- kref_put(&serv->sv_refcnt, svc_destroy);
-}
-
-/**
- * svc_put_not_last - decrement non-final reference count on SUNRPC serv
- * @serv: the svc_serv to have count decremented
- *
- * Returns: %true is refcount was decremented.
- *
- * If the refcount is 1, it is not decremented and instead failure is reported.
- */
-static inline bool svc_put_not_last(struct svc_serv *serv)
-{
- return refcount_dec_not_one(&serv->sv_refcnt.refcount);
-}
+void svc_destroy(struct svc_serv **svcp);
/*
* Maximum payload size supported by a kernel RPC server.
@@ -195,6 +166,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
*/
struct svc_rqst {
struct list_head rq_all; /* all threads list */
+ struct llist_node rq_idle; /* On the idle list */
struct rcu_head rq_rcu_head; /* for RCU deferred kfree */
struct svc_xprt * rq_xprt; /* transport ptr */
@@ -232,16 +204,6 @@ struct svc_rqst {
u32 rq_proc; /* procedure number */
u32 rq_prot; /* IP protocol */
int rq_cachetype; /* catering to nfsd */
-#define RQ_SECURE (0) /* secure port */
-#define RQ_LOCAL (1) /* local request */
-#define RQ_USEDEFERRAL (2) /* use deferral */
-#define RQ_DROPME (3) /* drop current reply */
-#define RQ_SPLICE_OK (4) /* turned off in gss privacy
- * to prevent encrypting page
- * cache pages */
-#define RQ_VICTIM (5) /* about to be shut down */
-#define RQ_BUSY (6) /* request is busy */
-#define RQ_DATA (7) /* request has data */
unsigned long rq_flags; /* flags field */
ktime_t rq_qtime; /* enqueue time */
@@ -265,12 +227,24 @@ struct svc_rqst {
/* Catering to nfsd */
struct auth_domain * rq_client; /* RPC peer info */
struct auth_domain * rq_gssclient; /* "gss/"-style peer info */
- struct svc_cacherep * rq_cacherep; /* cache info */
struct task_struct *rq_task; /* service thread */
struct net *rq_bc_net; /* pointer to backchannel's
* net namespace
*/
+ unsigned long bc_to_initval;
+ unsigned int bc_to_retries;
void ** rq_lease_breaker; /* The v4 client breaking a lease */
+ unsigned int rq_status_counter; /* RPC processing counter */
+};
+
+/* bits for rq_flags */
+enum {
+ RQ_SECURE, /* secure port */
+ RQ_LOCAL, /* local request */
+ RQ_USEDEFERRAL, /* use deferral */
+ RQ_DROPME, /* drop current reply */
+ RQ_VICTIM, /* Have agreed to shut down */
+ RQ_DATA, /* request has data */
};
#define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
@@ -308,6 +282,28 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst)
return (struct sockaddr *) &rqst->rq_daddr;
}
+/**
+ * svc_thread_should_stop - check if this thread should stop
+ * @rqstp: the thread that might need to stop
+ *
+ * To stop an svc thread, the pool flags SP_NEED_VICTIM and SP_VICTIM_REMAINS
+ * are set. The first thread which sees SP_NEED_VICTIM clears it, becoming
+ * the victim using this function. It should then promptly call
+ * svc_exit_thread() to complete the process, clearing SP_VICTIM_REMAINS
+ * so the task waiting for a thread to exit can wake and continue.
+ *
+ * Return values:
+ * %true: caller should invoke svc_exit_thread()
+ * %false: caller should do nothing
+ */
+static inline bool svc_thread_should_stop(struct svc_rqst *rqstp)
+{
+ if (test_and_clear_bit(SP_NEED_VICTIM, &rqstp->rq_pool->sp_flags))
+ set_bit(RQ_VICTIM, &rqstp->rq_flags);
+
+ return test_bit(RQ_VICTIM, &rqstp->rq_flags);
+}
+
struct svc_deferred_req {
u32 prot; /* protocol (UDP or TCP) */
struct svc_xprt *xprt;
@@ -343,8 +339,7 @@ struct svc_program {
const struct svc_version **pg_vers; /* version array */
char * pg_name; /* service name */
char * pg_class; /* class name: services sharing authentication */
- struct svc_stat * pg_stats; /* rpc statistics */
- int (*pg_authenticate)(struct svc_rqst *);
+ enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp);
__be32 (*pg_init_request)(struct svc_rqst *,
const struct svc_program *,
struct svc_process_info *);
@@ -415,18 +410,20 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp,
void svc_rqst_release_pages(struct svc_rqst *rqstp);
void svc_rqst_free(struct svc_rqst *);
void svc_exit_thread(struct svc_rqst *);
-struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
+struct svc_serv * svc_create_pooled(struct svc_program *prog,
+ struct svc_stat *stats,
+ unsigned int bufsize,
int (*threadfn)(void *data));
int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
-int svc_pool_stats_open(struct svc_serv *serv, struct file *file);
+int svc_pool_stats_open(struct svc_info *si, struct file *file);
void svc_process(struct svc_rqst *rqstp);
-int bc_svc_process(struct svc_serv *, struct rpc_rqst *,
- struct svc_rqst *);
+void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp);
int svc_register(const struct svc_serv *, struct net *, const int,
const unsigned short, const unsigned short);
void svc_wake_up(struct svc_serv *);
void svc_reserve(struct svc_rqst *rqstp, int space);
+void svc_pool_wake_idle_thread(struct svc_pool *pool);
struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv);
char * svc_print_addr(struct svc_rqst *, char *, size_t);
const char * svc_proc_name(const struct svc_rqst *rqstp);
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index a5ee0af2a310..d33bab33099a 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -65,6 +65,7 @@ extern unsigned int svcrdma_ord;
extern unsigned int svcrdma_max_requests;
extern unsigned int svcrdma_max_bc_requests;
extern unsigned int svcrdma_max_req_size;
+extern struct workqueue_struct *svcrdma_wq;
extern struct percpu_counter svcrdma_stat_read;
extern struct percpu_counter svcrdma_stat_recv;
@@ -97,6 +98,7 @@ struct svcxprt_rdma {
u32 sc_pending_recvs;
u32 sc_recv_batch;
struct list_head sc_rq_dto_q;
+ struct list_head sc_read_complete_q;
spinlock_t sc_rq_dto_lock;
struct ib_qp *sc_qp;
struct ib_cq *sc_rq_cq;
@@ -115,6 +117,13 @@ struct svcxprt_rdma {
/* sc_flags */
#define RDMAXPRT_CONN_PENDING 3
+static inline struct svcxprt_rdma *svc_rdma_rqst_rdma(struct svc_rqst *rqstp)
+{
+ struct svc_xprt *xprt = rqstp->rq_xprt;
+
+ return container_of(xprt, struct svcxprt_rdma, sc_xprt);
+}
+
/*
* Default connection parameters
*/
@@ -126,6 +135,43 @@ enum {
#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
+/**
+ * svc_rdma_send_cid_init - Initialize a Receive Queue completion ID
+ * @rdma: controlling transport
+ * @cid: completion ID to initialize
+ */
+static inline void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,
+ struct rpc_rdma_cid *cid)
+{
+ cid->ci_queue_id = rdma->sc_rq_cq->res.id;
+ cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
+}
+
+/**
+ * svc_rdma_send_cid_init - Initialize a Send Queue completion ID
+ * @rdma: controlling transport
+ * @cid: completion ID to initialize
+ */
+static inline void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
+ struct rpc_rdma_cid *cid)
+{
+ cid->ci_queue_id = rdma->sc_sq_cq->res.id;
+ cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
+}
+
+/*
+ * A chunk context tracks all I/O for moving one Read or Write
+ * chunk. This is a set of rdma_rw's that handle data movement
+ * for all segments of one chunk.
+ */
+struct svc_rdma_chunk_ctxt {
+ struct rpc_rdma_cid cc_cid;
+ struct ib_cqe cc_cqe;
+ struct list_head cc_rwctxts;
+ ktime_t cc_posttime;
+ int cc_sqecount;
+};
+
struct svc_rdma_recv_ctxt {
struct llist_node rc_node;
struct list_head rc_list;
@@ -136,26 +182,63 @@ struct svc_rdma_recv_ctxt {
void *rc_recv_buf;
struct xdr_stream rc_stream;
u32 rc_byte_len;
- unsigned int rc_page_count;
u32 rc_inv_rkey;
__be32 rc_msgtype;
+ /* State for pulling a Read chunk */
+ unsigned int rc_pageoff;
+ unsigned int rc_curpage;
+ unsigned int rc_readbytes;
+ struct xdr_buf rc_saved_arg;
+ struct svc_rdma_chunk_ctxt rc_cc;
+
struct svc_rdma_pcl rc_call_pcl;
struct svc_rdma_pcl rc_read_pcl;
struct svc_rdma_chunk *rc_cur_result_payload;
struct svc_rdma_pcl rc_write_pcl;
struct svc_rdma_pcl rc_reply_pcl;
+
+ unsigned int rc_page_count;
+ struct page *rc_pages[RPCSVC_MAXPAGES];
+};
+
+/*
+ * State for sending a Write chunk.
+ * - Tracks progress of writing one chunk over all its segments
+ * - Stores arguments for the SGL constructor functions
+ */
+struct svc_rdma_write_info {
+ struct svcxprt_rdma *wi_rdma;
+
+ const struct svc_rdma_chunk *wi_chunk;
+
+ /* write state of this chunk */
+ unsigned int wi_seg_off;
+ unsigned int wi_seg_no;
+
+ /* SGL constructor arguments */
+ const struct xdr_buf *wi_xdr;
+ unsigned char *wi_base;
+ unsigned int wi_next_off;
+
+ struct svc_rdma_chunk_ctxt wi_cc;
+ struct work_struct wi_work;
};
struct svc_rdma_send_ctxt {
struct llist_node sc_node;
struct rpc_rdma_cid sc_cid;
+ struct work_struct sc_work;
+ struct svcxprt_rdma *sc_rdma;
struct ib_send_wr sc_send_wr;
+ struct ib_send_wr *sc_wr_chain;
+ int sc_sqecount;
struct ib_cqe sc_cqe;
struct xdr_buf sc_hdrbuf;
struct xdr_stream sc_stream;
+ struct svc_rdma_write_info sc_reply_info;
void *sc_xprt_buf;
int sc_page_count;
int sc_cur_sge_no;
@@ -179,13 +262,24 @@ extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt);
extern int svc_rdma_recvfrom(struct svc_rqst *);
/* svc_rdma_rw.c */
+extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
+ struct svc_rdma_chunk_ctxt *cc);
extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
-extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
- const struct svc_rdma_chunk *chunk,
- const struct xdr_buf *xdr);
-extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
- const struct svc_rdma_recv_ctxt *rctxt,
- const struct xdr_buf *xdr);
+extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
+ struct svc_rdma_chunk_ctxt *cc);
+extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_chunk_ctxt *cc,
+ enum dma_data_direction dir);
+extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt);
+extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_recv_ctxt *rctxt,
+ const struct xdr_buf *xdr);
+extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_pcl *write_pcl,
+ const struct svc_rdma_pcl *reply_pcl,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct xdr_buf *xdr);
extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
struct svc_rqst *rqstp,
struct svc_rdma_recv_ctxt *head);
@@ -196,11 +290,12 @@ extern struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma);
extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt);
-extern int svc_rdma_send(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *ctxt);
+extern int svc_rdma_post_send(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt);
extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
- const struct svc_rdma_recv_ctxt *rctxt,
+ const struct svc_rdma_pcl *write_pcl,
+ const struct svc_rdma_pcl *reply_pcl,
const struct xdr_buf *xdr);
extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index a6b12631db21..8e20cd60e2e7 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -54,25 +54,8 @@ struct svc_xprt {
const struct svc_xprt_ops *xpt_ops;
struct kref xpt_ref;
struct list_head xpt_list;
- struct list_head xpt_ready;
+ struct lwq_node xpt_ready;
unsigned long xpt_flags;
-#define XPT_BUSY 0 /* enqueued/receiving */
-#define XPT_CONN 1 /* conn pending */
-#define XPT_CLOSE 2 /* dead or dying */
-#define XPT_DATA 3 /* data pending */
-#define XPT_TEMP 4 /* connected transport */
-#define XPT_DEAD 6 /* transport closed */
-#define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */
-#define XPT_DEFERRED 8 /* deferred request pending */
-#define XPT_OLD 9 /* used for xprt aging mark+sweep */
-#define XPT_LISTENER 10 /* listening endpoint */
-#define XPT_CACHE_AUTH 11 /* cache auth info */
-#define XPT_LOCAL 12 /* connection from loopback interface */
-#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
-#define XPT_CONG_CTRL 14 /* has congestion control */
-#define XPT_HANDSHAKE 15 /* xprt requests a handshake */
-#define XPT_TLS_SESSION 16 /* transport-layer security established */
-#define XPT_PEER_AUTH 17 /* peer has been authenticated */
struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */
@@ -97,6 +80,27 @@ struct svc_xprt {
struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */
};
+/* flag bits for xpt_flags */
+enum {
+ XPT_BUSY, /* enqueued/receiving */
+ XPT_CONN, /* conn pending */
+ XPT_CLOSE, /* dead or dying */
+ XPT_DATA, /* data pending */
+ XPT_TEMP, /* connected transport */
+ XPT_DEAD, /* transport closed */
+ XPT_CHNGBUF, /* need to change snd/rcv buf sizes */
+ XPT_DEFERRED, /* deferred request pending */
+ XPT_OLD, /* used for xprt aging mark+sweep */
+ XPT_LISTENER, /* listening endpoint */
+ XPT_CACHE_AUTH, /* cache auth info */
+ XPT_LOCAL, /* connection from loopback interface */
+ XPT_KILL_TEMP, /* call xpo_kill_temp_xprt before closing */
+ XPT_CONG_CTRL, /* has congestion control */
+ XPT_HANDSHAKE, /* xprt requests a handshake */
+ XPT_TLS_SESSION, /* transport-layer security established */
+ XPT_PEER_AUTH, /* peer has been authenticated */
+};
+
static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
{
spin_lock(&xpt->xpt_lock);
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 6d9cc9080aca..61c455f1e1f5 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -83,6 +83,19 @@ struct auth_domain {
struct rcu_head rcu_head;
};
+enum svc_auth_status {
+ SVC_GARBAGE = 1,
+ SVC_SYSERR,
+ SVC_VALID,
+ SVC_NEGATIVE,
+ SVC_OK,
+ SVC_DROP,
+ SVC_CLOSE,
+ SVC_DENIED,
+ SVC_PENDING,
+ SVC_COMPLETE,
+};
+
/*
* Each authentication flavour registers an auth_ops
* structure.
@@ -98,6 +111,8 @@ struct auth_domain {
* is (probably) already in place. Certainly space is
* reserved for it.
* DROP - simply drop the request. It may have been deferred
+ * CLOSE - like SVC_DROP, but request is definitely lost.
+ * If there is a tcp connection, it should be closed.
* GARBAGE - rpc garbage_args error
* SYSERR - rpc system_err error
* DENIED - authp holds reason for denial.
@@ -111,60 +126,45 @@ struct auth_domain {
*
* release() is given a request after the procedure has been run.
* It should sign/encrypt the results if needed
- * It should return:
- * OK - the resbuf is ready to be sent
- * DROP - the reply should be quitely dropped
- * DENIED - authp holds a reason for MSG_DENIED
- * SYSERR - rpc system_err
*
* domain_release()
* This call releases a domain.
+ *
* set_client()
- * Givens a pending request (struct svc_rqst), finds and assigns
+ * Given a pending request (struct svc_rqst), finds and assigns
* an appropriate 'auth_domain' as the client.
+ *
+ * pseudoflavor()
+ * Returns RPC_AUTH pseudoflavor in use by @rqstp.
*/
struct auth_ops {
char * name;
struct module *owner;
int flavour;
- int (*accept)(struct svc_rqst *rq);
- int (*release)(struct svc_rqst *rq);
- void (*domain_release)(struct auth_domain *);
- int (*set_client)(struct svc_rqst *rq);
-};
-#define SVC_GARBAGE 1
-#define SVC_SYSERR 2
-#define SVC_VALID 3
-#define SVC_NEGATIVE 4
-#define SVC_OK 5
-#define SVC_DROP 6
-#define SVC_CLOSE 7 /* Like SVC_DROP, but request is definitely
- * lost so if there is a tcp connection, it
- * should be closed
- */
-#define SVC_DENIED 8
-#define SVC_PENDING 9
-#define SVC_COMPLETE 10
+ enum svc_auth_status (*accept)(struct svc_rqst *rqstp);
+ int (*release)(struct svc_rqst *rqstp);
+ void (*domain_release)(struct auth_domain *dom);
+ enum svc_auth_status (*set_client)(struct svc_rqst *rqstp);
+ rpc_authflavor_t (*pseudoflavor)(struct svc_rqst *rqstp);
+};
struct svc_xprt;
-extern int svc_authenticate(struct svc_rqst *rqstp);
+extern enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp);
+extern rpc_authflavor_t svc_auth_flavor(struct svc_rqst *rqstp);
extern int svc_authorise(struct svc_rqst *rqstp);
-extern int svc_set_client(struct svc_rqst *rqstp);
+extern enum svc_auth_status svc_set_client(struct svc_rqst *rqstp);
extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops);
extern void svc_auth_unregister(rpc_authflavor_t flavor);
extern struct auth_domain *unix_domain_find(char *name);
extern void auth_domain_put(struct auth_domain *item);
-extern int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom);
extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new);
extern struct auth_domain *auth_domain_find(char *name);
-extern struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr);
-extern int auth_unix_forget_old(struct auth_domain *dom);
extern void svcauth_unix_purge(struct net *net);
extern void svcauth_unix_info_release(struct svc_xprt *xpt);
-extern int svcauth_unix_set_client(struct svc_rqst *rqstp);
+extern enum svc_auth_status svcauth_unix_set_client(struct svc_rqst *rqstp);
extern int unix_gid_cache_create(struct net *net);
extern void unix_gid_cache_destroy(struct net *net);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index a7116048a4d4..7c78ec6356b9 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -35,8 +35,8 @@ struct svc_sock {
/* Total length of the data (not including fragment headers)
* received so far in the fragments making up this rpc: */
u32 sk_datalen;
- /* Number of queued send requests */
- atomic_t sk_sendqlen;
+
+ struct page_frag_cache sk_frag_cache;
struct completion sk_handshake_done;
@@ -56,8 +56,7 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
/*
* Function prototypes.
*/
-void svc_close_net(struct svc_serv *, struct net *);
-int svc_recv(struct svc_rqst *, long);
+void svc_recv(struct svc_rqst *rqstp);
void svc_send(struct svc_rqst *rqstp);
void svc_drop(struct svc_rqst *);
void svc_sock_update_bufs(struct svc_serv *serv);
@@ -66,8 +65,6 @@ int svc_addsock(struct svc_serv *serv, struct net *net,
const struct cred *cred);
void svc_init_xprt_sock(void);
void svc_cleanup_xprt_sock(void);
-struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
-void svc_sock_destroy(struct svc_xprt *);
/*
* svc_makesock socket characteristics
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index f89ec4b5ea16..2f8dc47f1eb0 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -139,6 +139,8 @@ void xdr_terminate_string(const struct xdr_buf *, const u32);
size_t xdr_buf_pagecount(const struct xdr_buf *buf);
int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp);
void xdr_free_bvec(struct xdr_buf *buf);
+unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size,
+ const struct xdr_buf *xdr);
static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len)
{
@@ -224,6 +226,7 @@ struct xdr_stream {
struct kvec *iov; /* pointer to the current kvec */
struct kvec scratch; /* Scratch buffer */
struct page **page_ptr; /* pointer to the current page */
+ void *page_kaddr; /* kmapped address of the current page */
unsigned int nwords; /* Remaining decode buffer length */
struct rpc_rqst *rqst; /* For debugging */
@@ -255,6 +258,7 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf,
__be32 *p, struct rpc_rqst *rqst);
extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
struct page **pages, unsigned int len);
+extern void xdr_finish_decode(struct xdr_stream *xdr);
extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
@@ -775,7 +779,7 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr,
if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0))
return -EBADMSG;
- if (len > SIZE_MAX / sizeof(*p))
+ if (U32_MAX >= SIZE_MAX / sizeof(*p) && len > SIZE_MAX / sizeof(*p))
return -EBADMSG;
p = xdr_inline_decode(xdr, len * sizeof(*p));
if (unlikely(!p))
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index b52411bcfe4e..81b952649d35 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -30,17 +30,6 @@
#define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT)
#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
-/*
- * This describes a timeout strategy
- */
-struct rpc_timeout {
- unsigned long to_initval, /* initial timeout */
- to_maxval, /* max timeout */
- to_increment; /* if !exponential */
- unsigned int to_retries; /* max # of retries */
- unsigned char to_exponential;
-};
-
enum rpc_display_format_t {
RPC_DISPLAY_ADDR = 0,
RPC_DISPLAY_PORT,
@@ -57,6 +46,7 @@ struct xprt_class;
struct seq_file;
struct svc_serv;
struct net;
+#include <linux/lwq.h>
/*
* This describes a complete RPC request
@@ -121,7 +111,7 @@ struct rpc_rqst {
int rq_ntrans;
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
- struct list_head rq_bc_list; /* Callback service list */
+ struct lwq_node rq_bc_list; /* Callback service list */
unsigned long rq_bc_pa_state; /* Backchannel prealloc state */
struct list_head rq_bc_pa_list; /* Backchannel prealloc list */
#endif /* CONFIG_SUNRPC_BACKCHANEL */
@@ -162,6 +152,7 @@ struct rpc_xprt_ops {
int (*prepare_request)(struct rpc_rqst *req,
struct xdr_buf *buf);
int (*send_request)(struct rpc_rqst *req);
+ void (*abort_send_request)(struct rpc_rqst *req);
void (*wait_for_reply_request)(struct rpc_task *task);
void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task);
void (*release_request)(struct rpc_task *task);
@@ -351,6 +342,8 @@ struct xprt_create {
struct rpc_xprt_switch *bc_xps;
unsigned int flags;
struct xprtsec_parms xprtsec;
+ unsigned long connect_timeout;
+ unsigned long reconnect_timeout;
};
struct xprt_class {
diff --git a/include/linux/superhyway.h b/include/linux/superhyway.h
deleted file mode 100644
index 8d3376775813..000000000000
--- a/include/linux/superhyway.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * include/linux/superhyway.h
- *
- * SuperHyway Bus definitions
- *
- * Copyright (C) 2004, 2005 Paul Mundt <lethal@linux-sh.org>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#ifndef __LINUX_SUPERHYWAY_H
-#define __LINUX_SUPERHYWAY_H
-
-#include <linux/device.h>
-
-/*
- * SuperHyway IDs
- */
-#define SUPERHYWAY_DEVICE_ID_SH5_DMAC 0x0183
-
-struct superhyway_vcr_info {
- u8 perr_flags; /* P-port Error flags */
- u8 merr_flags; /* Module Error flags */
- u16 mod_vers; /* Module Version */
- u16 mod_id; /* Module ID */
- u8 bot_mb; /* Bottom Memory block */
- u8 top_mb; /* Top Memory block */
-};
-
-struct superhyway_ops {
- int (*read_vcr)(unsigned long base, struct superhyway_vcr_info *vcr);
- int (*write_vcr)(unsigned long base, struct superhyway_vcr_info vcr);
-};
-
-struct superhyway_bus {
- struct superhyway_ops *ops;
-};
-
-extern struct superhyway_bus superhyway_channels[];
-
-struct superhyway_device_id {
- unsigned int id;
- unsigned long driver_data;
-};
-
-struct superhyway_device;
-extern struct bus_type superhyway_bus_type;
-
-struct superhyway_driver {
- char *name;
-
- const struct superhyway_device_id *id_table;
- struct device_driver drv;
-
- int (*probe)(struct superhyway_device *dev, const struct superhyway_device_id *id);
- void (*remove)(struct superhyway_device *dev);
-};
-
-#define to_superhyway_driver(d) container_of((d), struct superhyway_driver, drv)
-
-struct superhyway_device {
- char name[32];
-
- struct device dev;
-
- struct superhyway_device_id id;
- struct superhyway_driver *drv;
- struct superhyway_bus *bus;
-
- int num_resources;
- struct resource *resource;
- struct superhyway_vcr_info vcr;
-};
-
-#define to_superhyway_device(d) container_of((d), struct superhyway_device, dev)
-
-#define superhyway_get_drvdata(d) dev_get_drvdata(&(d)->dev)
-#define superhyway_set_drvdata(d,p) dev_set_drvdata(&(d)->dev, (p))
-
-static inline int
-superhyway_read_vcr(struct superhyway_device *dev, unsigned long base,
- struct superhyway_vcr_info *vcr)
-{
- return dev->bus->ops->read_vcr(base, vcr);
-}
-
-static inline int
-superhyway_write_vcr(struct superhyway_device *dev, unsigned long base,
- struct superhyway_vcr_info vcr)
-{
- return dev->bus->ops->write_vcr(base, vcr);
-}
-
-extern int superhyway_scan_bus(struct superhyway_bus *);
-
-/* drivers/sh/superhyway/superhyway.c */
-int superhyway_register_driver(struct superhyway_driver *);
-void superhyway_unregister_driver(struct superhyway_driver *);
-int superhyway_add_device(unsigned long base, struct superhyway_device *, struct superhyway_bus *);
-int superhyway_add_devices(struct superhyway_bus *bus, struct superhyway_device **devices, int nr_devices);
-
-/* drivers/sh/superhyway/superhyway-sysfs.c */
-extern const struct attribute_group *superhyway_dev_groups[];
-
-#endif /* __LINUX_SUPERHYWAY_H */
-
diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h
index cb7980805920..5b67f0f47d80 100644
--- a/include/linux/surface_aggregator/controller.h
+++ b/include/linux/surface_aggregator/controller.h
@@ -44,7 +44,7 @@ struct ssam_event {
u8 command_id;
u8 instance_id;
u16 length;
- u8 data[];
+ u8 data[] __counted_by(length);
};
/**
diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h
index 42b249b4c24b..8cd8c38cf3f3 100644
--- a/include/linux/surface_aggregator/device.h
+++ b/include/linux/surface_aggregator/device.h
@@ -193,7 +193,6 @@ struct ssam_device_driver {
#ifdef CONFIG_SURFACE_AGGREGATOR_BUS
-extern struct bus_type ssam_bus_type;
extern const struct device_type ssam_device_type;
/**
diff --git a/include/linux/surface_aggregator/serial_hub.h b/include/linux/surface_aggregator/serial_hub.h
index 5c4ae1a26183..d8dbef6b7fc2 100644
--- a/include/linux/surface_aggregator/serial_hub.h
+++ b/include/linux/surface_aggregator/serial_hub.h
@@ -12,7 +12,7 @@
#ifndef _LINUX_SURFACE_AGGREGATOR_SERIAL_HUB_H
#define _LINUX_SURFACE_AGGREGATOR_SERIAL_HUB_H
-#include <linux/crc-ccitt.h>
+#include <linux/crc-itu-t.h>
#include <linux/kref.h>
#include <linux/ktime.h>
#include <linux/list.h>
@@ -188,7 +188,7 @@ static_assert(sizeof(struct ssh_command) == 8);
*/
static inline u16 ssh_crc(const u8 *buf, size_t len)
{
- return crc_ccitt_false(0xffff, buf, len);
+ return crc_itu_t(0xffff, buf, len);
}
/*
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index ef503088942d..da6ebca3ff77 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -40,65 +40,6 @@ typedef int __bitwise suspend_state_t;
#define PM_SUSPEND_MIN PM_SUSPEND_TO_IDLE
#define PM_SUSPEND_MAX ((__force suspend_state_t) 4)
-enum suspend_stat_step {
- SUSPEND_FREEZE = 1,
- SUSPEND_PREPARE,
- SUSPEND_SUSPEND,
- SUSPEND_SUSPEND_LATE,
- SUSPEND_SUSPEND_NOIRQ,
- SUSPEND_RESUME_NOIRQ,
- SUSPEND_RESUME_EARLY,
- SUSPEND_RESUME
-};
-
-struct suspend_stats {
- int success;
- int fail;
- int failed_freeze;
- int failed_prepare;
- int failed_suspend;
- int failed_suspend_late;
- int failed_suspend_noirq;
- int failed_resume;
- int failed_resume_early;
- int failed_resume_noirq;
-#define REC_FAILED_NUM 2
- int last_failed_dev;
- char failed_devs[REC_FAILED_NUM][40];
- int last_failed_errno;
- int errno[REC_FAILED_NUM];
- int last_failed_step;
- u64 last_hw_sleep;
- u64 total_hw_sleep;
- u64 max_hw_sleep;
- enum suspend_stat_step failed_steps[REC_FAILED_NUM];
-};
-
-extern struct suspend_stats suspend_stats;
-
-static inline void dpm_save_failed_dev(const char *name)
-{
- strscpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev],
- name,
- sizeof(suspend_stats.failed_devs[0]));
- suspend_stats.last_failed_dev++;
- suspend_stats.last_failed_dev %= REC_FAILED_NUM;
-}
-
-static inline void dpm_save_failed_errno(int err)
-{
- suspend_stats.errno[suspend_stats.last_failed_errno] = err;
- suspend_stats.last_failed_errno++;
- suspend_stats.last_failed_errno %= REC_FAILED_NUM;
-}
-
-static inline void dpm_save_failed_step(enum suspend_stat_step step)
-{
- suspend_stats.failed_steps[suspend_stats.last_failed_step] = step;
- suspend_stats.last_failed_step++;
- suspend_stats.last_failed_step %= REC_FAILED_NUM;
-}
-
/**
* struct platform_suspend_ops - Callbacks for managing platform dependent
* system sleep states.
@@ -626,4 +567,19 @@ static inline void queue_up_suspend_work(void) {}
#endif /* !CONFIG_PM_AUTOSLEEP */
+enum suspend_stat_step {
+ SUSPEND_WORKING = 0,
+ SUSPEND_FREEZE,
+ SUSPEND_PREPARE,
+ SUSPEND_SUSPEND,
+ SUSPEND_SUSPEND_LATE,
+ SUSPEND_SUSPEND_NOIRQ,
+ SUSPEND_RESUME_NOIRQ,
+ SUSPEND_RESUME_EARLY,
+ SUSPEND_RESUME
+};
+
+void dpm_save_failed_dev(const char *name);
+void dpm_save_failed_step(enum suspend_stat_step step);
+
#endif /* _LINUX_SUSPEND_H */
diff --git a/include/linux/swait.h b/include/linux/swait.h
index 6a8c22b8c2a5..d324419482a0 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -146,7 +146,7 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq)
extern void swake_up_one(struct swait_queue_head *q);
extern void swake_up_all(struct swait_queue_head *q);
-extern void swake_up_locked(struct swait_queue_head *q);
+extern void swake_up_locked(struct swait_queue_head *q, int wake_flags);
extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state);
extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 456546443f1f..f53d608daa01 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -298,14 +298,11 @@ struct swap_info_struct {
unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */
struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */
struct rb_root swap_extent_root;/* root of the swap extent rbtree */
+ struct file *bdev_file; /* open handle of the bdev */
struct block_device *bdev; /* swap device or bdev of swap file */
struct file *swap_file; /* seldom referenced */
unsigned int old_block_size; /* seldom referenced */
struct completion comp; /* seldom referenced */
-#ifdef CONFIG_FRONTSWAP
- unsigned long *frontswap_map; /* frontswap in-use, one bit per page */
- atomic_t frontswap_pages; /* frontswap pages in-use counter */
-#endif
spinlock_t lock; /*
* protect map scan related fields like
* swap_map, lowest_bit, highest_bit,
@@ -337,15 +334,13 @@ struct swap_info_struct {
*/
};
-static inline swp_entry_t folio_swap_entry(struct folio *folio)
+static inline swp_entry_t page_swap_entry(struct page *page)
{
- swp_entry_t entry = { .val = page_private(&folio->page) };
- return entry;
-}
+ struct folio *folio = page_folio(page);
+ swp_entry_t entry = folio->swap;
-static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
-{
- folio->private = (void *)entry.val;
+ entry.val += folio_page_idx(folio, page);
+ return entry;
}
/* linux/mm/workingset.c */
@@ -355,16 +350,6 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
void workingset_refault(struct folio *folio, void *shadow);
void workingset_activation(struct folio *folio);
-/* Only track the nodes of mappings with shadow entries */
-void workingset_update_node(struct xa_node *node);
-extern struct list_lru shadow_nodes;
-#define mapping_set_update(xas, mapping) do { \
- if (!dax_mapping(mapping) && !shmem_mapping(mapping)) { \
- xas_set_update(xas, workingset_update_node); \
- xas_set_lru(xas, &shadow_nodes); \
- } \
-} while (0)
-
/* linux/mm/page_alloc.c */
extern unsigned long totalreserve_pages;
@@ -402,9 +387,6 @@ void folio_deactivate(struct folio *folio);
void folio_mark_lazyfree(struct folio *folio);
extern void swap_setup(void);
-extern void lru_cache_add_inactive_or_unevictable(struct page *page,
- struct vm_area_struct *vma);
-
/* linux/mm/vmscan.c */
extern unsigned long zone_reclaimable_pages(struct zone *zone);
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
@@ -455,9 +437,9 @@ static inline unsigned long total_swapcache_pages(void)
return global_node_page_state(NR_SWAPCACHE);
}
-extern void free_swap_cache(struct page *page);
-extern void free_page_and_swap_cache(struct page *);
-extern void free_pages_and_swap_cache(struct encoded_page **, int);
+void free_swap_cache(struct folio *folio);
+void free_page_and_swap_cache(struct page *);
+void free_pages_and_swap_cache(struct encoded_page **, int);
/* linux/mm/swapfile.c */
extern atomic_long_t nr_swap_pages;
extern long total_swap_pages;
@@ -495,13 +477,12 @@ extern sector_t swapdev_block(int, pgoff_t);
extern int __swap_count(swp_entry_t entry);
extern int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry);
extern int swp_swapcount(swp_entry_t entry);
-extern struct swap_info_struct *page_swap_info(struct page *);
-extern struct swap_info_struct *swp_swap_info(swp_entry_t entry);
+struct swap_info_struct *swp_swap_info(swp_entry_t entry);
struct backing_dev_info;
extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
extern void exit_swap_address_space(unsigned int type);
extern struct swap_info_struct *get_swap_device(swp_entry_t entry);
-sector_t swap_page_sector(struct page *page);
+sector_t swap_folio_sector(struct folio *folio);
static inline void put_swap_device(struct swap_info_struct *si)
{
@@ -540,7 +521,7 @@ static inline void put_swap_device(struct swap_info_struct *si)
/* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
#define free_swap_and_cache(e) is_pfn_swap_entry(e)
-static inline void free_swap_cache(struct page *page)
+static inline void free_swap_cache(struct folio *folio)
{
}
@@ -558,6 +539,11 @@ static inline int swap_duplicate(swp_entry_t swp)
return 0;
}
+static inline int swapcache_prepare(swp_entry_t swp)
+{
+ return 0;
+}
+
static inline void swap_free(swp_entry_t swp)
{
}
@@ -630,11 +616,6 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
}
#endif
-#ifdef CONFIG_ZSWAP
-extern u64 zswap_pool_total_size;
-extern atomic_t zswap_stored_pages;
-#endif
-
#if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
void __folio_throttle_swaprate(struct folio *folio, gfp_t gfp);
static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp)
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index 7ed529a77c5b..99e3ed469e88 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -2,11 +2,6 @@
#ifndef _LINUX_SWAPFILE_H
#define _LINUX_SWAPFILE_H
-/*
- * these were static in swapfile.c but frontswap.c needs them and we don't
- * want to expose them to the dozens of source files that include swap.h
- */
-extern struct swap_info_struct *swap_info[];
extern unsigned long generic_max_swapfile_size(void);
unsigned long arch_max_swapfile_size(void);
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 4c932cb45e0b..a5c560a2f8c2 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -390,10 +390,44 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
}
#endif /* CONFIG_MIGRATION */
+#ifdef CONFIG_MEMORY_FAILURE
+
+/*
+ * Support for hardware poisoned pages
+ */
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+ BUG_ON(!PageLocked(page));
+ return swp_entry(SWP_HWPOISON, page_to_pfn(page));
+}
+
+static inline int is_hwpoison_entry(swp_entry_t entry)
+{
+ return swp_type(entry) == SWP_HWPOISON;
+}
+
+#else
+
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+ return swp_entry(0, 0);
+}
+
+static inline int is_hwpoison_entry(swp_entry_t swp)
+{
+ return 0;
+}
+#endif
+
typedef unsigned long pte_marker;
#define PTE_MARKER_UFFD_WP BIT(0)
-#define PTE_MARKER_SWAPIN_ERROR BIT(1)
+/*
+ * "Poisoned" here is meant in the very general sense of "future accesses are
+ * invalid", instead of referring very specifically to hardware memory errors.
+ * This marker is meant to represent any of various different causes of this.
+ */
+#define PTE_MARKER_POISONED BIT(1)
#define PTE_MARKER_MASK (BIT(2) - 1)
static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
@@ -421,15 +455,15 @@ static inline pte_t make_pte_marker(pte_marker marker)
return swp_entry_to_pte(make_pte_marker_entry(marker));
}
-static inline swp_entry_t make_swapin_error_entry(void)
+static inline swp_entry_t make_poisoned_swp_entry(void)
{
- return make_pte_marker_entry(PTE_MARKER_SWAPIN_ERROR);
+ return make_pte_marker_entry(PTE_MARKER_POISONED);
}
-static inline int is_swapin_error_entry(swp_entry_t entry)
+static inline int is_poisoned_swp_entry(swp_entry_t entry)
{
return is_pte_marker_entry(entry) &&
- (pte_marker_get(entry) & PTE_MARKER_SWAPIN_ERROR);
+ (pte_marker_get(entry) & PTE_MARKER_POISONED);
}
/*
@@ -463,10 +497,24 @@ static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
return p;
}
+static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry)
+{
+ struct folio *folio = pfn_folio(swp_offset_pfn(entry));
+
+ /*
+ * Any use of migration entries may only occur while the
+ * corresponding folio is locked
+ */
+ BUG_ON(is_migration_entry(entry) && !folio_test_locked(folio));
+
+ return folio;
+}
+
/*
* A pfn swap entry is a special type of swap entry that always has a pfn stored
- * in the swap offset. They are used to represent unaddressable device memory
- * and to restrict access to a page undergoing migration.
+ * in the swap offset. They can either be used to represent unaddressable device
+ * memory, to restrict access to a page undergoing migration or to represent a
+ * pfn which has been hwpoisoned and unmapped.
*/
static inline bool is_pfn_swap_entry(swp_entry_t entry)
{
@@ -474,7 +522,7 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry)
BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
return is_migration_entry(entry) || is_device_private_entry(entry) ||
- is_device_exclusive_entry(entry);
+ is_device_exclusive_entry(entry) || is_hwpoison_entry(entry);
}
struct page_vma_mapped_walk;
@@ -543,35 +591,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd)
}
#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
-#ifdef CONFIG_MEMORY_FAILURE
-
-/*
- * Support for hardware poisoned pages
- */
-static inline swp_entry_t make_hwpoison_entry(struct page *page)
-{
- BUG_ON(!PageLocked(page));
- return swp_entry(SWP_HWPOISON, page_to_pfn(page));
-}
-
-static inline int is_hwpoison_entry(swp_entry_t entry)
-{
- return swp_type(entry) == SWP_HWPOISON;
-}
-
-#else
-
-static inline swp_entry_t make_hwpoison_entry(struct page *page)
-{
- return swp_entry(0, 0);
-}
-
-static inline int is_hwpoison_entry(swp_entry_t swp)
-{
- return 0;
-}
-#endif
-
static inline int non_swap_entry(swp_entry_t entry)
{
return swp_type(entry) >= MAX_SWAPFILES;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 4e52cd5e0bdc..ea23097e351f 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -8,6 +8,7 @@
#include <linux/types.h>
#include <linux/limits.h>
#include <linux/spinlock.h>
+#include <linux/workqueue.h>
struct device;
struct page;
@@ -62,8 +63,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
#ifdef CONFIG_SWIOTLB
/**
- * struct io_tlb_mem - IO TLB Memory Pool Descriptor
- *
+ * struct io_tlb_pool - IO TLB memory pool descriptor
* @start: The start address of the swiotlb memory pool. Used to do a quick
* range check to see if the memory was in fact allocated by this
* API.
@@ -73,50 +73,125 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
* @vaddr: The vaddr of the swiotlb memory pool. The swiotlb memory pool
* may be remapped in the memory encrypted case and store virtual
* address for bounce buffer operation.
- * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and
- * @end. For default swiotlb, this is command line adjustable via
- * setup_io_tlb_npages.
- * @list: The free list describing the number of free entries available
- * from each index.
- * @orig_addr: The original address corresponding to a mapped entry.
- * @alloc_size: Size of the allocated buffer.
+ * @nslabs: The number of IO TLB slots between @start and @end. For the
+ * default swiotlb, this can be adjusted with a boot parameter,
+ * see setup_io_tlb_npages().
+ * @late_alloc: %true if allocated using the page allocator.
+ * @nareas: Number of areas in the pool.
+ * @area_nslabs: Number of slots in each area.
+ * @areas: Array of memory area descriptors.
+ * @slots: Array of slot descriptors.
+ * @node: Member of the IO TLB memory pool list.
+ * @rcu: RCU head for swiotlb_dyn_free().
+ * @transient: %true if transient memory pool.
+ */
+struct io_tlb_pool {
+ phys_addr_t start;
+ phys_addr_t end;
+ void *vaddr;
+ unsigned long nslabs;
+ bool late_alloc;
+ unsigned int nareas;
+ unsigned int area_nslabs;
+ struct io_tlb_area *areas;
+ struct io_tlb_slot *slots;
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ struct list_head node;
+ struct rcu_head rcu;
+ bool transient;
+#endif
+};
+
+/**
+ * struct io_tlb_mem - Software IO TLB allocator
+ * @defpool: Default (initial) IO TLB memory pool descriptor.
+ * @pool: IO TLB memory pool descriptor (if not dynamic).
+ * @nslabs: Total number of IO TLB slabs in all pools.
* @debugfs: The dentry to debugfs.
- * @late_alloc: %true if allocated using the page allocator
* @force_bounce: %true if swiotlb bouncing is forced
* @for_alloc: %true if the pool is used for memory allocation
- * @nareas: The area number in the pool.
- * @area_nslabs: The slot number in the area.
+ * @can_grow: %true if more pools can be allocated dynamically.
+ * @phys_limit: Maximum allowed physical address.
+ * @lock: Lock to synchronize changes to the list.
+ * @pools: List of IO TLB memory pool descriptors (if dynamic).
+ * @dyn_alloc: Dynamic IO TLB pool allocation work.
* @total_used: The total number of slots in the pool that are currently used
* across all areas. Used only for calculating used_hiwater in
* debugfs.
* @used_hiwater: The high water mark for total_used. Used only for reporting
* in debugfs.
+ * @transient_nslabs: The total number of slots in all transient pools that
+ * are currently used across all areas.
*/
struct io_tlb_mem {
- phys_addr_t start;
- phys_addr_t end;
- void *vaddr;
+ struct io_tlb_pool defpool;
unsigned long nslabs;
struct dentry *debugfs;
- bool late_alloc;
bool force_bounce;
bool for_alloc;
- unsigned int nareas;
- unsigned int area_nslabs;
- struct io_tlb_area *areas;
- struct io_tlb_slot *slots;
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ bool can_grow;
+ u64 phys_limit;
+ spinlock_t lock;
+ struct list_head pools;
+ struct work_struct dyn_alloc;
+#endif
#ifdef CONFIG_DEBUG_FS
atomic_long_t total_used;
atomic_long_t used_hiwater;
+ atomic_long_t transient_nslabs;
#endif
};
-extern struct io_tlb_mem io_tlb_default_mem;
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+
+struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr);
+
+#else
+
+static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev,
+ phys_addr_t paddr)
+{
+ return &dev->dma_io_tlb_mem->defpool;
+}
+
+#endif
+
+/**
+ * is_swiotlb_buffer() - check if a physical address belongs to a swiotlb
+ * @dev: Device which has mapped the buffer.
+ * @paddr: Physical address within the DMA buffer.
+ *
+ * Check if @paddr points into a bounce buffer.
+ *
+ * Return:
+ * * %true if @paddr points into a bounce buffer
+ * * %false otherwise
+ */
static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
{
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
- return mem && paddr >= mem->start && paddr < mem->end;
+ if (!mem)
+ return false;
+
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ /*
+ * All SWIOTLB buffer addresses must have been returned by
+ * swiotlb_tbl_map_single() and passed to a device driver.
+ * If a SWIOTLB address is checked on another CPU, then it was
+ * presumably loaded by the device driver from an unspecified private
+ * data structure. Make sure that this load is ordered before reading
+ * dev->dma_uses_io_tlb here and mem->pools in swiotlb_find_pool().
+ *
+ * This barrier pairs with smp_mb() in swiotlb_find_slots().
+ */
+ smp_rmb();
+ return READ_ONCE(dev->dma_uses_io_tlb) &&
+ swiotlb_find_pool(dev, paddr);
+#else
+ return paddr >= mem->defpool.start && paddr < mem->defpool.end;
+#endif
}
static inline bool is_swiotlb_force_bounce(struct device *dev)
@@ -128,13 +203,22 @@ static inline bool is_swiotlb_force_bounce(struct device *dev)
void swiotlb_init(bool addressing_limited, unsigned int flags);
void __init swiotlb_exit(void);
+void swiotlb_dev_init(struct device *dev);
size_t swiotlb_max_mapping_size(struct device *dev);
+bool is_swiotlb_allocated(void);
bool is_swiotlb_active(struct device *dev);
void __init swiotlb_adjust_size(unsigned long size);
+phys_addr_t default_swiotlb_base(void);
+phys_addr_t default_swiotlb_limit(void);
#else
static inline void swiotlb_init(bool addressing_limited, unsigned int flags)
{
}
+
+static inline void swiotlb_dev_init(struct device *dev)
+{
+}
+
static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
{
return false;
@@ -151,6 +235,11 @@ static inline size_t swiotlb_max_mapping_size(struct device *dev)
return SIZE_MAX;
}
+static inline bool is_swiotlb_allocated(void)
+{
+ return false;
+}
+
static inline bool is_swiotlb_active(struct device *dev)
{
return false;
@@ -159,6 +248,16 @@ static inline bool is_swiotlb_active(struct device *dev)
static inline void swiotlb_adjust_size(unsigned long size)
{
}
+
+static inline phys_addr_t default_swiotlb_base(void)
+{
+ return 0;
+}
+
+static inline phys_addr_t default_swiotlb_limit(void)
+{
+ return 0;
+}
#endif /* CONFIG_SWIOTLB */
extern void swiotlb_print_info(void);
diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h
index 48fabe36509e..8d8fac1626bd 100644
--- a/include/linux/switchtec.h
+++ b/include/linux/switchtec.h
@@ -41,6 +41,7 @@ enum {
enum switchtec_gen {
SWITCHTEC_GEN3,
SWITCHTEC_GEN4,
+ SWITCHTEC_GEN5,
};
struct mrpc_regs {
diff --git a/include/linux/sync_core.h b/include/linux/sync_core.h
index 013da4b8b327..67bb9794b875 100644
--- a/include/linux/sync_core.h
+++ b/include/linux/sync_core.h
@@ -17,5 +17,19 @@ static inline void sync_core_before_usermode(void)
}
#endif
-#endif /* _LINUX_SYNC_CORE_H */
+#ifdef CONFIG_ARCH_HAS_PREPARE_SYNC_CORE_CMD
+#include <asm/sync_core.h>
+#else
+/*
+ * This is a dummy prepare_sync_core_cmd() implementation that can be used on
+ * all architectures which provide unconditional core serializing instructions
+ * in switch_mm().
+ * If your architecture doesn't provide such core serializing instructions in
+ * switch_mm(), you may need to write your own functions.
+ */
+static inline void prepare_sync_core_cmd(struct mm_struct *mm)
+{
+}
+#endif
+#endif /* _LINUX_SYNC_CORE_H */
diff --git a/include/linux/syscall_user_dispatch.h b/include/linux/syscall_user_dispatch.h
index 641ca8880995..3858a6ffdd5c 100644
--- a/include/linux/syscall_user_dispatch.h
+++ b/include/linux/syscall_user_dispatch.h
@@ -6,16 +6,10 @@
#define _SYSCALL_USER_DISPATCH_H
#include <linux/thread_info.h>
+#include <linux/syscall_user_dispatch_types.h>
#ifdef CONFIG_GENERIC_ENTRY
-struct syscall_user_dispatch {
- char __user *selector;
- unsigned long offset;
- unsigned long len;
- bool on_dispatch;
-};
-
int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
unsigned long len, char __user *selector);
@@ -29,7 +23,6 @@ int syscall_user_dispatch_set_config(struct task_struct *task, unsigned long siz
void __user *data);
#else
-struct syscall_user_dispatch {};
static inline int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
unsigned long len, char __user *selector)
diff --git a/include/linux/syscall_user_dispatch_types.h b/include/linux/syscall_user_dispatch_types.h
new file mode 100644
index 000000000000..3be36b06c7d7
--- /dev/null
+++ b/include/linux/syscall_user_dispatch_types.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SYSCALL_USER_DISPATCH_TYPES_H
+#define _SYSCALL_USER_DISPATCH_TYPES_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_GENERIC_ENTRY
+
+struct syscall_user_dispatch {
+ char __user *selector;
+ unsigned long offset;
+ unsigned long len;
+ bool on_dispatch;
+};
+
+#else
+
+struct syscall_user_dispatch {};
+
+#endif
+
+#endif /* _SYSCALL_USER_DISPATCH_TYPES_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 03e3d0121d5e..e619ac10cd23 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -71,9 +71,12 @@ struct clone_args;
struct open_how;
struct mount_attr;
struct landlock_ruleset_attr;
+struct lsm_ctx;
enum landlock_rule_type;
struct cachestat_range;
struct cachestat;
+struct statmount;
+struct mnt_id_req;
#include <linux/types.h>
#include <linux/aio_abi.h>
@@ -125,6 +128,7 @@ struct cachestat;
#define __TYPE_IS_LL(t) (__TYPE_AS(t, 0LL) || __TYPE_AS(t, 0ULL))
#define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a
#define __SC_CAST(t, a) (__force t) a
+#define __SC_TYPE(t, a) t
#define __SC_ARGS(t, a) a
#define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long))
@@ -284,22 +288,6 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
#endif
/*
- * Called before coming back to user-mode. Returning to user-mode with an
- * address limit different than USER_DS can allow to overwrite kernel memory.
- */
-static inline void addr_limit_user_check(void)
-{
-#ifdef TIF_FSCHECK
- if (!test_thread_flag(TIF_FSCHECK))
- return;
-#endif
-
-#ifdef TIF_FSCHECK
- clear_thread_flag(TIF_FSCHECK);
-#endif
-}
-
-/*
* These syscall function prototypes are kept in the same order as
* include/uapi/asm-generic/unistd.h. Architecture specific entries go below,
* followed by deprecated or obsolete system calls.
@@ -371,7 +359,6 @@ asmlinkage long sys_lremovexattr(const char __user *path,
const char __user *name);
asmlinkage long sys_fremovexattr(int fd, const char __user *name);
asmlinkage long sys_getcwd(char __user *buf, unsigned long size);
-asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user *buf, size_t len);
asmlinkage long sys_eventfd2(unsigned int count, int flags);
asmlinkage long sys_epoll_create1(int flags);
asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
@@ -424,6 +411,12 @@ asmlinkage long sys_statfs64(const char __user *path, size_t sz,
asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user *buf);
asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz,
struct statfs64 __user *buf);
+asmlinkage long sys_statmount(const struct mnt_id_req __user *req,
+ struct statmount __user *buf, size_t bufsize,
+ unsigned int flags);
+asmlinkage long sys_listmount(const struct mnt_id_req __user *req,
+ u64 __user *mnt_ids, size_t nr_mnt_ids,
+ unsigned int flags);
asmlinkage long sys_truncate(const char __user *path, long length);
asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
#if BITS_PER_LONG == 32
@@ -438,8 +431,10 @@ asmlinkage long sys_chdir(const char __user *filename);
asmlinkage long sys_fchdir(unsigned int fd);
asmlinkage long sys_chroot(const char __user *filename);
asmlinkage long sys_fchmod(unsigned int fd, umode_t mode);
-asmlinkage long sys_fchmodat(int dfd, const char __user * filename,
+asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
umode_t mode);
+asmlinkage long sys_fchmodat2(int dfd, const char __user *filename,
+ umode_t mode, unsigned int flags);
asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
gid_t group, int flag);
asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group);
@@ -563,6 +558,16 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
unsigned int nr_futexes, unsigned int flags,
struct __kernel_timespec __user *timeout, clockid_t clockid);
+
+asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags);
+
+asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned long mask,
+ unsigned int flags, struct __kernel_timespec __user *timespec,
+ clockid_t clockid);
+
+asmlinkage long sys_futex_requeue(struct futex_waitv __user *waiters,
+ unsigned int flags, int nr_wake, int nr_requeue);
+
asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
struct __kernel_timespec __user *rmtp);
asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp,
@@ -953,6 +958,12 @@ asmlinkage long sys_set_mempolicy_home_node(unsigned long start, unsigned long l
asmlinkage long sys_cachestat(unsigned int fd,
struct cachestat_range __user *cstat_range,
struct cachestat __user *cstat, unsigned int flags);
+asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags);
+asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx,
+ u32 *size, u32 flags);
+asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx,
+ u32 size, u32 flags);
+asmlinkage long sys_lsm_list_modules(u64 *ids, u32 *size, u32 flags);
/*
* Architecture-specific system calls
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 59d451f455bf..ee7d33b89e9e 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -159,12 +159,22 @@ struct ctl_node {
struct ctl_table_header *header;
};
-/* struct ctl_table_header is used to maintain dynamic lists of
- struct ctl_table trees. */
+/**
+ * struct ctl_table_header - maintains dynamic lists of struct ctl_table trees
+ * @ctl_table: pointer to the first element in ctl_table array
+ * @ctl_table_size: number of elements pointed by @ctl_table
+ * @used: The entry will never be touched when equal to 0.
+ * @count: Upped every time something is added to @inodes and downed every time
+ * something is removed from inodes
+ * @nreg: When nreg drops to 0 the ctl_table_header will be unregistered.
+ * @rcu: Delays the freeing of the inode. Introduced with "unfuck proc_sysctl ->d_compare()"
+ *
+ */
struct ctl_table_header {
union {
struct {
struct ctl_table *ctl_table;
+ int ctl_table_size;
int used;
int count;
int nreg;
@@ -200,10 +210,8 @@ struct ctl_table_root {
int (*permissions)(struct ctl_table_header *head, struct ctl_table *table);
};
-/* struct ctl_path describes where in the hierarchy a table is added */
-struct ctl_path {
- const char *procname;
-};
+#define register_sysctl(path, table) \
+ register_sysctl_sz(path, table, ARRAY_SIZE(table))
#ifdef CONFIG_SYSCTL
@@ -216,17 +224,20 @@ extern void retire_sysctl_set(struct ctl_table_set *set);
struct ctl_table_header *__register_sysctl_table(
struct ctl_table_set *set,
- const char *path, struct ctl_table *table);
-struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table);
+ const char *path, struct ctl_table *table, size_t table_size);
+struct ctl_table_header *register_sysctl_sz(const char *path, struct ctl_table *table,
+ size_t table_size);
void unregister_sysctl_table(struct ctl_table_header * table);
extern int sysctl_init_bases(void);
extern void __register_sysctl_init(const char *path, struct ctl_table *table,
- const char *table_name);
-#define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table)
+ const char *table_name, size_t table_size);
+#define register_sysctl_init(path, table) \
+ __register_sysctl_init(path, table, #table, ARRAY_SIZE(table))
extern struct ctl_table_header *register_sysctl_mount_point(const char *path);
void do_sysctl_args(void);
+bool sysctl_is_alias(char *param);
int do_proc_douintvec(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos,
int (*conv)(unsigned long *lvalp,
@@ -239,8 +250,6 @@ extern int unaligned_enabled;
extern int unaligned_dump_stack;
extern int no_unaligned_warning;
-#define SYSCTL_PERM_EMPTY_DIR (1 << 0)
-
#else /* CONFIG_SYSCTL */
static inline void register_sysctl_init(const char *path, struct ctl_table *table)
@@ -252,7 +261,9 @@ static inline struct ctl_table_header *register_sysctl_mount_point(const char *p
return NULL;
}
-static inline struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table)
+static inline struct ctl_table_header *register_sysctl_sz(const char *path,
+ struct ctl_table *table,
+ size_t table_size)
{
return NULL;
}
@@ -270,6 +281,11 @@ static inline void setup_sysctl_set(struct ctl_table_set *p,
static inline void do_sysctl_args(void)
{
}
+
+static inline bool sysctl_is_alias(char *param)
+{
+ return false;
+}
#endif /* CONFIG_SYSCTL */
int sysctl_max_threads(struct ctl_table *table, int write, void *buffer,
diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h
index c1ef5fc60a3c..c9cb657dad08 100644
--- a/include/linux/sysfb.h
+++ b/include/linux/sysfb.h
@@ -9,7 +9,8 @@
#include <linux/kernel.h>
#include <linux/platform_data/simplefb.h>
-#include <linux/screen_info.h>
+
+struct screen_info;
enum {
M_I17, /* 17-Inch iMac */
@@ -90,7 +91,8 @@ static inline void sysfb_set_efifb_fwnode(struct platform_device *pd)
bool sysfb_parse_mode(const struct screen_info *si,
struct simplefb_platform_data *mode);
struct platform_device *sysfb_create_simplefb(const struct screen_info *si,
- const struct simplefb_platform_data *mode);
+ const struct simplefb_platform_data *mode,
+ struct device *parent);
#else /* CONFIG_SYSFB_SIMPLE */
@@ -101,7 +103,8 @@ static inline bool sysfb_parse_mode(const struct screen_info *si,
}
static inline struct platform_device *sysfb_create_simplefb(const struct screen_info *si,
- const struct simplefb_platform_data *mode)
+ const struct simplefb_platform_data *mode,
+ struct device *parent)
{
return ERR_PTR(-EINVAL);
}
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index fd3fe5c8c17f..326341c62385 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -61,22 +61,32 @@ do { \
/**
* struct attribute_group - data structure used to declare an attribute group.
* @name: Optional: Attribute group name
- * If specified, the attribute group will be created in
- * a new subdirectory with this name.
+ * If specified, the attribute group will be created in a
+ * new subdirectory with this name. Additionally when a
+ * group is named, @is_visible and @is_bin_visible may
+ * return SYSFS_GROUP_INVISIBLE to control visibility of
+ * the directory itself.
* @is_visible: Optional: Function to return permissions associated with an
- * attribute of the group. Will be called repeatedly for each
- * non-binary attribute in the group. Only read/write
+ * attribute of the group. Will be called repeatedly for
+ * each non-binary attribute in the group. Only read/write
* permissions as well as SYSFS_PREALLOC are accepted. Must
- * return 0 if an attribute is not visible. The returned value
- * will replace static permissions defined in struct attribute.
+ * return 0 if an attribute is not visible. The returned
+ * value will replace static permissions defined in struct
+ * attribute. Use SYSFS_GROUP_VISIBLE() when assigning this
+ * callback to specify separate _group_visible() and
+ * _attr_visible() handlers.
* @is_bin_visible:
* Optional: Function to return permissions associated with a
* binary attribute of the group. Will be called repeatedly
* for each binary attribute in the group. Only read/write
- * permissions as well as SYSFS_PREALLOC are accepted. Must
- * return 0 if a binary attribute is not visible. The returned
- * value will replace static permissions defined in
- * struct bin_attribute.
+ * permissions as well as SYSFS_PREALLOC (and the
+ * visibility flags for named groups) are accepted. Must
+ * return 0 if a binary attribute is not visible. The
+ * returned value will replace static permissions defined
+ * in struct bin_attribute. If @is_visible is not set, Use
+ * SYSFS_GROUP_VISIBLE() when assigning this callback to
+ * specify separate _group_visible() and _attr_visible()
+ * handlers.
* @attrs: Pointer to NULL terminated list of attributes.
* @bin_attrs: Pointer to NULL terminated list of binary attributes.
* Either attrs or bin_attrs or both must be provided.
@@ -91,13 +101,121 @@ struct attribute_group {
struct bin_attribute **bin_attrs;
};
+#define SYSFS_PREALLOC 010000
+#define SYSFS_GROUP_INVISIBLE 020000
+
+/*
+ * DEFINE_SYSFS_GROUP_VISIBLE(name):
+ * A helper macro to pair with the assignment of ".is_visible =
+ * SYSFS_GROUP_VISIBLE(name)", that arranges for the directory
+ * associated with a named attribute_group to optionally be hidden.
+ * This allows for static declaration of attribute_groups, and the
+ * simplification of attribute visibility lifetime that implies,
+ * without polluting sysfs with empty attribute directories.
+ * Ex.
+ *
+ * static umode_t example_attr_visible(struct kobject *kobj,
+ * struct attribute *attr, int n)
+ * {
+ * if (example_attr_condition)
+ * return 0;
+ * else if (ro_attr_condition)
+ * return 0444;
+ * return a->mode;
+ * }
+ *
+ * static bool example_group_visible(struct kobject *kobj)
+ * {
+ * if (example_group_condition)
+ * return false;
+ * return true;
+ * }
+ *
+ * DEFINE_SYSFS_GROUP_VISIBLE(example);
+ *
+ * static struct attribute_group example_group = {
+ * .name = "example",
+ * .is_visible = SYSFS_GROUP_VISIBLE(example),
+ * .attrs = &example_attrs,
+ * };
+ *
+ * Note that it expects <name>_attr_visible and <name>_group_visible to
+ * be defined. For cases where individual attributes do not need
+ * separate visibility consideration, only entire group visibility at
+ * once, see DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE().
+ */
+#define DEFINE_SYSFS_GROUP_VISIBLE(name) \
+ static inline umode_t sysfs_group_visible_##name( \
+ struct kobject *kobj, struct attribute *attr, int n) \
+ { \
+ if (n == 0 && !name##_group_visible(kobj)) \
+ return SYSFS_GROUP_INVISIBLE; \
+ return name##_attr_visible(kobj, attr, n); \
+ }
+
+/*
+ * DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(name):
+ * A helper macro to pair with SYSFS_GROUP_VISIBLE() that like
+ * DEFINE_SYSFS_GROUP_VISIBLE() controls group visibility, but does
+ * not require the implementation of a per-attribute visibility
+ * callback.
+ * Ex.
+ *
+ * static bool example_group_visible(struct kobject *kobj)
+ * {
+ * if (example_group_condition)
+ * return false;
+ * return true;
+ * }
+ *
+ * DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(example);
+ *
+ * static struct attribute_group example_group = {
+ * .name = "example",
+ * .is_visible = SYSFS_GROUP_VISIBLE(example),
+ * .attrs = &example_attrs,
+ * };
+ */
+#define DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(name) \
+ static inline umode_t sysfs_group_visible_##name( \
+ struct kobject *kobj, struct attribute *a, int n) \
+ { \
+ if (n == 0 && !name##_group_visible(kobj)) \
+ return SYSFS_GROUP_INVISIBLE; \
+ return a->mode; \
+ }
+
+/*
+ * Same as DEFINE_SYSFS_GROUP_VISIBLE, but for groups with only binary
+ * attributes. If an attribute_group defines both text and binary
+ * attributes, the group visibility is determined by the function
+ * specified to is_visible() not is_bin_visible()
+ */
+#define DEFINE_SYSFS_BIN_GROUP_VISIBLE(name) \
+ static inline umode_t sysfs_group_visible_##name( \
+ struct kobject *kobj, struct bin_attribute *attr, int n) \
+ { \
+ if (n == 0 && !name##_group_visible(kobj)) \
+ return SYSFS_GROUP_INVISIBLE; \
+ return name##_attr_visible(kobj, attr, n); \
+ }
+
+#define DEFINE_SIMPLE_SYSFS_BIN_GROUP_VISIBLE(name) \
+ static inline umode_t sysfs_group_visible_##name( \
+ struct kobject *kobj, struct bin_attribute *a, int n) \
+ { \
+ if (n == 0 && !name##_group_visible(kobj)) \
+ return SYSFS_GROUP_INVISIBLE; \
+ return a->mode; \
+ }
+
+#define SYSFS_GROUP_VISIBLE(fn) sysfs_group_visible_##fn
+
/*
* Use these macros to make defining attributes easier.
* See include/linux/device.h for examples..
*/
-#define SYSFS_PREALLOC 010000
-
#define __ATTR(_name, _mode, _show, _store) { \
.attr = {.name = __stringify(_name), \
.mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \
@@ -181,6 +299,8 @@ struct bin_attribute {
char *, loff_t, size_t);
ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *,
char *, loff_t, size_t);
+ loff_t (*llseek)(struct file *, struct kobject *, struct bin_attribute *,
+ loff_t, int);
int (*mmap)(struct file *, struct kobject *, struct bin_attribute *attr,
struct vm_area_struct *vma);
};
diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h
index 3a582ec7a2f1..bdca467ebb77 100644
--- a/include/linux/sysrq.h
+++ b/include/linux/sysrq.h
@@ -30,7 +30,7 @@
#define SYSRQ_ENABLE_RTNICE 0x0100
struct sysrq_key_op {
- void (* const handler)(int);
+ void (* const handler)(u8);
const char * const help_msg;
const char * const action_msg;
const int enable_mask;
@@ -43,10 +43,10 @@ struct sysrq_key_op {
* are available -- else NULL's).
*/
-void handle_sysrq(int key);
-void __handle_sysrq(int key, bool check_mask);
-int register_sysrq_key(int key, const struct sysrq_key_op *op);
-int unregister_sysrq_key(int key, const struct sysrq_key_op *op);
+void handle_sysrq(u8 key);
+void __handle_sysrq(u8 key, bool check_mask);
+int register_sysrq_key(u8 key, const struct sysrq_key_op *op);
+int unregister_sysrq_key(u8 key, const struct sysrq_key_op *op);
extern const struct sysrq_key_op *__sysrq_reboot_op;
int sysrq_toggle_support(int enable_mask);
@@ -54,20 +54,20 @@ int sysrq_mask(void);
#else
-static inline void handle_sysrq(int key)
+static inline void handle_sysrq(u8 key)
{
}
-static inline void __handle_sysrq(int key, bool check_mask)
+static inline void __handle_sysrq(u8 key, bool check_mask)
{
}
-static inline int register_sysrq_key(int key, const struct sysrq_key_op *op)
+static inline int register_sysrq_key(u8 key, const struct sysrq_key_op *op)
{
return -EINVAL;
}
-static inline int unregister_sysrq_key(int key, const struct sysrq_key_op *op)
+static inline int unregister_sysrq_key(u8 key, const struct sysrq_key_op *op)
{
return -EINVAL;
}
diff --git a/include/linux/tc.h b/include/linux/tc.h
index a60639f37963..1638660abf5e 100644
--- a/include/linux/tc.h
+++ b/include/linux/tc.h
@@ -120,7 +120,7 @@ static inline unsigned long tc_get_speed(struct tc_bus *tbus)
#ifdef CONFIG_TC
-extern struct bus_type tc_bus_type;
+extern const struct bus_type tc_bus_type;
extern int tc_register_driver(struct tc_driver *tdrv);
extern void tc_unregister_driver(struct tc_driver *tdrv);
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 91a37c99ba66..55399ee2a57e 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -152,6 +152,7 @@ struct tcp_request_sock {
u64 snt_synack; /* first SYNACK sent time */
bool tfo_listener;
bool is_mptcp;
+ bool req_usec_ts;
#if IS_ENABLED(CONFIG_MPTCP)
bool drop_req;
#endif
@@ -165,6 +166,11 @@ struct tcp_request_sock {
* after data-in-SYN.
*/
u8 syn_tos;
+#ifdef CONFIG_TCP_AO
+ u8 ao_keyid;
+ u8 ao_rcv_next;
+ bool used_tcp_ao;
+#endif
};
static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
@@ -172,24 +178,135 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
return (struct tcp_request_sock *)req;
}
+static inline bool tcp_rsk_used_ao(const struct request_sock *req)
+{
+#ifndef CONFIG_TCP_AO
+ return false;
+#else
+ return tcp_rsk(req)->used_tcp_ao;
+#endif
+}
+
+#define TCP_RMEM_TO_WIN_SCALE 8
+
struct tcp_sock {
+ /* Cacheline organization can be found documented in
+ * Documentation/networking/net_cachelines/tcp_sock.rst.
+ * Please update the document when adding new fields.
+ */
+
/* inet_connection_sock has to be the first member of tcp_sock */
struct inet_connection_sock inet_conn;
- u16 tcp_header_len; /* Bytes of tcp header to send */
+
+ /* TX read-mostly hotpath cache lines */
+ __cacheline_group_begin(tcp_sock_read_tx);
+ /* timestamp of last sent data packet (for restart window) */
+ u32 max_window; /* Maximal window ever seen from peer */
+ u32 rcv_ssthresh; /* Current window clamp */
+ u32 reordering; /* Packet reordering metric. */
+ u32 notsent_lowat; /* TCP_NOTSENT_LOWAT */
u16 gso_segs; /* Max number of segs per GSO packet */
+ /* from STCP, retrans queue hinting */
+ struct sk_buff *lost_skb_hint;
+ struct sk_buff *retransmit_skb_hint;
+ __cacheline_group_end(tcp_sock_read_tx);
+
+ /* TXRX read-mostly hotpath cache lines */
+ __cacheline_group_begin(tcp_sock_read_txrx);
+ u32 tsoffset; /* timestamp offset */
+ u32 snd_wnd; /* The window we expect to receive */
+ u32 mss_cache; /* Cached effective mss, not including SACKS */
+ u32 snd_cwnd; /* Sending congestion window */
+ u32 prr_out; /* Total number of pkts sent during Recovery. */
+ u32 lost_out; /* Lost packets */
+ u32 sacked_out; /* SACK'd packets */
+ u16 tcp_header_len; /* Bytes of tcp header to send */
+ u8 scaling_ratio; /* see tcp_win_from_space() */
+ u8 chrono_type : 2, /* current chronograph type */
+ repair : 1,
+ tcp_usec_ts : 1, /* TSval values in usec */
+ is_sack_reneg:1, /* in recovery from loss with SACK reneg? */
+ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
+ __cacheline_group_end(tcp_sock_read_txrx);
+
+ /* RX read-mostly hotpath cache lines */
+ __cacheline_group_begin(tcp_sock_read_rx);
+ u32 copied_seq; /* Head of yet unread data */
+ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
+ u32 snd_wl1; /* Sequence for window update */
+ u32 tlp_high_seq; /* snd_nxt at the time of TLP */
+ u32 rttvar_us; /* smoothed mdev_max */
+ u32 retrans_out; /* Retransmitted packets out */
+ u16 advmss; /* Advertised MSS */
+ u16 urg_data; /* Saved octet of OOB data and control flags */
+ u32 lost; /* Total data packets lost incl. rexmits */
+ struct minmax rtt_min;
+ /* OOO segments go in this rbtree. Socket lock must be held. */
+ struct rb_root out_of_order_queue;
+ u32 snd_ssthresh; /* Slow start size threshold */
+ __cacheline_group_end(tcp_sock_read_rx);
+ /* TX read-write hotpath cache lines */
+ __cacheline_group_begin(tcp_sock_write_tx) ____cacheline_aligned;
+ u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
+ * The total number of segments sent.
+ */
+ u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
+ * total number of data segments sent.
+ */
+ u64 bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut
+ * total number of data bytes sent.
+ */
+ u32 snd_sml; /* Last byte of the most recently transmitted small packet */
+ u32 chrono_start; /* Start time in jiffies of a TCP chrono */
+ u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
+ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */
+ u32 pushed_seq; /* Last pushed seq, required to talk to windows */
+ u32 lsndtime;
+ u32 mdev_us; /* medium deviation */
+ u32 rtt_seq; /* sequence number to update rttvar */
+ u64 tcp_wstamp_ns; /* departure time for next sent data packet */
+ u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
+ u64 tcp_mstamp; /* most recent packet received/sent */
+ struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */
+ struct sk_buff *highest_sack; /* skb just after the highest
+ * skb with SACKed bit set
+ * (validity guaranteed only if
+ * sacked_out > 0)
+ */
+ u8 ecn_flags; /* ECN status bits. */
+ __cacheline_group_end(tcp_sock_write_tx);
+
+ /* TXRX read-write hotpath cache lines */
+ __cacheline_group_begin(tcp_sock_write_txrx);
/*
* Header prediction flags
* 0x5?10 << 16 + snd_wnd in net byte order
*/
__be32 pred_flags;
-
+ u32 rcv_nxt; /* What we want to receive next */
+ u32 snd_nxt; /* Next sequence we send */
+ u32 snd_una; /* First byte we want an ack for */
+ u32 window_clamp; /* Maximal window to advertise */
+ u32 srtt_us; /* smoothed round trip time << 3 in usecs */
+ u32 packets_out; /* Packets which are "in flight" */
+ u32 snd_up; /* Urgent pointer */
+ u32 delivered; /* Total data packets delivered incl. rexmits */
+ u32 delivered_ce; /* Like the above but only ECE marked packets */
+ u32 app_limited; /* limited until "delivered" reaches this val */
+ u32 rcv_wnd; /* Current receiver window */
/*
- * RFC793 variables by their proper names. This means you can
- * read the code and the spec side by side (and laugh ...)
- * See RFC793 and RFC1122. The RFC writes these in capitals.
+ * Options received (usually on last packet, some only on SYN packets).
*/
- u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived
+ struct tcp_options_received rx_opt;
+ u8 nonagle : 4,/* Disable Nagle algorithm? */
+ rate_app_limited:1; /* rate_{delivered,interval_us} limited? */
+ __cacheline_group_end(tcp_sock_write_txrx);
+
+ /* RX read-write hotpath cache lines */
+ __cacheline_group_begin(tcp_sock_write_rx) __aligned(8);
+ u64 bytes_received;
+ /* RFC4898 tcpEStatsAppHCThruOctetsReceived
* sum(delta(rcv_nxt)), or how many bytes
* were acked.
*/
@@ -199,45 +316,42 @@ struct tcp_sock {
u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn
* total number of data segments in.
*/
- u32 rcv_nxt; /* What we want to receive next */
- u32 copied_seq; /* Head of yet unread data */
u32 rcv_wup; /* rcv_nxt on last window update sent */
- u32 snd_nxt; /* Next sequence we send */
- u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
- * The total number of segments sent.
- */
- u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
- * total number of data segments sent.
- */
- u64 bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut
- * total number of data bytes sent.
- */
+ u32 max_packets_out; /* max packets_out in last window */
+ u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */
+ u32 rate_delivered; /* saved rate sample: packets delivered */
+ u32 rate_interval_us; /* saved rate sample: time elapsed */
+ u32 rcv_rtt_last_tsecr;
+ u64 first_tx_mstamp; /* start of window send phase */
+ u64 delivered_mstamp; /* time we reached "delivered" */
u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
* sum(delta(snd_una)), or how many bytes
* were acked.
*/
+ struct {
+ u32 rtt_us;
+ u32 seq;
+ u64 time;
+ } rcv_rtt_est;
+/* Receiver queue space */
+ struct {
+ u32 space;
+ u32 seq;
+ u64 time;
+ } rcvq_space;
+ __cacheline_group_end(tcp_sock_write_rx);
+ /* End of Hot Path */
+
+/*
+ * RFC793 variables by their proper names. This means you can
+ * read the code and the spec side by side (and laugh ...)
+ * See RFC793 and RFC1122. The RFC writes these in capitals.
+ */
u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups
* total number of DSACK blocks received
*/
- u32 snd_una; /* First byte we want an ack for */
- u32 snd_sml; /* Last byte of the most recently transmitted small packet */
- u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
- u32 lsndtime; /* timestamp of last sent data packet (for restart window) */
- u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */
u32 compressed_ack_rcv_nxt;
-
- u32 tsoffset; /* timestamp offset */
-
struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
- struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */
-
- u32 snd_wl1; /* Sequence for window update */
- u32 snd_wnd; /* The window we expect to receive */
- u32 max_window; /* Maximal window ever seen from peer */
- u32 mss_cache; /* Cached effective mss, not including SACKS */
-
- u32 window_clamp; /* Maximal window to advertise */
- u32 rcv_ssthresh; /* Current window clamp */
/* Information of the most recently (s)acked skb */
struct tcp_rack {
@@ -251,23 +365,15 @@ struct tcp_sock {
dsack_seen:1, /* Whether DSACK seen after last adj */
advanced:1; /* mstamp advanced since last lost marking */
} rack;
- u16 advmss; /* Advertised MSS */
u8 compressed_ack;
u8 dup_ack_counter:2,
tlp_retrans:1, /* TLP is a retransmission */
unused:5;
- u32 chrono_start; /* Start time in jiffies of a TCP chrono */
- u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
- u8 chrono_type:2, /* current chronograph type */
- rate_app_limited:1, /* rate_{delivered,interval_us} limited? */
+ u8 thin_lto : 1,/* Use linear timeouts for thin streams */
+ recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */
fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
- is_sack_reneg:1, /* in recovery from loss with SACK reneg? */
- fastopen_client_fail:2; /* reason why fastopen failed */
- u8 nonagle : 4,/* Disable Nagle algorithm? */
- thin_lto : 1,/* Use linear timeouts for thin streams */
- recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */
- repair : 1,
+ fastopen_client_fail:2, /* reason why fastopen failed */
frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */
u8 repair_queue;
u8 save_syn:2, /* Save headers of SYN packet */
@@ -275,45 +381,19 @@ struct tcp_sock {
syn_fastopen:1, /* SYN includes Fast Open option */
syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
syn_fastopen_ch:1, /* Active TFO re-enabling probe */
- syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
- is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
- u32 tlp_high_seq; /* snd_nxt at the time of TLP */
+ syn_data_acked:1;/* data in SYN is acked by SYN-ACK */
+ u8 keepalive_probes; /* num of allowed keep alive probes */
u32 tcp_tx_delay; /* delay (in usec) added to TX packets */
- u64 tcp_wstamp_ns; /* departure time for next sent data packet */
- u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
/* RTT measurement */
- u64 tcp_mstamp; /* most recent packet received/sent */
- u32 srtt_us; /* smoothed round trip time << 3 in usecs */
- u32 mdev_us; /* medium deviation */
u32 mdev_max_us; /* maximal mdev for the last rtt period */
- u32 rttvar_us; /* smoothed mdev_max */
- u32 rtt_seq; /* sequence number to update rttvar */
- struct minmax rtt_min;
-
- u32 packets_out; /* Packets which are "in flight" */
- u32 retrans_out; /* Retransmitted packets out */
- u32 max_packets_out; /* max packets_out in last window */
- u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */
- u16 urg_data; /* Saved octet of OOB data and control flags */
- u8 ecn_flags; /* ECN status bits. */
- u8 keepalive_probes; /* num of allowed keep alive probes */
- u32 reordering; /* Packet reordering metric. */
u32 reord_seen; /* number of data packet reordering events */
- u32 snd_up; /* Urgent pointer */
-
-/*
- * Options received (usually on last packet, some only on SYN packets).
- */
- struct tcp_options_received rx_opt;
/*
* Slow start and congestion control (see also Nagle, and Karn & Partridge)
*/
- u32 snd_ssthresh; /* Slow start size threshold */
- u32 snd_cwnd; /* Sending congestion window */
u32 snd_cwnd_cnt; /* Linear increase counter */
u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
u32 snd_cwnd_used;
@@ -321,32 +401,11 @@ struct tcp_sock {
u32 prior_cwnd; /* cwnd right before starting loss recovery */
u32 prr_delivered; /* Number of newly delivered packets to
* receiver in Recovery. */
- u32 prr_out; /* Total number of pkts sent during Recovery. */
- u32 delivered; /* Total data packets delivered incl. rexmits */
- u32 delivered_ce; /* Like the above but only ECE marked packets */
- u32 lost; /* Total data packets lost incl. rexmits */
- u32 app_limited; /* limited until "delivered" reaches this val */
- u64 first_tx_mstamp; /* start of window send phase */
- u64 delivered_mstamp; /* time we reached "delivered" */
- u32 rate_delivered; /* saved rate sample: packets delivered */
- u32 rate_interval_us; /* saved rate sample: time elapsed */
-
- u32 rcv_wnd; /* Current receiver window */
- u32 write_seq; /* Tail(+1) of data held in tcp send buffer */
- u32 notsent_lowat; /* TCP_NOTSENT_LOWAT */
- u32 pushed_seq; /* Last pushed seq, required to talk to windows */
- u32 lost_out; /* Lost packets */
- u32 sacked_out; /* SACK'd packets */
+ u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */
struct hrtimer pacing_timer;
struct hrtimer compressed_ack_timer;
- /* from STCP, retrans queue hinting */
- struct sk_buff* lost_skb_hint;
- struct sk_buff *retransmit_skb_hint;
-
- /* OOO segments go in this rbtree. Socket lock must be held. */
- struct rb_root out_of_order_queue;
struct sk_buff *ooo_last_skb; /* cache rb_last(out_of_order_queue) */
/* SACKs data, these 2 need to be together (see tcp_options_write) */
@@ -355,12 +414,6 @@ struct tcp_sock {
struct tcp_sack_block recv_sack_cache[4];
- struct sk_buff *highest_sack; /* skb just after the highest
- * skb with SACKed bit set
- * (validity guaranteed only if
- * sacked_out > 0)
- */
-
int lost_cnt_hint;
u32 prior_ssthresh; /* ssthresh saved at recovery start */
@@ -375,6 +428,14 @@ struct tcp_sock {
* Total data bytes retransmitted
*/
u32 total_retrans; /* Total retransmits for entire connection */
+ u32 rto_stamp; /* Start time (ms) of last CA_Loss recovery */
+ u16 total_rto; /* Total number of RTO timeouts, including
+ * SYN/SYN-ACK and recurring timeouts.
+ */
+ u16 total_rto_recoveries; /* Total number of RTO recoveries,
+ * including any unfinished recovery.
+ */
+ u32 total_rto_time; /* ms spent in (completed) RTO recoveries. */
u32 urg_seq; /* Seq of received urgent pointer */
unsigned int keepalive_time; /* time before keep alive takes place */
@@ -403,21 +464,6 @@ struct tcp_sock {
u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */
-/* Receiver side RTT estimation */
- u32 rcv_rtt_last_tsecr;
- struct {
- u32 rtt_us;
- u32 seq;
- u64 time;
- } rcv_rtt_est;
-
-/* Receiver queue space */
- struct {
- u32 space;
- u32 seq;
- u64 time;
- } rcvq_space;
-
/* TCP-specific MTU probe information. */
struct {
u32 probe_seq_start;
@@ -431,17 +477,22 @@ struct tcp_sock {
bool is_mptcp;
#endif
#if IS_ENABLED(CONFIG_SMC)
- bool (*smc_hs_congested)(const struct sock *sk);
bool syn_smc; /* SYN includes SMC */
+ bool (*smc_hs_congested)(const struct sock *sk);
#endif
-#ifdef CONFIG_TCP_MD5SIG
-/* TCP AF-Specific parts; only used by MD5 Signature support so far */
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
+/* TCP AF-Specific parts; only used by TCP-AO/MD5 Signature support so far */
const struct tcp_sock_af_ops *af_specific;
+#ifdef CONFIG_TCP_MD5SIG
/* TCP MD5 Signature Option information */
struct tcp_md5sig_info __rcu *md5sig_info;
#endif
+#ifdef CONFIG_TCP_AO
+ struct tcp_ao_info __rcu *ao_info;
+#endif
+#endif
/* TCP fastopen related information */
struct tcp_fastopen_request *fastopen_req;
@@ -461,15 +512,17 @@ enum tsq_enum {
TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call
* tcp_v{4|6}_mtu_reduced()
*/
+ TCP_ACK_DEFERRED, /* TX pure ack is deferred */
};
enum tsq_flags {
- TSQF_THROTTLED = (1UL << TSQ_THROTTLED),
- TSQF_QUEUED = (1UL << TSQ_QUEUED),
- TCPF_TSQ_DEFERRED = (1UL << TCP_TSQ_DEFERRED),
- TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED),
- TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED),
- TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED),
+ TSQF_THROTTLED = BIT(TSQ_THROTTLED),
+ TSQF_QUEUED = BIT(TSQ_QUEUED),
+ TCPF_TSQ_DEFERRED = BIT(TCP_TSQ_DEFERRED),
+ TCPF_WRITE_TIMER_DEFERRED = BIT(TCP_WRITE_TIMER_DEFERRED),
+ TCPF_DELACK_TIMER_DEFERRED = BIT(TCP_DELACK_TIMER_DEFERRED),
+ TCPF_MTU_REDUCED_DEFERRED = BIT(TCP_MTU_REDUCED_DEFERRED),
+ TCPF_ACK_DEFERRED = BIT(TCP_ACK_DEFERRED),
};
#define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk)
@@ -495,6 +548,9 @@ struct tcp_timewait_sock {
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *tw_md5_key;
#endif
+#ifdef CONFIG_TCP_AO
+ struct tcp_ao_info __rcu *ao_info;
+#endif
};
static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
@@ -562,6 +618,11 @@ void __tcp_sock_set_nodelay(struct sock *sk, bool on);
void tcp_sock_set_nodelay(struct sock *sk);
void tcp_sock_set_quickack(struct sock *sk, int val);
int tcp_sock_set_syncnt(struct sock *sk, int val);
-void tcp_sock_set_user_timeout(struct sock *sk, u32 val);
+int tcp_sock_set_user_timeout(struct sock *sk, int val);
+
+static inline bool dst_tcp_usec_ts(const struct dst_entry *dst)
+{
+ return dst_feature(dst, RTAX_FEATURE_TCP_USEC_TS);
+}
#endif /* _LINUX_TCP_H */
diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index 17eb1c5205d3..71632e3c5f18 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -84,6 +84,7 @@ struct tee_param {
* @release: release this open file
* @open_session: open a new session
* @close_session: close a session
+ * @system_session: declare session as a system session
* @invoke_func: invoke a trusted function
* @cancel_req: request cancel of an ongoing invoke or open
* @supp_recv: called for supplicant to get a command
@@ -100,6 +101,7 @@ struct tee_driver_ops {
struct tee_ioctl_open_session_arg *arg,
struct tee_param *param);
int (*close_session)(struct tee_context *ctx, u32 session);
+ int (*system_session)(struct tee_context *ctx, u32 session);
int (*invoke_func)(struct tee_context *ctx,
struct tee_ioctl_invoke_arg *arg,
struct tee_param *param);
@@ -430,6 +432,20 @@ int tee_client_open_session(struct tee_context *ctx,
int tee_client_close_session(struct tee_context *ctx, u32 session);
/**
+ * tee_client_system_session() - Declare session as a system session
+ * @ctx: TEE Context
+ * @session: Session id
+ *
+ * This function requests TEE to provision an entry context ready to use for
+ * that session only. The provisioned entry context is used for command
+ * invocation and session closure, not for command cancelling requests.
+ * TEE releases the provisioned context upon session closure.
+ *
+ * Return < 0 on error else 0 if an entry context has been provisioned.
+ */
+int tee_client_system_session(struct tee_context *ctx, u32 session);
+
+/**
* tee_client_invoke_func() - Invoke a function in a Trusted Application
* @ctx: TEE Context
* @arg: Invoke arguments, see description of
@@ -466,7 +482,7 @@ static inline bool tee_param_is_memref(struct tee_param *param)
}
}
-extern struct bus_type tee_bus_type;
+extern const struct bus_type tee_bus_type;
/**
* struct tee_client_device - tee based device
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index dee66ade89a0..c33f50177f51 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -32,6 +32,7 @@
struct thermal_zone_device;
struct thermal_cooling_device;
struct thermal_instance;
+struct thermal_debugfs;
struct thermal_attr;
enum thermal_trend {
@@ -51,8 +52,35 @@ enum thermal_notify_event {
THERMAL_DEVICE_POWER_CAPABILITY_CHANGED, /* power capability changed */
THERMAL_TABLE_CHANGED, /* Thermal table(s) changed */
THERMAL_EVENT_KEEP_ALIVE, /* Request for user space handler to respond */
+ THERMAL_TZ_BIND_CDEV, /* Cooling dev is bind to the thermal zone */
+ THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */
+ THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */
};
+/**
+ * struct thermal_trip - representation of a point in temperature domain
+ * @temperature: temperature value in miliCelsius
+ * @hysteresis: relative hysteresis in miliCelsius
+ * @threshold: trip crossing notification threshold miliCelsius
+ * @type: trip point type
+ * @priv: pointer to driver data associated with this trip
+ * @flags: flags representing binary properties of the trip
+ */
+struct thermal_trip {
+ int temperature;
+ int hysteresis;
+ int threshold;
+ enum thermal_trip_type type;
+ u8 flags;
+ void *priv;
+};
+
+#define THERMAL_TRIP_FLAG_RW_TEMP BIT(0)
+#define THERMAL_TRIP_FLAG_RW_HYST BIT(1)
+
+#define THERMAL_TRIP_FLAG_RW (THERMAL_TRIP_FLAG_RW_TEMP | \
+ THERMAL_TRIP_FLAG_RW_HYST)
+
struct thermal_zone_device_ops {
int (*bind) (struct thermal_zone_device *,
struct thermal_cooling_device *);
@@ -62,32 +90,15 @@ struct thermal_zone_device_ops {
int (*set_trips) (struct thermal_zone_device *, int, int);
int (*change_mode) (struct thermal_zone_device *,
enum thermal_device_mode);
- int (*get_trip_type) (struct thermal_zone_device *, int,
- enum thermal_trip_type *);
- int (*get_trip_temp) (struct thermal_zone_device *, int, int *);
int (*set_trip_temp) (struct thermal_zone_device *, int, int);
- int (*get_trip_hyst) (struct thermal_zone_device *, int, int *);
- int (*set_trip_hyst) (struct thermal_zone_device *, int, int);
int (*get_crit_temp) (struct thermal_zone_device *, int *);
int (*set_emul_temp) (struct thermal_zone_device *, int);
- int (*get_trend) (struct thermal_zone_device *, int,
- enum thermal_trend *);
+ int (*get_trend) (struct thermal_zone_device *,
+ const struct thermal_trip *, enum thermal_trend *);
void (*hot)(struct thermal_zone_device *);
void (*critical)(struct thermal_zone_device *);
};
-/**
- * struct thermal_trip - representation of a point in temperature domain
- * @temperature: temperature value in miliCelsius
- * @hysteresis: relative hysteresis in miliCelsius
- * @type: trip point type
- */
-struct thermal_trip {
- int temperature;
- int hysteresis;
- enum thermal_trip_type type;
-};
-
struct thermal_cooling_device_ops {
int (*get_max_state) (struct thermal_cooling_device *, unsigned long *);
int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *);
@@ -99,7 +110,7 @@ struct thermal_cooling_device_ops {
struct thermal_cooling_device {
int id;
- char *type;
+ const char *type;
unsigned long max_state;
struct device device;
struct device_node *np;
@@ -110,6 +121,9 @@ struct thermal_cooling_device {
struct mutex lock; /* protect thermal_instances list */
struct list_head thermal_instances;
struct list_head node;
+#ifdef CONFIG_THERMAL_DEBUGFS
+ struct thermal_debugfs *debugfs;
+#endif
};
/**
@@ -117,14 +131,13 @@ struct thermal_cooling_device {
* @id: unique id number for each thermal zone
* @type: the thermal zone device type
* @device: &struct device for this thermal zone
+ * @removal: removal completion
* @trip_temp_attrs: attributes for trip points for sysfs: trip temperature
* @trip_type_attrs: attributes for trip points for sysfs: trip type
* @trip_hyst_attrs: attributes for trip points for sysfs: trip hysteresis
* @mode: current mode of this thermal zone
* @devdata: private pointer for device private data
- * @trips: an array of struct thermal_trip
* @num_trips: number of trip points the thermal zone supports
- * @trips_disabled; bitmap for disabled trips
* @passive_delay_jiffies: number of jiffies to wait between polls when
* performing passive cooling.
* @polling_delay_jiffies: number of jiffies to wait between polls when
@@ -152,20 +165,21 @@ struct thermal_cooling_device {
* @node: node in thermal_tz_list (in thermal_core.c)
* @poll_queue: delayed work for polling
* @notify_event: Last notification event
+ * @suspended: thermal zone suspend indicator
+ * @trips: array of struct thermal_trip objects
*/
struct thermal_zone_device {
int id;
char type[THERMAL_NAME_LENGTH];
struct device device;
+ struct completion removal;
struct attribute_group trips_attribute_group;
struct thermal_attr *trip_temp_attrs;
struct thermal_attr *trip_type_attrs;
struct thermal_attr *trip_hyst_attrs;
enum thermal_device_mode mode;
void *devdata;
- struct thermal_trip *trips;
int num_trips;
- unsigned long trips_disabled; /* bitmap for disabled trips */
unsigned long passive_delay_jiffies;
unsigned long polling_delay_jiffies;
int temperature;
@@ -175,7 +189,7 @@ struct thermal_zone_device {
int prev_low_trip;
int prev_high_trip;
atomic_t need_update;
- struct thermal_zone_device_ops *ops;
+ struct thermal_zone_device_ops ops;
struct thermal_zone_params *tzp;
struct thermal_governor *governor;
void *governor_data;
@@ -185,6 +199,11 @@ struct thermal_zone_device {
struct list_head node;
struct delayed_work poll_queue;
enum thermal_notify_event notify_event;
+ bool suspended;
+#ifdef CONFIG_THERMAL_DEBUGFS
+ struct thermal_debugfs *debugfs;
+#endif
+ struct thermal_trip trips[] __counted_by(num_trips);
};
/**
@@ -197,19 +216,24 @@ struct thermal_zone_device {
* thermal zone.
* @throttle: callback called for every trip point even if temperature is
* below the trip point temperature
+ * @update_tz: callback called when thermal zone internals have changed, e.g.
+ * thermal cooling instance was added/removed
* @governor_list: node in thermal_governor_list (in thermal_core.c)
*/
struct thermal_governor {
- char name[THERMAL_NAME_LENGTH];
+ const char *name;
int (*bind_to_tz)(struct thermal_zone_device *tz);
void (*unbind_from_tz)(struct thermal_zone_device *tz);
- int (*throttle)(struct thermal_zone_device *tz, int trip);
+ int (*throttle)(struct thermal_zone_device *tz,
+ const struct thermal_trip *trip);
+ void (*update_tz)(struct thermal_zone_device *tz,
+ enum thermal_notify_event reason);
struct list_head governor_list;
};
/* Structure to define Thermal Zone parameters */
struct thermal_zone_params {
- char governor_name[THERMAL_NAME_LENGTH];
+ const char *governor_name;
/*
* a boolean to indicate if the thermal to hwmon sysfs interface
@@ -283,42 +307,52 @@ int __thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
struct thermal_trip *trip);
int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
struct thermal_trip *trip);
-
-int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id,
- const struct thermal_trip *trip);
-
+int for_each_thermal_trip(struct thermal_zone_device *tz,
+ int (*cb)(struct thermal_trip *, void *),
+ void *data);
+int thermal_zone_for_each_trip(struct thermal_zone_device *tz,
+ int (*cb)(struct thermal_trip *, void *),
+ void *data);
int thermal_zone_get_num_trips(struct thermal_zone_device *tz);
+void thermal_zone_set_trip_temp(struct thermal_zone_device *tz,
+ struct thermal_trip *trip, int temp);
int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp);
-#ifdef CONFIG_THERMAL_ACPI
-int thermal_acpi_active_trip_temp(struct acpi_device *adev, int id, int *ret_temp);
-int thermal_acpi_passive_trip_temp(struct acpi_device *adev, int *ret_temp);
-int thermal_acpi_hot_trip_temp(struct acpi_device *adev, int *ret_temp);
-int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp);
-#endif
-
#ifdef CONFIG_THERMAL
-struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
- void *, struct thermal_zone_device_ops *,
- const struct thermal_zone_params *, int, int);
-
-void thermal_zone_device_unregister(struct thermal_zone_device *);
-
-struct thermal_zone_device *
-thermal_zone_device_register_with_trips(const char *, struct thermal_trip *, int, int,
- void *, struct thermal_zone_device_ops *,
- const struct thermal_zone_params *, int, int);
+struct thermal_zone_device *thermal_zone_device_register_with_trips(
+ const char *type,
+ const struct thermal_trip *trips,
+ int num_trips, void *devdata,
+ const struct thermal_zone_device_ops *ops,
+ const struct thermal_zone_params *tzp,
+ int passive_delay, int polling_delay);
+
+struct thermal_zone_device *thermal_tripless_zone_device_register(
+ const char *type,
+ void *devdata,
+ const struct thermal_zone_device_ops *ops,
+ const struct thermal_zone_params *tzp);
+
+void thermal_zone_device_unregister(struct thermal_zone_device *tz);
void *thermal_zone_device_priv(struct thermal_zone_device *tzd);
const char *thermal_zone_device_type(struct thermal_zone_device *tzd);
int thermal_zone_device_id(struct thermal_zone_device *tzd);
struct device *thermal_zone_device(struct thermal_zone_device *tzd);
+int thermal_bind_cdev_to_trip(struct thermal_zone_device *tz,
+ const struct thermal_trip *trip,
+ struct thermal_cooling_device *cdev,
+ unsigned long upper, unsigned long lower,
+ unsigned int weight);
int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int,
struct thermal_cooling_device *,
unsigned long, unsigned long,
unsigned int);
+int thermal_unbind_cdev_from_trip(struct thermal_zone_device *tz,
+ const struct thermal_trip *trip,
+ struct thermal_cooling_device *cdev);
int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int,
struct thermal_cooling_device *);
void thermal_zone_device_update(struct thermal_zone_device *,
@@ -345,15 +379,25 @@ int thermal_zone_device_enable(struct thermal_zone_device *tz);
int thermal_zone_device_disable(struct thermal_zone_device *tz);
void thermal_zone_device_critical(struct thermal_zone_device *tz);
#else
-static inline struct thermal_zone_device *thermal_zone_device_register(
- const char *type, int trips, int mask, void *devdata,
- struct thermal_zone_device_ops *ops,
- const struct thermal_zone_params *tzp,
- int passive_delay, int polling_delay)
+static inline struct thermal_zone_device *thermal_zone_device_register_with_trips(
+ const char *type,
+ const struct thermal_trip *trips,
+ int num_trips, void *devdata,
+ const struct thermal_zone_device_ops *ops,
+ const struct thermal_zone_params *tzp,
+ int passive_delay, int polling_delay)
{ return ERR_PTR(-ENODEV); }
-static inline void thermal_zone_device_unregister(
- struct thermal_zone_device *tz)
+
+static inline struct thermal_zone_device *thermal_tripless_zone_device_register(
+ const char *type,
+ void *devdata,
+ struct thermal_zone_device_ops *ops,
+ const struct thermal_zone_params *tzp)
+{ return ERR_PTR(-ENODEV); }
+
+static inline void thermal_zone_device_unregister(struct thermal_zone_device *tz)
{ }
+
static inline struct thermal_cooling_device *
thermal_cooling_device_register(const char *type, void *devdata,
const struct thermal_cooling_device_ops *ops)
diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h
index 02333f47c994..4338ea9ac4fd 100644
--- a/include/linux/thunderbolt.h
+++ b/include/linux/thunderbolt.h
@@ -86,9 +86,9 @@ struct tb {
unsigned long privdata[];
};
-extern struct bus_type tb_bus_type;
-extern struct device_type tb_service_type;
-extern struct device_type tb_xdomain_type;
+extern const struct bus_type tb_bus_type;
+extern const struct device_type tb_service_type;
+extern const struct device_type tb_xdomain_type;
#define TB_LINKS_PER_PHY_PORT 2
@@ -175,7 +175,7 @@ void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir);
* enum tb_link_width - Thunderbolt/USB4 link width
* @TB_LINK_WIDTH_SINGLE: Single lane link
* @TB_LINK_WIDTH_DUAL: Dual lane symmetric link
- * @TB_LINK_WIDTH_ASYM_TX: Dual lane asymmetric Gen 4 link with 3 trasmitters
+ * @TB_LINK_WIDTH_ASYM_TX: Dual lane asymmetric Gen 4 link with 3 transmitters
* @TB_LINK_WIDTH_ASYM_RX: Dual lane asymmetric Gen 4 link with 3 receivers
*/
enum tb_link_width {
diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h
index 44a7f9169ac6..10642d4844f0 100644
--- a/include/linux/ti_wilink_st.h
+++ b/include/linux/ti_wilink_st.h
@@ -271,7 +271,7 @@ long st_kim_stop(void *);
void st_kim_complete(void *);
void kim_st_list_protocols(struct st_data_s *, void *);
-void st_kim_recv(void *, const unsigned char *, long);
+void st_kim_recv(void *disc_data, const u8 *data, size_t count);
/*
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 9459fef5b857..4924a33700b7 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -12,6 +12,7 @@
#include <linux/cpumask.h>
#include <linux/sched.h>
#include <linux/rcupdate.h>
+#include <linux/static_key.h>
#ifdef CONFIG_GENERIC_CLOCKEVENTS
extern void __init tick_init(void);
@@ -19,16 +20,22 @@ extern void __init tick_init(void);
extern void tick_suspend_local(void);
/* Should be core only, but XEN resume magic and ARM BL switcher require it */
extern void tick_resume_local(void);
-extern void tick_handover_do_timer(void);
extern void tick_cleanup_dead_cpu(int cpu);
#else /* CONFIG_GENERIC_CLOCKEVENTS */
static inline void tick_init(void) { }
static inline void tick_suspend_local(void) { }
static inline void tick_resume_local(void) { }
-static inline void tick_handover_do_timer(void) { }
static inline void tick_cleanup_dead_cpu(int cpu) { }
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
+#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU)
+extern int tick_cpu_dying(unsigned int cpu);
+extern void tick_assert_timekeeping_handover(void);
+#else
+#define tick_cpu_dying NULL
+static inline void tick_assert_timekeeping_handover(void) { }
+#endif
+
#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND)
extern void tick_freeze(void);
extern void tick_unfreeze(void);
@@ -63,18 +70,14 @@ enum tick_broadcast_state {
TICK_BROADCAST_ENTER,
};
+extern struct static_key_false arch_needs_tick_broadcast;
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
extern void tick_broadcast_control(enum tick_broadcast_mode mode);
#else
static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { }
#endif /* BROADCAST */
-#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU)
-extern void tick_offline_cpu(unsigned int cpu);
-#else
-static inline void tick_offline_cpu(unsigned int cpu) { }
-#endif
-
#ifdef CONFIG_GENERIC_CLOCKEVENTS
extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state);
#else
@@ -140,14 +143,6 @@ extern unsigned long tick_nohz_get_idle_calls(void);
extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
-
-static inline void tick_nohz_idle_stop_tick_protected(void)
-{
- local_irq_disable();
- tick_nohz_idle_stop_tick();
- local_irq_enable();
-}
-
#else /* !CONFIG_NO_HZ_COMMON */
#define tick_nohz_enabled (0)
static inline int tick_nohz_tick_stopped(void) { return 0; }
@@ -170,13 +165,18 @@ static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
}
static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
-
-static inline void tick_nohz_idle_stop_tick_protected(void) { }
#endif /* !CONFIG_NO_HZ_COMMON */
+/*
+ * Mask of CPUs that are nohz_full.
+ *
+ * Users should be guarded by CONFIG_NO_HZ_FULL or a tick_nohz_full_cpu()
+ * check.
+ */
+extern cpumask_var_t tick_nohz_full_mask;
+
#ifdef CONFIG_NO_HZ_FULL
extern bool tick_nohz_full_running;
-extern cpumask_var_t tick_nohz_full_mask;
static inline bool tick_nohz_full_enabled(void)
{
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index 03d9c5ac01d1..876e31b4461d 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -7,10 +7,13 @@
#include <linux/nsproxy.h>
#include <linux/ns_common.h>
#include <linux/err.h>
+#include <linux/time64.h>
struct user_namespace;
extern struct user_namespace init_user_ns;
+struct vm_area_struct;
+
struct timens_offsets {
struct timespec64 monotonic;
struct timespec64 boottime;
diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h
index c6540ceea143..0982d1d52b24 100644
--- a/include/linux/timecounter.h
+++ b/include/linux/timecounter.h
@@ -22,7 +22,7 @@
*
* @read: returns the current cycle value
* @mask: bitmask for two's complement
- * subtraction of non 64 bit counters,
+ * subtraction of non-64-bit counters,
* see CYCLECOUNTER_MASK() helper macro
* @mult: cycle to nanosecond multiplier
* @shift: cycle to nanosecond divisor (power of two)
@@ -35,7 +35,7 @@ struct cyclecounter {
};
/**
- * struct timecounter - layer above a %struct cyclecounter which counts nanoseconds
+ * struct timecounter - layer above a &struct cyclecounter which counts nanoseconds
* Contains the state needed by timecounter_read() to detect
* cycle counter wrap around. Initialize with
* timecounter_init(). Also used to convert cycle counts into the
@@ -66,6 +66,8 @@ struct timecounter {
* @cycles: Cycles
* @mask: bit mask for maintaining the 'frac' field
* @frac: pointer to storage for the fractional nanoseconds.
+ *
+ * Returns: cycle counter cycles converted to nanoseconds
*/
static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
u64 cycles, u64 mask, u64 *frac)
@@ -79,6 +81,7 @@ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
/**
* timecounter_adjtime - Shifts the time of the clock.
+ * @tc: The &struct timecounter to adjust
* @delta: Desired change in nanoseconds.
*/
static inline void timecounter_adjtime(struct timecounter *tc, s64 delta)
@@ -107,6 +110,8 @@ extern void timecounter_init(struct timecounter *tc,
*
* In other words, keeps track of time since the same epoch as
* the function which generated the initial time stamp.
+ *
+ * Returns: nanoseconds since the initial time stamp
*/
extern u64 timecounter_read(struct timecounter *tc);
@@ -123,6 +128,8 @@ extern u64 timecounter_read(struct timecounter *tc);
*
* This allows conversion of cycle counter values which were generated
* in the past.
+ *
+ * Returns: cycle counter converted to nanoseconds since the initial time stamp
*/
extern u64 timecounter_cyc2time(const struct timecounter *tc,
u64 cycle_tstamp);
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index fe1e467ba046..0ea7823b7f31 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -4,6 +4,7 @@
#include <linux/errno.h>
#include <linux/clocksource_ids.h>
+#include <linux/ktime.h>
/* Included from linux/ktime.h */
@@ -21,14 +22,14 @@ extern int do_sys_settimeofday64(const struct timespec64 *tv,
const struct timezone *tz);
/*
- * ktime_get() family: read the current time in a multitude of ways,
+ * ktime_get() family - read the current time in a multitude of ways.
*
* The default time reference is CLOCK_MONOTONIC, starting at
* boot time but not counting the time spent in suspend.
* For other references, use the functions with "real", "clocktai",
* "boottime" and "raw" suffixes.
*
- * To get the time in a different format, use the ones wit
+ * To get the time in a different format, use the ones with
* "ns", "ts64" and "seconds" suffix.
*
* See Documentation/core-api/timekeeping.rst for more details.
@@ -73,6 +74,8 @@ extern u32 ktime_get_resolution_ns(void);
/**
* ktime_get_real - get the real (wall-) time in ktime_t format
+ *
+ * Returns: real (wall) time in ktime_t format
*/
static inline ktime_t ktime_get_real(void)
{
@@ -85,10 +88,12 @@ static inline ktime_t ktime_get_coarse_real(void)
}
/**
- * ktime_get_boottime - Returns monotonic time since boot in ktime_t format
+ * ktime_get_boottime - Get monotonic time since boot in ktime_t format
*
* This is similar to CLOCK_MONTONIC/ktime_get, but also includes the
* time spent in suspend.
+ *
+ * Returns: monotonic time since boot in ktime_t format
*/
static inline ktime_t ktime_get_boottime(void)
{
@@ -101,7 +106,9 @@ static inline ktime_t ktime_get_coarse_boottime(void)
}
/**
- * ktime_get_clocktai - Returns the TAI time of day in ktime_t format
+ * ktime_get_clocktai - Get the TAI time of day in ktime_t format
+ *
+ * Returns: the TAI time of day in ktime_t format
*/
static inline ktime_t ktime_get_clocktai(void)
{
@@ -143,32 +150,60 @@ static inline u64 ktime_get_coarse_clocktai_ns(void)
/**
* ktime_mono_to_real - Convert monotonic time to clock realtime
+ * @mono: monotonic time to convert
+ *
+ * Returns: time converted to realtime clock
*/
static inline ktime_t ktime_mono_to_real(ktime_t mono)
{
return ktime_mono_to_any(mono, TK_OFFS_REAL);
}
+/**
+ * ktime_get_ns - Get the current time in nanoseconds
+ *
+ * Returns: current time converted to nanoseconds
+ */
static inline u64 ktime_get_ns(void)
{
return ktime_to_ns(ktime_get());
}
+/**
+ * ktime_get_real_ns - Get the current real/wall time in nanoseconds
+ *
+ * Returns: current real time converted to nanoseconds
+ */
static inline u64 ktime_get_real_ns(void)
{
return ktime_to_ns(ktime_get_real());
}
+/**
+ * ktime_get_boottime_ns - Get the monotonic time since boot in nanoseconds
+ *
+ * Returns: current boottime converted to nanoseconds
+ */
static inline u64 ktime_get_boottime_ns(void)
{
return ktime_to_ns(ktime_get_boottime());
}
+/**
+ * ktime_get_clocktai_ns - Get the current TAI time of day in nanoseconds
+ *
+ * Returns: current TAI time converted to nanoseconds
+ */
static inline u64 ktime_get_clocktai_ns(void)
{
return ktime_to_ns(ktime_get_clocktai());
}
+/**
+ * ktime_get_raw_ns - Get the raw monotonic time in nanoseconds
+ *
+ * Returns: current raw monotonic time converted to nanoseconds
+ */
static inline u64 ktime_get_raw_ns(void)
{
return ktime_to_ns(ktime_get_raw());
@@ -223,8 +258,8 @@ extern bool timekeeping_rtc_skipresume(void);
extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);
-/*
- * struct ktime_timestanps - Simultaneous mono/boot/real timestamps
+/**
+ * struct ktime_timestamps - Simultaneous mono/boot/real timestamps
* @mono: Monotonic timestamp
* @boot: Boottime timestamp
* @real: Realtime timestamp
@@ -241,7 +276,8 @@ struct ktime_timestamps {
* @cycles: Clocksource counter value to produce the system times
* @real: Realtime system time
* @raw: Monotonic raw system time
- * @clock_was_set_seq: The sequence number of clock was set events
+ * @cs_id: Clocksource ID
+ * @clock_was_set_seq: The sequence number of clock-was-set events
* @cs_was_changed_seq: The sequence number of clocksource change events
*/
struct system_time_snapshot {
@@ -267,15 +303,17 @@ struct system_device_crosststamp {
};
/**
- * struct system_counterval_t - system counter value with the pointer to the
+ * struct system_counterval_t - system counter value with the ID of the
* corresponding clocksource
* @cycles: System counter value
- * @cs: Clocksource corresponding to system counter value. Used by
- * timekeeping code to verify comparibility of two cycle values
+ * @cs_id: Clocksource ID corresponding to system counter value. Used by
+ * timekeeping code to verify comparability of two cycle values.
+ * The default ID, CSID_GENERIC, does not identify a specific
+ * clocksource.
*/
struct system_counterval_t {
u64 cycles;
- struct clocksource *cs;
+ enum clocksource_ids cs_id;
};
/*
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 9162f275819a..e67ecd1cbc97 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -7,21 +7,7 @@
#include <linux/stddef.h>
#include <linux/debugobjects.h>
#include <linux/stringify.h>
-
-struct timer_list {
- /*
- * All fields that change during normal runtime grouped to the
- * same cacheline
- */
- struct hlist_node entry;
- unsigned long expires;
- void (*function)(struct timer_list *);
- u32 flags;
-
-#ifdef CONFIG_LOCKDEP
- struct lockdep_map lockdep_map;
-#endif
-};
+#include <linux/timer_types.h>
#ifdef CONFIG_LOCKDEP
/*
@@ -36,7 +22,7 @@ struct timer_list {
#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
#endif
-/**
+/*
* @TIMER_DEFERRABLE: A deferrable timer will work normally when the
* system is busy, but will not cause a CPU to come out of idle just
* to service it; instead, the timer will be serviced when the CPU
@@ -50,16 +36,10 @@ struct timer_list {
* workqueue locking issues. It's not meant for executing random crap
* with interrupts disabled. Abuse is monitored!
*
- * @TIMER_PINNED: A pinned timer will not be affected by any timer
- * placement heuristics (like, NOHZ) and will always expire on the CPU
- * on which the timer was enqueued.
- *
- * Note: Because enqueuing of timers can migrate the timer from one
- * CPU to another, pinned timers are not guaranteed to stay on the
- * initialy selected CPU. They move to the CPU on which the enqueue
- * function is invoked via mod_timer() or add_timer(). If the timer
- * should be placed on a particular CPU, then add_timer_on() has to be
- * used.
+ * @TIMER_PINNED: A pinned timer will always expire on the CPU on which the
+ * timer was enqueued. When a particular CPU is required, add_timer_on()
+ * has to be used. Enqueue via mod_timer() and add_timer() is always done
+ * on the local CPU.
*/
#define TIMER_CPUMASK 0x0003FFFF
#define TIMER_MIGRATING 0x00040000
@@ -77,8 +57,7 @@ struct timer_list {
.entry = { .next = TIMER_ENTRY_STATIC }, \
.function = (_function), \
.flags = (_flags), \
- __TIMER_LOCKDEP_MAP_INITIALIZER( \
- __FILE__ ":" __stringify(__LINE__)) \
+ __TIMER_LOCKDEP_MAP_INITIALIZER(FILE_LINE) \
}
#define DEFINE_TIMER(_name, _function) \
@@ -161,7 +140,7 @@ static inline void destroy_timer_on_stack(struct timer_list *timer) { }
* or not. Callers must ensure serialization wrt. other operations done
* to this timer, eg. interrupt contexts, or other CPUs on SMP.
*
- * return value: 1 if the timer is pending, 0 if not.
+ * Returns: 1 if the timer is pending, 0 if not.
*/
static inline int timer_pending(const struct timer_list * timer)
{
@@ -180,6 +159,8 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires);
#define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1)
extern void add_timer(struct timer_list *timer);
+extern void add_timer_local(struct timer_list *timer);
+extern void add_timer_global(struct timer_list *timer);
extern int try_to_del_timer_sync(struct timer_list *timer);
extern int timer_delete_sync(struct timer_list *timer);
@@ -194,6 +175,10 @@ extern int timer_shutdown(struct timer_list *timer);
* See timer_delete_sync() for detailed explanation.
*
* Do not use in new code. Use timer_delete_sync() instead.
+ *
+ * Returns:
+ * * %0 - The timer was not pending
+ * * %1 - The timer was pending and deactivated
*/
static inline int del_timer_sync(struct timer_list *timer)
{
@@ -207,6 +192,10 @@ static inline int del_timer_sync(struct timer_list *timer)
* See timer_delete() for detailed explanation.
*
* Do not use in new code. Use timer_delete() instead.
+ *
+ * Returns:
+ * * %0 - The timer was not pending
+ * * %1 - The timer was pending and deactivated
*/
static inline int del_timer(struct timer_list *timer)
{
diff --git a/include/linux/timer_types.h b/include/linux/timer_types.h
new file mode 100644
index 000000000000..fae5a388f914
--- /dev/null
+++ b/include/linux/timer_types.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_TIMER_TYPES_H
+#define _LINUX_TIMER_TYPES_H
+
+#include <linux/lockdep_types.h>
+#include <linux/types.h>
+
+struct timer_list {
+ /*
+ * All fields that change during normal runtime grouped to the
+ * same cacheline
+ */
+ struct hlist_node entry;
+ unsigned long expires;
+ void (*function)(struct timer_list *);
+ u32 flags;
+
+#ifdef CONFIG_LOCKDEP
+ struct lockdep_map lockdep_map;
+#endif
+};
+
+#endif /* _LINUX_TIMER_TYPES_H */
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
index adc80e29168e..62973f7d4610 100644
--- a/include/linux/timerqueue.h
+++ b/include/linux/timerqueue.h
@@ -3,18 +3,7 @@
#define _LINUX_TIMERQUEUE_H
#include <linux/rbtree.h>
-#include <linux/ktime.h>
-
-
-struct timerqueue_node {
- struct rb_node node;
- ktime_t expires;
-};
-
-struct timerqueue_head {
- struct rb_root_cached rb_root;
-};
-
+#include <linux/timerqueue_types.h>
extern bool timerqueue_add(struct timerqueue_head *head,
struct timerqueue_node *node);
diff --git a/include/linux/timerqueue_types.h b/include/linux/timerqueue_types.h
new file mode 100644
index 000000000000..dc298d0923e3
--- /dev/null
+++ b/include/linux/timerqueue_types.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_TIMERQUEUE_TYPES_H
+#define _LINUX_TIMERQUEUE_TYPES_H
+
+#include <linux/rbtree_types.h>
+#include <linux/types.h>
+
+struct timerqueue_node {
+ struct rb_node node;
+ ktime_t expires;
+};
+
+struct timerqueue_head {
+ struct rb_root_cached rb_root;
+};
+
+#endif /* _LINUX_TIMERQUEUE_TYPES_H */
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index 1c3948a1d6ad..3c13240077b8 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -106,6 +106,10 @@ int tnum_sbin(char *str, size_t size, struct tnum a);
struct tnum tnum_subreg(struct tnum a);
/* Returns the tnum with the lower 32-bit subreg cleared */
struct tnum tnum_clear_subreg(struct tnum a);
+/* Returns the tnum with the lower 32-bit subreg in *reg* set to the lower
+ * 32-bit subreg in *subreg*
+ */
+struct tnum tnum_with_subreg(struct tnum reg, struct tnum subreg);
/* Returns the tnum with the lower 32-bit subreg set to value */
struct tnum tnum_const_subreg(struct tnum a, u32 value);
/* Returns true if 32-bit subreg @a is a known constant*/
diff --git a/include/linux/topology.h b/include/linux/topology.h
index fea32377f7c7..52f5850730b3 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -251,7 +251,7 @@ extern const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int
#else
static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node)
{
- return cpumask_nth(cpu, cpus);
+ return cpumask_nth_and(cpu, cpus, cpu_online_mask);
}
static inline const struct cpumask *
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 7038104463e4..1541454da03e 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -21,6 +21,7 @@
#include <linux/debugobjects.h>
#include <linux/bug.h>
#include <linux/compiler.h>
+#include <linux/hrtimer.h>
/* Definitions for a non-string torture-test module parameter. */
#define torture_param(type, name, init, msg) \
@@ -81,7 +82,8 @@ static inline void torture_random_init(struct torture_random_state *trsp)
}
/* Definitions for high-resolution-timer sleeps. */
-int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_state *trsp);
+int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode mode,
+ struct torture_random_state *trsp);
int torture_hrtimeout_us(u32 baset_us, u32 fuzzt_ns, struct torture_random_state *trsp);
int torture_hrtimeout_ms(u32 baset_ms, u32 fuzzt_us, struct torture_random_state *trsp);
int torture_hrtimeout_jiffies(u32 baset_j, struct torture_random_state *trsp);
@@ -108,19 +110,27 @@ bool torture_must_stop(void);
bool torture_must_stop_irq(void);
void torture_kthread_stopping(char *title);
int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
- char *f, struct task_struct **tp);
+ char *f, struct task_struct **tp, void (*cbf)(struct task_struct *tp));
void _torture_stop_kthread(char *m, struct task_struct **tp);
#define torture_create_kthread(n, arg, tp) \
_torture_create_kthread(n, (arg), #n, "Creating " #n " task", \
- "Failed to create " #n, &(tp))
+ "Failed to create " #n, &(tp), NULL)
+#define torture_create_kthread_cb(n, arg, tp, cbf) \
+ _torture_create_kthread(n, (arg), #n, "Creating " #n " task", \
+ "Failed to create " #n, &(tp), cbf)
#define torture_stop_kthread(n, tp) \
_torture_stop_kthread("Stopping " #n " task", &(tp))
+/* Scheduler-related definitions. */
#ifdef CONFIG_PREEMPTION
#define torture_preempt_schedule() __preempt_schedule()
#else
#define torture_preempt_schedule() do { } while (0)
#endif
+#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) || IS_ENABLED(CONFIG_LOCK_TORTURE_TEST) || IS_MODULE(CONFIG_LOCK_TORTURE_TEST)
+long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask);
+#endif
+
#endif /* __LINUX_TORTURE_H */
diff --git a/include/linux/trace.h b/include/linux/trace.h
index 2a70a447184c..fdcd76b7be83 100644
--- a/include/linux/trace.h
+++ b/include/linux/trace.h
@@ -51,7 +51,7 @@ int trace_array_printk(struct trace_array *tr, unsigned long ip,
const char *fmt, ...);
int trace_array_init_printk(struct trace_array *tr);
void trace_array_put(struct trace_array *tr);
-struct trace_array *trace_array_get_by_name(const char *name);
+struct trace_array *trace_array_get_by_name(const char *name, const char *systems);
int trace_array_destroy(struct trace_array *tr);
/* For osnoise tracer */
@@ -84,7 +84,7 @@ static inline int trace_array_init_printk(struct trace_array *tr)
static inline void trace_array_put(struct trace_array *tr)
{
}
-static inline struct trace_array *trace_array_get_by_name(const char *name)
+static inline struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
{
return NULL;
}
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 1e8bbdb8da90..6f9bdfb09d1d 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -17,6 +17,9 @@ struct dentry;
struct bpf_prog;
union bpf_attr;
+/* Used for event string fields when they are NULL */
+#define EVENT_NULL_STR "(null)"
+
const char *trace_print_flags_seq(struct trace_seq *p, const char *delim,
unsigned long flags,
const struct trace_print_flags *flag_array);
@@ -62,13 +65,13 @@ void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...);
/* Used to find the offset and length of dynamic fields in trace events */
struct trace_dynamic_info {
#ifdef CONFIG_CPU_BIG_ENDIAN
- u16 offset;
u16 len;
+ u16 offset;
#else
- u16 len;
u16 offset;
+ u16 len;
#endif
-};
+} __packed;
/*
* The trace entry - the most basic unit of tracing. This is what
@@ -103,13 +106,16 @@ struct trace_iterator {
unsigned int temp_size;
char *fmt; /* modified format holder */
unsigned int fmt_size;
- long wait_index;
+ atomic_t wait_index;
/* trace_seq for __print_flags() and __print_symbolic() etc. */
struct trace_seq tmp_seq;
cpumask_var_t started;
+ /* Set when the file is closed to prevent new waiters */
+ bool closed;
+
/* it's true when current open file is snapshot */
bool snapshot;
@@ -492,6 +498,7 @@ enum {
EVENT_FILE_FL_TRIGGER_COND_BIT,
EVENT_FILE_FL_PID_FILTER_BIT,
EVENT_FILE_FL_WAS_ENABLED_BIT,
+ EVENT_FILE_FL_FREED_BIT,
};
extern struct trace_event_file *trace_get_event_file(const char *instance,
@@ -630,6 +637,7 @@ extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...);
* TRIGGER_COND - When set, one or more triggers has an associated filter
* PID_FILTER - When set, the event is filtered based on pid
* WAS_ENABLED - Set when enabled to know to clear trace on module removal
+ * FREED - File descriptor is freed, all fields should be considered invalid
*/
enum {
EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT),
@@ -643,13 +651,14 @@ enum {
EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT),
EVENT_FILE_FL_WAS_ENABLED = (1 << EVENT_FILE_FL_WAS_ENABLED_BIT),
+ EVENT_FILE_FL_FREED = (1 << EVENT_FILE_FL_FREED_BIT),
};
struct trace_event_file {
struct list_head list;
struct trace_event_call *event_call;
struct event_filter __rcu *filter;
- struct dentry *dir;
+ struct eventfs_inode *ei;
struct trace_array *tr;
struct trace_subsystem_dir *system;
struct list_head triggers;
@@ -671,6 +680,7 @@ struct trace_event_file {
* caching and such. Which is mostly OK ;-)
*/
unsigned long flags;
+ atomic_t ref; /* ref count for opened files */
atomic_t sm_ref; /* soft-mode reference counter */
atomic_t tm_ref; /* trigger-mode reference counter */
};
@@ -761,8 +771,10 @@ struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name);
void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp);
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
u32 *fd_type, const char **buf,
- u64 *probe_offset, u64 *probe_addr);
+ u64 *probe_offset, u64 *probe_addr,
+ unsigned long *missed);
int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
#else
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
@@ -800,7 +812,7 @@ static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
static inline int bpf_get_perf_event_info(const struct perf_event *event,
u32 *prog_id, u32 *fd_type,
const char **buf, u64 *probe_offset,
- u64 *probe_addr)
+ u64 *probe_addr, unsigned long *missed)
{
return -EOPNOTSUPP;
}
@@ -809,6 +821,11 @@ bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
return -EOPNOTSUPP;
}
+static inline int
+bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ return -EOPNOTSUPP;
+}
#endif
enum {
@@ -818,6 +835,7 @@ enum {
FILTER_RDYN_STRING,
FILTER_PTR_STRING,
FILTER_TRACE_FN,
+ FILTER_CPUMASK,
FILTER_COMM,
FILTER_CPU,
FILTER_STACKTRACE,
@@ -870,6 +888,7 @@ extern void perf_kprobe_destroy(struct perf_event *event);
extern int bpf_get_kprobe_info(const struct perf_event *event,
u32 *fd_type, const char **symbol,
u64 *probe_offset, u64 *probe_addr,
+ unsigned long *missed,
bool perf_type_tracepoint);
#endif
#ifdef CONFIG_UPROBE_EVENTS
@@ -878,7 +897,8 @@ extern int perf_uprobe_init(struct perf_event *event,
extern void perf_uprobe_destroy(struct perf_event *event);
extern int bpf_get_uprobe_info(const struct perf_event *event,
u32 *fd_type, const char **filename,
- u64 *probe_offset, bool perf_type_tracepoint);
+ u64 *probe_offset, u64 *probe_addr,
+ bool perf_type_tracepoint);
#endif
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str);
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 6be92bf559fe..1ef95c0287f0 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -8,20 +8,31 @@
/*
* Trace sequences are used to allow a function to call several other functions
- * to create a string of data to use (up to a max of PAGE_SIZE).
+ * to create a string of data to use.
+ *
+ * Have the trace seq to be 8K which is typically PAGE_SIZE * 2 on
+ * most architectures. The TRACE_SEQ_BUFFER_SIZE (which is
+ * TRACE_SEQ_SIZE minus the other fields of trace_seq), is the
+ * max size the output of a trace event may be.
*/
+#define TRACE_SEQ_SIZE 8192
+#define TRACE_SEQ_BUFFER_SIZE (TRACE_SEQ_SIZE - \
+ (sizeof(struct seq_buf) + sizeof(size_t) + sizeof(int)))
+
struct trace_seq {
- char buffer[PAGE_SIZE];
+ char buffer[TRACE_SEQ_BUFFER_SIZE];
struct seq_buf seq;
+ size_t readpos;
int full;
};
static inline void
trace_seq_init(struct trace_seq *s)
{
- seq_buf_init(&s->seq, s->buffer, PAGE_SIZE);
+ seq_buf_init(&s->seq, s->buffer, TRACE_SEQ_BUFFER_SIZE);
s->full = 0;
+ s->readpos = 0;
}
/**
diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h
index 99912445974c..d03f74658716 100644
--- a/include/linux/tracefs.h
+++ b/include/linux/tracefs.h
@@ -21,6 +21,75 @@ struct file_operations;
#ifdef CONFIG_TRACING
+struct eventfs_file;
+
+/**
+ * eventfs_callback - A callback function to create dynamic files in eventfs
+ * @name: The name of the file that is to be created
+ * @mode: return the file mode for the file (RW access, etc)
+ * @data: data to pass to the created file ops
+ * @fops: the file operations of the created file
+ *
+ * The evetnfs files are dynamically created. The struct eventfs_entry array
+ * is passed to eventfs_create_dir() or eventfs_create_events_dir() that will
+ * be used to create the files within those directories. When a lookup
+ * or access to a file within the directory is made, the struct eventfs_entry
+ * array is used to find a callback() with the matching name that is being
+ * referenced (for lookups, the entire array is iterated and each callback
+ * will be called).
+ *
+ * The callback will be called with @name for the name of the file to create.
+ * The callback can return less than 1 to indicate that no file should be
+ * created.
+ *
+ * If a file is to be created, then @mode should be populated with the file
+ * mode (permissions) for which the file is created for. This would be
+ * used to set the created inode i_mode field.
+ *
+ * The @data should be set to the data passed to the other file operations
+ * (read, write, etc). Note, @data will also point to the data passed in
+ * to eventfs_create_dir() or eventfs_create_events_dir(), but the callback
+ * can replace the data if it chooses to. Otherwise, the original data
+ * will be used for the file operation functions.
+ *
+ * The @fops should be set to the file operations that will be used to create
+ * the inode.
+ *
+ * NB. This callback is called while holding internal locks of the eventfs
+ * system. The callback must not call any code that might also call into
+ * the tracefs or eventfs system or it will risk creating a deadlock.
+ */
+typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data,
+ const struct file_operations **fops);
+
+typedef void (*eventfs_release)(const char *name, void *data);
+
+/**
+ * struct eventfs_entry - dynamically created eventfs file call back handler
+ * @name: Then name of the dynamic file in an eventfs directory
+ * @callback: The callback to get the fops of the file when it is created
+ *
+ * See evenfs_callback() typedef for how to set up @callback.
+ */
+struct eventfs_entry {
+ const char *name;
+ eventfs_callback callback;
+ eventfs_release release;
+};
+
+struct eventfs_inode;
+
+struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
+ const struct eventfs_entry *entries,
+ int size, void *data);
+
+struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
+ const struct eventfs_entry *entries,
+ int size, void *data);
+
+void eventfs_remove_events_dir(struct eventfs_inode *ei);
+void eventfs_remove_dir(struct eventfs_inode *ei);
+
struct dentry *tracefs_create_file(const char *name, umode_t mode,
struct dentry *parent, void *data,
const struct file_operations *fops);
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 88c0ba623ee6..689b6d71590e 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -199,7 +199,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
if (!(cond)) \
return; \
\
- if (WARN_ON_ONCE(RCUIDLE_COND(rcuidle))) \
+ if (WARN_ONCE(RCUIDLE_COND(rcuidle), \
+ "Bad RCU usage for tracepoint")) \
return; \
\
/* keep srcu and sched-rcu usage consistent */ \
@@ -259,7 +260,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
TP_ARGS(args), \
TP_CONDITION(cond), 0); \
if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \
- WARN_ON_ONCE(!rcu_is_watching()); \
+ WARN_ONCE(!rcu_is_watching(), \
+ "RCU not watching for tracepoint"); \
} \
} \
__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \
diff --git a/include/linux/tsm.h b/include/linux/tsm.h
new file mode 100644
index 000000000000..de8324a2223c
--- /dev/null
+++ b/include/linux/tsm.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TSM_H
+#define __TSM_H
+
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+#define TSM_INBLOB_MAX 64
+#define TSM_OUTBLOB_MAX SZ_32K
+
+/*
+ * Privilege level is a nested permission concept to allow confidential
+ * guests to partition address space, 4-levels are supported.
+ */
+#define TSM_PRIVLEVEL_MAX 3
+
+/**
+ * struct tsm_desc - option descriptor for generating tsm report blobs
+ * @privlevel: optional privilege level to associate with @outblob
+ * @inblob_len: sizeof @inblob
+ * @inblob: arbitrary input data
+ */
+struct tsm_desc {
+ unsigned int privlevel;
+ size_t inblob_len;
+ u8 inblob[TSM_INBLOB_MAX];
+};
+
+/**
+ * struct tsm_report - track state of report generation relative to options
+ * @desc: input parameters to @report_new()
+ * @outblob_len: sizeof(@outblob)
+ * @outblob: generated evidence to provider to the attestation agent
+ * @auxblob_len: sizeof(@auxblob)
+ * @auxblob: (optional) auxiliary data to the report (e.g. certificate data)
+ */
+struct tsm_report {
+ struct tsm_desc desc;
+ size_t outblob_len;
+ u8 *outblob;
+ size_t auxblob_len;
+ u8 *auxblob;
+};
+
+/**
+ * struct tsm_ops - attributes and operations for tsm instances
+ * @name: tsm id reflected in /sys/kernel/config/tsm/report/$report/provider
+ * @privlevel_floor: convey base privlevel for nested scenarios
+ * @report_new: Populate @report with the report blob and auxblob
+ * (optional), return 0 on successful population, or -errno otherwise
+ *
+ * Implementation specific ops, only one is expected to be registered at
+ * a time i.e. only one of "sev-guest", "tdx-guest", etc.
+ */
+struct tsm_ops {
+ const char *name;
+ const unsigned int privlevel_floor;
+ int (*report_new)(struct tsm_report *report, void *data);
+};
+
+extern const struct config_item_type tsm_report_default_type;
+
+/* publish @privlevel, @privlevel_floor, and @auxblob attributes */
+extern const struct config_item_type tsm_report_extra_type;
+
+int tsm_register(const struct tsm_ops *ops, void *priv,
+ const struct config_item_type *type);
+int tsm_unregister(const struct tsm_ops *ops);
+#endif /* __TSM_H */
diff --git a/include/linux/tty.h b/include/linux/tty.h
index e8d5d9997aca..2b2e6f0a54d6 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -6,7 +6,6 @@
#include <linux/major.h>
#include <linux/termios.h>
#include <linux/workqueue.h>
-#include <linux/tty_buffer.h>
#include <linux/tty_driver.h>
#include <linux/tty_ldisc.h>
#include <linux/tty_port.h>
@@ -192,13 +191,14 @@ struct tty_operations;
*/
struct tty_struct {
struct kref kref;
+ int index;
struct device *dev;
struct tty_driver *driver;
+ struct tty_port *port;
const struct tty_operations *ops;
- int index;
- struct ld_semaphore ldisc_sem;
struct tty_ldisc *ldisc;
+ struct ld_semaphore ldisc_sem;
struct mutex atomic_write_lock;
struct mutex legacy_mutex;
@@ -209,6 +209,7 @@ struct tty_struct {
char name[64];
unsigned long flags;
int count;
+ unsigned int receive_room;
struct winsize winsize;
struct {
@@ -219,16 +220,16 @@ struct tty_struct {
} __aligned(sizeof(unsigned long)) flow;
struct {
- spinlock_t lock;
struct pid *pgrp;
struct pid *session;
+ spinlock_t lock;
unsigned char pktstatus;
bool packet;
unsigned long unused[0];
} __aligned(sizeof(unsigned long)) ctrl;
bool hw_stopped;
- unsigned int receive_room;
+ bool closing;
int flow_change;
struct tty_struct *link;
@@ -239,15 +240,13 @@ struct tty_struct {
void *disc_data;
void *driver_data;
spinlock_t files_lock;
+ int write_cnt;
+ u8 *write_buf;
+
struct list_head tty_files;
#define N_TTY_BUF_SIZE 4096
-
- int closing;
- unsigned char *write_buf;
- int write_cnt;
struct work_struct SAK_work;
- struct tty_port *port;
} __randomize_layout;
/* Each of a tty's open files has private_data pointing to tty_file_private */
@@ -390,14 +389,14 @@ int vcs_init(void);
extern const struct class tty_class;
/**
- * tty_kref_get - get a tty reference
- * @tty: tty device
+ * tty_kref_get - get a tty reference
+ * @tty: tty device
*
- * Return a new reference to a tty object. The caller must hold
- * sufficient locks/counts to ensure that their existing reference cannot
- * go away
+ * Returns: a new reference to a tty object
+ *
+ * Locking: The caller must hold sufficient locks/counts to ensure that their
+ * existing reference cannot go away.
*/
-
static inline struct tty_struct *tty_kref_get(struct tty_struct *tty)
{
if (tty)
@@ -410,17 +409,18 @@ void tty_wait_until_sent(struct tty_struct *tty, long timeout);
void stop_tty(struct tty_struct *tty);
void start_tty(struct tty_struct *tty);
void tty_write_message(struct tty_struct *tty, char *msg);
-int tty_send_xchar(struct tty_struct *tty, char ch);
-int tty_put_char(struct tty_struct *tty, unsigned char c);
+int tty_send_xchar(struct tty_struct *tty, u8 ch);
+int tty_put_char(struct tty_struct *tty, u8 c);
unsigned int tty_chars_in_buffer(struct tty_struct *tty);
unsigned int tty_write_room(struct tty_struct *tty);
void tty_driver_flush_buffer(struct tty_struct *tty);
void tty_unthrottle(struct tty_struct *tty);
-int tty_throttle_safe(struct tty_struct *tty);
-int tty_unthrottle_safe(struct tty_struct *tty);
+bool tty_throttle_safe(struct tty_struct *tty);
+bool tty_unthrottle_safe(struct tty_struct *tty);
int tty_do_resize(struct tty_struct *tty, struct winsize *ws);
int tty_get_icount(struct tty_struct *tty,
struct serial_icounter_struct *icount);
+int tty_get_tiocm(struct tty_struct *tty);
int is_current_pgrp_orphaned(void);
void tty_hangup(struct tty_struct *tty);
void tty_vhangup(struct tty_struct *tty);
@@ -435,16 +435,14 @@ void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud,
speed_t obaud);
/**
- * tty_get_baud_rate - get tty bit rates
- * @tty: tty to query
+ * tty_get_baud_rate - get tty bit rates
+ * @tty: tty to query
*
- * Returns the baud rate as an integer for this terminal. The
- * termios lock must be held by the caller and the terminal bit
- * flags may be updated.
+ * Returns: the baud rate as an integer for this terminal
*
- * Locking: none
+ * Locking: The termios lock must be held by the caller.
*/
-static inline speed_t tty_get_baud_rate(struct tty_struct *tty)
+static inline speed_t tty_get_baud_rate(const struct tty_struct *tty)
{
return tty_termios_baud_rate(&tty->termios);
}
diff --git a/include/linux/tty_buffer.h b/include/linux/tty_buffer.h
index 6ceb2789e6c8..31125e3be3c5 100644
--- a/include/linux/tty_buffer.h
+++ b/include/linux/tty_buffer.h
@@ -12,24 +12,24 @@ struct tty_buffer {
struct tty_buffer *next;
struct llist_node free;
};
- int used;
- int size;
- int commit;
- int lookahead; /* Lazy update on recv, can become less than "read" */
- int read;
+ unsigned int used;
+ unsigned int size;
+ unsigned int commit;
+ unsigned int lookahead; /* Lazy update on recv, can become less than "read" */
+ unsigned int read;
bool flags;
/* Data points here */
- unsigned long data[];
+ u8 data[] __aligned(sizeof(unsigned long));
};
-static inline unsigned char *char_buf_ptr(struct tty_buffer *b, int ofs)
+static inline u8 *char_buf_ptr(struct tty_buffer *b, unsigned int ofs)
{
- return ((unsigned char *)b->data) + ofs;
+ return b->data + ofs;
}
-static inline char *flag_buf_ptr(struct tty_buffer *b, int ofs)
+static inline u8 *flag_buf_ptr(struct tty_buffer *b, unsigned int ofs)
{
- return (char *)char_buf_ptr(b, ofs) + b->size;
+ return char_buf_ptr(b, ofs) + b->size;
}
struct tty_bufhead {
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index e00034118c7b..7372124fbf90 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -72,8 +72,7 @@ struct serial_struct;
* is closed for the last time freeing up the resources. This is
* actually the second part of shutdown for routines that might sleep.
*
- * @write: ``int ()(struct tty_struct *tty, const unsigned char *buf,
- * int count)``
+ * @write: ``ssize_t ()(struct tty_struct *tty, const u8 *buf, size_t count)``
*
* This routine is called by the kernel to write a series (@count) of
* characters (@buf) to the @tty device. The characters may come from
@@ -85,7 +84,7 @@ struct serial_struct;
*
* Optional: Required for writable devices. May not sleep.
*
- * @put_char: ``int ()(struct tty_struct *tty, unsigned char ch)``
+ * @put_char: ``int ()(struct tty_struct *tty, u8 ch)``
*
* This routine is called by the kernel to write a single character @ch to
* the @tty device. If the kernel uses this routine, it must call the
@@ -243,7 +242,7 @@ struct serial_struct;
* Optional: If not provided, the device is assumed to have no FIFO.
* Usually correct to invoke via tty_wait_until_sent(). May sleep.
*
- * @send_xchar: ``void ()(struct tty_struct *tty, char ch)``
+ * @send_xchar: ``void ()(struct tty_struct *tty, u8 ch)``
*
* This routine is used to send a high-priority XON/XOFF character (@ch)
* to the @tty device.
@@ -356,9 +355,8 @@ struct tty_operations {
void (*close)(struct tty_struct * tty, struct file * filp);
void (*shutdown)(struct tty_struct *tty);
void (*cleanup)(struct tty_struct *tty);
- int (*write)(struct tty_struct * tty,
- const unsigned char *buf, int count);
- int (*put_char)(struct tty_struct *tty, unsigned char ch);
+ ssize_t (*write)(struct tty_struct *tty, const u8 *buf, size_t count);
+ int (*put_char)(struct tty_struct *tty, u8 ch);
void (*flush_chars)(struct tty_struct *tty);
unsigned int (*write_room)(struct tty_struct *tty);
unsigned int (*chars_in_buffer)(struct tty_struct *tty);
@@ -376,7 +374,7 @@ struct tty_operations {
void (*flush_buffer)(struct tty_struct *tty);
void (*set_ldisc)(struct tty_struct *tty);
void (*wait_until_sent)(struct tty_struct *tty, int timeout);
- void (*send_xchar)(struct tty_struct *tty, char ch);
+ void (*send_xchar)(struct tty_struct *tty, u8 ch);
int (*tiocmget)(struct tty_struct *tty);
int (*tiocmset)(struct tty_struct *tty,
unsigned int set, unsigned int clear);
diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index bfaaeee61a05..af4fce98f64e 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -10,17 +10,59 @@ struct tty_ldisc;
int tty_buffer_set_limit(struct tty_port *port, int limit);
unsigned int tty_buffer_space_avail(struct tty_port *port);
int tty_buffer_request_room(struct tty_port *port, size_t size);
-int tty_insert_flip_string_flags(struct tty_port *port,
- const unsigned char *chars, const char *flags, size_t size);
-int tty_insert_flip_string_fixed_flag(struct tty_port *port,
- const unsigned char *chars, char flag, size_t size);
-int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars,
- size_t size);
+size_t __tty_insert_flip_string_flags(struct tty_port *port, const u8 *chars,
+ const u8 *flags, bool mutable_flags,
+ size_t size);
+size_t tty_prepare_flip_string(struct tty_port *port, u8 **chars, size_t size);
void tty_flip_buffer_push(struct tty_port *port);
-int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag);
-static inline int tty_insert_flip_char(struct tty_port *port,
- unsigned char ch, char flag)
+/**
+ * tty_insert_flip_string_fixed_flag - add characters to the tty buffer
+ * @port: tty port
+ * @chars: characters
+ * @flag: flag value for each character
+ * @size: size
+ *
+ * Queue a series of bytes to the tty buffering. All the characters passed are
+ * marked with the supplied flag.
+ *
+ * Returns: the number added.
+ */
+static inline size_t tty_insert_flip_string_fixed_flag(struct tty_port *port,
+ const u8 *chars, u8 flag,
+ size_t size)
+{
+ return __tty_insert_flip_string_flags(port, chars, &flag, false, size);
+}
+
+/**
+ * tty_insert_flip_string_flags - add characters to the tty buffer
+ * @port: tty port
+ * @chars: characters
+ * @flags: flag bytes
+ * @size: size
+ *
+ * Queue a series of bytes to the tty buffering. For each character the flags
+ * array indicates the status of the character.
+ *
+ * Returns: the number added.
+ */
+static inline size_t tty_insert_flip_string_flags(struct tty_port *port,
+ const u8 *chars,
+ const u8 *flags, size_t size)
+{
+ return __tty_insert_flip_string_flags(port, chars, flags, true, size);
+}
+
+/**
+ * tty_insert_flip_char - add one character to the tty buffer
+ * @port: tty port
+ * @ch: character
+ * @flag: flag byte
+ *
+ * Queue a single byte @ch to the tty buffering, with an optional flag.
+ */
+static inline size_t tty_insert_flip_char(struct tty_port *port, u8 ch, u8 flag)
{
struct tty_buffer *tb = port->buf.tail;
int change;
@@ -32,17 +74,17 @@ static inline int tty_insert_flip_char(struct tty_port *port,
*char_buf_ptr(tb, tb->used++) = ch;
return 1;
}
- return __tty_insert_flip_char(port, ch, flag);
+ return __tty_insert_flip_string_flags(port, &ch, &flag, false, 1);
}
-static inline int tty_insert_flip_string(struct tty_port *port,
- const unsigned char *chars, size_t size)
+static inline size_t tty_insert_flip_string(struct tty_port *port,
+ const u8 *chars, size_t size)
{
return tty_insert_flip_string_fixed_flag(port, chars, TTY_NORMAL, size);
}
-int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p,
- const char *f, int count);
+size_t tty_ldisc_receive_buf(struct tty_ldisc *ld, const u8 *p, const u8 *f,
+ size_t count);
void tty_buffer_lock_exclusive(struct tty_port *port);
void tty_buffer_unlock_exclusive(struct tty_port *port);
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index 49dc172dedc7..af01e89074b2 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -71,7 +71,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
* call to @receive_buf(). Returning an error will prevent the ldisc from
* being attached.
*
- * Can sleep.
+ * Optional. Can sleep.
*
* @close: [TTY] ``void ()(struct tty_struct *tty)``
*
@@ -80,7 +80,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
* changed to use a new line discipline. At the point of execution no
* further users will enter the ldisc code for this tty.
*
- * Can sleep.
+ * Optional. Can sleep.
*
* @flush_buffer: [TTY] ``void ()(struct tty_struct *tty)``
*
@@ -88,8 +88,10 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
* input characters it may have queued to be delivered to the user mode
* process. It may be called at any point between open and close.
*
- * @read: [TTY] ``ssize_t ()(struct tty_struct *tty, struct file *file,
- * unsigned char *buf, size_t nr)``
+ * Optional.
+ *
+ * @read: [TTY] ``ssize_t ()(struct tty_struct *tty, struct file *file, u8 *buf,
+ * size_t nr)``
*
* This function is called when the user requests to read from the @tty.
* The line discipline will return whatever characters it has buffered up
@@ -97,10 +99,10 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
* an %EIO error. Multiple read calls may occur in parallel and the ldisc
* must deal with serialization issues.
*
- * Can sleep.
+ * Optional: %EIO unless provided. Can sleep.
*
* @write: [TTY] ``ssize_t ()(struct tty_struct *tty, struct file *file,
- * const unsigned char *buf, size_t nr)``
+ * const u8 *buf, size_t nr)``
*
* This function is called when the user requests to write to the @tty.
* The line discipline will deliver the characters to the low-level tty
@@ -108,7 +110,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
* characters first. If this function is not defined, the user will
* receive an %EIO error.
*
- * Can sleep.
+ * Optional: %EIO unless provided. Can sleep.
*
* @ioctl: [TTY] ``int ()(struct tty_struct *tty, unsigned int cmd,
* unsigned long arg)``
@@ -120,6 +122,8 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
* discpline. So a low-level driver can "grab" an ioctl request before
* the line discpline has a chance to see it.
*
+ * Optional.
+ *
* @compat_ioctl: [TTY] ``int ()(struct tty_struct *tty, unsigned int cmd,
* unsigned long arg)``
*
@@ -130,11 +134,15 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
* a pointer to wordsize-sensitive structure belongs here, but most of
* ldiscs will happily leave it %NULL.
*
+ * Optional.
+ *
* @set_termios: [TTY] ``void ()(struct tty_struct *tty, const struct ktermios *old)``
*
* This function notifies the line discpline that a change has been made
* to the termios structure.
*
+ * Optional.
+ *
* @poll: [TTY] ``int ()(struct tty_struct *tty, struct file *file,
* struct poll_table_struct *wait)``
*
@@ -142,6 +150,8 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
* device. It is solely the responsibility of the line discipline to
* handle poll requests.
*
+ * Optional.
+ *
* @hangup: [TTY] ``void ()(struct tty_struct *tty)``
*
* Called on a hangup. Tells the discipline that it should cease I/O to
@@ -149,10 +159,10 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
* but should wait until any pending driver I/O is completed. No further
* calls into the ldisc code will occur.
*
- * Can sleep.
+ * Optional. Can sleep.
*
- * @receive_buf: [DRV] ``void ()(struct tty_struct *tty,
- * const unsigned char *cp, const char *fp, int count)``
+ * @receive_buf: [DRV] ``void ()(struct tty_struct *tty, const u8 *cp,
+ * const u8 *fp, size_t count)``
*
* This function is called by the low-level tty driver to send characters
* received by the hardware to the line discpline for processing. @cp is
@@ -161,6 +171,8 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
* character was received with a parity error, etc. @fp may be %NULL to
* indicate all data received is %TTY_NORMAL.
*
+ * Optional.
+ *
* @write_wakeup: [DRV] ``void ()(struct tty_struct *tty)``
*
* This function is called by the low-level tty driver to signal that line
@@ -170,13 +182,17 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
* send, please arise a tasklet or workqueue to do the real data transfer.
* Do not send data in this hook, it may lead to a deadlock.
*
+ * Optional.
+ *
* @dcd_change: [DRV] ``void ()(struct tty_struct *tty, bool active)``
*
* Tells the discipline that the DCD pin has changed its status. Used
* exclusively by the %N_PPS (Pulse-Per-Second) line discipline.
*
- * @receive_buf2: [DRV] ``int ()(struct tty_struct *tty,
- * const unsigned char *cp, const char *fp, int count)``
+ * Optional.
+ *
+ * @receive_buf2: [DRV] ``ssize_t ()(struct tty_struct *tty, const u8 *cp,
+ * const u8 *fp, size_t count)``
*
* This function is called by the low-level tty driver to send characters
* received by the hardware to the line discpline for processing. @cp is a
@@ -186,8 +202,10 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
* indicate all data received is %TTY_NORMAL. If assigned, prefer this
* function for automatic flow control.
*
- * @lookahead_buf: [DRV] ``void ()(struct tty_struct *tty,
- * const unsigned char *cp, const char *fp, int count)``
+ * Optional.
+ *
+ * @lookahead_buf: [DRV] ``void ()(struct tty_struct *tty, const u8 *cp,
+ * const u8 *fp, size_t count)``
*
* This function is called by the low-level tty driver for characters
* not eaten by ->receive_buf() or ->receive_buf2(). It is useful for
@@ -198,6 +216,8 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
* same characters (e.g. by skipping the actions for high-priority
* characters already handled by ->lookahead_buf()).
*
+ * Optional.
+ *
* @owner: module containting this ldisc (for reference counting)
*
* This structure defines the interface between the tty line discipline
@@ -218,11 +238,10 @@ struct tty_ldisc_ops {
int (*open)(struct tty_struct *tty);
void (*close)(struct tty_struct *tty);
void (*flush_buffer)(struct tty_struct *tty);
- ssize_t (*read)(struct tty_struct *tty, struct file *file,
- unsigned char *buf, size_t nr,
- void **cookie, unsigned long offset);
+ ssize_t (*read)(struct tty_struct *tty, struct file *file, u8 *buf,
+ size_t nr, void **cookie, unsigned long offset);
ssize_t (*write)(struct tty_struct *tty, struct file *file,
- const unsigned char *buf, size_t nr);
+ const u8 *buf, size_t nr);
int (*ioctl)(struct tty_struct *tty, unsigned int cmd,
unsigned long arg);
int (*compat_ioctl)(struct tty_struct *tty, unsigned int cmd,
@@ -235,14 +254,14 @@ struct tty_ldisc_ops {
/*
* The following routines are called from below.
*/
- void (*receive_buf)(struct tty_struct *tty, const unsigned char *cp,
- const char *fp, int count);
+ void (*receive_buf)(struct tty_struct *tty, const u8 *cp,
+ const u8 *fp, size_t count);
void (*write_wakeup)(struct tty_struct *tty);
void (*dcd_change)(struct tty_struct *tty, bool active);
- int (*receive_buf2)(struct tty_struct *tty, const unsigned char *cp,
- const char *fp, int count);
- void (*lookahead_buf)(struct tty_struct *tty, const unsigned char *cp,
- const unsigned char *fp, unsigned int count);
+ size_t (*receive_buf2)(struct tty_struct *tty, const u8 *cp,
+ const u8 *fp, size_t count);
+ void (*lookahead_buf)(struct tty_struct *tty, const u8 *cp,
+ const u8 *fp, size_t count);
struct module *owner;
};
diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h
index edf685a24f7c..1b861f2100b6 100644
--- a/include/linux/tty_port.h
+++ b/include/linux/tty_port.h
@@ -39,9 +39,10 @@ struct tty_port_operations {
};
struct tty_port_client_operations {
- int (*receive_buf)(struct tty_port *port, const unsigned char *, const unsigned char *, size_t);
- void (*lookahead_buf)(struct tty_port *port, const unsigned char *cp,
- const unsigned char *fp, unsigned int count);
+ size_t (*receive_buf)(struct tty_port *port, const u8 *cp, const u8 *fp,
+ size_t count);
+ void (*lookahead_buf)(struct tty_port *port, const u8 *cp,
+ const u8 *fp, size_t count);
void (*write_wakeup)(struct tty_port *port);
};
@@ -113,8 +114,8 @@ struct tty_port {
unsigned char console:1;
struct mutex mutex;
struct mutex buf_mutex;
- unsigned char *xmit_buf;
- DECLARE_KFIFO_PTR(xmit_fifo, unsigned char);
+ u8 *xmit_buf;
+ DECLARE_KFIFO_PTR(xmit_fifo, u8);
unsigned int close_delay;
unsigned int closing_wait;
int drain_delay;
@@ -148,10 +149,10 @@ struct device *tty_port_register_device_attr(struct tty_port *port,
const struct attribute_group **attr_grp);
struct device *tty_port_register_device_serdev(struct tty_port *port,
struct tty_driver *driver, unsigned index,
- struct device *device);
+ struct device *host, struct device *parent);
struct device *tty_port_register_device_attr_serdev(struct tty_port *port,
struct tty_driver *driver, unsigned index,
- struct device *device, void *drvdata,
+ struct device *host, struct device *parent, void *drvdata,
const struct attribute_group **attr_grp);
void tty_port_unregister_device(struct tty_port *port,
struct tty_driver *driver, unsigned index);
diff --git a/include/linux/types.h b/include/linux/types.h
index 253168bb3fe1..2bc8766ba20c 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -120,6 +120,9 @@ typedef s64 int64_t;
#define aligned_be64 __aligned_be64
#define aligned_le64 __aligned_le64
+/* Nanosecond scalar representation for kernel time values */
+typedef s64 ktime_t;
+
/**
* The type used for indexing onto a disc or disc partition.
*
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index ffe48e69b3f3..457879938fc1 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -135,10 +135,11 @@ static inline void u64_stats_inc(u64_stats_t *p)
p->v++;
}
-static inline void u64_stats_init(struct u64_stats_sync *syncp)
-{
- seqcount_init(&syncp->seq);
-}
+#define u64_stats_init(syncp) \
+ do { \
+ struct u64_stats_sync *__s = (syncp); \
+ seqcount_init(&__s->seq); \
+ } while (0)
static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp)
{
diff --git a/include/linux/ucs2_string.h b/include/linux/ucs2_string.h
index cf3ada3e820e..c499ae809c7d 100644
--- a/include/linux/ucs2_string.h
+++ b/include/linux/ucs2_string.h
@@ -10,6 +10,7 @@ typedef u16 ucs2_char_t;
unsigned long ucs2_strnlen(const ucs2_char_t *s, size_t maxlength);
unsigned long ucs2_strlen(const ucs2_char_t *s);
unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength);
+ssize_t ucs2_strscpy(ucs2_char_t *dst, const ucs2_char_t *src, size_t count);
int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len);
unsigned long ucs2_utf8size(const ucs2_char_t *src);
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 43c1fb2d2c21..e398e1dbd2d3 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -32,25 +32,30 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
return (num + net_hash_mix(net)) & mask;
}
+enum {
+ UDP_FLAGS_CORK, /* Cork is required */
+ UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */
+ UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */
+ UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */
+ UDP_FLAGS_ACCEPT_FRAGLIST,
+ UDP_FLAGS_ACCEPT_L4,
+ UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */
+ UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */
+ UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */
+};
+
struct udp_sock {
/* inet_sock has to be the first member */
struct inet_sock inet;
#define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0]
#define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1]
#define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node
+
+ unsigned long udp_flags;
+
int pending; /* Any pending frames ? */
- unsigned int corkflag; /* Cork is required */
__u8 encap_type; /* Is this an Encapsulation socket? */
- unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
- no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
- encap_enabled:1, /* This socket enabled encap
- * processing; UDP tunnels and
- * different encapsulation layer set
- * this
- */
- gro_enabled:1, /* Request GRO aggregation */
- accept_udp_l4:1,
- accept_udp_fraglist:1;
+
/*
* Following member retains the information to create a UDP header
* when the socket is uncorked.
@@ -62,12 +67,6 @@ struct udp_sock {
*/
__u16 pcslen;
__u16 pcrlen;
-/* indicator bits used by pcflag: */
-#define UDPLITE_BIT 0x1 /* set by udplite proto init function */
-#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */
-#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */
- __u8 pcflag; /* marks socket as UDP-Lite if > 0 */
- __u8 unused[3];
/*
* For encapsulation sockets.
*/
@@ -93,30 +92,51 @@ struct udp_sock {
/* This fields follows rcvbuf value, and is touched by udp_recvmsg */
int forward_threshold;
+
+ /* Cache friendly copy of sk->sk_peek_off >= 0 */
+ bool peeking_with_offset;
};
-#define UDP_MAX_SEGMENTS (1 << 6UL)
+#define udp_test_bit(nr, sk) \
+ test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
+#define udp_set_bit(nr, sk) \
+ set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
+#define udp_test_and_set_bit(nr, sk) \
+ test_and_set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
+#define udp_clear_bit(nr, sk) \
+ clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
+#define udp_assign_bit(nr, sk, val) \
+ assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val)
+
+#define UDP_MAX_SEGMENTS (1 << 7UL)
#define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk)
+static inline int udp_set_peek_off(struct sock *sk, int val)
+{
+ sk_set_peek_off(sk, val);
+ WRITE_ONCE(udp_sk(sk)->peeking_with_offset, val >= 0);
+ return 0;
+}
+
static inline void udp_set_no_check6_tx(struct sock *sk, bool val)
{
- udp_sk(sk)->no_check6_tx = val;
+ udp_assign_bit(NO_CHECK6_TX, sk, val);
}
static inline void udp_set_no_check6_rx(struct sock *sk, bool val)
{
- udp_sk(sk)->no_check6_rx = val;
+ udp_assign_bit(NO_CHECK6_RX, sk, val);
}
-static inline bool udp_get_no_check6_tx(struct sock *sk)
+static inline bool udp_get_no_check6_tx(const struct sock *sk)
{
- return udp_sk(sk)->no_check6_tx;
+ return udp_test_bit(NO_CHECK6_TX, sk);
}
-static inline bool udp_get_no_check6_rx(struct sock *sk)
+static inline bool udp_get_no_check6_rx(const struct sock *sk)
{
- return udp_sk(sk)->no_check6_rx;
+ return udp_test_bit(NO_CHECK6_RX, sk);
}
static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
@@ -130,15 +150,45 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
}
}
+DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
+#if IS_ENABLED(CONFIG_IPV6)
+DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+#endif
+
+static inline bool udp_encap_needed(void)
+{
+ if (static_branch_unlikely(&udp_encap_needed_key))
+ return true;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (static_branch_unlikely(&udpv6_encap_needed_key))
+ return true;
+#endif
+
+ return false;
+}
+
static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
{
if (!skb_is_gso(skb))
return false;
- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4)
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
+ !udp_test_bit(ACCEPT_L4, sk))
return true;
- if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist)
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST &&
+ !udp_test_bit(ACCEPT_FRAGLIST, sk))
+ return true;
+
+ /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still
+ * land in a tunnel as the socket check in udp_gro_receive cannot be
+ * foolproof.
+ */
+ if (udp_encap_needed() &&
+ READ_ONCE(udp_sk(sk)->encap_rcv) &&
+ !(skb_shinfo(skb)->gso_type &
+ (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)))
return true;
return false;
@@ -146,8 +196,8 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
static inline void udp_allow_gso(struct sock *sk)
{
- udp_sk(sk)->accept_udp_l4 = 1;
- udp_sk(sk)->accept_udp_fraglist = 1;
+ udp_set_bit(ACCEPT_L4, sk);
+ udp_set_bit(ACCEPT_FRAGLIST, sk);
}
#define udp_portaddr_for_each_entry(__sk, list) \
diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h
index b0542cd11aeb..f85ec5613721 100644
--- a/include/linux/uidgid.h
+++ b/include/linux/uidgid.h
@@ -12,20 +12,12 @@
* to detect when we overlook these differences.
*
*/
-#include <linux/types.h>
+#include <linux/uidgid_types.h>
#include <linux/highuid.h>
struct user_namespace;
extern struct user_namespace init_user_ns;
-
-typedef struct {
- uid_t val;
-} kuid_t;
-
-
-typedef struct {
- gid_t val;
-} kgid_t;
+struct uid_gid_map;
#define KUIDT_INIT(value) (kuid_t){ value }
#define KGIDT_INIT(value) (kgid_t){ value }
@@ -138,6 +130,9 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
return from_kgid(ns, gid) != (gid_t) -1;
}
+u32 map_id_down(struct uid_gid_map *map, u32 id);
+u32 map_id_up(struct uid_gid_map *map, u32 id);
+
#else
static inline kuid_t make_kuid(struct user_namespace *from, uid_t uid)
@@ -186,6 +181,15 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
return gid_valid(gid);
}
+static inline u32 map_id_down(struct uid_gid_map *map, u32 id)
+{
+ return id;
+}
+
+static inline u32 map_id_up(struct uid_gid_map *map, u32 id)
+{
+ return id;
+}
#endif /* CONFIG_USER_NS */
#endif /* _LINUX_UIDGID_H */
diff --git a/include/linux/uidgid_types.h b/include/linux/uidgid_types.h
new file mode 100644
index 000000000000..b35ac4955a33
--- /dev/null
+++ b/include/linux/uidgid_types.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_UIDGID_TYPES_H
+#define _LINUX_UIDGID_TYPES_H
+
+#include <linux/types.h>
+
+typedef struct {
+ uid_t val;
+} kuid_t;
+
+typedef struct {
+ gid_t val;
+} kgid_t;
+
+#endif /* _LINUX_UIDGID_TYPES_H */
diff --git a/include/linux/uio.h b/include/linux/uio.h
index ff81e5ccaef2..00cebe2b70de 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -21,12 +21,12 @@ struct kvec {
enum iter_type {
/* iter types */
+ ITER_UBUF,
ITER_IOVEC,
- ITER_KVEC,
ITER_BVEC,
+ ITER_KVEC,
ITER_XARRAY,
ITER_DISCARD,
- ITER_UBUF,
};
#define ITER_SOURCE 1 // == WRITE
@@ -40,14 +40,9 @@ struct iov_iter_state {
struct iov_iter {
u8 iter_type;
- bool copy_mc;
bool nofault;
bool data_source;
- bool user_backed;
- union {
- size_t iov_offset;
- int last_offset;
- };
+ size_t iov_offset;
/*
* Hack alert: overlay ubuf_iovec with iovec + count, so
* that the members resolve correctly regardless of the type
@@ -143,7 +138,7 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i)
static inline bool user_backed_iter(const struct iov_iter *i)
{
- return i->user_backed;
+ return iter_is_ubuf(i) || iter_is_iovec(i);
}
/*
@@ -163,7 +158,7 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
return ret;
}
-size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
+size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
size_t bytes, struct iov_iter *i);
void iov_iter_advance(struct iov_iter *i, size_t bytes);
void iov_iter_revert(struct iov_iter *i, size_t bytes);
@@ -184,6 +179,13 @@ static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset,
{
return copy_page_to_iter(&folio->page, offset, bytes, i);
}
+
+static inline size_t copy_folio_from_iter_atomic(struct folio *folio,
+ size_t offset, size_t bytes, struct iov_iter *i)
+{
+ return copy_page_from_iter_atomic(&folio->page, offset, bytes, i);
+}
+
size_t copy_page_to_iter_nofault(struct page *page, unsigned offset,
size_t bytes, struct iov_iter *i);
@@ -245,22 +247,8 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
#ifdef CONFIG_ARCH_HAS_COPY_MC
size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
-static inline void iov_iter_set_copy_mc(struct iov_iter *i)
-{
- i->copy_mc = true;
-}
-
-static inline bool iov_iter_is_copy_mc(const struct iov_iter *i)
-{
- return i->copy_mc;
-}
#else
#define _copy_mc_to_iter _copy_to_iter
-static inline void iov_iter_set_copy_mc(struct iov_iter *i) { }
-static inline bool iov_iter_is_copy_mc(const struct iov_iter *i)
-{
- return false;
-}
#endif
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
@@ -335,27 +323,6 @@ iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes)
return npages;
}
-struct csum_state {
- __wsum csum;
- size_t off;
-};
-
-size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csstate, struct iov_iter *i);
-size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
-
-static __always_inline __must_check
-bool csum_and_copy_from_iter_full(void *addr, size_t bytes,
- __wsum *csum, struct iov_iter *i)
-{
- size_t copied = csum_and_copy_from_iter(addr, bytes, csum, i);
- if (likely(copied == bytes))
- return true;
- iov_iter_revert(i, copied);
- return false;
-}
-size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
- struct iov_iter *i);
-
struct iovec *iovec_from_user(const struct iovec __user *uvector,
unsigned long nr_segs, unsigned long fast_segs,
struct iovec *fast_iov, bool compat);
@@ -365,8 +332,6 @@ ssize_t import_iovec(int type, const struct iovec __user *uvec,
ssize_t __import_iovec(int type, const struct iovec __user *uvec,
unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
struct iov_iter *i, bool compat);
-int import_single_range(int type, void __user *buf, size_t len,
- struct iovec *iov, struct iov_iter *i);
int import_ubuf(int type, void __user *buf, size_t len, struct iov_iter *i);
static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
@@ -375,8 +340,6 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
WARN_ON(direction & ~(READ | WRITE));
*i = (struct iov_iter) {
.iter_type = ITER_UBUF,
- .copy_mc = false,
- .user_backed = true,
.data_source = direction,
.ubuf = buf,
.count = count,
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 47c5962b876b..18238dc8bfd3 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -28,19 +28,26 @@ struct uio_map;
* logical, virtual, or physical & phys_addr_t
* should always be large enough to handle any of
* the address types)
+ * @dma_addr: DMA handle set by dma_alloc_coherent, used with
+ * UIO_MEM_DMA_COHERENT only (@addr should be the
+ * void * returned from the same dma_alloc_coherent call)
* @offs: offset of device memory within the page
* @size: size of IO (multiple of page size)
* @memtype: type of memory addr points to
* @internal_addr: ioremap-ped version of addr, for driver internal use
+ * @dma_device: device struct that was passed to dma_alloc_coherent,
+ * used with UIO_MEM_DMA_COHERENT only
* @map: for use by the UIO core only.
*/
struct uio_mem {
const char *name;
phys_addr_t addr;
+ dma_addr_t dma_addr;
unsigned long offs;
resource_size_t size;
int memtype;
void __iomem *internal_addr;
+ struct device *dma_device;
struct uio_map *map;
};
@@ -158,6 +165,12 @@ extern int __must_check
#define UIO_MEM_LOGICAL 2
#define UIO_MEM_VIRTUAL 3
#define UIO_MEM_IOVA 4
+/*
+ * UIO_MEM_DMA_COHERENT exists for legacy drivers that had been getting by with
+ * improperly mapping DMA coherent allocations through the other modes.
+ * Do not use in new drivers.
+ */
+#define UIO_MEM_DMA_COHERENT 5
/* defines for uio_port->porttype */
#define UIO_PORT_NONE 0
diff --git a/include/linux/units.h b/include/linux/units.h
index 2793a41e73a2..00e15de33eca 100644
--- a/include/linux/units.h
+++ b/include/linux/units.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_UNITS_H
#define _LINUX_UNITS_H
+#include <linux/bits.h>
#include <linux/math.h>
/* Metric prefixes in accordance with Système international (d'unités) */
@@ -23,14 +24,21 @@
#define NANOHZ_PER_HZ 1000000000UL
#define MICROHZ_PER_HZ 1000000UL
#define MILLIHZ_PER_HZ 1000UL
+
#define HZ_PER_KHZ 1000UL
-#define KHZ_PER_MHZ 1000UL
#define HZ_PER_MHZ 1000000UL
+#define KHZ_PER_MHZ 1000UL
+#define KHZ_PER_GHZ 1000000UL
+
#define MILLIWATT_PER_WATT 1000UL
#define MICROWATT_PER_MILLIWATT 1000UL
#define MICROWATT_PER_WATT 1000000UL
+#define BYTES_PER_KBIT (KILO / BITS_PER_BYTE)
+#define BYTES_PER_MBIT (MEGA / BITS_PER_BYTE)
+#define BYTES_PER_GBIT (GIGA / BITS_PER_BYTE)
+
#define ABSOLUTE_ZERO_MILLICELSIUS -273150
static inline long milli_kelvin_to_millicelsius(long t)
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 25f8e62a30ec..9e52179872a5 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -25,7 +25,6 @@
struct usb_device;
struct usb_driver;
-struct wusb_dev;
/*-------------------------------------------------------------------------*/
@@ -425,7 +424,6 @@ struct usb_host_config {
struct usb_host_bos {
struct usb_bos_descriptor *desc;
- /* wireless cap descriptor is handled by wusb */
struct usb_ext_cap_descriptor *ext_cap;
struct usb_ss_cap_descriptor *ss_cap;
struct usb_ssp_cap_descriptor *ssp_cap;
@@ -612,7 +610,6 @@ struct usb3_lpm_parameters {
* WUSB devices are not, until we authorize them from user space.
* FIXME -- complete doc
* @authenticated: Crypto authentication passed
- * @wusb: device is Wireless USB
* @lpm_capable: device supports LPM
* @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range
* @usb2_hw_lpm_capable: device can perform USB2 hardware LPM
@@ -634,10 +631,7 @@ struct usb3_lpm_parameters {
* @do_remote_wakeup: remote wakeup should be enabled
* @reset_resume: needs reset instead of resume
* @port_is_suspended: the upstream port is suspended (L2 or U3)
- * @wusb_dev: if this is a Wireless USB device, link to the WUSB
- * specific data for the device.
* @slot_id: Slot ID assigned by xHCI
- * @removable: Device can be physically removed from this port
* @l1_params: best effor service latency for USB2 L1 LPM state, and L1 timeout.
* @u1_params: exit latencies for USB3 U1 LPM state, and hub-initiated timeout.
* @u2_params: exit latencies for USB3 U2 LPM state, and hub-initiated timeout.
@@ -696,7 +690,6 @@ struct usb_device {
unsigned have_langid:1;
unsigned authorized:1;
unsigned authenticated:1;
- unsigned wusb:1;
unsigned lpm_capable:1;
unsigned lpm_devinit_allow:1;
unsigned usb2_hw_lpm_capable:1;
@@ -727,7 +720,6 @@ struct usb_device {
unsigned reset_resume:1;
unsigned port_is_suspended:1;
- struct wusb_dev *wusb_dev;
int slot_id;
struct usb2_lpm_parameters l1_params;
struct usb3_lpm_parameters u1_params;
@@ -1152,16 +1144,6 @@ extern ssize_t usb_store_new_id(struct usb_dynids *dynids,
extern ssize_t usb_show_dynids(struct usb_dynids *dynids, char *buf);
/**
- * struct usbdrv_wrap - wrapper for driver-model structure
- * @driver: The driver-model core driver structure.
- * @for_devices: Non-zero for device drivers, 0 for interface drivers.
- */
-struct usbdrv_wrap {
- struct device_driver driver;
- int for_devices;
-};
-
-/**
* struct usb_driver - identifies USB interface driver to usbcore
* @name: The driver name should be unique among USB drivers,
* and should normally be the same as the module name.
@@ -1201,7 +1183,7 @@ struct usbdrv_wrap {
* is bound to the driver.
* @dynids: used internally to hold the list of dynamically added device
* ids for this driver.
- * @drvwrap: Driver-model core structure wrapper.
+ * @driver: The driver-model core driver structure.
* @no_dynamic_id: if set to 1, the USB core will not allow dynamic ids to be
* added to this driver by preventing the sysfs file from being created.
* @supports_autosuspend: if set to 0, the USB core will not allow autosuspend
@@ -1249,13 +1231,13 @@ struct usb_driver {
const struct attribute_group **dev_groups;
struct usb_dynids dynids;
- struct usbdrv_wrap drvwrap;
+ struct device_driver driver;
unsigned int no_dynamic_id:1;
unsigned int supports_autosuspend:1;
unsigned int disable_hub_initiated_lpm:1;
unsigned int soft_unbind:1;
};
-#define to_usb_driver(d) container_of(d, struct usb_driver, drvwrap.driver)
+#define to_usb_driver(d) container_of(d, struct usb_driver, driver)
/**
* struct usb_device_driver - identifies USB device driver to usbcore
@@ -1271,9 +1253,12 @@ struct usb_driver {
* module is being unloaded.
* @suspend: Called when the device is going to be suspended by the system.
* @resume: Called when the device is being resumed by the system.
+ * @choose_configuration: If non-NULL, called instead of the default
+ * usb_choose_configuration(). If this returns an error then we'll go
+ * on to call the normal usb_choose_configuration().
* @dev_groups: Attributes attached to the device that will be created once it
* is bound to the driver.
- * @drvwrap: Driver-model core structure wrapper.
+ * @driver: The driver-model core driver structure.
* @id_table: used with @match() to select better matching driver at
* probe() time.
* @supports_autosuspend: if set to 0, the USB core will not allow autosuspend
@@ -1282,7 +1267,7 @@ struct usb_driver {
* resume and suspend functions will be called in addition to the driver's
* own, so this part of the setup does not need to be replicated.
*
- * USB drivers must provide all the fields listed above except drvwrap,
+ * USB drivers must provide all the fields listed above except driver,
* match, and id_table.
*/
struct usb_device_driver {
@@ -1294,14 +1279,17 @@ struct usb_device_driver {
int (*suspend) (struct usb_device *udev, pm_message_t message);
int (*resume) (struct usb_device *udev, pm_message_t message);
+
+ int (*choose_configuration) (struct usb_device *udev);
+
const struct attribute_group **dev_groups;
- struct usbdrv_wrap drvwrap;
+ struct device_driver driver;
const struct usb_device_id *id_table;
unsigned int supports_autosuspend:1;
unsigned int generic_subclass:1;
};
#define to_usb_device_driver(d) container_of(d, struct usb_device_driver, \
- drvwrap.driver)
+ driver)
/**
* struct usb_class_driver - identifies a USB driver that wants to use the USB major number
@@ -1742,11 +1730,6 @@ static inline void usb_fill_bulk_urb(struct urb *urb,
* encoding of the endpoint interval, and express polling intervals in
* microframes (eight per millisecond) rather than in frames (one per
* millisecond).
- *
- * Wireless USB also uses the logarithmic encoding, but specifies it in units of
- * 128us instead of 125us. For Wireless USB devices, the interval is passed
- * through to the host controller, rather than being translated into microframe
- * units.
*/
static inline void usb_fill_int_urb(struct urb *urb,
struct usb_device *dev,
@@ -1835,22 +1818,6 @@ void *usb_alloc_coherent(struct usb_device *dev, size_t size,
void usb_free_coherent(struct usb_device *dev, size_t size,
void *addr, dma_addr_t dma);
-#if 0
-struct urb *usb_buffer_map(struct urb *urb);
-void usb_buffer_dmasync(struct urb *urb);
-void usb_buffer_unmap(struct urb *urb);
-#endif
-
-struct scatterlist;
-int usb_buffer_map_sg(const struct usb_device *dev, int is_in,
- struct scatterlist *sg, int nents);
-#if 0
-void usb_buffer_dmasync_sg(const struct usb_device *dev, int is_in,
- struct scatterlist *sg, int n_hw_ents);
-#endif
-void usb_buffer_unmap_sg(const struct usb_device *dev, int is_in,
- struct scatterlist *sg, int n_hw_ents);
-
/*-------------------------------------------------------------------*
* SYNCHRONOUS CALL SUPPORT *
*-------------------------------------------------------------------*/
diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h
index ca796dc1a984..6e5555610010 100644
--- a/include/linux/usb/audio-v2.h
+++ b/include/linux/usb/audio-v2.h
@@ -82,7 +82,7 @@ struct uac_clock_source_descriptor {
#define UAC_CLOCK_SOURCE_TYPE_INT_PROG 0x3
#define UAC_CLOCK_SOURCE_SYNCED_TO_SOF (1 << 2)
-/* 4.7.2.2 Clock Source Descriptor */
+/* 4.7.2.2 Clock Selector Descriptor */
struct uac_clock_selector_descriptor {
__u8 bLength;
@@ -91,7 +91,7 @@ struct uac_clock_selector_descriptor {
__u8 bClockID;
__u8 bNrInPins;
__u8 baCSourceID[];
- /* bmControls and iClockSource omitted */
+ /* bmControls and iClockSelector omitted */
} __attribute__((packed));
/* 4.7.2.3 Clock Multiplier Descriptor */
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 969e7dba6358..c93b410b314a 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -3,7 +3,7 @@
* This file holds USB constants and structures that are needed for
* USB device APIs. These are used by the USB device model, which is
* defined in chapter 9 of the USB 2.0 specification and in the
- * Wireless USB 1.0 (spread around). Linux has several APIs in C that
+ * Wireless USB 1.0 spec (now defunct). Linux has several APIs in C that
* need these:
*
* - the host side Linux-USB kernel driver API;
@@ -14,9 +14,6 @@
* act either as a USB host or as a USB device. That means the host and
* device side APIs benefit from working well together.
*
- * There's also "Wireless USB", using low power short range radios for
- * peripheral interconnection but otherwise building on the USB framework.
- *
* Note all descriptors are declared '__attribute__((packed))' so that:
*
* [a] they never get padded, either internally (USB spec writers
diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h
index ee38835ed77c..5a7f96684ea2 100644
--- a/include/linux/usb/chipidea.h
+++ b/include/linux/usb/chipidea.h
@@ -63,6 +63,8 @@ struct ci_hdrc_platform_data {
#define CI_HDRC_IMX_IS_HSIC BIT(14)
#define CI_HDRC_PMQOS BIT(15)
#define CI_HDRC_PHY_VBUS_CONTROL BIT(16)
+#define CI_HDRC_HAS_PORTSC_PEC_MISSED BIT(17)
+#define CI_HDRC_FORCE_VBUS_ACTIVE_ALWAYS BIT(18)
enum usb_dr_mode dr_mode;
#define CI_HDRC_CONTROLLER_RESET_EVENT 0
#define CI_HDRC_CONTROLLER_STOPPED_EVENT 1
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 07531c4f4350..af3cd2aae4bc 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -35,6 +35,14 @@
* are ready. The control transfer will then be kept from completing till
* all the function drivers that requested for USB_GADGET_DELAYED_STAUS
* invoke usb_composite_setup_continue().
+ *
+ * NOTE: USB_GADGET_DELAYED_STATUS must not be used in UDC drivers: they
+ * must delay completing the status stage for 0-length control transfers
+ * regardless of the whether USB_GADGET_DELAYED_STATUS is returned from
+ * the gadget driver's setup() callback.
+ * Currently, a number of UDC drivers rely on USB_GADGET_DELAYED_STATUS,
+ * which is a bug. These drivers must be fixed and USB_GADGET_DELAYED_STATUS
+ * must be contained within the composite framework.
*/
#define USB_GADGET_DELAYED_STATUS 0x7fff /* Impossibly large value */
@@ -450,29 +458,6 @@ static inline struct usb_composite_driver *to_cdriver(
*
* One of these devices is allocated and initialized before the
* associated device driver's bind() is called.
- *
- * OPEN ISSUE: it appears that some WUSB devices will need to be
- * built by combining a normal (wired) gadget with a wireless one.
- * This revision of the gadget framework should probably try to make
- * sure doing that won't hurt too much.
- *
- * One notion for how to handle Wireless USB devices involves:
- *
- * (a) a second gadget here, discovery mechanism TBD, but likely
- * needing separate "register/unregister WUSB gadget" calls;
- * (b) updates to usb_gadget to include flags "is it wireless",
- * "is it wired", plus (presumably in a wrapper structure)
- * bandgroup and PHY info;
- * (c) presumably a wireless_ep wrapping a usb_ep, and reporting
- * wireless-specific parameters like maxburst and maxsequence;
- * (d) configurations that are specific to wireless links;
- * (e) function drivers that understand wireless configs and will
- * support wireless for (additional) function instances;
- * (f) a function to support association setup (like CBAF), not
- * necessarily requiring a wireless adapter;
- * (g) composite device setup that can create one or more wireless
- * configs, including appropriate association setup support;
- * (h) more, TBD.
*/
struct usb_composite_dev {
struct usb_gadget *gadget;
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 75bda0783395..56dda8e1562d 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -52,6 +52,7 @@ struct usb_ep;
* @short_not_ok: When reading data, makes short packets be
* treated as errors (queue stops advancing till cleanup).
* @dma_mapped: Indicates if request has been mapped to DMA (internal)
+ * @sg_was_mapped: Set if the scatterlist has been mapped before the request
* @complete: Function called when request completes, so this request and
* its buffer may be re-used. The function will always be called with
* interrupts disabled, and it must not sleep.
@@ -111,6 +112,7 @@ struct usb_request {
unsigned zero:1;
unsigned short_not_ok:1;
unsigned dma_mapped:1;
+ unsigned sg_was_mapped:1;
void (*complete)(struct usb_ep *ep,
struct usb_request *req);
@@ -711,6 +713,15 @@ static inline int usb_gadget_check_config(struct usb_gadget *gadget)
* get_interface. Setting a configuration (or interface) is where
* endpoints should be activated or (config 0) shut down.
*
+ * The gadget driver's setup() callback does not have to queue a response to
+ * ep0 within the setup() call, the driver can do it after setup() returns.
+ * The UDC driver must wait until such a response is queued before proceeding
+ * with the data/status stages of the control transfer.
+ *
+ * NOTE: Currently, a number of UDC drivers rely on USB_GADGET_DELAYED_STATUS
+ * being returned from the setup() callback, which is a bug. See the comment
+ * next to USB_GADGET_DELAYED_STATUS for details.
+ *
* (Note that only the default control endpoint is supported. Neither
* hosts nor devices generally support control traffic except to ep0.)
*
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 4e9623e8492b..ac95e7c89df5 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -55,7 +55,7 @@ struct giveback_urb_bh {
bool high_prio;
spinlock_t lock;
struct list_head head;
- struct tasklet_struct bh;
+ struct work_struct bh;
struct usb_host_endpoint *completing_ep;
};
@@ -154,7 +154,6 @@ struct usb_hcd {
/* The next flag is a stopgap, to be removed when all the HCDs
* support the new root-hub polling mechanism. */
unsigned uses_new_polling:1;
- unsigned wireless:1; /* Wireless USB HCD */
unsigned has_tt:1; /* Integrated TT in root hub */
unsigned amd_resume_bug:1; /* AMD remote wakeup quirk */
unsigned can_do_streams:1; /* HC supports streams */
@@ -249,7 +248,6 @@ struct hc_driver {
#define HCD_SHARED 0x0004 /* Two (or more) usb_hcds share HW */
#define HCD_USB11 0x0010 /* USB 1.1 */
#define HCD_USB2 0x0020 /* USB 2.0 */
-#define HCD_USB25 0x0030 /* Wireless USB 1.0 (USB 2.5)*/
#define HCD_USB3 0x0040 /* USB 3.0 */
#define HCD_USB31 0x0050 /* USB 3.1 */
#define HCD_USB32 0x0060 /* USB 3.2 */
@@ -374,8 +372,9 @@ struct hc_driver {
* or bandwidth constraints.
*/
void (*reset_bandwidth)(struct usb_hcd *, struct usb_device *);
- /* Returns the hardware-chosen device address */
- int (*address_device)(struct usb_hcd *, struct usb_device *udev);
+ /* Set the hardware-chosen device address */
+ int (*address_device)(struct usb_hcd *, struct usb_device *udev,
+ unsigned int timeout_ms);
/* prepares the hardware to send commands to the device */
int (*enable_device)(struct usb_hcd *, struct usb_device *udev);
/* Notifies the HCD after a hub descriptor is fetched.
@@ -486,8 +485,25 @@ extern int usb_hcd_pci_probe(struct pci_dev *dev,
extern void usb_hcd_pci_remove(struct pci_dev *dev);
extern void usb_hcd_pci_shutdown(struct pci_dev *dev);
+#ifdef CONFIG_USB_PCI_AMD
extern int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *dev);
+static inline bool usb_hcd_amd_resume_bug(struct pci_dev *dev,
+ const struct hc_driver *driver)
+{
+ if (!usb_hcd_amd_remote_wakeup_quirk(dev))
+ return false;
+ if (driver->flags & (HCD_USB11 | HCD_USB3))
+ return true;
+ return false;
+}
+#else /* CONFIG_USB_PCI_AMD */
+static inline bool usb_hcd_amd_resume_bug(struct pci_dev *dev,
+ const struct hc_driver *driver)
+{
+ return false;
+}
+#endif
extern const struct dev_pm_ops usb_hcd_pci_pm_ops;
#endif /* CONFIG_USB_PCI */
diff --git a/include/linux/usb/ljca.h b/include/linux/usb/ljca.h
new file mode 100644
index 000000000000..47661feda96c
--- /dev/null
+++ b/include/linux/usb/ljca.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2023, Intel Corporation. All rights reserved.
+ */
+#ifndef _LINUX_USB_LJCA_H_
+#define _LINUX_USB_LJCA_H_
+
+#include <linux/auxiliary_bus.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#define LJCA_MAX_GPIO_NUM 64
+
+#define auxiliary_dev_to_ljca_client(auxiliary_dev) \
+ container_of(auxiliary_dev, struct ljca_client, auxdev)
+
+struct ljca_adapter;
+
+/**
+ * typedef ljca_event_cb_t - event callback function signature
+ *
+ * @context: the execution context of who registered this callback
+ * @cmd: the command from device for this event
+ * @evt_data: the event data payload
+ * @len: the event data payload length
+ *
+ * The callback function is called in interrupt context and the data payload is
+ * only valid during the call. If the user needs later access of the data, it
+ * must copy it.
+ */
+typedef void (*ljca_event_cb_t)(void *context, u8 cmd, const void *evt_data, int len);
+
+/**
+ * struct ljca_client - represent a ljca client device
+ *
+ * @type: ljca client type
+ * @id: ljca client id within same client type
+ * @link: ljca client on the same ljca adapter
+ * @auxdev: auxiliary device object
+ * @adapter: ljca adapter the ljca client sit on
+ * @context: the execution context of the event callback
+ * @event_cb: ljca client driver register this callback to get
+ * firmware asynchronous rx buffer pending notifications
+ * @event_cb_lock: spinlock to protect event callback
+ */
+struct ljca_client {
+ u8 type;
+ u8 id;
+ struct list_head link;
+ struct auxiliary_device auxdev;
+ struct ljca_adapter *adapter;
+
+ void *context;
+ ljca_event_cb_t event_cb;
+ /* lock to protect event_cb */
+ spinlock_t event_cb_lock;
+};
+
+/**
+ * struct ljca_gpio_info - ljca gpio client device info
+ *
+ * @num: ljca gpio client device pin number
+ * @valid_pin_map: ljca gpio client device valid pin mapping
+ */
+struct ljca_gpio_info {
+ unsigned int num;
+ DECLARE_BITMAP(valid_pin_map, LJCA_MAX_GPIO_NUM);
+};
+
+/**
+ * struct ljca_i2c_info - ljca i2c client device info
+ *
+ * @id: ljca i2c client device identification number
+ * @capacity: ljca i2c client device capacity
+ * @intr_pin: ljca i2c client device interrupt pin number if exists
+ */
+struct ljca_i2c_info {
+ u8 id;
+ u8 capacity;
+ u8 intr_pin;
+};
+
+/**
+ * struct ljca_spi_info - ljca spi client device info
+ *
+ * @id: ljca spi client device identification number
+ * @capacity: ljca spi client device capacity
+ */
+struct ljca_spi_info {
+ u8 id;
+ u8 capacity;
+};
+
+/**
+ * ljca_register_event_cb - register a callback function to receive events
+ *
+ * @client: ljca client device
+ * @event_cb: callback function
+ * @context: execution context of event callback
+ *
+ * Return: 0 in case of success, negative value in case of error
+ */
+int ljca_register_event_cb(struct ljca_client *client, ljca_event_cb_t event_cb, void *context);
+
+/**
+ * ljca_unregister_event_cb - unregister the callback function for an event
+ *
+ * @client: ljca client device
+ */
+void ljca_unregister_event_cb(struct ljca_client *client);
+
+/**
+ * ljca_transfer - issue a LJCA command and wait for a response
+ *
+ * @client: ljca client device
+ * @cmd: the command to be sent to the device
+ * @obuf: the buffer to be sent to the device; it can be NULL if the user
+ * doesn't need to transmit data with this command
+ * @obuf_len: the size of the buffer to be sent to the device; it should
+ * be 0 when obuf is NULL
+ * @ibuf: any data associated with the response will be copied here; it can be
+ * NULL if the user doesn't need the response data
+ * @ibuf_len: must be initialized to the input buffer size
+ *
+ * Return: the actual length of response data for success, negative value for errors
+ */
+int ljca_transfer(struct ljca_client *client, u8 cmd, const u8 *obuf,
+ u8 obuf_len, u8 *ibuf, u8 ibuf_len);
+
+/**
+ * ljca_transfer_noack - issue a LJCA command without a response
+ *
+ * @client: ljca client device
+ * @cmd: the command to be sent to the device
+ * @obuf: the buffer to be sent to the device; it can be NULL if the user
+ * doesn't need to transmit data with this command
+ * @obuf_len: the size of the buffer to be sent to the device
+ *
+ * Return: 0 for success, negative value for errors
+ */
+int ljca_transfer_noack(struct ljca_client *client, u8 cmd, const u8 *obuf,
+ u8 obuf_len);
+
+#endif
diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h
index 98487fd7ab11..de42f14bd280 100644
--- a/include/linux/usb/of.h
+++ b/include/linux/usb/of.h
@@ -6,6 +6,7 @@
#ifndef __LINUX_USB_OF_H
#define __LINUX_USB_OF_H
+#include <linux/usb.h>
#include <linux/usb/ch9.h>
#include <linux/usb/otg.h>
#include <linux/usb/phy.h>
@@ -17,6 +18,7 @@ enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *np, int arg0);
bool of_usb_host_tpl_support(struct device_node *np);
int of_usb_update_otg_caps(struct device_node *np,
struct usb_otg_caps *otg_caps);
+enum usb_port_connect_type usb_of_get_connect_type(struct usb_device *hub, int port1);
struct device_node *usb_of_get_device_node(struct usb_device *hub, int port1);
bool usb_of_has_combined_node(struct usb_device *udev);
struct device_node *usb_of_get_interface_node(struct usb_device *udev,
@@ -37,6 +39,11 @@ static inline int of_usb_update_otg_caps(struct device_node *np,
{
return 0;
}
+static inline enum usb_port_connect_type
+usb_of_get_connect_type(const struct usb_device *hub, int port1)
+{
+ return USB_PORT_CONNECT_TYPE_UNKNOWN;
+}
static inline struct device_node *
usb_of_get_device_node(struct usb_device *hub, int port1)
{
diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h
index c59fb79a42e8..d50098fb16b5 100644
--- a/include/linux/usb/pd.h
+++ b/include/linux/usb/pd.h
@@ -228,6 +228,7 @@ enum pd_pdo_type {
#define PDO_FIXED_UNCHUNK_EXT BIT(24) /* Unchunked Extended Message supported (Source) */
#define PDO_FIXED_FRS_CURR_MASK (BIT(24) | BIT(23)) /* FR_Swap Current (Sink) */
#define PDO_FIXED_FRS_CURR_SHIFT 23
+#define PDO_FIXED_PEAK_CURR_SHIFT 20
#define PDO_FIXED_VOLT_SHIFT 10 /* 50mV units */
#define PDO_FIXED_CURR_SHIFT 0 /* 10mA units */
@@ -482,6 +483,7 @@ static inline unsigned int rdo_max_power(u32 rdo)
#define PD_T_BIST_CONT_MODE 50 /* 30 - 60 ms */
#define PD_T_SINK_TX 16 /* 16 - 20 ms */
#define PD_T_CHUNK_NOT_SUPP 42 /* 40 - 50 ms */
+#define PD_T_VCONN_STABLE 50
#define PD_T_DRP_TRY 100 /* 75 - 150 ms */
#define PD_T_DRP_TRYWAIT 600 /* 400 - 800 ms */
diff --git a/include/linux/usb/pd_vdo.h b/include/linux/usb/pd_vdo.h
index b057250704e8..5c48e8a81403 100644
--- a/include/linux/usb/pd_vdo.h
+++ b/include/linux/usb/pd_vdo.h
@@ -7,6 +7,7 @@
#define __LINUX_USB_PD_VDO_H
#include "pd.h"
+#include <linux/bitfield.h>
/*
* VDO : Vendor Defined Message Object
@@ -86,12 +87,15 @@
*
* Request is simply properly formatted SVDM header
*
- * Response is 4 data objects:
+ * Response is 4 data objects for Power Delivery 2.0 and Passive Cables for
+ * Power Delivery 3.0. Active Cables in Power Delivery 3.0 have 5 data objects.
* [0] :: SVDM header
* [1] :: Identitiy header
* [2] :: Cert Stat VDO
* [3] :: (Product | Cable) VDO
+ * [4] :: Cable VDO 1
* [4] :: AMA VDO
+ * [5] :: Cable VDO 2
*
*/
#define VDO_INDEX_HDR 0
@@ -100,6 +104,8 @@
#define VDO_INDEX_CABLE 3
#define VDO_INDEX_PRODUCT 3
#define VDO_INDEX_AMA 4
+#define VDO_INDEX_CABLE_1 4
+#define VDO_INDEX_CABLE_2 5
/*
* SVDM Identity Header
@@ -150,6 +156,7 @@
#define PD_IDH_MODAL_SUPP(vdo) ((vdo) & (1 << 26))
#define PD_IDH_DFP_PTYPE(vdo) (((vdo) >> 23) & 0x7)
#define PD_IDH_CONN_TYPE(vdo) (((vdo) >> 21) & 0x3)
+#define PD_IDH_HOST_SUPP(vdo) ((vdo) & (1 << 31))
/*
* Cert Stat VDO
@@ -182,7 +189,7 @@
* <5:3> :: Alternate modes
* <2:0> :: USB highest speed
*/
-#define PD_VDO_UFP_DEVCAP(vdo) (((vdo) & GENMASK(27, 24)) >> 24)
+#define PD_VDO_UFP_DEVCAP(vdo) FIELD_GET(GENMASK(27, 24), vdo)
/* UFP VDO Version */
#define UFP_VDO_VER1_2 2
@@ -241,7 +248,7 @@
* <21:5> :: Reserved
* <4:0> :: Port number
*/
-#define PD_VDO_DFP_HOSTCAP(vdo) (((vdo) & GENMASK(26, 24)) >> 24)
+#define PD_VDO_DFP_HOSTCAP(vdo) FIELD_GET(GENMASK(26, 24), vdo)
#define DFP_VDO_VER1_1 1
#define HOST_USB2_CAPABLE BIT(0)
@@ -376,6 +383,7 @@
| ((vbm) & 0x3) << 9 | (sbu) << 8 | (sbut) << 7 | ((cur) & 0x3) << 5 \
| (vbt) << 4 | (sopp) << 3 | ((spd) & 0x7))
+#define VDO_TYPEC_CABLE_SPEED(vdo) ((vdo) & 0x7)
#define VDO_TYPEC_CABLE_TYPE(vdo) (((vdo) >> 18) & 0x3)
/*
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index eeb7c2157c72..59409c1fc3de 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -72,4 +72,7 @@
/* device has endpoints that should be ignored */
#define USB_QUIRK_ENDPOINT_IGNORE BIT(15)
+/* short SET_ADDRESS request timeout */
+#define USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT BIT(16)
+
#endif /* __LINUX_USB_QUIRKS_H */
diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h
index 20d88b1defc3..33a4c146dc19 100644
--- a/include/linux/usb/r8152.h
+++ b/include/linux/usb/r8152.h
@@ -29,6 +29,8 @@
#define VENDOR_ID_LINKSYS 0x13b1
#define VENDOR_ID_NVIDIA 0x0955
#define VENDOR_ID_TPLINK 0x2357
+#define VENDOR_ID_DLINK 0x2001
+#define VENDOR_ID_ASUS 0x0b05
#if IS_REACHABLE(CONFIG_USB_RTL8152)
extern u8 rtl8152_get_version(struct usb_interface *intf);
diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h
index d418c55523a7..372898d9eeb0 100644
--- a/include/linux/usb/renesas_usbhs.h
+++ b/include/linux/usb/renesas_usbhs.h
@@ -5,16 +5,6 @@
* Copyright (C) 2011 Renesas Solutions Corp.
* Copyright (C) 2019 Renesas Electronics Corporation
* Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
*/
#ifndef RENESAS_USB_H
#define RENESAS_USB_H
diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h
index 85e95a3251d3..47a86b8a4a50 100644
--- a/include/linux/usb/tcpci.h
+++ b/include/linux/usb/tcpci.h
@@ -36,7 +36,9 @@
#define TCPC_ALERT_MASK 0x12
#define TCPC_POWER_STATUS_MASK 0x14
-#define TCPC_FAULT_STATUS_MASK 0x15
+
+#define TCPC_FAULT_STATUS_MASK 0x15
+#define TCPC_FAULT_STATUS_MASK_VCONN_OC BIT(1)
#define TCPC_EXTENDED_STATUS_MASK 0x16
#define TCPC_EXTENDED_STATUS_MASK_VSAFE0V BIT(0)
@@ -103,6 +105,8 @@
#define TCPC_POWER_STATUS_SINKING_VBUS BIT(0)
#define TCPC_FAULT_STATUS 0x1f
+#define TCPC_FAULT_STATUS_ALL_REG_RST_TO_DEFAULT BIT(7)
+#define TCPC_FAULT_STATUS_VCONN_OC BIT(1)
#define TCPC_ALERT_EXTENDED 0x21
@@ -141,6 +145,7 @@
#define TCPC_RX_BYTE_CNT 0x30
#define TCPC_RX_BUF_FRAME_TYPE 0x31
#define TCPC_RX_BUF_FRAME_TYPE_SOP 0
+#define TCPC_RX_BUF_FRAME_TYPE_SOP1 1
#define TCPC_RX_HDR 0x32
#define TCPC_RX_DATA 0x34 /* through 0x4f */
@@ -194,12 +199,23 @@ struct tcpci;
* Chip level drivers are expected to check for contaminant and call
* tcpm_clean_port when the port is clean to put the port back into
* toggling state.
+ * @cable_comm_capable
+ * optional; Set when TCPC can communicate with cable plugs over SOP'
+ * @attempt_vconn_swap_discovery:
+ * Optional; The callback is called by the TCPM when the result of
+ * a Discover Identity request indicates that the port partner is
+ * a receptacle capable of modal operation. Chip level TCPCI drivers
+ * can implement their own policy to determine if and when a Vconn
+ * swap following Discover Identity on SOP' occurs.
+ * Return true when the TCPM is allowed to request a Vconn swap
+ * after Discovery Identity on SOP.
*/
struct tcpci_data {
struct regmap *regmap;
unsigned char TX_BUF_BYTE_x_hidden:1;
unsigned char auto_discharge_disconnect:1;
unsigned char vbus_vsafe0v:1;
+ unsigned char cable_comm_capable:1;
int (*init)(struct tcpci *tcpci, struct tcpci_data *data);
int (*set_vconn)(struct tcpci *tcpci, struct tcpci_data *data,
@@ -211,6 +227,7 @@ struct tcpci_data {
void (*set_partner_usb_comm_capable)(struct tcpci *tcpci, struct tcpci_data *data,
bool capable);
void (*check_contaminant)(struct tcpci *tcpci, struct tcpci_data *data);
+ bool (*attempt_vconn_swap_discovery)(struct tcpci *tcpci, struct tcpci_data *data);
};
struct tcpci *tcpci_register_port(struct device *dev, struct tcpci_data *data);
diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h
index ab7ca872950b..061da9546a81 100644
--- a/include/linux/usb/tcpm.h
+++ b/include/linux/usb/tcpm.h
@@ -119,6 +119,17 @@ enum tcpm_transmit_type {
* at the end of the deboumce period or when the port is still
* toggling. Chip level drivers are expected to check for contaminant
* and call tcpm_clean_port when the port is clean.
+ * @cable_comm_capable
+ * Optional; Returns whether cable communication over SOP' is supported
+ * by the tcpc
+ * @attempt_vconn_swap_discovery:
+ * Optional; The callback is called by the TCPM when the result of
+ * a Discover Identity request indicates that the port partner is
+ * a receptacle capable of modal operation. Chip level TCPCI drivers
+ * can implement their own policy to determine if and when a Vconn
+ * swap following Discover Identity on SOP' occurs.
+ * Return true when the TCPM is allowed to request a Vconn swap
+ * after Discovery Identity on SOP.
*/
struct tcpc_dev {
struct fwnode_handle *fwnode;
@@ -133,6 +144,8 @@ struct tcpc_dev {
enum typec_cc_status *cc2);
int (*set_polarity)(struct tcpc_dev *dev,
enum typec_cc_polarity polarity);
+ int (*set_orientation)(struct tcpc_dev *dev,
+ enum typec_orientation orientation);
int (*set_vconn)(struct tcpc_dev *dev, bool on);
int (*set_vbus)(struct tcpc_dev *dev, bool on, bool charge);
int (*set_current_limit)(struct tcpc_dev *dev, u32 max_ma, u32 mv);
@@ -154,6 +167,8 @@ struct tcpc_dev {
bool (*is_vbus_vsafe0v)(struct tcpc_dev *dev);
void (*set_partner_usb_comm_capable)(struct tcpc_dev *dev, bool enable);
void (*check_contaminant)(struct tcpc_dev *dev);
+ bool (*cable_comm_capable)(struct tcpc_dev *dev);
+ bool (*attempt_vconn_swap_discovery)(struct tcpc_dev *dev);
};
struct tcpm_port;
@@ -166,12 +181,14 @@ void tcpm_cc_change(struct tcpm_port *port);
void tcpm_sink_frs(struct tcpm_port *port);
void tcpm_sourcing_vbus(struct tcpm_port *port);
void tcpm_pd_receive(struct tcpm_port *port,
- const struct pd_message *msg);
+ const struct pd_message *msg,
+ enum tcpm_transmit_type rx_sop_type);
void tcpm_pd_transmit_complete(struct tcpm_port *port,
enum tcpm_transmit_status status);
void tcpm_pd_hard_reset(struct tcpm_port *port);
void tcpm_tcpc_reset(struct tcpm_port *port);
void tcpm_port_clean(struct tcpm_port *port);
bool tcpm_port_is_toggling(struct tcpm_port *port);
+void tcpm_port_error_recovery(struct tcpm_port *port);
#endif /* __LINUX_USB_TCPM_H */
diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
index 8fa781207970..b35b427561ab 100644
--- a/include/linux/usb/typec.h
+++ b/include/linux/usb/typec.h
@@ -18,6 +18,7 @@ struct typec_cable;
struct typec_plug;
struct typec_port;
struct typec_altmode_ops;
+struct typec_cable_ops;
struct fwnode_handle;
struct device;
@@ -157,6 +158,9 @@ void typec_port_register_altmodes(struct typec_port *port,
const struct typec_altmode_ops *ops, void *drvdata,
struct typec_altmode **altmodes, size_t n);
+void typec_port_register_cable_ops(struct typec_altmode **altmodes, int max_altmodes,
+ const struct typec_cable_ops *ops);
+
void typec_unregister_altmode(struct typec_altmode *altmode);
struct typec_port *typec_altmode2port(struct typec_altmode *alt);
@@ -202,6 +206,8 @@ struct typec_cable_desc {
* @accessory: Audio, Debug or none.
* @identity: Discover Identity command data
* @pd_revision: USB Power Delivery Specification Revision if supported
+ * @attach: Notification about attached USB device
+ * @deattach: Notification about removed USB device
*
* Details about a partner that is attached to USB Type-C port. If @identity
* member exists when partner is registered, a directory named "identity" is
@@ -217,6 +223,9 @@ struct typec_partner_desc {
enum typec_accessory accessory;
struct usb_pd_identity *identity;
u16 pd_revision; /* 0300H = "3.0" */
+
+ void (*attach)(struct typec_partner *partner, struct device *dev);
+ void (*deattach)(struct typec_partner *partner, struct device *dev);
};
/**
@@ -328,6 +337,9 @@ void typec_partner_set_svdm_version(struct typec_partner *partner,
enum usb_pd_svdm_ver svdm_version);
int typec_get_negotiated_svdm_version(struct typec_port *port);
+int typec_get_cable_svdm_version(struct typec_port *port);
+void typec_cable_set_svdm_version(struct typec_cable *cable, enum usb_pd_svdm_ver svdm_version);
+
struct usb_power_delivery *typec_partner_usb_power_delivery_register(struct typec_partner *partner,
struct usb_power_delivery_desc *desc);
@@ -335,4 +347,36 @@ int typec_port_set_usb_power_delivery(struct typec_port *port, struct usb_power_
int typec_partner_set_usb_power_delivery(struct typec_partner *partner,
struct usb_power_delivery *pd);
+/**
+ * struct typec_connector - Representation of Type-C port for external drivers
+ * @attach: notification about device removal
+ * @deattach: notification about device removal
+ *
+ * Drivers that control the USB and other ports (DisplayPorts, etc.), that are
+ * connected to the Type-C connectors, can use these callbacks to inform the
+ * Type-C connector class about connections and disconnections. That information
+ * can then be used by the typec-port drivers to power on or off parts that are
+ * needed or not needed - as an example, in USB mode if USB2 device is
+ * enumerated, USB3 components (retimers, phys, and what have you) do not need
+ * to be powered on.
+ *
+ * The attached (enumerated) devices will be liked with the typec-partner device.
+ */
+struct typec_connector {
+ void (*attach)(struct typec_connector *con, struct device *dev);
+ void (*deattach)(struct typec_connector *con, struct device *dev);
+};
+
+static inline void typec_attach(struct typec_connector *con, struct device *dev)
+{
+ if (con && con->attach)
+ con->attach(con, dev);
+}
+
+static inline void typec_deattach(struct typec_connector *con, struct device *dev)
+{
+ if (con && con->deattach)
+ con->deattach(con, dev);
+}
+
#endif /* __LINUX_USB_TYPEC_H */
diff --git a/include/linux/usb/typec_altmode.h b/include/linux/usb/typec_altmode.h
index 350d49012659..b3c0866ea70f 100644
--- a/include/linux/usb/typec_altmode.h
+++ b/include/linux/usb/typec_altmode.h
@@ -20,6 +20,7 @@ struct typec_altmode_ops;
* @active: Tells has the mode been entered or not
* @desc: Optional human readable description of the mode
* @ops: Operations vector from the driver
+ * @cable_ops: Cable operations vector from the driver.
*/
struct typec_altmode {
struct device dev;
@@ -30,6 +31,7 @@ struct typec_altmode {
char *desc;
const struct typec_altmode_ops *ops;
+ const struct typec_cable_ops *cable_ops;
};
#define to_typec_altmode(d) container_of(d, struct typec_altmode, dev)
@@ -67,7 +69,7 @@ struct typec_altmode_ops {
int typec_altmode_enter(struct typec_altmode *altmode, u32 *vdo);
int typec_altmode_exit(struct typec_altmode *altmode);
-void typec_altmode_attention(struct typec_altmode *altmode, u32 vdo);
+int typec_altmode_attention(struct typec_altmode *altmode, u32 vdo);
int typec_altmode_vdm(struct typec_altmode *altmode,
const u32 header, const u32 *vdo, int count);
int typec_altmode_notify(struct typec_altmode *altmode, unsigned long conf,
@@ -75,6 +77,34 @@ int typec_altmode_notify(struct typec_altmode *altmode, unsigned long conf,
const struct typec_altmode *
typec_altmode_get_partner(struct typec_altmode *altmode);
+/**
+ * struct typec_cable_ops - Cable alternate mode operations vector
+ * @enter: Operations to be executed with Enter Mode Command
+ * @exit: Operations to be executed with Exit Mode Command
+ * @vdm: Callback for SVID specific commands
+ */
+struct typec_cable_ops {
+ int (*enter)(struct typec_altmode *altmode, enum typec_plug_index sop, u32 *vdo);
+ int (*exit)(struct typec_altmode *altmode, enum typec_plug_index sop);
+ int (*vdm)(struct typec_altmode *altmode, enum typec_plug_index sop,
+ const u32 hdr, const u32 *vdo, int cnt);
+};
+
+int typec_cable_altmode_enter(struct typec_altmode *altmode, enum typec_plug_index sop, u32 *vdo);
+int typec_cable_altmode_exit(struct typec_altmode *altmode, enum typec_plug_index sop);
+int typec_cable_altmode_vdm(struct typec_altmode *altmode, enum typec_plug_index sop,
+ const u32 header, const u32 *vdo, int count);
+
+/**
+ * typec_altmode_get_cable_svdm_version - Get negotiated SVDM version for cable plug
+ * @altmode: Handle to the alternate mode
+ */
+static inline int
+typec_altmode_get_cable_svdm_version(struct typec_altmode *altmode)
+{
+ return typec_get_cable_svdm_version(typec_altmode2port(altmode));
+}
+
/*
* These are the connector states (USB, Safe and Alt Mode) defined in USB Type-C
* Specification. SVID specific connector states are expected to follow and
diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h
index 8d09c2f0a9b8..f2da264d9c14 100644
--- a/include/linux/usb/typec_dp.h
+++ b/include/linux/usb/typec_dp.h
@@ -3,6 +3,7 @@
#define __USB_TYPEC_DP_H
#include <linux/usb/typec_altmode.h>
+#include <linux/bitfield.h>
#define USB_TYPEC_DP_SID 0xff01
/* USB IF has not assigned a Standard ID (SID) for VirtualLink,
@@ -67,17 +68,26 @@ enum {
#define DP_CAP_UFP_D 1
#define DP_CAP_DFP_D 2
#define DP_CAP_DFP_D_AND_UFP_D 3
-#define DP_CAP_DP_SIGNALING BIT(2) /* Always set */
-#define DP_CAP_GEN2 BIT(3) /* Reserved after v1.0b */
+#define DP_CAP_DP_SIGNALLING(_cap_) FIELD_GET(GENMASK(5, 2), _cap_)
+#define DP_CAP_SIGNALLING_HBR3 1
+#define DP_CAP_SIGNALLING_UHBR10 2
+#define DP_CAP_SIGNALLING_UHBR20 3
#define DP_CAP_RECEPTACLE BIT(6)
#define DP_CAP_USB BIT(7)
-#define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(15, 8)) >> 8)
-#define DP_CAP_UFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(23, 16)) >> 16)
+#define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) FIELD_GET(GENMASK(15, 8), _cap_)
+#define DP_CAP_UFP_D_PIN_ASSIGN(_cap_) FIELD_GET(GENMASK(23, 16), _cap_)
/* Get pin assignment taking plug & receptacle into consideration */
#define DP_CAP_PIN_ASSIGN_UFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \
DP_CAP_UFP_D_PIN_ASSIGN(_cap_) : DP_CAP_DFP_D_PIN_ASSIGN(_cap_))
#define DP_CAP_PIN_ASSIGN_DFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \
DP_CAP_DFP_D_PIN_ASSIGN(_cap_) : DP_CAP_UFP_D_PIN_ASSIGN(_cap_))
+#define DP_CAP_UHBR_13_5_SUPPORT BIT(26)
+#define DP_CAP_CABLE_TYPE(_cap_) FIELD_GET(GENMASK(29, 28), _cap_)
+#define DP_CAP_CABLE_TYPE_PASSIVE 0
+#define DP_CAP_CABLE_TYPE_RE_TIMER 1
+#define DP_CAP_CABLE_TYPE_RE_DRIVER 2
+#define DP_CAP_CABLE_TYPE_OPTICAL 3
+#define DP_CAP_DPAM_VERSION BIT(30)
/* DisplayPort Status Update VDO bits */
#define DP_STATUS_CONNECTION(_status_) ((_status_) & 3)
@@ -97,13 +107,24 @@ enum {
#define DP_CONF_CURRENTLY(_conf_) ((_conf_) & 3)
#define DP_CONF_UFP_U_AS_DFP_D BIT(0)
#define DP_CONF_UFP_U_AS_UFP_D BIT(1)
-#define DP_CONF_SIGNALING_DP BIT(2)
-#define DP_CONF_SIGNALING_GEN_2 BIT(3) /* Reserved after v1.0b */
+#define DP_CONF_SIGNALLING_MASK GENMASK(5, 2)
+#define DP_CONF_SIGNALLING_SHIFT 2
+#define DP_CONF_SIGNALLING_HBR3 1
+#define DP_CONF_SIGNALLING_UHBR10 2
+#define DP_CONF_SIGNALLING_UHBR20 3
#define DP_CONF_PIN_ASSIGNEMENT_SHIFT 8
#define DP_CONF_PIN_ASSIGNEMENT_MASK GENMASK(15, 8)
/* Helper for setting/getting the pin assignment value to the configuration */
#define DP_CONF_SET_PIN_ASSIGN(_a_) ((_a_) << 8)
-#define DP_CONF_GET_PIN_ASSIGN(_conf_) (((_conf_) & GENMASK(15, 8)) >> 8)
+#define DP_CONF_GET_PIN_ASSIGN(_conf_) FIELD_GET(GENMASK(15, 8), _conf_)
+#define DP_CONF_UHBR13_5_SUPPORT BIT(26)
+#define DP_CONF_CABLE_TYPE_MASK GENMASK(29, 28)
+#define DP_CONF_CABLE_TYPE_SHIFT 28
+#define DP_CONF_CABLE_TYPE_PASSIVE 0
+#define DP_CONF_CABLE_TYPE_RE_TIMER 1
+#define DP_CONF_CABLE_TYPE_RE_DRIVER 2
+#define DP_CONF_CABLE_TYPE_OPTICAL 3
+#define DP_CONF_DPAM_VERSION BIT(30)
#endif /* __USB_TYPEC_DP_H */
diff --git a/include/linux/usb/typec_tbt.h b/include/linux/usb/typec_tbt.h
index 63dd44b72e0c..fa97d7e00f5c 100644
--- a/include/linux/usb/typec_tbt.h
+++ b/include/linux/usb/typec_tbt.h
@@ -3,6 +3,7 @@
#define __USB_TYPEC_TBT_H
#include <linux/usb/typec_altmode.h>
+#include <linux/bitfield.h>
#define USB_TYPEC_VENDOR_INTEL 0x8087
/* Alias for convenience */
@@ -25,7 +26,7 @@ struct typec_thunderbolt_data {
/* TBT3 Device Discover Mode VDO bits */
#define TBT_MODE BIT(0)
-#define TBT_ADAPTER(_vdo_) (((_vdo_) & BIT(16)) >> 16)
+#define TBT_ADAPTER(_vdo_) FIELD_GET(BIT(16), _vdo_)
#define TBT_ADAPTER_LEGACY 0
#define TBT_ADAPTER_TBT3 1
#define TBT_INTEL_SPECIFIC_B0 BIT(26)
@@ -35,17 +36,18 @@ struct typec_thunderbolt_data {
#define TBT_SET_ADAPTER(a) (((a) & 1) << 16)
/* TBT3 Cable Discover Mode VDO bits */
-#define TBT_CABLE_SPEED(_vdo_) (((_vdo_) & GENMASK(18, 16)) >> 16)
+#define TBT_CABLE_SPEED(_vdo_) FIELD_GET(GENMASK(18, 16), _vdo_)
#define TBT_CABLE_USB3_GEN1 1
#define TBT_CABLE_USB3_PASSIVE 2
#define TBT_CABLE_10_AND_20GBPS 3
-#define TBT_CABLE_ROUNDED_SUPPORT(_vdo_) \
- (((_vdo_) & GENMASK(20, 19)) >> 19)
+#define TBT_CABLE_ROUNDED_SUPPORT(_vdo_) FIELD_GET(GENMASK(20, 19), _vdo_)
+
#define TBT_GEN3_NON_ROUNDED 0
#define TBT_GEN3_GEN4_ROUNDED_NON_ROUNDED 1
#define TBT_CABLE_OPTICAL BIT(21)
#define TBT_CABLE_RETIMER BIT(22)
#define TBT_CABLE_LINK_TRAINING BIT(23)
+#define TBT_CABLE_ACTIVE_PASSIVE BIT(25)
#define TBT_SET_CABLE_SPEED(_s_) (((_s_) & GENMASK(2, 0)) << 16)
#define TBT_SET_CABLE_ROUNDED(_g_) (((_g_) & GENMASK(1, 0)) << 19)
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 45f09bec02c4..6030a8235617 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -65,6 +65,10 @@ enum rlimit_type {
UCOUNT_RLIMIT_COUNTS,
};
+#if IS_ENABLED(CONFIG_BINFMT_MISC)
+struct binfmt_misc;
+#endif
+
struct user_namespace {
struct uid_gid_map uid_map;
struct uid_gid_map gid_map;
@@ -102,6 +106,10 @@ struct user_namespace {
struct ucounts *ucounts;
long ucount_max[UCOUNT_COUNTS];
long rlimit_max[UCOUNT_RLIMIT_COUNTS];
+
+#if IS_ENABLED(CONFIG_BINFMT_MISC)
+ struct binfmt_misc *binfmt_misc;
+#endif
} __randomize_layout;
struct ucounts {
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index ac7b0c96d351..05d59f74fc88 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -36,6 +36,52 @@
#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
+/*
+ * Start with fault_pending_wqh and fault_wqh so they're more likely
+ * to be in the same cacheline.
+ *
+ * Locking order:
+ * fd_wqh.lock
+ * fault_pending_wqh.lock
+ * fault_wqh.lock
+ * event_wqh.lock
+ *
+ * To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
+ * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
+ * also taken in IRQ context.
+ */
+struct userfaultfd_ctx {
+ /* waitqueue head for the pending (i.e. not read) userfaults */
+ wait_queue_head_t fault_pending_wqh;
+ /* waitqueue head for the userfaults */
+ wait_queue_head_t fault_wqh;
+ /* waitqueue head for the pseudo fd to wakeup poll/read */
+ wait_queue_head_t fd_wqh;
+ /* waitqueue head for events */
+ wait_queue_head_t event_wqh;
+ /* a refile sequence protected by fault_pending_wqh lock */
+ seqcount_spinlock_t refile_seq;
+ /* pseudo fd refcounting */
+ refcount_t refcount;
+ /* userfaultfd syscall flags */
+ unsigned int flags;
+ /* features requested from the userspace */
+ unsigned int features;
+ /* released */
+ bool released;
+ /*
+ * Prevents userfaultfd operations (fill/move/wp) from happening while
+ * some non-cooperative event(s) is taking place. Increments are done
+ * in write-mode. Whereas, userfaultfd operations, which includes
+ * reading mmap_changing, is done under read-mode.
+ */
+ struct rw_semaphore map_changing_lock;
+ /* memory mappings are changing because of non-cooperative event */
+ atomic_t mmap_changing;
+ /* mm with one ore more vmas attached to this userfaultfd_ctx */
+ struct mm_struct *mm;
+};
+
extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
/* A combined operation mode + behavior flags. */
@@ -46,6 +92,7 @@ enum mfill_atomic_mode {
MFILL_ATOMIC_COPY,
MFILL_ATOMIC_ZEROPAGE,
MFILL_ATOMIC_CONTINUE,
+ MFILL_ATOMIC_POISON,
NR_MFILL_ATOMIC_MODES,
};
@@ -73,22 +120,31 @@ extern int mfill_atomic_install_pte(pmd_t *dst_pmd,
unsigned long dst_addr, struct page *page,
bool newly_allocated, uffd_flags_t flags);
-extern ssize_t mfill_atomic_copy(struct mm_struct *dst_mm, unsigned long dst_start,
+extern ssize_t mfill_atomic_copy(struct userfaultfd_ctx *ctx, unsigned long dst_start,
unsigned long src_start, unsigned long len,
- atomic_t *mmap_changing, uffd_flags_t flags);
-extern ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm,
+ uffd_flags_t flags);
+extern ssize_t mfill_atomic_zeropage(struct userfaultfd_ctx *ctx,
unsigned long dst_start,
- unsigned long len,
- atomic_t *mmap_changing);
-extern ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long dst_start,
- unsigned long len, atomic_t *mmap_changing,
- uffd_flags_t flags);
-extern int mwriteprotect_range(struct mm_struct *dst_mm,
- unsigned long start, unsigned long len,
- bool enable_wp, atomic_t *mmap_changing);
+ unsigned long len);
+extern ssize_t mfill_atomic_continue(struct userfaultfd_ctx *ctx, unsigned long dst_start,
+ unsigned long len, uffd_flags_t flags);
+extern ssize_t mfill_atomic_poison(struct userfaultfd_ctx *ctx, unsigned long start,
+ unsigned long len, uffd_flags_t flags);
+extern int mwriteprotect_range(struct userfaultfd_ctx *ctx, unsigned long start,
+ unsigned long len, bool enable_wp);
extern long uffd_wp_range(struct vm_area_struct *vma,
unsigned long start, unsigned long len, bool enable_wp);
+/* move_pages */
+void double_pt_lock(spinlock_t *ptl1, spinlock_t *ptl2);
+void double_pt_unlock(spinlock_t *ptl1, spinlock_t *ptl2);
+ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start,
+ unsigned long src_start, unsigned long len, __u64 flags);
+int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pmd_t dst_pmdval,
+ struct vm_area_struct *dst_vma,
+ struct vm_area_struct *src_vma,
+ unsigned long dst_addr, unsigned long src_addr);
+
/* mm helpers */
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
struct vm_userfaultfd_ctx vm_ctx)
@@ -157,11 +213,22 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
}
static inline bool vma_can_userfault(struct vm_area_struct *vma,
- unsigned long vm_flags)
+ unsigned long vm_flags,
+ bool wp_async)
{
+ vm_flags &= __VM_UFFD_FLAGS;
+
if ((vm_flags & VM_UFFD_MINOR) &&
(!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma)))
return false;
+
+ /*
+ * If wp async enabled, and WP is the only mode enabled, allow any
+ * memory type.
+ */
+ if (wp_async && (vm_flags == VM_UFFD_WP))
+ return true;
+
#ifndef CONFIG_PTE_MARKER_UFFD_WP
/*
* If user requested uffd-wp but not enabled pte markers for
@@ -171,6 +238,8 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma))
return false;
#endif
+
+ /* By default, allow any of anon|shmem|hugetlb */
return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) ||
vma_is_shmem(vma);
}
@@ -193,6 +262,7 @@ extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
extern void userfaultfd_unmap_complete(struct mm_struct *mm,
struct list_head *uf);
extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma);
+extern bool userfaultfd_wp_async(struct vm_area_struct *vma);
#else /* CONFIG_USERFAULTFD */
@@ -203,6 +273,13 @@ static inline vm_fault_t handle_userfault(struct vm_fault *vmf,
return VM_FAULT_SIGBUS;
}
+static inline long uffd_wp_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long len,
+ bool enable_wp)
+{
+ return false;
+}
+
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
struct vm_userfaultfd_ctx vm_ctx)
{
@@ -293,6 +370,11 @@ static inline bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma)
return false;
}
+static inline bool userfaultfd_wp_async(struct vm_area_struct *vma)
+{
+ return false;
+}
+
#endif /* CONFIG_USERFAULTFD */
static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma)
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index db1b0eaef4eb..7977ca03ac7a 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -7,6 +7,7 @@
#include <linux/interrupt.h>
#include <linux/vhost_iotlb.h>
#include <linux/virtio_net.h>
+#include <linux/virtio_blk.h>
#include <linux/if_ether.h>
/**
@@ -195,6 +196,10 @@ struct vdpa_map_file {
* @idx: virtqueue index
* Returns int: irq number of a virtqueue,
* negative number if no irq assigned.
+ * @get_vq_size: Get the size of a specific virtqueue (optional)
+ * @vdev: vdpa device
+ * @idx: virtqueue index
+ * Return u16: the size of the virtqueue
* @get_vq_align: Get the virtqueue align requirement
* for the device
* @vdev: vdpa device
@@ -204,10 +209,23 @@ struct vdpa_map_file {
* @vdev: vdpa device
* @idx: virtqueue index
* Returns u32: group id for this virtqueue
+ * @get_vq_desc_group: Get the group id for the descriptor table of
+ * a specific virtqueue (optional)
+ * @vdev: vdpa device
+ * @idx: virtqueue index
+ * Returns u32: group id for the descriptor table
+ * portion of this virtqueue. Could be different
+ * than the one from @get_vq_group, in which case
+ * the access to the descriptor table can be
+ * confined to a separate asid, isolating from
+ * the virtqueue's buffer address access.
* @get_device_features: Get virtio features supported by the device
* @vdev: vdpa device
* Returns the virtio features support by the
* device
+ * @get_backend_features: Get parent-specific backend features (optional)
+ * Returns the vdpa features supported by the
+ * device.
* @set_driver_features: Set virtio features supported by the driver
* @vdev: vdpa device
* @features: feature support by the driver
@@ -239,6 +257,17 @@ struct vdpa_map_file {
* @reset: Reset device
* @vdev: vdpa device
* Returns integer: success (0) or error (< 0)
+ * @compat_reset: Reset device with compatibility quirks to
+ * accommodate older userspace. Only needed by
+ * parent driver which used to have bogus reset
+ * behaviour, and has to maintain such behaviour
+ * for compatibility with older userspace.
+ * Historically compliant driver only has to
+ * implement .reset, Historically non-compliant
+ * driver should implement both.
+ * @vdev: vdpa device
+ * @flags: compatibility quirks for reset
+ * Returns integer: success (0) or error (< 0)
* @suspend: Suspend the device (optional)
* @vdev: vdpa device
* Returns integer: success (0) or error (< 0)
@@ -314,6 +343,15 @@ struct vdpa_map_file {
* @iova: iova to be unmapped
* @size: size of the area
* Returns integer: success (0) or error (< 0)
+ * @reset_map: Reset device memory mapping to the default
+ * state (optional)
+ * Needed for devices that are using device
+ * specific DMA translation and prefer mapping
+ * to be decoupled from the virtio life cycle,
+ * i.e. device .reset op does not reset mapping
+ * @vdev: vdpa device
+ * @asid: address space identifier
+ * Returns integer: success (0) or error (< 0)
* @get_vq_dma_dev: Get the dma device for a specific
* virtqueue (optional)
* @vdev: vdpa device
@@ -353,11 +391,14 @@ struct vdpa_config_ops {
(*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
/* vq irq is not expected to be changed once DRIVER_OK is set */
int (*get_vq_irq)(struct vdpa_device *vdev, u16 idx);
+ u16 (*get_vq_size)(struct vdpa_device *vdev, u16 idx);
/* Device ops */
u32 (*get_vq_align)(struct vdpa_device *vdev);
u32 (*get_vq_group)(struct vdpa_device *vdev, u16 idx);
+ u32 (*get_vq_desc_group)(struct vdpa_device *vdev, u16 idx);
u64 (*get_device_features)(struct vdpa_device *vdev);
+ u64 (*get_backend_features)(const struct vdpa_device *vdev);
int (*set_driver_features)(struct vdpa_device *vdev, u64 features);
u64 (*get_driver_features)(struct vdpa_device *vdev);
void (*set_config_cb)(struct vdpa_device *vdev,
@@ -369,6 +410,8 @@ struct vdpa_config_ops {
u8 (*get_status)(struct vdpa_device *vdev);
void (*set_status)(struct vdpa_device *vdev, u8 status);
int (*reset)(struct vdpa_device *vdev);
+ int (*compat_reset)(struct vdpa_device *vdev, u32 flags);
+#define VDPA_RESET_F_CLEAN_MAP 1
int (*suspend)(struct vdpa_device *vdev);
int (*resume)(struct vdpa_device *vdev);
size_t (*get_config_size)(struct vdpa_device *vdev);
@@ -390,6 +433,7 @@ struct vdpa_config_ops {
u64 iova, u64 size, u64 pa, u32 perm, void *opaque);
int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid,
u64 iova, u64 size);
+ int (*reset_map)(struct vdpa_device *vdev, unsigned int asid);
int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group,
unsigned int asid);
struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx);
@@ -481,14 +525,17 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)
return vdev->dma_dev;
}
-static inline int vdpa_reset(struct vdpa_device *vdev)
+static inline int vdpa_reset(struct vdpa_device *vdev, u32 flags)
{
const struct vdpa_config_ops *ops = vdev->config;
int ret;
down_write(&vdev->cf_lock);
vdev->features_valid = false;
- ret = ops->reset(vdev);
+ if (ops->compat_reset && flags)
+ ret = ops->compat_reset(vdev, flags);
+ else
+ ret = ops->reset(vdev);
up_write(&vdev->cf_lock);
return ret;
}
diff --git a/include/linux/verification.h b/include/linux/verification.h
index f34e50ebcf60..cb2d47f28091 100644
--- a/include/linux/verification.h
+++ b/include/linux/verification.h
@@ -8,6 +8,7 @@
#ifndef _LINUX_VERIFICATION_H
#define _LINUX_VERIFICATION_H
+#include <linux/errno.h>
#include <linux/types.h>
/*
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 2c137ea94a3e..8b1a29820409 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -13,6 +13,7 @@
#include <linux/mm.h>
#include <linux/workqueue.h>
#include <linux/poll.h>
+#include <linux/cdev.h>
#include <uapi/linux/vfio.h>
#include <linux/iova_bitmap.h>
@@ -42,7 +43,11 @@ struct vfio_device {
*/
const struct vfio_migration_ops *mig_ops;
const struct vfio_log_ops *log_ops;
+#if IS_ENABLED(CONFIG_VFIO_GROUP)
struct vfio_group *group;
+ struct list_head group_next;
+ struct list_head iommu_entry;
+#endif
struct vfio_device_set *dev_set;
struct list_head dev_set_list;
unsigned int migration_flags;
@@ -51,16 +56,25 @@ struct vfio_device {
/* Members below here are private, not for driver use */
unsigned int index;
struct device device; /* device.kref covers object life circle */
+#if IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV)
+ struct cdev cdev;
+#endif
refcount_t refcount; /* user count on registered device*/
unsigned int open_count;
struct completion comp;
- struct list_head group_next;
- struct list_head iommu_entry;
struct iommufd_access *iommufd_access;
void (*put_kvm)(struct kvm *kvm);
#if IS_ENABLED(CONFIG_IOMMUFD)
struct iommufd_device *iommufd_device;
- bool iommufd_attached;
+ u8 iommufd_attached:1;
+#endif
+ u8 cdev_opened:1;
+#ifdef CONFIG_DEBUG_FS
+ /*
+ * debug_root is a static property of the vfio_device
+ * which must be set prior to registering the vfio_device.
+ */
+ struct dentry *debug_root;
#endif
};
@@ -73,7 +87,9 @@ struct vfio_device {
* @bind_iommufd: Called when binding the device to an iommufd
* @unbind_iommufd: Opposite of bind_iommufd
* @attach_ioas: Called when attaching device to an IOAS/HWPT managed by the
- * bound iommufd. Undo in unbind_iommufd.
+ * bound iommufd. Undo in unbind_iommufd if @detach_ioas is not
+ * called.
+ * @detach_ioas: Opposite of attach_ioas
* @open_device: Called when the first file descriptor is opened for this device
* @close_device: Opposite of open_device
* @read: Perform read(2) on device file descriptor
@@ -97,6 +113,7 @@ struct vfio_device_ops {
struct iommufd_ctx *ictx, u32 *out_device_id);
void (*unbind_iommufd)(struct vfio_device *vdev);
int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id);
+ void (*detach_ioas)(struct vfio_device *vdev);
int (*open_device)(struct vfio_device *vdev);
void (*close_device)(struct vfio_device *vdev);
ssize_t (*read)(struct vfio_device *vdev, char __user *buf,
@@ -114,15 +131,31 @@ struct vfio_device_ops {
};
#if IS_ENABLED(CONFIG_IOMMUFD)
+struct iommufd_ctx *vfio_iommufd_device_ictx(struct vfio_device *vdev);
+int vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx);
int vfio_iommufd_physical_bind(struct vfio_device *vdev,
struct iommufd_ctx *ictx, u32 *out_device_id);
void vfio_iommufd_physical_unbind(struct vfio_device *vdev);
int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
+void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev);
int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
struct iommufd_ctx *ictx, u32 *out_device_id);
void vfio_iommufd_emulated_unbind(struct vfio_device *vdev);
int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
+void vfio_iommufd_emulated_detach_ioas(struct vfio_device *vdev);
#else
+static inline struct iommufd_ctx *
+vfio_iommufd_device_ictx(struct vfio_device *vdev)
+{
+ return NULL;
+}
+
+static inline int
+vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx)
+{
+ return VFIO_PCI_DEVID_NOT_OWNED;
+}
+
#define vfio_iommufd_physical_bind \
((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \
u32 *out_device_id)) NULL)
@@ -130,6 +163,8 @@ int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
((void (*)(struct vfio_device *vdev)) NULL)
#define vfio_iommufd_physical_attach_ioas \
((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
+#define vfio_iommufd_physical_detach_ioas \
+ ((void (*)(struct vfio_device *vdev)) NULL)
#define vfio_iommufd_emulated_bind \
((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \
u32 *out_device_id)) NULL)
@@ -137,8 +172,15 @@ int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
((void (*)(struct vfio_device *vdev)) NULL)
#define vfio_iommufd_emulated_attach_ioas \
((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
+#define vfio_iommufd_emulated_detach_ioas \
+ ((void (*)(struct vfio_device *vdev)) NULL)
#endif
+static inline bool vfio_device_cdev_opened(struct vfio_device *device)
+{
+ return device->cdev_opened;
+}
+
/**
* struct vfio_migration_ops - VFIO bus device driver migration callbacks
*
@@ -239,20 +281,40 @@ void vfio_unregister_group_dev(struct vfio_device *device);
int vfio_assign_device_set(struct vfio_device *device, void *set_id);
unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set);
+struct vfio_device *
+vfio_find_device_in_devset(struct vfio_device_set *dev_set,
+ struct device *dev);
int vfio_mig_get_next_state(struct vfio_device *device,
enum vfio_device_mig_state cur_fsm,
enum vfio_device_mig_state new_fsm,
enum vfio_device_mig_state *next_fsm);
+void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes,
+ u32 req_nodes);
+
/*
* External user API
*/
struct iommu_group *vfio_file_iommu_group(struct file *file);
+
+#if IS_ENABLED(CONFIG_VFIO_GROUP)
bool vfio_file_is_group(struct file *file);
+bool vfio_file_has_dev(struct file *file, struct vfio_device *device);
+#else
+static inline bool vfio_file_is_group(struct file *file)
+{
+ return false;
+}
+
+static inline bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
+{
+ return false;
+}
+#endif
+bool vfio_file_is_valid(struct file *file);
bool vfio_file_enforced_coherent(struct file *file);
void vfio_file_set_kvm(struct file *file, struct kvm *kvm);
-bool vfio_file_has_dev(struct file *file, struct vfio_device *device);
#define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long))
@@ -294,6 +356,7 @@ struct virqfd {
wait_queue_entry_t wait;
poll_table pt;
struct work_struct shutdown;
+ struct work_struct flush_inject;
struct virqfd **pvirqfd;
};
@@ -301,5 +364,6 @@ int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *),
void (*thread)(void *, void *), void *data,
struct virqfd **pvirqfd, int fd);
void vfio_virqfd_disable(struct virqfd **pvirqfd);
+void vfio_virqfd_flush_thread(struct virqfd **pvirqfd);
#endif /* VFIO_H */
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 562e8754869d..a2c8b8bba711 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -127,7 +127,35 @@ int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf);
int vfio_pci_core_enable(struct vfio_pci_core_device *vdev);
void vfio_pci_core_disable(struct vfio_pci_core_device *vdev);
void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev);
+int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar);
pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev,
pci_channel_state_t state);
+ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
+ void __iomem *io, char __user *buf,
+ loff_t off, size_t count, size_t x_start,
+ size_t x_end, bool iswrite);
+bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt,
+ loff_t reg_start, size_t reg_cnt,
+ loff_t *buf_offset,
+ size_t *intersect_count,
+ size_t *register_offset);
+#define VFIO_IOWRITE_DECLATION(size) \
+int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev, \
+ bool test_mem, u##size val, void __iomem *io);
+
+VFIO_IOWRITE_DECLATION(8)
+VFIO_IOWRITE_DECLATION(16)
+VFIO_IOWRITE_DECLATION(32)
+#ifdef iowrite64
+VFIO_IOWRITE_DECLATION(64)
+#endif
+
+#define VFIO_IOREAD_DECLATION(size) \
+int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \
+ bool test_mem, u##size *val, void __iomem *io);
+
+VFIO_IOREAD_DECLATION(8)
+VFIO_IOREAD_DECLATION(16)
+VFIO_IOREAD_DECLATION(32)
#endif /* VFIO_PCI_CORE_H */
diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index b4b9137f9792..97129a1bbb7d 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: MIT */
+
/*
* The VGA aribiter manages VGA space routing and VGA resource decode to
* allow multiple VGA devices to be used in a system in a safe way.
@@ -5,27 +7,6 @@
* (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
* (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com>
* (C) Copyright 2007, 2009 Tiago Vignatti <vignatti@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS
- * IN THE SOFTWARE.
- *
*/
#ifndef LINUX_VGA_H
@@ -96,7 +77,7 @@ static inline int vga_client_register(struct pci_dev *pdev,
static inline int vga_get_interruptible(struct pci_dev *pdev,
unsigned int rsrc)
{
- return vga_get(pdev, rsrc, 1);
+ return vga_get(pdev, rsrc, 1);
}
/**
@@ -111,7 +92,7 @@ static inline int vga_get_interruptible(struct pci_dev *pdev,
static inline int vga_get_uninterruptible(struct pci_dev *pdev,
unsigned int rsrc)
{
- return vga_get(pdev, rsrc, 0);
+ return vga_get(pdev, rsrc, 0);
}
static inline void vga_client_unregister(struct pci_dev *pdev)
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index de6041deee37..26c4325aa373 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -9,6 +9,7 @@
#include <linux/device.h>
#include <linux/mod_devicetable.h>
#include <linux/gfp.h>
+#include <linux/dma-mapping.h>
/**
* struct virtqueue - a queue to register buffers for sending or receiving.
@@ -61,6 +62,8 @@ int virtqueue_add_sgs(struct virtqueue *vq,
void *data,
gfp_t gfp);
+struct device *virtqueue_dma_dev(struct virtqueue *vq);
+
bool virtqueue_kick(struct virtqueue *vq);
bool virtqueue_kick_prepare(struct virtqueue *vq);
@@ -78,6 +81,8 @@ bool virtqueue_enable_cb(struct virtqueue *vq);
unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq);
+int virtqueue_set_dma_premapped(struct virtqueue *_vq);
+
bool virtqueue_poll(struct virtqueue *vq, unsigned);
bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
@@ -95,6 +100,16 @@ dma_addr_t virtqueue_get_used_addr(const struct virtqueue *vq);
int virtqueue_resize(struct virtqueue *vq, u32 num,
void (*recycle)(struct virtqueue *vq, void *buf));
+int virtqueue_reset(struct virtqueue *vq,
+ void (*recycle)(struct virtqueue *vq, void *buf));
+
+struct virtio_admin_cmd {
+ __le16 opcode;
+ __le16 group_type;
+ __le64 group_member_id;
+ struct scatterlist *data_sg;
+ struct scatterlist *result_sg;
+};
/**
* struct virtio_device - representation of a device using virtio
@@ -155,7 +170,7 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev);
/**
* struct virtio_driver - operations for a virtio I/O driver
- * @driver: underlying device driver (populate name and owner).
+ * @driver: underlying device driver (populate name).
* @id_table: the ids serviced by this driver.
* @feature_table: an array of feature numbers supported by this driver.
* @feature_table_size: number of entries in the feature table array.
@@ -184,10 +199,8 @@ struct virtio_driver {
void (*scan)(struct virtio_device *dev);
void (*remove)(struct virtio_device *dev);
void (*config_changed)(struct virtio_device *dev);
-#ifdef CONFIG_PM
int (*freeze)(struct virtio_device *dev);
int (*restore)(struct virtio_device *dev);
-#endif
};
static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv)
@@ -195,7 +208,10 @@ static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv)
return container_of(drv, struct virtio_driver, driver);
}
-int register_virtio_driver(struct virtio_driver *drv);
+/* use a macro to avoid include chaining to get THIS_MODULE */
+#define register_virtio_driver(drv) \
+ __register_virtio_driver(drv, THIS_MODULE)
+int __register_virtio_driver(struct virtio_driver *drv, struct module *owner);
void unregister_virtio_driver(struct virtio_driver *drv);
/* module_virtio_driver() - Helper macro for drivers that don't do
@@ -206,4 +222,19 @@ void unregister_virtio_driver(struct virtio_driver *drv);
#define module_virtio_driver(__virtio_driver) \
module_driver(__virtio_driver, register_virtio_driver, \
unregister_virtio_driver)
+
+dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs);
+void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs);
+int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr);
+
+bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr);
+void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, dma_addr_t addr,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir);
+void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, dma_addr_t addr,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir);
#endif /* _LINUX_VIRTIO_H */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 2b3438de2c4d..da9b271b54db 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -93,6 +93,8 @@ typedef void vq_callback_t(struct virtqueue *);
* Returns 0 on success or error status
* If disable_vq_and_reset is set, then enable_vq_after_reset must also be
* set.
+ * @create_avq: create admin virtqueue resource.
+ * @destroy_avq: destroy admin virtqueue resource.
*/
struct virtio_config_ops {
void (*get)(struct virtio_device *vdev, unsigned offset,
@@ -120,6 +122,8 @@ struct virtio_config_ops {
struct virtio_shm_region *region, u8 id);
int (*disable_vq_and_reset)(struct virtqueue *vq);
int (*enable_vq_after_reset)(struct virtqueue *vq);
+ int (*create_avq)(struct virtio_device *vdev);
+ void (*destroy_avq)(struct virtio_device *vdev);
};
/* If driver didn't advertise the feature, it will never appear. */
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
deleted file mode 100644
index d2e2785af602..000000000000
--- a/include/linux/virtio_console.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
- * anyone can use the definitions to implement compatible drivers/servers:
- *
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of IBM nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Copyright (C) Red Hat, Inc., 2009, 2010, 2011
- * Copyright (C) Amit Shah <amit.shah@redhat.com>, 2009, 2010, 2011
- */
-#ifndef _LINUX_VIRTIO_CONSOLE_H
-#define _LINUX_VIRTIO_CONSOLE_H
-
-#include <uapi/linux/virtio_console.h>
-
-int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int));
-#endif /* _LINUX_VIRTIO_CONSOLE_H */
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 7b4dd69555e4..4dfa9b69ca8d 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -3,8 +3,10 @@
#define _LINUX_VIRTIO_NET_H
#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
#include <uapi/linux/tcp.h>
-#include <uapi/linux/udp.h>
#include <uapi/linux/virtio_net.h>
static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type)
@@ -49,6 +51,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
const struct virtio_net_hdr *hdr,
bool little_endian)
{
+ unsigned int nh_min_len = sizeof(struct iphdr);
unsigned int gso_type = 0;
unsigned int thlen = 0;
unsigned int p_off = 0;
@@ -65,6 +68,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
gso_type = SKB_GSO_TCPV6;
ip_proto = IPPROTO_TCP;
thlen = sizeof(struct tcphdr);
+ nh_min_len = sizeof(struct ipv6hdr);
break;
case VIRTIO_NET_HDR_GSO_UDP:
gso_type = SKB_GSO_UDP;
@@ -100,7 +104,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
if (!skb_partial_csum_set(skb, start, off))
return -EINVAL;
- p_off = skb_transport_offset(skb) + thlen;
+ nh_min_len = max_t(u32, nh_min_len, skb_transport_offset(skb));
+ p_off = nh_min_len + thlen;
if (!pskb_may_pull(skb, p_off))
return -EINVAL;
} else {
@@ -140,7 +145,7 @@ retry:
skb_set_transport_header(skb, keys.control.thoff);
} else if (gso_type) {
- p_off = thlen;
+ p_off = nh_min_len + thlen;
if (!pskb_may_pull(skb, p_off))
return -EINVAL;
}
@@ -151,9 +156,22 @@ retry:
unsigned int nh_off = p_off;
struct skb_shared_info *shinfo = skb_shinfo(skb);
- /* UFO may not include transport header in gso_size. */
- if (gso_type & SKB_GSO_UDP)
+ switch (gso_type & ~SKB_GSO_TCP_ECN) {
+ case SKB_GSO_UDP:
+ /* UFO may not include transport header in gso_size. */
nh_off -= thlen;
+ break;
+ case SKB_GSO_UDP_L4:
+ if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
+ return -EINVAL;
+ if (skb->csum_offset != offsetof(struct udphdr, check))
+ return -EINVAL;
+ if (skb->len - p_off > gso_size * UDP_MAX_SEGMENTS)
+ return -EINVAL;
+ if (gso_type != SKB_GSO_UDP_L4)
+ return -EINVAL;
+ break;
+ }
/* Kernel has a special handling for GSO_BY_FRAGS. */
if (gso_size == GSO_BY_FRAGS)
diff --git a/include/linux/virtio_pci_admin.h b/include/linux/virtio_pci_admin.h
new file mode 100644
index 000000000000..f4a100a0fe2e
--- /dev/null
+++ b/include/linux/virtio_pci_admin.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_VIRTIO_PCI_ADMIN_H
+#define _LINUX_VIRTIO_PCI_ADMIN_H
+
+#include <linux/types.h>
+#include <linux/pci.h>
+
+#ifdef CONFIG_VIRTIO_PCI_ADMIN_LEGACY
+bool virtio_pci_admin_has_legacy_io(struct pci_dev *pdev);
+int virtio_pci_admin_legacy_common_io_write(struct pci_dev *pdev, u8 offset,
+ u8 size, u8 *buf);
+int virtio_pci_admin_legacy_common_io_read(struct pci_dev *pdev, u8 offset,
+ u8 size, u8 *buf);
+int virtio_pci_admin_legacy_device_io_write(struct pci_dev *pdev, u8 offset,
+ u8 size, u8 *buf);
+int virtio_pci_admin_legacy_device_io_read(struct pci_dev *pdev, u8 offset,
+ u8 size, u8 *buf);
+int virtio_pci_admin_legacy_io_notify_info(struct pci_dev *pdev,
+ u8 req_bar_flags, u8 *bar,
+ u64 *bar_offset);
+#endif
+
+#endif /* _LINUX_VIRTIO_PCI_ADMIN_H */
diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h
index 067ac1d789bc..c0b1b1ca1163 100644
--- a/include/linux/virtio_pci_modern.h
+++ b/include/linux/virtio_pci_modern.h
@@ -5,44 +5,48 @@
#include <linux/pci.h>
#include <linux/virtio_pci.h>
-struct virtio_pci_modern_common_cfg {
- struct virtio_pci_common_cfg cfg;
-
- __le16 queue_notify_data; /* read-write */
- __le16 queue_reset; /* read-write */
-};
-
+/**
+ * struct virtio_pci_modern_device - info for modern PCI virtio
+ * @pci_dev: Ptr to the PCI device struct
+ * @common: Position of the common capability in the PCI config
+ * @device: Device-specific data (non-legacy mode)
+ * @notify_base: Base of vq notifications (non-legacy mode)
+ * @notify_pa: Physical base of vq notifications
+ * @isr: Where to read and clear interrupt
+ * @notify_len: So we can sanity-check accesses
+ * @device_len: So we can sanity-check accesses
+ * @notify_map_cap: Capability for when we need to map notifications per-vq
+ * @notify_offset_multiplier: Multiply queue_notify_off by this value
+ * (non-legacy mode).
+ * @modern_bars: Bitmask of BARs
+ * @id: Device and vendor id
+ * @device_id_check: Callback defined before vp_modern_probe() to be used to
+ * verify the PCI device is a vendor's expected device rather
+ * than the standard virtio PCI device
+ * Returns the found device id or ERRNO
+ * @dma_mask: Optional mask instead of the traditional DMA_BIT_MASK(64),
+ * for vendor devices with DMA space address limitations
+ */
struct virtio_pci_modern_device {
struct pci_dev *pci_dev;
struct virtio_pci_common_cfg __iomem *common;
- /* Device-specific data (non-legacy mode) */
void __iomem *device;
- /* Base of vq notifications (non-legacy mode). */
void __iomem *notify_base;
- /* Physical base of vq notifications */
resource_size_t notify_pa;
- /* Where to read and clear interrupt */
u8 __iomem *isr;
- /* So we can sanity-check accesses. */
size_t notify_len;
size_t device_len;
+ size_t common_len;
- /* Capability for when we need to map notifications per-vq. */
int notify_map_cap;
- /* Multiply queue_notify_off by this value. (non-legacy mode). */
u32 notify_offset_multiplier;
-
int modern_bars;
-
struct virtio_device_id id;
- /* optional check for vendor virtio device, returns dev_id or -ERRNO */
int (*device_id_check)(struct pci_dev *pdev);
-
- /* optional mask for devices with limited DMA space */
u64 dma_mask;
};
@@ -121,4 +125,6 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev);
void vp_modern_remove(struct virtio_pci_modern_device *mdev);
int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index);
void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index);
+u16 vp_modern_avq_num(struct virtio_pci_modern_device *mdev);
+u16 vp_modern_avq_index(struct virtio_pci_modern_device *mdev);
#endif
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index c58453699ee9..c82089dee0c8 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -12,6 +12,7 @@
struct virtio_vsock_skb_cb {
bool reply;
bool tap_delivered;
+ u32 offset;
};
#define VIRTIO_VSOCK_SKB_CB(skb) ((struct virtio_vsock_skb_cb *)((skb)->cb))
@@ -159,6 +160,15 @@ struct virtio_transport {
/* Takes ownership of the packet */
int (*send_pkt)(struct sk_buff *skb);
+
+ /* Used in MSG_ZEROCOPY mode. Checks, that provided data
+ * (number of buffers) could be transmitted with zerocopy
+ * mode. If this callback is not implemented for the current
+ * transport - this means that this transport doesn't need
+ * extra checks and can perform zerocopy transmission by
+ * default.
+ */
+ bool (*can_msgzerocopy)(int bufs_num);
};
ssize_t
@@ -246,4 +256,5 @@ void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit);
void virtio_transport_deliver_tap_pkt(struct sk_buff *skb);
int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *list);
int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t read_actor);
+int virtio_transport_notify_set_rcvlowat(struct vsock_sock *vsk, int val);
#endif /* _LINUX_VIRTIO_VSOCK_H */
diff --git a/include/linux/vlynq.h b/include/linux/vlynq.h
deleted file mode 100644
index e9c0cd36c48a..000000000000
--- a/include/linux/vlynq.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2006, 2007 Eugene Konev <ejka@openwrt.org>
- */
-
-#ifndef __VLYNQ_H__
-#define __VLYNQ_H__
-
-#include <linux/device.h>
-#include <linux/types.h>
-
-struct module;
-
-#define VLYNQ_NUM_IRQS 32
-
-struct vlynq_mapping {
- u32 size;
- u32 offset;
-};
-
-enum vlynq_divisor {
- vlynq_div_auto = 0,
- vlynq_ldiv1,
- vlynq_ldiv2,
- vlynq_ldiv3,
- vlynq_ldiv4,
- vlynq_ldiv5,
- vlynq_ldiv6,
- vlynq_ldiv7,
- vlynq_ldiv8,
- vlynq_rdiv1,
- vlynq_rdiv2,
- vlynq_rdiv3,
- vlynq_rdiv4,
- vlynq_rdiv5,
- vlynq_rdiv6,
- vlynq_rdiv7,
- vlynq_rdiv8,
- vlynq_div_external
-};
-
-struct vlynq_device_id {
- u32 id;
- enum vlynq_divisor divisor;
- unsigned long driver_data;
-};
-
-struct vlynq_regs;
-struct vlynq_device {
- u32 id, dev_id;
- int local_irq;
- int remote_irq;
- enum vlynq_divisor divisor;
- u32 regs_start, regs_end;
- u32 mem_start, mem_end;
- u32 irq_start, irq_end;
- int irq;
- int enabled;
- struct vlynq_regs *local;
- struct vlynq_regs *remote;
- struct device dev;
-};
-
-struct vlynq_driver {
- char *name;
- struct vlynq_device_id *id_table;
- int (*probe)(struct vlynq_device *dev, struct vlynq_device_id *id);
- void (*remove)(struct vlynq_device *dev);
- struct device_driver driver;
-};
-
-struct plat_vlynq_ops {
- int (*on)(struct vlynq_device *dev);
- void (*off)(struct vlynq_device *dev);
-};
-
-static inline struct vlynq_driver *to_vlynq_driver(struct device_driver *drv)
-{
- return container_of(drv, struct vlynq_driver, driver);
-}
-
-static inline struct vlynq_device *to_vlynq_device(struct device *device)
-{
- return container_of(device, struct vlynq_device, dev);
-}
-
-extern struct bus_type vlynq_bus_type;
-
-extern int __vlynq_register_driver(struct vlynq_driver *driver,
- struct module *owner);
-
-static inline int vlynq_register_driver(struct vlynq_driver *driver)
-{
- return __vlynq_register_driver(driver, THIS_MODULE);
-}
-
-static inline void *vlynq_get_drvdata(struct vlynq_device *dev)
-{
- return dev_get_drvdata(&dev->dev);
-}
-
-static inline void vlynq_set_drvdata(struct vlynq_device *dev, void *data)
-{
- dev_set_drvdata(&dev->dev, data);
-}
-
-static inline u32 vlynq_mem_start(struct vlynq_device *dev)
-{
- return dev->mem_start;
-}
-
-static inline u32 vlynq_mem_end(struct vlynq_device *dev)
-{
- return dev->mem_end;
-}
-
-static inline u32 vlynq_mem_len(struct vlynq_device *dev)
-{
- return dev->mem_end - dev->mem_start + 1;
-}
-
-static inline int vlynq_virq_to_irq(struct vlynq_device *dev, int virq)
-{
- int irq = dev->irq_start + virq;
- if ((irq < dev->irq_start) || (irq > dev->irq_end))
- return -EINVAL;
-
- return irq;
-}
-
-static inline int vlynq_irq_to_virq(struct vlynq_device *dev, int irq)
-{
- if ((irq < dev->irq_start) || (irq > dev->irq_end))
- return -EINVAL;
-
- return irq - dev->irq_start;
-}
-
-extern void vlynq_unregister_driver(struct vlynq_driver *driver);
-extern int vlynq_enable_device(struct vlynq_device *dev);
-extern void vlynq_disable_device(struct vlynq_device *dev);
-extern int vlynq_set_local_mapping(struct vlynq_device *dev, u32 tx_offset,
- struct vlynq_mapping *mapping);
-extern int vlynq_set_remote_mapping(struct vlynq_device *dev, u32 tx_offset,
- struct vlynq_mapping *mapping);
-extern int vlynq_set_local_irq(struct vlynq_device *dev, int virq);
-extern int vlynq_set_remote_irq(struct vlynq_device *dev, int virq);
-
-#endif /* __VLYNQ_H__ */
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 8abfa1240040..747943bc8cc2 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -41,9 +41,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
PGSTEAL_KSWAPD,
PGSTEAL_DIRECT,
PGSTEAL_KHUGEPAGED,
- PGDEMOTE_KSWAPD,
- PGDEMOTE_DIRECT,
- PGDEMOTE_KHUGEPAGED,
PGSCAN_KSWAPD,
PGSCAN_DIRECT,
PGSCAN_KHUGEPAGED,
@@ -145,6 +142,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
#ifdef CONFIG_ZSWAP
ZSWPIN,
ZSWPOUT,
+ ZSWPWB,
#endif
#ifdef CONFIG_X86
DIRECT_MAP_LEVEL2_SPLIT,
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index c720be70c8dd..98ea90e90439 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -35,6 +35,7 @@ struct iov_iter; /* in uio.h */
#else
#define VM_DEFER_KMEMLEAK 0
#endif
+#define VM_SPARSE 0x00001000 /* sparse vm_area. not all pages are present. */
/* bits [20..32] reserved for arch specific ioremap internals */
@@ -232,6 +233,10 @@ static inline bool is_vm_area_hugepages(const void *addr)
}
#ifdef CONFIG_MMU
+int vm_area_map_pages(struct vm_struct *area, unsigned long start,
+ unsigned long end, struct page **pages);
+void vm_area_unmap_pages(struct vm_struct *area, unsigned long start,
+ unsigned long end);
void vunmap_range(unsigned long addr, unsigned long end);
static inline void set_vm_flush_reset_perms(void *addr)
{
@@ -253,7 +258,6 @@ extern long vread_iter(struct iov_iter *iter, const char *addr, size_t count);
/*
* Internals. Don't use..
*/
-extern struct list_head vmap_area_list;
extern __init void vm_area_add_early(struct vm_struct *vm);
extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
diff --git a/include/linux/vmcore_info.h b/include/linux/vmcore_info.h
new file mode 100644
index 000000000000..e1dec1a6a749
--- /dev/null
+++ b/include/linux/vmcore_info.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_VMCORE_INFO_H
+#define LINUX_VMCORE_INFO_H
+
+#include <linux/linkage.h>
+#include <linux/elfcore.h>
+#include <linux/elf.h>
+
+#define CRASH_CORE_NOTE_NAME "CORE"
+#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4)
+#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4)
+#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4)
+
+/*
+ * The per-cpu notes area is a list of notes terminated by a "NULL"
+ * note header. For kdump, the code in vmcore.c runs in the context
+ * of the second kernel to combine them into one note.
+ */
+#define CRASH_CORE_NOTE_BYTES ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \
+ CRASH_CORE_NOTE_NAME_BYTES + \
+ CRASH_CORE_NOTE_DESC_BYTES)
+
+#define VMCOREINFO_BYTES PAGE_SIZE
+#define VMCOREINFO_NOTE_NAME "VMCOREINFO"
+#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
+#define VMCOREINFO_NOTE_SIZE ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \
+ VMCOREINFO_NOTE_NAME_BYTES + \
+ VMCOREINFO_BYTES)
+
+typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4];
+/* Per cpu memory for storing cpu states in case of system crash. */
+extern note_buf_t __percpu *crash_notes;
+
+void crash_update_vmcoreinfo_safecopy(void *ptr);
+void crash_save_vmcoreinfo(void);
+void arch_crash_save_vmcoreinfo(void);
+__printf(1, 2)
+void vmcoreinfo_append_str(const char *fmt, ...);
+phys_addr_t paddr_vmcoreinfo_note(void);
+
+#define VMCOREINFO_OSRELEASE(value) \
+ vmcoreinfo_append_str("OSRELEASE=%s\n", value)
+#define VMCOREINFO_BUILD_ID() \
+ ({ \
+ static_assert(sizeof(vmlinux_build_id) == 20); \
+ vmcoreinfo_append_str("BUILD-ID=%20phN\n", vmlinux_build_id); \
+ })
+
+#define VMCOREINFO_PAGESIZE(value) \
+ vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
+#define VMCOREINFO_SYMBOL(name) \
+ vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
+#define VMCOREINFO_SYMBOL_ARRAY(name) \
+ vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name)
+#define VMCOREINFO_SIZE(name) \
+ vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
+ (unsigned long)sizeof(name))
+#define VMCOREINFO_STRUCT_SIZE(name) \
+ vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
+ (unsigned long)sizeof(struct name))
+#define VMCOREINFO_OFFSET(name, field) \
+ vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
+ (unsigned long)offsetof(struct name, field))
+#define VMCOREINFO_TYPE_OFFSET(name, field) \
+ vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
+ (unsigned long)offsetof(name, field))
+#define VMCOREINFO_LENGTH(name, value) \
+ vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value)
+#define VMCOREINFO_NUMBER(name) \
+ vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name)
+#define VMCOREINFO_CONFIG(name) \
+ vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
+
+extern unsigned char *vmcoreinfo_data;
+extern size_t vmcoreinfo_size;
+extern u32 *vmcoreinfo_note;
+
+Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
+ void *data, size_t data_len);
+void final_note(Elf_Word *buf);
+#endif /* LINUX_VMCORE_INFO_H */
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index fed855bae6d8..343906a98d6e 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -556,19 +556,25 @@ static inline void mod_lruvec_state(struct lruvec *lruvec,
local_irq_restore(flags);
}
-void __mod_lruvec_page_state(struct page *page,
+void __lruvec_stat_mod_folio(struct folio *folio,
enum node_stat_item idx, int val);
-static inline void mod_lruvec_page_state(struct page *page,
+static inline void lruvec_stat_mod_folio(struct folio *folio,
enum node_stat_item idx, int val)
{
unsigned long flags;
local_irq_save(flags);
- __mod_lruvec_page_state(page, idx, val);
+ __lruvec_stat_mod_folio(folio, idx, val);
local_irq_restore(flags);
}
+static inline void mod_lruvec_page_state(struct page *page,
+ enum node_stat_item idx, int val)
+{
+ lruvec_stat_mod_folio(page_folio(page), idx, val);
+}
+
#else
static inline void __mod_lruvec_state(struct lruvec *lruvec,
@@ -583,37 +589,25 @@ static inline void mod_lruvec_state(struct lruvec *lruvec,
mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
}
-static inline void __mod_lruvec_page_state(struct page *page,
- enum node_stat_item idx, int val)
-{
- __mod_node_page_state(page_pgdat(page), idx, val);
-}
-
-static inline void mod_lruvec_page_state(struct page *page,
+static inline void __lruvec_stat_mod_folio(struct folio *folio,
enum node_stat_item idx, int val)
{
- mod_node_page_state(page_pgdat(page), idx, val);
+ __mod_node_page_state(folio_pgdat(folio), idx, val);
}
-#endif /* CONFIG_MEMCG */
-
-static inline void __inc_lruvec_page_state(struct page *page,
- enum node_stat_item idx)
+static inline void lruvec_stat_mod_folio(struct folio *folio,
+ enum node_stat_item idx, int val)
{
- __mod_lruvec_page_state(page, idx, 1);
+ mod_node_page_state(folio_pgdat(folio), idx, val);
}
-static inline void __dec_lruvec_page_state(struct page *page,
- enum node_stat_item idx)
+static inline void mod_lruvec_page_state(struct page *page,
+ enum node_stat_item idx, int val)
{
- __mod_lruvec_page_state(page, idx, -1);
+ mod_node_page_state(page_pgdat(page), idx, val);
}
-static inline void __lruvec_stat_mod_folio(struct folio *folio,
- enum node_stat_item idx, int val)
-{
- __mod_lruvec_page_state(&folio->page, idx, val);
-}
+#endif /* CONFIG_MEMCG */
static inline void __lruvec_stat_add_folio(struct folio *folio,
enum node_stat_item idx)
@@ -627,24 +621,6 @@ static inline void __lruvec_stat_sub_folio(struct folio *folio,
__lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
}
-static inline void inc_lruvec_page_state(struct page *page,
- enum node_stat_item idx)
-{
- mod_lruvec_page_state(page, idx, 1);
-}
-
-static inline void dec_lruvec_page_state(struct page *page,
- enum node_stat_item idx)
-{
- mod_lruvec_page_state(page, idx, -1);
-}
-
-static inline void lruvec_stat_mod_folio(struct folio *folio,
- enum node_stat_item idx, int val)
-{
- mod_lruvec_page_state(&folio->page, idx, val);
-}
-
static inline void lruvec_stat_add_folio(struct folio *folio,
enum node_stat_item idx)
{
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index c1f5aebef170..d008c3d0a9bb 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -25,7 +25,8 @@ extern int fg_console, last_console, want_console;
int vc_allocate(unsigned int console);
int vc_cons_allocated(unsigned int console);
-int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines);
+int __vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines,
+ bool from_user);
struct vc_data *vc_deallocate(unsigned int console);
void reset_palette(struct vc_data *vc);
void do_blank_screen(int entering_gfx);
@@ -42,6 +43,12 @@ void redraw_screen(struct vc_data *vc, int is_switch);
#define update_screen(x) redraw_screen(x, 0)
#define switch_screen(x) redraw_screen(x, 1)
+static inline int vc_resize(struct vc_data *vc, unsigned int cols,
+ unsigned int lines)
+{
+ return __vc_resize(vc, cols, lines, false);
+}
+
struct tty_struct;
int tioclinux(struct tty_struct *tty, unsigned long arg);
@@ -168,7 +175,4 @@ void vt_set_led_state(unsigned int console, int leds);
void vt_kbd_con_start(unsigned int console);
void vt_kbd_con_stop(unsigned int console);
-void vc_scrolldelta_helper(struct vc_data *c, int lines,
- unsigned int rolled_over, void *_base, unsigned int size);
-
#endif /* _VT_KERN_H */
diff --git a/include/linux/w1-gpio.h b/include/linux/w1-gpio.h
deleted file mode 100644
index 3495fd0dc790..000000000000
--- a/include/linux/w1-gpio.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * w1-gpio interface to platform code
- *
- * Copyright (C) 2007 Ville Syrjala <syrjala@sci.fi>
- */
-#ifndef _LINUX_W1_GPIO_H
-#define _LINUX_W1_GPIO_H
-
-struct gpio_desc;
-
-/**
- * struct w1_gpio_platform_data - Platform-dependent data for w1-gpio
- */
-struct w1_gpio_platform_data {
- struct gpio_desc *gpiod;
- struct gpio_desc *pullup_gpiod;
- void (*enable_external_pullup)(int enable);
- unsigned int pullup_duration;
-};
-
-#endif /* _LINUX_W1_GPIO_H */
diff --git a/include/linux/wait.h b/include/linux/wait.h
index a0307b516b09..8aa3372f21a0 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -9,7 +9,6 @@
#include <linux/spinlock.h>
#include <asm/current.h>
-#include <uapi/linux/wait.h>
typedef struct wait_queue_entry wait_queue_entry_t;
@@ -19,10 +18,9 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int
/* wait_queue_entry::flags */
#define WQ_FLAG_EXCLUSIVE 0x01
#define WQ_FLAG_WOKEN 0x02
-#define WQ_FLAG_BOOKMARK 0x04
-#define WQ_FLAG_CUSTOM 0x08
-#define WQ_FLAG_DONE 0x10
-#define WQ_FLAG_PRIORITY 0x20
+#define WQ_FLAG_CUSTOM 0x04
+#define WQ_FLAG_DONE 0x08
+#define WQ_FLAG_PRIORITY 0x10
/*
* A single wait-queue entry structure:
@@ -210,9 +208,8 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
}
int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
-void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
- unsigned int mode, void *key, wait_queue_entry_t *bookmark);
void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr);
@@ -237,6 +234,8 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head);
#define key_to_poll(m) ((__force __poll_t)(uintptr_t)(void *)(m))
#define wake_up_poll(x, m) \
__wake_up(x, TASK_NORMAL, 1, poll_to_key(m))
+#define wake_up_poll_on_current_cpu(x, m) \
+ __wake_up_on_current_cpu(x, TASK_NORMAL, poll_to_key(m))
#define wake_up_locked_poll(x, m) \
__wake_up_locked_key((x), TASK_NORMAL, poll_to_key(m))
#define wake_up_interruptible_poll(x, m) \
diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h
index 45cd42f55d49..429c7b6afead 100644
--- a/include/linux/watch_queue.h
+++ b/include/linux/watch_queue.h
@@ -32,7 +32,7 @@ struct watch_filter {
DECLARE_BITMAP(type_filter, WATCH_TYPE__NR);
};
u32 nr_filters; /* Number of filters */
- struct watch_type_filter filters[];
+ struct watch_type_filter filters[] __counted_by(nr_filters);
};
struct watch_queue {
diff --git a/include/linux/win_minmax.h b/include/linux/win_minmax.h
index 4ca2842d2842..6a5bb052fcc2 100644
--- a/include/linux/win_minmax.h
+++ b/include/linux/win_minmax.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/**
- * lib/minmax.c: windowed min/max tracker by Kathleen Nichols.
+/*
+ * win_minmax.h: windowed min/max tracker by Kathleen Nichols.
*
*/
#ifndef MINMAX_H
diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index 763bd382cf2d..63cca3b58d6d 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -11,7 +11,6 @@
#include <linux/device.h>
#include <linux/acpi.h>
#include <linux/mod_devicetable.h>
-#include <uapi/linux/wmi.h>
/**
* struct wmi_device - WMI device structure
@@ -22,11 +21,17 @@
*/
struct wmi_device {
struct device dev;
-
- /* private: used by the WMI driver core */
bool setable;
};
+/**
+ * to_wmi_device() - Helper macro to cast a device to a wmi_device
+ * @device: device struct
+ *
+ * Cast a struct device to a struct wmi_device.
+ */
+#define to_wmi_device(device) container_of(device, struct wmi_device, dev)
+
extern acpi_status wmidev_evaluate_method(struct wmi_device *wdev,
u8 instance, u32 method_id,
const struct acpi_buffer *in,
@@ -35,34 +40,31 @@ extern acpi_status wmidev_evaluate_method(struct wmi_device *wdev,
extern union acpi_object *wmidev_block_query(struct wmi_device *wdev,
u8 instance);
-u8 wmidev_instance_count(struct wmi_device *wdev);
+acpi_status wmidev_block_set(struct wmi_device *wdev, u8 instance, const struct acpi_buffer *in);
-extern int set_required_buffer_size(struct wmi_device *wdev, u64 length);
+u8 wmidev_instance_count(struct wmi_device *wdev);
/**
* struct wmi_driver - WMI driver structure
* @driver: Driver model structure
* @id_table: List of WMI GUIDs supported by this driver
- * @no_notify_data: WMI events provide no event data
+ * @no_notify_data: Driver supports WMI events which provide no event data
+ * @no_singleton: Driver can be instantiated multiple times
* @probe: Callback for device binding
* @remove: Callback for device unbinding
* @notify: Callback for receiving WMI events
- * @filter_callback: Callback for filtering device IOCTLs
*
* This represents WMI drivers which handle WMI devices.
- * @filter_callback is only necessary for drivers which
- * want to set up a WMI IOCTL interface.
*/
struct wmi_driver {
struct device_driver driver;
const struct wmi_device_id *id_table;
bool no_notify_data;
+ bool no_singleton;
int (*probe)(struct wmi_device *wdev, const void *context);
void (*remove)(struct wmi_device *wdev);
void (*notify)(struct wmi_device *device, union acpi_object *data);
- long (*filter_callback)(struct wmi_device *wdev, unsigned int cmd,
- struct wmi_ioctl_buffer *arg);
};
extern int __must_check __wmi_driver_register(struct wmi_driver *driver,
diff --git a/include/linux/wordpart.h b/include/linux/wordpart.h
new file mode 100644
index 000000000000..f6f8f83b15b0
--- /dev/null
+++ b/include/linux/wordpart.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_WORDPART_H
+#define _LINUX_WORDPART_H
+
+/**
+ * upper_32_bits - return bits 32-63 of a number
+ * @n: the number we're accessing
+ *
+ * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress
+ * the "right shift count >= width of type" warning when that quantity is
+ * 32-bits.
+ */
+#define upper_32_bits(n) ((u32)(((n) >> 16) >> 16))
+
+/**
+ * lower_32_bits - return bits 0-31 of a number
+ * @n: the number we're accessing
+ */
+#define lower_32_bits(n) ((u32)((n) & 0xffffffff))
+
+/**
+ * upper_16_bits - return bits 16-31 of a number
+ * @n: the number we're accessing
+ */
+#define upper_16_bits(n) ((u16)((n) >> 16))
+
+/**
+ * lower_16_bits - return bits 0-15 of a number
+ * @n: the number we're accessing
+ */
+#define lower_16_bits(n) ((u16)((n) & 0xffff))
+
+/**
+ * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value
+ * @x: value to repeat
+ *
+ * NOTE: @x is not checked for > 0xff; larger values produce odd results.
+ */
+#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
+
+#endif // _LINUX_WORDPART_H
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 683efe29fa69..158784dd189a 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -14,12 +14,7 @@
#include <linux/atomic.h>
#include <linux/cpumask.h>
#include <linux/rcupdate.h>
-
-struct workqueue_struct;
-
-struct work_struct;
-typedef void (*work_func_t)(struct work_struct *work);
-void delayed_work_timer_fn(struct timer_list *t);
+#include <linux/workqueue_types.h>
/*
* The first word is the work queue pointer and the flags rolled into
@@ -27,20 +22,54 @@ void delayed_work_timer_fn(struct timer_list *t);
*/
#define work_data_bits(work) ((unsigned long *)(&(work)->data))
-enum {
+enum work_bits {
WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */
- WORK_STRUCT_INACTIVE_BIT= 1, /* work item is inactive */
- WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */
- WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */
+ WORK_STRUCT_INACTIVE_BIT, /* work item is inactive */
+ WORK_STRUCT_PWQ_BIT, /* data points to pwq */
+ WORK_STRUCT_LINKED_BIT, /* next work is linked to this one */
#ifdef CONFIG_DEBUG_OBJECTS_WORK
- WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */
- WORK_STRUCT_COLOR_SHIFT = 5, /* color for workqueue flushing */
-#else
- WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */
+ WORK_STRUCT_STATIC_BIT, /* static initializer (debugobjects) */
#endif
+ WORK_STRUCT_FLAG_BITS,
+ /* color for workqueue flushing */
+ WORK_STRUCT_COLOR_SHIFT = WORK_STRUCT_FLAG_BITS,
WORK_STRUCT_COLOR_BITS = 4,
+ /*
+ * When WORK_STRUCT_PWQ is set, reserve 8 bits off of pwq pointer w/
+ * debugobjects turned off. This makes pwqs aligned to 256 bytes (512
+ * bytes w/ DEBUG_OBJECTS_WORK) and allows 16 workqueue flush colors.
+ *
+ * MSB
+ * [ pwq pointer ] [ flush color ] [ STRUCT flags ]
+ * 4 bits 4 or 5 bits
+ */
+ WORK_STRUCT_PWQ_SHIFT = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS,
+
+ /*
+ * data contains off-queue information when !WORK_STRUCT_PWQ.
+ *
+ * MSB
+ * [ pool ID ] [ OFFQ flags ] [ STRUCT flags ]
+ * 1 bit 4 or 5 bits
+ */
+ WORK_OFFQ_FLAG_SHIFT = WORK_STRUCT_FLAG_BITS,
+ WORK_OFFQ_CANCELING_BIT = WORK_OFFQ_FLAG_SHIFT,
+ WORK_OFFQ_FLAG_END,
+ WORK_OFFQ_FLAG_BITS = WORK_OFFQ_FLAG_END - WORK_OFFQ_FLAG_SHIFT,
+
+ /*
+ * When a work item is off queue, the high bits encode off-queue flags
+ * and the last pool it was on. Cap pool ID to 31 bits and use the
+ * highest number to indicate that no pool is associated.
+ */
+ WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_SHIFT + WORK_OFFQ_FLAG_BITS,
+ WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
+ WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
+};
+
+enum work_flags {
WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT,
WORK_STRUCT_INACTIVE = 1 << WORK_STRUCT_INACTIVE_BIT,
WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT,
@@ -50,35 +79,14 @@ enum {
#else
WORK_STRUCT_STATIC = 0,
#endif
+};
+enum wq_misc_consts {
WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS),
/* not bound to any CPU, prefer the local CPU */
WORK_CPU_UNBOUND = NR_CPUS,
- /*
- * Reserve 8 bits off of pwq pointer w/ debugobjects turned off.
- * This makes pwqs aligned to 256 bytes and allows 16 workqueue
- * flush colors.
- */
- WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT +
- WORK_STRUCT_COLOR_BITS,
-
- /* data contains off-queue information when !WORK_STRUCT_PWQ */
- WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT,
-
- __WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE,
-
- /*
- * When a work item is off queue, its high bits point to the last
- * pool it was on. Cap at 31 bits and use the highest number to
- * indicate that no pool is associated.
- */
- WORK_OFFQ_FLAG_BITS = 1,
- WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
- WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
- WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
-
/* bit mask for work_busy() return values */
WORK_BUSY_PENDING = 1 << 0,
WORK_BUSY_RUNNING = 1 << 1,
@@ -88,21 +96,10 @@ enum {
};
/* Convenience constants - of type 'unsigned long', not 'enum'! */
-#define WORK_OFFQ_CANCELING (1ul << __WORK_OFFQ_CANCELING)
+#define WORK_OFFQ_CANCELING (1ul << WORK_OFFQ_CANCELING_BIT)
#define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1)
#define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT)
-
-#define WORK_STRUCT_FLAG_MASK ((1ul << WORK_STRUCT_FLAG_BITS) - 1)
-#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
-
-struct work_struct {
- atomic_long_t data;
- struct list_head entry;
- work_func_t func;
-#ifdef CONFIG_LOCKDEP
- struct lockdep_map lockdep_map;
-#endif
-};
+#define WORK_STRUCT_PWQ_MASK (~((1ul << WORK_STRUCT_PWQ_SHIFT) - 1))
#define WORK_DATA_INIT() ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL)
#define WORK_DATA_STATIC_INIT() \
@@ -125,6 +122,17 @@ struct rcu_work {
struct workqueue_struct *wq;
};
+enum wq_affn_scope {
+ WQ_AFFN_DFL, /* use system default */
+ WQ_AFFN_CPU, /* one pod per CPU */
+ WQ_AFFN_SMT, /* one pod poer SMT */
+ WQ_AFFN_CACHE, /* one pod per LLC */
+ WQ_AFFN_NUMA, /* one pod per NUMA node */
+ WQ_AFFN_SYSTEM, /* one pod across the whole system */
+
+ WQ_AFFN_NR_TYPES,
+};
+
/**
* struct workqueue_attrs - A struct for workqueue attributes.
*
@@ -138,17 +146,58 @@ struct workqueue_attrs {
/**
* @cpumask: allowed CPUs
+ *
+ * Work items in this workqueue are affine to these CPUs and not allowed
+ * to execute on other CPUs. A pool serving a workqueue must have the
+ * same @cpumask.
*/
cpumask_var_t cpumask;
/**
- * @no_numa: disable NUMA affinity
+ * @__pod_cpumask: internal attribute used to create per-pod pools
+ *
+ * Internal use only.
+ *
+ * Per-pod unbound worker pools are used to improve locality. Always a
+ * subset of ->cpumask. A workqueue can be associated with multiple
+ * worker pools with disjoint @__pod_cpumask's. Whether the enforcement
+ * of a pool's @__pod_cpumask is strict depends on @affn_strict.
+ */
+ cpumask_var_t __pod_cpumask;
+
+ /**
+ * @affn_strict: affinity scope is strict
+ *
+ * If clear, workqueue will make a best-effort attempt at starting the
+ * worker inside @__pod_cpumask but the scheduler is free to migrate it
+ * outside.
+ *
+ * If set, workers are only allowed to run inside @__pod_cpumask.
+ */
+ bool affn_strict;
+
+ /*
+ * Below fields aren't properties of a worker_pool. They only modify how
+ * :c:func:`apply_workqueue_attrs` select pools and thus don't
+ * participate in pool hash calculations or equality comparisons.
+ */
+
+ /**
+ * @affn_scope: unbound CPU affinity scope
*
- * Unlike other fields, ``no_numa`` isn't a property of a worker_pool. It
- * only modifies how :c:func:`apply_workqueue_attrs` select pools and thus
- * doesn't participate in pool hash calculations or equality comparisons.
+ * CPU pods are used to improve execution locality of unbound work
+ * items. There are multiple pod types, one for each wq_affn_scope, and
+ * every CPU in the system belongs to one pod in every pod type. CPUs
+ * that belong to the same pod share the worker pool. For example,
+ * selecting %WQ_AFFN_NUMA makes the workqueue use a separate worker
+ * pool for each NUMA node.
+ */
+ enum wq_affn_scope affn_scope;
+
+ /**
+ * @ordered: work items must be executed one by one in queueing order
*/
- bool no_numa;
+ bool ordered;
};
static inline struct delayed_work *to_delayed_work(struct work_struct *work)
@@ -222,18 +271,16 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
* to generate better code.
*/
#ifdef CONFIG_LOCKDEP
-#define __INIT_WORK(_work, _func, _onstack) \
+#define __INIT_WORK_KEY(_work, _func, _onstack, _key) \
do { \
- static struct lock_class_key __key; \
- \
__init_work((_work), _onstack); \
(_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
- lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, &__key, 0); \
+ lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, (_key), 0); \
INIT_LIST_HEAD(&(_work)->entry); \
(_work)->func = (_func); \
} while (0)
#else
-#define __INIT_WORK(_work, _func, _onstack) \
+#define __INIT_WORK_KEY(_work, _func, _onstack, _key) \
do { \
__init_work((_work), _onstack); \
(_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
@@ -242,12 +289,22 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
} while (0)
#endif
+#define __INIT_WORK(_work, _func, _onstack) \
+ do { \
+ static __maybe_unused struct lock_class_key __key; \
+ \
+ __INIT_WORK_KEY(_work, _func, _onstack, &__key); \
+ } while (0)
+
#define INIT_WORK(_work, _func) \
__INIT_WORK((_work), (_func), 0)
#define INIT_WORK_ONSTACK(_work, _func) \
__INIT_WORK((_work), (_func), 1)
+#define INIT_WORK_ONSTACK_KEY(_work, _func, _key) \
+ __INIT_WORK_KEY((_work), (_func), 1, _key)
+
#define __INIT_DELAYED_WORK(_work, _func, _tflags) \
do { \
INIT_WORK(&(_work)->work, (_func)); \
@@ -301,7 +358,8 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
* Workqueue flags and constants. For details, please refer to
* Documentation/core-api/workqueue.rst.
*/
-enum {
+enum wq_flags {
+ WQ_BH = 1 << 0, /* execute in bottom half (softirq) context */
WQ_UNBOUND = 1 << 1, /* not bound to any cpu */
WQ_FREEZABLE = 1 << 2, /* freeze during suspend */
WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */
@@ -340,16 +398,23 @@ enum {
__WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */
__WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */
__WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */
- __WQ_ORDERED_EXPLICIT = 1 << 19, /* internal: alloc_ordered_workqueue() */
+ /* BH wq only allows the following flags */
+ __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI,
+};
+
+enum wq_consts {
WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */
- WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */
+ WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE,
WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2,
-};
-/* unbound wq's aren't per-cpu, scale max_active according to #cpus */
-#define WQ_UNBOUND_MAX_ACTIVE \
- max_t(int, WQ_MAX_ACTIVE, num_possible_cpus() * WQ_MAX_UNBOUND_PER_CPU)
+ /*
+ * Per-node default cap on min_active. Unless explicitly set, min_active
+ * is set to min(max_active, WQ_DFL_MIN_ACTIVE). For more details, see
+ * workqueue_struct->min_active definition.
+ */
+ WQ_DFL_MIN_ACTIVE = 8,
+};
/*
* System-wide workqueues which are always present.
@@ -378,6 +443,9 @@ enum {
* they are same as their non-power-efficient counterparts - e.g.
* system_power_efficient_wq is identical to system_wq if
* 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info.
+ *
+ * system_bh[_highpri]_wq are convenience interface to softirq. BH work items
+ * are executed in the queueing CPU's BH context in the queueing order.
*/
extern struct workqueue_struct *system_wq;
extern struct workqueue_struct *system_highpri_wq;
@@ -386,6 +454,11 @@ extern struct workqueue_struct *system_unbound_wq;
extern struct workqueue_struct *system_freezable_wq;
extern struct workqueue_struct *system_power_efficient_wq;
extern struct workqueue_struct *system_freezable_power_efficient_wq;
+extern struct workqueue_struct *system_bh_wq;
+extern struct workqueue_struct *system_bh_highpri_wq;
+
+void workqueue_softirq_action(bool highpri);
+void workqueue_softirq_dead(unsigned int cpu);
/**
* alloc_workqueue - allocate a workqueue
@@ -394,8 +467,30 @@ extern struct workqueue_struct *system_freezable_power_efficient_wq;
* @max_active: max in-flight work items, 0 for default
* remaining args: args for @fmt
*
- * Allocate a workqueue with the specified parameters. For detailed
- * information on WQ_* flags, please refer to
+ * For a per-cpu workqueue, @max_active limits the number of in-flight work
+ * items for each CPU. e.g. @max_active of 1 indicates that each CPU can be
+ * executing at most one work item for the workqueue.
+ *
+ * For unbound workqueues, @max_active limits the number of in-flight work items
+ * for the whole system. e.g. @max_active of 16 indicates that that there can be
+ * at most 16 work items executing for the workqueue in the whole system.
+ *
+ * As sharing the same active counter for an unbound workqueue across multiple
+ * NUMA nodes can be expensive, @max_active is distributed to each NUMA node
+ * according to the proportion of the number of online CPUs and enforced
+ * independently.
+ *
+ * Depending on online CPU distribution, a node may end up with per-node
+ * max_active which is significantly lower than @max_active, which can lead to
+ * deadlocks if the per-node concurrency limit is lower than the maximum number
+ * of interdependent work items for the workqueue.
+ *
+ * To guarantee forward progress regardless of online CPU distribution, the
+ * concurrency limit on every node is guaranteed to be equal to or greater than
+ * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means
+ * that the sum of per-node max_active's may be larger than @max_active.
+ *
+ * For detailed information on %WQ_* flags, please refer to
* Documentation/core-api/workqueue.rst.
*
* RETURNS:
@@ -418,8 +513,7 @@ alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...);
* Pointer to the allocated workqueue on success, %NULL on failure.
*/
#define alloc_ordered_workqueue(fmt, flags, args...) \
- alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | \
- __WQ_ORDERED_EXPLICIT | (flags), 1, ##args)
+ alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
#define create_workqueue(name) \
alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
@@ -429,13 +523,16 @@ alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...);
#define create_singlethread_workqueue(name) \
alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name)
+#define from_work(var, callback_work, work_fieldname) \
+ container_of(callback_work, typeof(*var), work_fieldname)
+
extern void destroy_workqueue(struct workqueue_struct *wq);
struct workqueue_attrs *alloc_workqueue_attrs(void);
void free_workqueue_attrs(struct workqueue_attrs *attrs);
int apply_workqueue_attrs(struct workqueue_struct *wq,
const struct workqueue_attrs *attrs);
-int workqueue_set_unbound_cpumask(cpumask_var_t cpumask);
+extern int workqueue_unbound_exclude_cpumask(cpumask_var_t cpumask);
extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
struct work_struct *work);
@@ -466,6 +563,8 @@ extern bool flush_rcu_work(struct rcu_work *rwork);
extern void workqueue_set_max_active(struct workqueue_struct *wq,
int max_active);
+extern void workqueue_set_min_active(struct workqueue_struct *wq,
+ int min_active);
extern struct work_struct *current_work(void);
extern bool current_is_workqueue_rescuer(void);
extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
@@ -569,6 +668,7 @@ static inline bool schedule_work(struct work_struct *work)
/*
* Detect attempt to flush system-wide workqueues at compile time when possible.
+ * Warn attempt to flush system-wide workqueues at runtime.
*
* See https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp
* for reasons and steps for converting system-wide workqueues into local workqueues.
@@ -576,52 +676,13 @@ static inline bool schedule_work(struct work_struct *work)
extern void __warn_flushing_systemwide_wq(void)
__compiletime_warning("Please avoid flushing system-wide workqueues.");
-/**
- * flush_scheduled_work - ensure that any scheduled work has run to completion.
- *
- * Forces execution of the kernel-global workqueue and blocks until its
- * completion.
- *
- * It's very easy to get into trouble if you don't take great care.
- * Either of the following situations will lead to deadlock:
- *
- * One of the work items currently on the workqueue needs to acquire
- * a lock held by your code or its caller.
- *
- * Your code is running in the context of a work routine.
- *
- * They will be detected by lockdep when they occur, but the first might not
- * occur very often. It depends on what work items are on the workqueue and
- * what locks they need, which you have no control over.
- *
- * In most situations flushing the entire workqueue is overkill; you merely
- * need to know that a particular work item isn't queued and isn't running.
- * In such cases you should use cancel_delayed_work_sync() or
- * cancel_work_sync() instead.
- *
- * Please stop calling this function! A conversion to stop flushing system-wide
- * workqueues is in progress. This function will be removed after all in-tree
- * users stopped calling this function.
- */
-/*
- * The background of commit 771c035372a036f8 ("deprecate the
- * '__deprecated' attribute warnings entirely and for good") is that,
- * since Linus builds all modules between every single pull he does,
- * the standard kernel build needs to be _clean_ in order to be able to
- * notice when new problems happen. Therefore, don't emit warning while
- * there are in-tree users.
- */
+/* Please stop using this function, for this function will be removed in near future. */
#define flush_scheduled_work() \
({ \
- if (0) \
- __warn_flushing_systemwide_wq(); \
+ __warn_flushing_systemwide_wq(); \
__flush_workqueue(system_wq); \
})
-/*
- * Although there is no longer in-tree caller, for now just emit warning
- * in order to give out-of-tree callers time to update.
- */
#define flush_workqueue(wq) \
({ \
struct workqueue_struct *_wq = (wq); \
@@ -683,8 +744,32 @@ static inline long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
return fn(arg);
}
#else
-long work_on_cpu(int cpu, long (*fn)(void *), void *arg);
-long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg);
+long work_on_cpu_key(int cpu, long (*fn)(void *),
+ void *arg, struct lock_class_key *key);
+/*
+ * A new key is defined for each caller to make sure the work
+ * associated with the function doesn't share its locking class.
+ */
+#define work_on_cpu(_cpu, _fn, _arg) \
+({ \
+ static struct lock_class_key __key; \
+ \
+ work_on_cpu_key(_cpu, _fn, _arg, &__key); \
+})
+
+long work_on_cpu_safe_key(int cpu, long (*fn)(void *),
+ void *arg, struct lock_class_key *key);
+
+/*
+ * A new key is defined for each caller to make sure the work
+ * associated with the function doesn't share its locking class.
+ */
+#define work_on_cpu_safe(_cpu, _fn, _arg) \
+({ \
+ static struct lock_class_key __key; \
+ \
+ work_on_cpu_safe_key(_cpu, _fn, _arg, &__key); \
+})
#endif /* CONFIG_SMP */
#ifdef CONFIG_FREEZER
@@ -714,5 +799,6 @@ int workqueue_offline_cpu(unsigned int cpu);
void __init workqueue_init_early(void);
void __init workqueue_init(void);
+void __init workqueue_init_topology(void);
#endif
diff --git a/include/linux/workqueue_types.h b/include/linux/workqueue_types.h
new file mode 100644
index 000000000000..4c38824f3ab4
--- /dev/null
+++ b/include/linux/workqueue_types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_WORKQUEUE_TYPES_H
+#define _LINUX_WORKQUEUE_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/lockdep_types.h>
+#include <linux/timer_types.h>
+#include <linux/types.h>
+
+struct workqueue_struct;
+
+struct work_struct;
+typedef void (*work_func_t)(struct work_struct *work);
+void delayed_work_timer_fn(struct timer_list *t);
+
+struct work_struct {
+ atomic_long_t data;
+ struct list_head entry;
+ work_func_t func;
+#ifdef CONFIG_LOCKDEP
+ struct lockdep_map lockdep_map;
+#endif
+};
+
+#endif /* _LINUX_WORKQUEUE_TYPES_H */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index fba937999fbf..9845cb62e40b 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -11,6 +11,7 @@
#include <linux/flex_proportions.h>
#include <linux/backing-dev-defs.h>
#include <linux/blk_types.h>
+#include <linux/pagevec.h>
struct bio;
@@ -40,6 +41,7 @@ enum writeback_sync_modes {
* in a manner such that unspecified fields are set to zero.
*/
struct writeback_control {
+ /* public fields that can be set and/or consumed by the caller: */
long nr_to_write; /* Write this many pages, and decrement
this for each page written */
long pages_skipped; /* Pages which were not written */
@@ -60,7 +62,7 @@ struct writeback_control {
unsigned for_reclaim:1; /* Invoked from the page allocator */
unsigned range_cyclic:1; /* range_start is cyclic */
unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
- unsigned unpinned_fscache_wb:1; /* Cleared I_PINNING_FSCACHE_WB */
+ unsigned unpinned_netfs_wb:1; /* Cleared I_PINNING_NETFS_WB */
/*
* When writeback IOs are bounced through async layers, only the
@@ -77,6 +79,11 @@ struct writeback_control {
*/
struct swap_iocb **swap_plug;
+ /* internal fields used by the ->writepages implementation: */
+ struct folio_batch fbatch;
+ pgoff_t index;
+ int saved_err;
+
#ifdef CONFIG_CGROUP_WRITEBACK
struct bdi_writeback *wb; /* wb this writeback is issued under */
struct inode *inode; /* inode being written out */
@@ -193,7 +200,6 @@ void inode_io_list_del(struct inode *inode);
/* writeback.h requires fs.h; it, too, is not included from here. */
static inline void wait_on_inode(struct inode *inode)
{
- might_sleep();
wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE);
}
@@ -361,11 +367,12 @@ int balance_dirty_pages_ratelimited_flags(struct address_space *mapping,
bool wb_over_bg_thresh(struct bdi_writeback *wb);
+struct folio *writeback_iter(struct address_space *mapping,
+ struct writeback_control *wbc, struct folio *folio, int *error);
+
typedef int (*writepage_t)(struct folio *folio, struct writeback_control *wbc,
void *data);
-void tag_pages_for_writeback(struct address_space *mapping,
- pgoff_t start, pgoff_t end);
int write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc, writepage_t writepage,
void *data);
@@ -375,11 +382,6 @@ void tag_pages_for_writeback(struct address_space *mapping,
pgoff_t start, pgoff_t end);
bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio);
-void folio_account_redirty(struct folio *folio);
-static inline void account_page_redirty(struct page *page)
-{
- folio_account_redirty(page_folio(page));
-}
bool folio_redirty_for_writepage(struct writeback_control *, struct folio *);
bool redirty_page_for_writepage(struct writeback_control *, struct page *);
diff --git a/include/linux/wwan.h b/include/linux/wwan.h
index 01fa15506286..170fdee6339c 100644
--- a/include/linux/wwan.h
+++ b/include/linux/wwan.h
@@ -16,6 +16,7 @@
* @WWAN_PORT_QCDM: Qcom Modem diagnostic interface
* @WWAN_PORT_FIREHOSE: XML based command protocol
* @WWAN_PORT_XMMRPC: Control protocol for Intel XMM modems
+ * @WWAN_PORT_FASTBOOT: Fastboot protocol control
*
* @WWAN_PORT_MAX: Highest supported port types
* @WWAN_PORT_UNKNOWN: Special value to indicate an unknown port type
@@ -28,6 +29,7 @@ enum wwan_port_type {
WWAN_PORT_QCDM,
WWAN_PORT_FIREHOSE,
WWAN_PORT_XMMRPC,
+ WWAN_PORT_FASTBOOT,
/* Add new port types above this line */
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 741703b45f61..cb571dfcf4b1 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -856,6 +856,9 @@ static inline int __must_check xa_insert_irq(struct xarray *xa,
* stores the index into the @id pointer, then stores the entry at
* that index. A concurrent lookup will not see an uninitialised @id.
*
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
* Context: Any context. Takes and releases the xa_lock. May sleep if
* the @gfp flags permit.
* Return: 0 on success, -ENOMEM if memory could not be allocated or
@@ -886,6 +889,9 @@ static inline __must_check int xa_alloc(struct xarray *xa, u32 *id,
* stores the index into the @id pointer, then stores the entry at
* that index. A concurrent lookup will not see an uninitialised @id.
*
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
* Context: Any context. Takes and releases the xa_lock while
* disabling softirqs. May sleep if the @gfp flags permit.
* Return: 0 on success, -ENOMEM if memory could not be allocated or
@@ -916,6 +922,9 @@ static inline int __must_check xa_alloc_bh(struct xarray *xa, u32 *id,
* stores the index into the @id pointer, then stores the entry at
* that index. A concurrent lookup will not see an uninitialised @id.
*
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
* Context: Process context. Takes and releases the xa_lock while
* disabling interrupts. May sleep if the @gfp flags permit.
* Return: 0 on success, -ENOMEM if memory could not be allocated or
@@ -949,6 +958,9 @@ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id,
* The search for an empty entry will start at @next and will wrap
* around if necessary.
*
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
* Context: Any context. Takes and releases the xa_lock. May sleep if
* the @gfp flags permit.
* Return: 0 if the allocation succeeded without wrapping. 1 if the
@@ -983,6 +995,9 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
* The search for an empty entry will start at @next and will wrap
* around if necessary.
*
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
* Context: Any context. Takes and releases the xa_lock while
* disabling softirqs. May sleep if the @gfp flags permit.
* Return: 0 if the allocation succeeded without wrapping. 1 if the
@@ -1017,6 +1032,9 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry,
* The search for an empty entry will start at @next and will wrap
* around if necessary.
*
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
* Context: Process context. Takes and releases the xa_lock while
* disabling interrupts. May sleep if the @gfp flags permit.
* Return: 0 if the allocation succeeded without wrapping. 1 if the
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index d591ef59aa98..d20051865800 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -114,13 +114,15 @@ struct simple_xattr {
};
void simple_xattrs_init(struct simple_xattrs *xattrs);
-void simple_xattrs_free(struct simple_xattrs *xattrs);
+void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space);
+size_t simple_xattr_space(const char *name, size_t size);
struct simple_xattr *simple_xattr_alloc(const void *value, size_t size);
+void simple_xattr_free(struct simple_xattr *xattr);
int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
void *buffer, size_t size);
-int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
- const void *value, size_t size, int flags,
- ssize_t *removed_size);
+struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
+ const char *name, const void *value,
+ size_t size, int flags);
ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
char *buffer, size_t size);
void simple_xattr_add(struct simple_xattrs *xattrs,
diff --git a/include/linux/zswap.h b/include/linux/zswap.h
new file mode 100644
index 000000000000..341aea490070
--- /dev/null
+++ b/include/linux/zswap.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_ZSWAP_H
+#define _LINUX_ZSWAP_H
+
+#include <linux/types.h>
+#include <linux/mm_types.h>
+
+struct lruvec;
+
+extern u64 zswap_pool_total_size;
+extern atomic_t zswap_stored_pages;
+
+#ifdef CONFIG_ZSWAP
+
+struct zswap_lruvec_state {
+ /*
+ * Number of pages in zswap that should be protected from the shrinker.
+ * This number is an estimate of the following counts:
+ *
+ * a) Recent page faults.
+ * b) Recent insertion to the zswap LRU. This includes new zswap stores,
+ * as well as recent zswap LRU rotations.
+ *
+ * These pages are likely to be warm, and might incur IO if the are written
+ * to swap.
+ */
+ atomic_long_t nr_zswap_protected;
+};
+
+bool zswap_store(struct folio *folio);
+bool zswap_load(struct folio *folio);
+void zswap_invalidate(swp_entry_t swp);
+int zswap_swapon(int type, unsigned long nr_pages);
+void zswap_swapoff(int type);
+void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg);
+void zswap_lruvec_state_init(struct lruvec *lruvec);
+void zswap_folio_swapin(struct folio *folio);
+bool is_zswap_enabled(void);
+#else
+
+struct zswap_lruvec_state {};
+
+static inline bool zswap_store(struct folio *folio)
+{
+ return false;
+}
+
+static inline bool zswap_load(struct folio *folio)
+{
+ return false;
+}
+
+static inline void zswap_invalidate(swp_entry_t swp) {}
+static inline int zswap_swapon(int type, unsigned long nr_pages)
+{
+ return 0;
+}
+static inline void zswap_swapoff(int type) {}
+static inline void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) {}
+static inline void zswap_lruvec_state_init(struct lruvec *lruvec) {}
+static inline void zswap_folio_swapin(struct folio *folio) {}
+
+static inline bool is_zswap_enabled(void)
+{
+ return false;
+}
+
+#endif
+
+#endif /* _LINUX_ZSWAP_H */