summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/acpi.h7
-rw-r--r--include/linux/acpi_iort.h20
-rw-r--r--include/linux/arm-smccc.h44
-rw-r--r--include/linux/backing-dev-defs.h43
-rw-r--r--include/linux/backing-dev.h22
-rw-r--r--include/linux/bio.h12
-rw-r--r--include/linux/blk-cgroup.h107
-rw-r--r--include/linux/blk-mq.h67
-rw-r--r--include/linux/blk_types.h37
-rw-r--r--include/linux/blkdev.h166
-rw-r--r--include/linux/buffer_head.h1
-rw-r--r--include/linux/cdrom.h2
-rw-r--r--include/linux/compiler.h187
-rw-r--r--include/linux/compiler_types.h41
-rw-r--r--include/linux/context_tracking.h2
-rw-r--r--include/linux/crypto.h41
-rw-r--r--include/linux/dasd_mod.h2
-rw-r--r--include/linux/device-mapper.h12
-rw-r--r--include/linux/efi.h1
-rw-r--r--include/linux/fs.h196
-rw-r--r--include/linux/fscrypt.h111
-rw-r--r--include/linux/fsverity.h9
-rw-r--r--include/linux/ftrace.h12
-rw-r--r--include/linux/genhd.h40
-rw-r--r--include/linux/hardirq.h28
-rw-r--r--include/linux/i2c.h4
-rw-r--r--include/linux/instrumentation.h57
-rw-r--r--include/linux/io-mapping.h5
-rw-r--r--include/linux/irq.h13
-rw-r--r--include/linux/irqflags.h36
-rw-r--r--include/linux/jbd2.h1
-rw-r--r--include/linux/kprobes.h15
-rw-r--r--include/linux/lightnvm.h3
-rw-r--r--include/linux/list.h20
-rw-r--r--include/linux/lockdep.h230
-rw-r--r--include/linux/lockdep_types.h194
-rw-r--r--include/linux/memblock.h28
-rw-r--r--include/linux/mlx5/mlx5_ifc.h1
-rw-r--r--include/linux/mpi.h3
-rw-r--r--include/linux/nospec.h2
-rw-r--r--include/linux/of.h4
-rw-r--r--include/linux/of_device.h16
-rw-r--r--include/linux/of_iommu.h6
-rw-r--r--include/linux/of_irq.h13
-rw-r--r--include/linux/padata.h21
-rw-r--r--include/linux/pagemap.h43
-rw-r--r--include/linux/percpu-refcount.h2
-rw-r--r--include/linux/perf_event.h15
-rw-r--r--include/linux/ptr_ring.h2
-rw-r--r--include/linux/random.h3
-rw-r--r--include/linux/rculist.h4
-rw-r--r--include/linux/rculist_nulls.h2
-rw-r--r--include/linux/rcupdate.h53
-rw-r--r--include/linux/rcupdate_trace.h4
-rw-r--r--include/linux/rcutiny.h20
-rw-r--r--include/linux/rcutree.h2
-rw-r--r--include/linux/rhashtable.h71
-rw-r--r--include/linux/rwsem.h20
-rw-r--r--include/linux/sched.h17
-rw-r--r--include/linux/sched/task.h6
-rw-r--r--include/linux/sched_clock.h28
-rw-r--r--include/linux/seqlock.h756
-rw-r--r--include/linux/spinlock.h1
-rw-r--r--include/linux/spinlock_types.h2
-rw-r--r--include/linux/tcp.h6
-rw-r--r--include/linux/torture.h5
-rw-r--r--include/linux/tpm.h1
-rw-r--r--include/linux/tpm_eventlog.h11
-rw-r--r--include/linux/types.h2
-rw-r--r--include/linux/xattr.h3
70 files changed, 1679 insertions, 1282 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index d661cd0ee64d..6d2c47489d90 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -905,6 +905,13 @@ static inline int acpi_dma_configure(struct device *dev,
return 0;
}
+static inline int acpi_dma_configure_id(struct device *dev,
+ enum dev_dma_attr attr,
+ const u32 *input_id)
+{
+ return 0;
+}
+
#define ACPI_PTR(_ptr) (NULL)
static inline void acpi_device_set_enumerated(struct acpi_device *adev)
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 8e7e2ec37f1b..20a32120bb88 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -28,27 +28,29 @@ void iort_deregister_domain_token(int trans_id);
struct fwnode_handle *iort_find_domain_token(int trans_id);
#ifdef CONFIG_ACPI_IORT
void acpi_iort_init(void);
-u32 iort_msi_map_rid(struct device *dev, u32 req_id);
-struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id);
+u32 iort_msi_map_id(struct device *dev, u32 id);
+struct irq_domain *iort_get_device_domain(struct device *dev, u32 id,
+ enum irq_domain_bus_token bus_token);
void acpi_configure_pmsi_domain(struct device *dev);
int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id);
/* IOMMU interface */
void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size);
-const struct iommu_ops *iort_iommu_configure(struct device *dev);
+const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
+ const u32 *id_in);
int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head);
#else
static inline void acpi_iort_init(void) { }
-static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id)
-{ return req_id; }
-static inline struct irq_domain *iort_get_device_domain(struct device *dev,
- u32 req_id)
+static inline u32 iort_msi_map_id(struct device *dev, u32 id)
+{ return id; }
+static inline struct irq_domain *iort_get_device_domain(
+ struct device *dev, u32 id, enum irq_domain_bus_token bus_token)
{ return NULL; }
static inline void acpi_configure_pmsi_domain(struct device *dev) { }
/* IOMMU interface */
static inline void iort_dma_setup(struct device *dev, u64 *dma_addr,
u64 *size) { }
-static inline const struct iommu_ops *iort_iommu_configure(
- struct device *dev)
+static inline const struct iommu_ops *iort_iommu_configure_id(
+ struct device *dev, const u32 *id_in)
{ return NULL; }
static inline
int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 56d6a5c6e353..efcbde731f03 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -81,6 +81,28 @@
ARM_SMCCC_SMC_32, \
0, 0x7fff)
+/* Paravirtualised time calls (defined by ARM DEN0057A) */
+#define ARM_SMCCC_HV_PV_TIME_FEATURES \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ ARM_SMCCC_OWNER_STANDARD_HYP, \
+ 0x20)
+
+#define ARM_SMCCC_HV_PV_TIME_ST \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ ARM_SMCCC_OWNER_STANDARD_HYP, \
+ 0x21)
+
+/*
+ * Return codes defined in ARM DEN 0070A
+ * ARM DEN 0070A is now merged/consolidated into ARM DEN 0028 C
+ */
+#define SMCCC_RET_SUCCESS 0
+#define SMCCC_RET_NOT_SUPPORTED -1
+#define SMCCC_RET_NOT_REQUIRED -2
+#define SMCCC_RET_INVALID_PARAMETER -3
+
#ifndef __ASSEMBLY__
#include <linux/linkage.h>
@@ -332,15 +354,6 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
#define arm_smccc_1_1_hvc(...) __arm_smccc_1_1(SMCCC_HVC_INST, __VA_ARGS__)
/*
- * Return codes defined in ARM DEN 0070A
- * ARM DEN 0070A is now merged/consolidated into ARM DEN 0028 C
- */
-#define SMCCC_RET_SUCCESS 0
-#define SMCCC_RET_NOT_SUPPORTED -1
-#define SMCCC_RET_NOT_REQUIRED -2
-#define SMCCC_RET_INVALID_PARAMETER -3
-
-/*
* Like arm_smccc_1_1* but always returns SMCCC_RET_NOT_SUPPORTED.
* Used when the SMCCC conduit is not defined. The empty asm statement
* avoids compiler warnings about unused variables.
@@ -385,18 +398,5 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
method; \
})
-/* Paravirtualised time calls (defined by ARM DEN0057A) */
-#define ARM_SMCCC_HV_PV_TIME_FEATURES \
- ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
- ARM_SMCCC_SMC_64, \
- ARM_SMCCC_OWNER_STANDARD_HYP, \
- 0x20)
-
-#define ARM_SMCCC_HV_PV_TIME_ST \
- ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
- ARM_SMCCC_SMC_64, \
- ARM_SMCCC_OWNER_STANDARD_HYP, \
- 0x21)
-
#endif /*__ASSEMBLY__*/
#endif /*__LINUX_ARM_SMCCC_H*/
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 90a7e844a098..fff9367a6348 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -33,8 +33,6 @@ enum wb_congested_state {
WB_sync_congested, /* The sync queue is getting full */
};
-typedef int (congested_fn)(void *, int);
-
enum wb_stat_item {
WB_RECLAIMABLE,
WB_WRITEBACK,
@@ -88,26 +86,6 @@ struct wb_completion {
struct wb_completion cmpl = WB_COMPLETION_INIT(bdi)
/*
- * For cgroup writeback, multiple wb's may map to the same blkcg. Those
- * wb's can operate mostly independently but should share the congested
- * state. To facilitate such sharing, the congested state is tracked using
- * the following struct which is created on demand, indexed by blkcg ID on
- * its bdi, and refcounted.
- */
-struct bdi_writeback_congested {
- unsigned long state; /* WB_[a]sync_congested flags */
- refcount_t refcnt; /* nr of attached wb's and blkg */
-
-#ifdef CONFIG_CGROUP_WRITEBACK
- struct backing_dev_info *__bdi; /* the associated bdi, set to NULL
- * on bdi unregistration. For memcg-wb
- * internal use only! */
- int blkcg_id; /* ID of the associated blkcg */
- struct rb_node rb_node; /* on bdi->cgwb_congestion_tree */
-#endif
-};
-
-/*
* Each wb (bdi_writeback) can perform writeback operations, is measured
* and throttled, independently. Without cgroup writeback, each bdi
* (bdi_writeback) is served by its embedded bdi->wb.
@@ -140,7 +118,7 @@ struct bdi_writeback {
struct percpu_counter stat[NR_WB_STAT_ITEMS];
- struct bdi_writeback_congested *congested;
+ unsigned long congested; /* WB_[a]sync_congested flags */
unsigned long bw_time_stamp; /* last time write bw is updated */
unsigned long dirtied_stamp;
@@ -190,8 +168,6 @@ struct backing_dev_info {
struct list_head bdi_list;
unsigned long ra_pages; /* max readahead in PAGE_SIZE units */
unsigned long io_pages; /* max allowed IO size */
- congested_fn *congested_fn; /* Function pointer if device is md/dm */
- void *congested_data; /* Pointer to aux data for congested func */
struct kref refcnt; /* Reference counter for the structure */
unsigned int capabilities; /* Device capabilities */
@@ -208,11 +184,8 @@ struct backing_dev_info {
struct list_head wb_list; /* list of all wbs */
#ifdef CONFIG_CGROUP_WRITEBACK
struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
- struct rb_root cgwb_congested_tree; /* their congested states */
struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */
struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */
-#else
- struct bdi_writeback_congested *wb_congested;
#endif
wait_queue_head_t wb_waitq;
@@ -232,18 +205,8 @@ enum {
BLK_RW_SYNC = 1,
};
-void clear_wb_congested(struct bdi_writeback_congested *congested, int sync);
-void set_wb_congested(struct bdi_writeback_congested *congested, int sync);
-
-static inline void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
-{
- clear_wb_congested(bdi->wb.congested, sync);
-}
-
-static inline void set_bdi_congested(struct backing_dev_info *bdi, int sync)
-{
- set_wb_congested(bdi->wb.congested, sync);
-}
+void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
+void set_bdi_congested(struct backing_dev_info *bdi, int sync);
struct wb_lock_cookie {
bool locked;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 6b3504bf7a42..0b06b2d26c9a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -169,11 +169,7 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
{
- struct backing_dev_info *bdi = wb->bdi;
-
- if (bdi->congested_fn)
- return bdi->congested_fn(bdi->congested_data, cong_bits);
- return wb->congested->state & cong_bits;
+ return wb->congested & cong_bits;
}
long congestion_wait(int sync, long timeout);
@@ -224,9 +220,6 @@ static inline int bdi_sched_wait(void *word)
#ifdef CONFIG_CGROUP_WRITEBACK
-struct bdi_writeback_congested *
-wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp);
-void wb_congested_put(struct bdi_writeback_congested *congested);
struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi,
struct cgroup_subsys_state *memcg_css);
struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
@@ -404,19 +397,6 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
return false;
}
-static inline struct bdi_writeback_congested *
-wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
-{
- refcount_inc(&bdi->wb_congested->refcnt);
- return bdi->wb_congested;
-}
-
-static inline void wb_congested_put(struct bdi_writeback_congested *congested)
-{
- if (refcount_dec_and_test(&congested->refcnt))
- kfree(congested);
-}
-
static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
{
return &bdi->wb;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 91676d4b2dfe..c6d765382926 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -8,8 +8,6 @@
#include <linux/highmem.h>
#include <linux/mempool.h>
#include <linux/ioprio.h>
-
-#ifdef CONFIG_BLOCK
/* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
#include <linux/blk_types.h>
@@ -491,21 +489,12 @@ do { \
#define bio_dev(bio) \
disk_devt((bio)->bi_disk)
-#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-void bio_associate_blkg_from_page(struct bio *bio, struct page *page);
-#else
-static inline void bio_associate_blkg_from_page(struct bio *bio,
- struct page *page) { }
-#endif
-
#ifdef CONFIG_BLK_CGROUP
-void bio_disassociate_blkg(struct bio *bio);
void bio_associate_blkg(struct bio *bio);
void bio_associate_blkg_from_css(struct bio *bio,
struct cgroup_subsys_state *css);
void bio_clone_blkg_association(struct bio *dst, struct bio *src);
#else /* CONFIG_BLK_CGROUP */
-static inline void bio_disassociate_blkg(struct bio *bio) { }
static inline void bio_associate_blkg(struct bio *bio) { }
static inline void bio_associate_blkg_from_css(struct bio *bio,
struct cgroup_subsys_state *css)
@@ -824,5 +813,4 @@ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
bio->bi_opf |= REQ_NOWAIT;
}
-#endif /* CONFIG_BLOCK */
#endif /* __LINUX_BIO_H */
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index a57ebe2f00ab..c8fc9792ac77 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -109,12 +109,6 @@ struct blkcg_gq {
struct hlist_node blkcg_node;
struct blkcg *blkcg;
- /*
- * Each blkg gets congested separately and the congestion state is
- * propagated to the matching bdi_writeback_congested.
- */
- struct bdi_writeback_congested *wb_congested;
-
/* all non-root blkcg_gq's are guaranteed to have access to parent */
struct blkcg_gq *parent;
@@ -183,10 +177,6 @@ extern bool blkcg_debug_stats;
struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
struct request_queue *q, bool update_hint);
-struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
- struct request_queue *q);
-struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
- struct request_queue *q);
int blkcg_init_queue(struct request_queue *q);
void blkcg_exit_queue(struct request_queue *q);
@@ -481,32 +471,6 @@ static inline bool blkg_tryget(struct blkcg_gq *blkg)
}
/**
- * blkg_tryget_closest - try and get a blkg ref on the closet blkg
- * @blkg: blkg to get
- *
- * This needs to be called rcu protected. As the failure mode here is to walk
- * up the blkg tree, this ensure that the blkg->parent pointers are always
- * valid. This returns the blkg that it ended up taking a reference on or %NULL
- * if no reference was taken.
- */
-static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
-{
- struct blkcg_gq *ret_blkg = NULL;
-
- WARN_ON_ONCE(!rcu_read_lock_held());
-
- while (blkg) {
- if (blkg_tryget(blkg)) {
- ret_blkg = blkg;
- break;
- }
- blkg = blkg->parent;
- }
-
- return ret_blkg;
-}
-
-/**
* blkg_put - put a blkg reference
* @blkg: blkg to put
*/
@@ -547,14 +511,6 @@ static inline void blkg_put(struct blkcg_gq *blkg)
if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
(p_blkg)->q, false)))
-#ifdef CONFIG_BLK_DEV_THROTTLING
-extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
- struct bio *bio);
-#else
-static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
- struct bio *bio) { return false; }
-#endif
-
bool __blkcg_punt_bio_submit(struct bio *bio);
static inline bool blkcg_punt_bio_submit(struct bio *bio)
@@ -570,65 +526,6 @@ static inline void blkcg_bio_issue_init(struct bio *bio)
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
}
-static inline bool blkcg_bio_issue_check(struct request_queue *q,
- struct bio *bio)
-{
- struct blkcg_gq *blkg;
- bool throtl = false;
-
- rcu_read_lock();
-
- if (!bio->bi_blkg) {
- char b[BDEVNAME_SIZE];
-
- WARN_ONCE(1,
- "no blkg associated for bio on block-device: %s\n",
- bio_devname(bio, b));
- bio_associate_blkg(bio);
- }
-
- blkg = bio->bi_blkg;
-
- throtl = blk_throtl_bio(q, blkg, bio);
-
- if (!throtl) {
- struct blkg_iostat_set *bis;
- int rwd, cpu;
-
- if (op_is_discard(bio->bi_opf))
- rwd = BLKG_IOSTAT_DISCARD;
- else if (op_is_write(bio->bi_opf))
- rwd = BLKG_IOSTAT_WRITE;
- else
- rwd = BLKG_IOSTAT_READ;
-
- cpu = get_cpu();
- bis = per_cpu_ptr(blkg->iostat_cpu, cpu);
- u64_stats_update_begin(&bis->sync);
-
- /*
- * If the bio is flagged with BIO_CGROUP_ACCT it means this is a
- * split bio and we would have already accounted for the size of
- * the bio.
- */
- if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
- bio_set_flag(bio, BIO_CGROUP_ACCT);
- bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
- }
- bis->cur.ios[rwd]++;
-
- u64_stats_update_end(&bis->sync);
- if (cgroup_subsys_on_dfl(io_cgrp_subsys))
- cgroup_rstat_updated(blkg->blkcg->css.cgroup, cpu);
- put_cpu();
- }
-
- blkcg_bio_issue_init(bio);
-
- rcu_read_unlock();
- return !throtl;
-}
-
static inline void blkcg_use_delay(struct blkcg_gq *blkg)
{
if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
@@ -702,6 +599,7 @@ static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
}
+void blk_cgroup_bio_start(struct bio *bio);
void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
void blkcg_maybe_throttle_current(void);
@@ -755,8 +653,7 @@ static inline void blkg_put(struct blkcg_gq *blkg) { }
static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
static inline void blkcg_bio_issue_init(struct bio *bio) { }
-static inline bool blkcg_bio_issue_check(struct request_queue *q,
- struct bio *bio) { return true; }
+static inline void blk_cgroup_bio_start(struct bio *bio) { }
#define blk_queue_for_each_rl(rl, q) \
for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d6fcae17da5a..9d2d5ad367a4 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -267,27 +267,9 @@ struct blk_mq_queue_data {
bool last;
};
-typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
- const struct blk_mq_queue_data *);
-typedef void (commit_rqs_fn)(struct blk_mq_hw_ctx *);
-typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *);
-typedef void (put_budget_fn)(struct blk_mq_hw_ctx *);
-typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
-typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
-typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
-typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *,
- unsigned int, unsigned int);
-typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *,
- unsigned int);
-
typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
bool);
typedef bool (busy_tag_iter_fn)(struct request *, void *, bool);
-typedef int (poll_fn)(struct blk_mq_hw_ctx *);
-typedef int (map_queues_fn)(struct blk_mq_tag_set *set);
-typedef bool (busy_fn)(struct request_queue *);
-typedef void (complete_fn)(struct request *);
-typedef void (cleanup_rq_fn)(struct request *);
/**
* struct blk_mq_ops - Callback functions that implements block driver
@@ -297,7 +279,8 @@ struct blk_mq_ops {
/**
* @queue_rq: Queue a new request from block IO.
*/
- queue_rq_fn *queue_rq;
+ blk_status_t (*queue_rq)(struct blk_mq_hw_ctx *,
+ const struct blk_mq_queue_data *);
/**
* @commit_rqs: If a driver uses bd->last to judge when to submit
@@ -306,7 +289,7 @@ struct blk_mq_ops {
* purpose of kicking the hardware (which the last request otherwise
* would have done).
*/
- commit_rqs_fn *commit_rqs;
+ void (*commit_rqs)(struct blk_mq_hw_ctx *);
/**
* @get_budget: Reserve budget before queue request, once .queue_rq is
@@ -314,37 +297,38 @@ struct blk_mq_ops {
* reserved budget. Also we have to handle failure case
* of .get_budget for avoiding I/O deadlock.
*/
- get_budget_fn *get_budget;
+ bool (*get_budget)(struct request_queue *);
+
/**
* @put_budget: Release the reserved budget.
*/
- put_budget_fn *put_budget;
+ void (*put_budget)(struct request_queue *);
/**
* @timeout: Called on request timeout.
*/
- timeout_fn *timeout;
+ enum blk_eh_timer_return (*timeout)(struct request *, bool);
/**
* @poll: Called to poll for completion of a specific tag.
*/
- poll_fn *poll;
+ int (*poll)(struct blk_mq_hw_ctx *);
/**
* @complete: Mark the request as complete.
*/
- complete_fn *complete;
+ void (*complete)(struct request *);
/**
* @init_hctx: Called when the block layer side of a hardware queue has
* been set up, allowing the driver to allocate/init matching
* structures.
*/
- init_hctx_fn *init_hctx;
+ int (*init_hctx)(struct blk_mq_hw_ctx *, void *, unsigned int);
/**
* @exit_hctx: Ditto for exit/teardown.
*/
- exit_hctx_fn *exit_hctx;
+ void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
/**
* @init_request: Called for every command allocated by the block layer
@@ -353,11 +337,13 @@ struct blk_mq_ops {
* Tag greater than or equal to queue_depth is for setting up
* flush request.
*/
- init_request_fn *init_request;
+ int (*init_request)(struct blk_mq_tag_set *set, struct request *,
+ unsigned int, unsigned int);
/**
* @exit_request: Ditto for exit/teardown.
*/
- exit_request_fn *exit_request;
+ void (*exit_request)(struct blk_mq_tag_set *set, struct request *,
+ unsigned int);
/**
* @initialize_rq_fn: Called from inside blk_get_request().
@@ -368,18 +354,18 @@ struct blk_mq_ops {
* @cleanup_rq: Called before freeing one request which isn't completed
* yet, and usually for freeing the driver private data.
*/
- cleanup_rq_fn *cleanup_rq;
+ void (*cleanup_rq)(struct request *);
/**
* @busy: If set, returns whether or not this queue currently is busy.
*/
- busy_fn *busy;
+ bool (*busy)(struct request_queue *);
/**
* @map_queues: This allows drivers specify their own queue mapping by
* overriding the setup-time function that builds the mq_map.
*/
- map_queues_fn *map_queues;
+ int (*map_queues)(struct blk_mq_tag_set *set);
#ifdef CONFIG_BLK_DEBUG_FS
/**
@@ -447,8 +433,6 @@ enum {
BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0),
/* allocate from reserved pool */
BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1),
- /* allocate internal/sched tag */
- BLK_MQ_REQ_INTERNAL = (__force blk_mq_req_flags_t)(1 << 2),
/* set RQF_PREEMPT */
BLK_MQ_REQ_PREEMPT = (__force blk_mq_req_flags_t)(1 << 3),
};
@@ -503,8 +487,8 @@ void __blk_mq_end_request(struct request *rq, blk_status_t error);
void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
void blk_mq_kick_requeue_list(struct request_queue *q);
void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
-bool blk_mq_complete_request(struct request *rq);
-void blk_mq_force_complete_rq(struct request *rq);
+void blk_mq_complete_request(struct request *rq);
+bool blk_mq_complete_request_remote(struct request *rq);
bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
struct bio *bio, unsigned int nr_segs);
bool blk_mq_queue_stopped(struct request_queue *q);
@@ -537,6 +521,15 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q);
unsigned int blk_mq_rq_cpu(struct request *rq);
+bool __blk_should_fake_timeout(struct request_queue *q);
+static inline bool blk_should_fake_timeout(struct request_queue *q)
+{
+ if (IS_ENABLED(CONFIG_FAIL_IO_TIMEOUT) &&
+ test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
+ return __blk_should_fake_timeout(q);
+ return false;
+}
+
/**
* blk_mq_rq_from_pdu - cast a PDU to a request
* @pdu: the PDU (Protocol Data Unit) to be casted
@@ -589,6 +582,6 @@ static inline void blk_mq_cleanup_rq(struct request *rq)
rq->q->mq_ops->cleanup_rq(rq);
}
-blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio);
+blk_qc_t blk_mq_submit_bio(struct bio *bio);
#endif
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index ccb895f911b1..4ecf4fed171f 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -14,12 +14,39 @@ struct bio_set;
struct bio;
struct bio_integrity_payload;
struct page;
-struct block_device;
struct io_context;
struct cgroup_subsys_state;
typedef void (bio_end_io_t) (struct bio *);
struct bio_crypt_ctx;
+struct block_device {
+ dev_t bd_dev; /* not a kdev_t - it's a search key */
+ int bd_openers;
+ struct inode * bd_inode; /* will die */
+ struct super_block * bd_super;
+ struct mutex bd_mutex; /* open/close mutex */
+ void * bd_claiming;
+ void * bd_holder;
+ int bd_holders;
+ bool bd_write_holder;
+#ifdef CONFIG_SYSFS
+ struct list_head bd_holder_disks;
+#endif
+ struct block_device * bd_contains;
+ u8 bd_partno;
+ struct hd_struct * bd_part;
+ /* number of times partitions within this device have been opened. */
+ unsigned bd_part_count;
+ int bd_invalidated;
+ struct gendisk * bd_disk;
+ struct backing_dev_info *bd_bdi;
+
+ /* The counter of freeze processes */
+ int bd_fsfreeze_count;
+ /* Mutex for freeze */
+ struct mutex bd_fsfreeze_mutex;
+} __randomize_layout;
+
/*
* Block error status values. See block/blk-core:blk_errors for the details.
* Alpha cannot write a byte atomically, so we need to use 32-bit value.
@@ -300,12 +327,8 @@ enum req_opf {
REQ_OP_DISCARD = 3,
/* securely erase sectors */
REQ_OP_SECURE_ERASE = 5,
- /* reset a zone write pointer */
- REQ_OP_ZONE_RESET = 6,
/* write the same sector many times */
REQ_OP_WRITE_SAME = 7,
- /* reset all the zone present on the device */
- REQ_OP_ZONE_RESET_ALL = 8,
/* write the zero filled sector many times */
REQ_OP_WRITE_ZEROES = 9,
/* Open a zone */
@@ -316,6 +339,10 @@ enum req_opf {
REQ_OP_ZONE_FINISH = 12,
/* write data at the current zone write pointer */
REQ_OP_ZONE_APPEND = 13,
+ /* reset a zone write pointer */
+ REQ_OP_ZONE_RESET = 15,
+ /* reset all the zone present on the device */
+ REQ_OP_ZONE_RESET_ALL = 17,
/* SCSI passthrough using struct scsi_request */
REQ_OP_SCSI_IN = 32,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 57241417ff2f..06ecb2c1492f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -4,9 +4,6 @@
#include <linux/sched.h>
#include <linux/sched/clock.h>
-
-#ifdef CONFIG_BLOCK
-
#include <linux/major.h>
#include <linux/genhd.h>
#include <linux/list.h>
@@ -289,8 +286,6 @@ static inline unsigned short req_get_ioprio(struct request *req)
struct blk_queue_ctx;
-typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio);
-
struct bio_vec;
enum blk_eh_timer_return {
@@ -401,8 +396,6 @@ struct request_queue {
struct blk_queue_stats *stats;
struct rq_qos *rq_qos;
- make_request_fn *make_request_fn;
-
const struct blk_mq_ops *mq_ops;
/* sw queues */
@@ -528,9 +521,9 @@ struct request_queue {
unsigned int sg_timeout;
unsigned int sg_reserved_size;
int node;
+ struct mutex debugfs_mutex;
#ifdef CONFIG_BLK_DEV_IO_TRACE
struct blk_trace __rcu *blk_trace;
- struct mutex blk_trace_mutex;
#endif
/*
* for flush operations
@@ -574,8 +567,9 @@ struct request_queue {
struct list_head tag_set_list;
struct bio_set bio_split;
-#ifdef CONFIG_BLK_DEBUG_FS
struct dentry *debugfs_dir;
+
+#ifdef CONFIG_BLK_DEBUG_FS
struct dentry *sched_debugfs_dir;
struct dentry *rqos_debugfs_dir;
#endif
@@ -584,8 +578,6 @@ struct request_queue {
size_t cmd_size;
- struct work_struct release_work;
-
#define BLK_MAX_WRITE_HINTS 5
u64 write_hints[BLK_MAX_WRITE_HINTS];
};
@@ -861,8 +853,7 @@ static inline void rq_flush_dcache_pages(struct request *rq)
extern int blk_register_queue(struct gendisk *disk);
extern void blk_unregister_queue(struct gendisk *disk);
-extern blk_qc_t generic_make_request(struct bio *bio);
-extern blk_qc_t direct_make_request(struct bio *bio);
+blk_qc_t submit_bio_noacct(struct bio *bio);
extern void blk_rq_init(struct request_queue *q, struct request *rq);
extern void blk_put_request(struct request *);
extern struct request *blk_get_request(struct request_queue *, unsigned int op,
@@ -876,7 +867,7 @@ extern void blk_rq_unprep_clone(struct request *rq);
extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
struct request *rq);
extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
-extern void blk_queue_split(struct request_queue *, struct bio **);
+extern void blk_queue_split(struct bio **);
extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
unsigned int, void __user *);
@@ -1079,7 +1070,6 @@ void blk_steal_bios(struct bio_list *list, struct request *rq);
extern bool blk_update_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes);
-extern void __blk_complete_request(struct request *);
extern void blk_abort_request(struct request *);
/*
@@ -1166,13 +1156,13 @@ static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq,
return __blk_rq_map_sg(q, rq, sglist, &last_sg);
}
extern void blk_dump_rq_flags(struct request *, char *);
-extern long nr_blockdev_pages(void);
bool __must_check blk_get_queue(struct request_queue *);
-struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id);
+struct request_queue *blk_alloc_queue(int node_id);
extern void blk_put_queue(struct request_queue *);
extern void blk_set_queue_dying(struct request_queue *);
+#ifdef CONFIG_BLOCK
/*
* blk_plug permits building a queue of related requests by holding the I/O
* fragments for a short period. This allows merging of sequential requests
@@ -1190,6 +1180,7 @@ struct blk_plug {
struct list_head cb_list; /* md requires an unplug callback */
unsigned short rq_count;
bool multiple_queues;
+ bool nowait;
};
#define BLK_MAX_REQUEST_COUNT 16
#define BLK_PLUG_FLUSH_SIZE (128 * 1024)
@@ -1232,9 +1223,47 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
!list_empty(&plug->cb_list));
}
+int blkdev_issue_flush(struct block_device *, gfp_t);
+long nr_blockdev_pages(void);
+#else /* CONFIG_BLOCK */
+struct blk_plug {
+};
+
+static inline void blk_start_plug(struct blk_plug *plug)
+{
+}
+
+static inline void blk_finish_plug(struct blk_plug *plug)
+{
+}
+
+static inline void blk_flush_plug(struct task_struct *task)
+{
+}
+
+static inline void blk_schedule_flush_plug(struct task_struct *task)
+{
+}
+
+
+static inline bool blk_needs_flush_plug(struct task_struct *tsk)
+{
+ return false;
+}
+
+static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask)
+{
+ return 0;
+}
+
+static inline long nr_blockdev_pages(void)
+{
+ return 0;
+}
+#endif /* CONFIG_BLOCK */
+
extern void blk_io_schedule(void);
-int blkdev_issue_flush(struct block_device *, gfp_t);
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct page *page);
@@ -1516,7 +1545,7 @@ static inline unsigned int blksize_bits(unsigned int size)
static inline unsigned int block_size(struct block_device *bdev)
{
- return bdev->bd_block_size;
+ return 1 << bdev->bd_inode->i_blkbits;
}
int kblockd_schedule_work(struct work_struct *work);
@@ -1746,6 +1775,7 @@ static inline void blk_ksm_unregister(struct request_queue *q) { }
struct block_device_operations {
+ blk_qc_t (*submit_bio) (struct bio *bio);
int (*open) (struct block_device *, fmode_t);
void (*release) (struct gendisk *, fmode_t);
int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);
@@ -1753,8 +1783,6 @@ struct block_device_operations {
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
unsigned int (*check_events) (struct gendisk *disk,
unsigned int clearing);
- /* ->media_changed() is DEPRECATED, use ->check_events() instead */
- int (*media_changed) (struct gendisk *);
void (*unlock_native_capacity) (struct gendisk *);
int (*revalidate_disk) (struct gendisk *);
int (*getgeo)(struct block_device *, struct hd_geometry *);
@@ -1834,52 +1862,6 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
}
#endif /* CONFIG_BLK_DEV_ZONED */
-#else /* CONFIG_BLOCK */
-
-struct block_device;
-
-/*
- * stubs for when the block layer is configured out
- */
-#define buffer_heads_over_limit 0
-
-static inline long nr_blockdev_pages(void)
-{
- return 0;
-}
-
-struct blk_plug {
-};
-
-static inline void blk_start_plug(struct blk_plug *plug)
-{
-}
-
-static inline void blk_finish_plug(struct blk_plug *plug)
-{
-}
-
-static inline void blk_flush_plug(struct task_struct *task)
-{
-}
-
-static inline void blk_schedule_flush_plug(struct task_struct *task)
-{
-}
-
-
-static inline bool blk_needs_flush_plug(struct task_struct *tsk)
-{
- return false;
-}
-
-static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask)
-{
- return 0;
-}
-
-#endif /* CONFIG_BLOCK */
-
static inline void blk_wake_io_task(struct task_struct *waiter)
{
/*
@@ -1893,7 +1875,6 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
wake_up_process(waiter);
}
-#ifdef CONFIG_BLOCK
unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
unsigned int op);
void disk_end_io_acct(struct gendisk *disk, unsigned int op,
@@ -1919,6 +1900,53 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time)
{
return disk_end_io_acct(bio->bi_disk, bio_op(bio), start_time);
}
-#endif /* CONFIG_BLOCK */
+int bdev_read_only(struct block_device *bdev);
+int set_blocksize(struct block_device *bdev, int size);
+
+const char *bdevname(struct block_device *bdev, char *buffer);
+struct block_device *lookup_bdev(const char *);
+
+void blkdev_show(struct seq_file *seqf, off_t offset);
+
+#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
+#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */
+#ifdef CONFIG_BLOCK
+#define BLKDEV_MAJOR_MAX 512
+#else
+#define BLKDEV_MAJOR_MAX 0
+#endif
+
+int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
+struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+ void *holder);
+struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder);
+int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole,
+ void *holder);
+void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
+ void *holder);
+void blkdev_put(struct block_device *bdev, fmode_t mode);
+
+struct block_device *I_BDEV(struct inode *inode);
+struct block_device *bdget(dev_t);
+struct block_device *bdgrab(struct block_device *bdev);
+void bdput(struct block_device *);
+
+#ifdef CONFIG_BLOCK
+void invalidate_bdev(struct block_device *bdev);
+int sync_blockdev(struct block_device *bdev);
+#else
+static inline void invalidate_bdev(struct block_device *bdev)
+{
+}
+static inline int sync_blockdev(struct block_device *bdev)
+{
+ return 0;
+}
#endif
+int fsync_bdev(struct block_device *bdev);
+
+struct super_block *freeze_bdev(struct block_device *bdev);
+int thaw_bdev(struct block_device *bdev, struct super_block *sb);
+
+#endif /* _LINUX_BLKDEV_H */
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 22fb11e2d2e0..6b47f94378c5 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -406,6 +406,7 @@ static inline int inode_has_buffers(struct inode *inode) { return 0; }
static inline void invalidate_inode_buffers(struct inode *inode) {}
static inline int remove_inode_buffers(struct inode *inode) { return 1; }
static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
+#define buffer_heads_over_limit 0
#endif /* CONFIG_BLOCK */
#endif /* _LINUX_BUFFER_HEAD_H */
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 8543fa59da72..f48d0a31deae 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -73,7 +73,6 @@ struct cdrom_device_ops {
int (*drive_status) (struct cdrom_device_info *, int);
unsigned int (*check_events) (struct cdrom_device_info *cdi,
unsigned int clearing, int slot);
- int (*media_changed) (struct cdrom_device_info *, int);
int (*tray_move) (struct cdrom_device_info *, int);
int (*lock_door) (struct cdrom_device_info *, int);
int (*select_speed) (struct cdrom_device_info *, int);
@@ -107,7 +106,6 @@ extern int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
fmode_t mode, unsigned int cmd, unsigned long arg);
extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi,
unsigned int clearing);
-extern int cdrom_media_changed(struct cdrom_device_info *);
extern int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi);
extern void unregister_cdrom(struct cdrom_device_info *cdi);
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 204e76856435..6810d80acb0b 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -120,65 +120,12 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
/* Annotate a C jump table to allow objtool to follow the code flow */
#define __annotate_jump_table __section(.rodata..c_jump_table)
-#ifdef CONFIG_DEBUG_ENTRY
-/* Begin/end of an instrumentation safe region */
-#define instrumentation_begin() ({ \
- asm volatile("%c0: nop\n\t" \
- ".pushsection .discard.instr_begin\n\t" \
- ".long %c0b - .\n\t" \
- ".popsection\n\t" : : "i" (__COUNTER__)); \
-})
-
-/*
- * Because instrumentation_{begin,end}() can nest, objtool validation considers
- * _begin() a +1 and _end() a -1 and computes a sum over the instructions.
- * When the value is greater than 0, we consider instrumentation allowed.
- *
- * There is a problem with code like:
- *
- * noinstr void foo()
- * {
- * instrumentation_begin();
- * ...
- * if (cond) {
- * instrumentation_begin();
- * ...
- * instrumentation_end();
- * }
- * bar();
- * instrumentation_end();
- * }
- *
- * If instrumentation_end() would be an empty label, like all the other
- * annotations, the inner _end(), which is at the end of a conditional block,
- * would land on the instruction after the block.
- *
- * If we then consider the sum of the !cond path, we'll see that the call to
- * bar() is with a 0-value, even though, we meant it to happen with a positive
- * value.
- *
- * To avoid this, have _end() be a NOP instruction, this ensures it will be
- * part of the condition block and does not escape.
- */
-#define instrumentation_end() ({ \
- asm volatile("%c0: nop\n\t" \
- ".pushsection .discard.instr_end\n\t" \
- ".long %c0b - .\n\t" \
- ".popsection\n\t" : : "i" (__COUNTER__)); \
-})
-#endif /* CONFIG_DEBUG_ENTRY */
-
#else
#define annotate_reachable()
#define annotate_unreachable()
#define __annotate_jump_table
#endif
-#ifndef instrumentation_begin
-#define instrumentation_begin() do { } while(0)
-#define instrumentation_end() do { } while(0)
-#endif
-
#ifndef ASM_UNREACHABLE
# define ASM_UNREACHABLE
#endif
@@ -230,28 +177,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
#endif
-/*
- * Prevent the compiler from merging or refetching reads or writes. The
- * compiler is also forbidden from reordering successive instances of
- * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some
- * particular ordering. One way to make the compiler aware of ordering is to
- * put the two invocations of READ_ONCE or WRITE_ONCE in different C
- * statements.
- *
- * These two macros will also work on aggregate data types like structs or
- * unions.
- *
- * Their two major use cases are: (1) Mediating communication between
- * process-level code and irq/NMI handlers, all running on the same CPU,
- * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
- * mutilate accesses that either do not require ordering or that interact
- * with an explicit memory barrier or atomic instruction that provides the
- * required ordering.
- */
-#include <asm/barrier.h>
-#include <linux/kasan-checks.h>
-#include <linux/kcsan-checks.h>
-
/**
* data_race - mark an expression as containing intentional data races
*
@@ -272,65 +197,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
__v; \
})
-/*
- * Use __READ_ONCE() instead of READ_ONCE() if you do not require any
- * atomicity or dependency ordering guarantees. Note that this may result
- * in tears!
- */
-#define __READ_ONCE(x) (*(const volatile __unqual_scalar_typeof(x) *)&(x))
-
-#define __READ_ONCE_SCALAR(x) \
-({ \
- __unqual_scalar_typeof(x) __x = __READ_ONCE(x); \
- smp_read_barrier_depends(); \
- (typeof(x))__x; \
-})
-
-#define READ_ONCE(x) \
-({ \
- compiletime_assert_rwonce_type(x); \
- __READ_ONCE_SCALAR(x); \
-})
-
-#define __WRITE_ONCE(x, val) \
-do { \
- *(volatile typeof(x) *)&(x) = (val); \
-} while (0)
-
-#define WRITE_ONCE(x, val) \
-do { \
- compiletime_assert_rwonce_type(x); \
- __WRITE_ONCE(x, val); \
-} while (0)
-
-static __no_sanitize_or_inline
-unsigned long __read_once_word_nocheck(const void *addr)
-{
- return __READ_ONCE(*(unsigned long *)addr);
-}
-
-/*
- * Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need to load a
- * word from memory atomically but without telling KASAN/KCSAN. This is
- * usually used by unwinding code when walking the stack of a running process.
- */
-#define READ_ONCE_NOCHECK(x) \
-({ \
- unsigned long __x; \
- compiletime_assert(sizeof(x) == sizeof(__x), \
- "Unsupported access size for READ_ONCE_NOCHECK()."); \
- __x = __read_once_word_nocheck(&(x)); \
- smp_read_barrier_depends(); \
- (typeof(x))__x; \
-})
-
-static __no_kasan_or_inline
-unsigned long read_word_at_a_time(const void *addr)
-{
- kasan_check_read(addr, 1);
- return *(unsigned long *)addr;
-}
-
#endif /* __KERNEL__ */
/*
@@ -354,57 +220,6 @@ static inline void *offset_to_ptr(const int *off)
#endif /* __ASSEMBLY__ */
-/* Compile time object size, -1 for unknown */
-#ifndef __compiletime_object_size
-# define __compiletime_object_size(obj) -1
-#endif
-#ifndef __compiletime_warning
-# define __compiletime_warning(message)
-#endif
-#ifndef __compiletime_error
-# define __compiletime_error(message)
-#endif
-
-#ifdef __OPTIMIZE__
-# define __compiletime_assert(condition, msg, prefix, suffix) \
- do { \
- extern void prefix ## suffix(void) __compiletime_error(msg); \
- if (!(condition)) \
- prefix ## suffix(); \
- } while (0)
-#else
-# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0)
-#endif
-
-#define _compiletime_assert(condition, msg, prefix, suffix) \
- __compiletime_assert(condition, msg, prefix, suffix)
-
-/**
- * compiletime_assert - break build and emit msg if condition is false
- * @condition: a compile-time constant condition to check
- * @msg: a message to emit if condition is false
- *
- * In tradition of POSIX assert, this macro will break the build if the
- * supplied condition is *false*, emitting the supplied error message if the
- * compiler has support to do so.
- */
-#define compiletime_assert(condition, msg) \
- _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
-
-#define compiletime_assert_atomic_type(t) \
- compiletime_assert(__native_word(t), \
- "Need native word sized stores/loads for atomicity.")
-
-/*
- * Yes, this permits 64-bit accesses on 32-bit architectures. These will
- * actually be atomic in some cases (namely Armv7 + LPAE), but for others we
- * rely on the access being split into 2x32-bit accesses for a 32-bit quantity
- * (e.g. a virtual address) and a strong prevailing wind.
- */
-#define compiletime_assert_rwonce_type(t) \
- compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
- "Unsupported access size for {READ,WRITE}_ONCE().")
-
/* &a[0] degrades to a pointer: a different type from an array */
#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
@@ -414,4 +229,6 @@ static inline void *offset_to_ptr(const int *off)
*/
#define prevent_tail_call_optimization() mb()
+#include <asm/rwonce.h>
+
#endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 01dd58c74d80..2e231ba8fe3f 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -275,6 +275,47 @@ struct ftrace_likely_data {
(sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \
sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
+/* Compile time object size, -1 for unknown */
+#ifndef __compiletime_object_size
+# define __compiletime_object_size(obj) -1
+#endif
+#ifndef __compiletime_warning
+# define __compiletime_warning(message)
+#endif
+#ifndef __compiletime_error
+# define __compiletime_error(message)
+#endif
+
+#ifdef __OPTIMIZE__
+# define __compiletime_assert(condition, msg, prefix, suffix) \
+ do { \
+ extern void prefix ## suffix(void) __compiletime_error(msg); \
+ if (!(condition)) \
+ prefix ## suffix(); \
+ } while (0)
+#else
+# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0)
+#endif
+
+#define _compiletime_assert(condition, msg, prefix, suffix) \
+ __compiletime_assert(condition, msg, prefix, suffix)
+
+/**
+ * compiletime_assert - break build and emit msg if condition is false
+ * @condition: a compile-time constant condition to check
+ * @msg: a message to emit if condition is false
+ *
+ * In tradition of POSIX assert, this macro will break the build if the
+ * supplied condition is *false*, emitting the supplied error message if the
+ * compiler has support to do so.
+ */
+#define compiletime_assert(condition, msg) \
+ _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
+
+#define compiletime_assert_atomic_type(t) \
+ compiletime_assert(__native_word(t), \
+ "Need native word sized stores/loads for atomicity.")
+
/* Helpers for emitting diagnostics in pragmas. */
#ifndef __diag
#define __diag(string)
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 981b880d5b60..d53cd331c4dd 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -5,6 +5,8 @@
#include <linux/sched.h>
#include <linux/vtime.h>
#include <linux/context_tracking_state.h>
+#include <linux/instrumentation.h>
+
#include <asm/ptrace.h>
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 763863dbc079..ef90e07c9635 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -16,9 +16,8 @@
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/bug.h>
+#include <linux/refcount.h>
#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/uaccess.h>
#include <linux/completion.h>
/*
@@ -61,8 +60,8 @@
#define CRYPTO_ALG_ASYNC 0x00000080
/*
- * Set this bit if and only if the algorithm requires another algorithm of
- * the same type to handle corner cases.
+ * Set if the algorithm (or an algorithm which it uses) requires another
+ * algorithm of the same type to handle corner cases.
*/
#define CRYPTO_ALG_NEED_FALLBACK 0x00000100
@@ -102,6 +101,38 @@
#define CRYPTO_NOLOAD 0x00008000
/*
+ * The algorithm may allocate memory during request processing, i.e. during
+ * encryption, decryption, or hashing. Users can request an algorithm with this
+ * flag unset if they can't handle memory allocation failures.
+ *
+ * This flag is currently only implemented for algorithms of type "skcipher",
+ * "aead", "ahash", "shash", and "cipher". Algorithms of other types might not
+ * have this flag set even if they allocate memory.
+ *
+ * In some edge cases, algorithms can allocate memory regardless of this flag.
+ * To avoid these cases, users must obey the following usage constraints:
+ * skcipher:
+ * - The IV buffer and all scatterlist elements must be aligned to the
+ * algorithm's alignmask.
+ * - If the data were to be divided into chunks of size
+ * crypto_skcipher_walksize() (with any remainder going at the end), no
+ * chunk can cross a page boundary or a scatterlist element boundary.
+ * aead:
+ * - The IV buffer and all scatterlist elements must be aligned to the
+ * algorithm's alignmask.
+ * - The first scatterlist element must contain all the associated data,
+ * and its pages must be !PageHighMem.
+ * - If the plaintext/ciphertext were to be divided into chunks of size
+ * crypto_aead_walksize() (with the remainder going at the end), no chunk
+ * can cross a page boundary or a scatterlist element boundary.
+ * ahash:
+ * - The result buffer must be aligned to the algorithm's alignmask.
+ * - crypto_ahash_finup() must not be used unless the algorithm implements
+ * ->finup() natively.
+ */
+#define CRYPTO_ALG_ALLOCATES_MEMORY 0x00010000
+
+/*
* Transform masks and values (for crt_flags).
*/
#define CRYPTO_TFM_NEED_KEY 0x00000001
@@ -595,6 +626,8 @@ int crypto_has_alg(const char *name, u32 type, u32 mask);
struct crypto_tfm {
u32 crt_flags;
+
+ int node;
void (*exit)(struct crypto_tfm *tfm);
diff --git a/include/linux/dasd_mod.h b/include/linux/dasd_mod.h
index d39abad2ff6e..14e6cf8c6267 100644
--- a/include/linux/dasd_mod.h
+++ b/include/linux/dasd_mod.h
@@ -4,6 +4,8 @@
#include <asm/dasd.h>
+struct gendisk;
+
extern int dasd_biodasdinfo(struct gendisk *disk, dasd_information2_t *info);
#endif
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 8750f2dc5613..93096e524e43 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -322,12 +322,6 @@ struct dm_target {
bool discards_supported:1;
};
-/* Each target can link one of these into the table */
-struct dm_target_callbacks {
- struct list_head list;
- int (*congested_fn) (struct dm_target_callbacks *, int);
-};
-
void *dm_per_bio_data(struct bio *bio, size_t data_size);
struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size);
unsigned dm_bio_get_target_bio_nr(const struct bio *bio);
@@ -426,6 +420,7 @@ const char *dm_device_name(struct mapped_device *md);
int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid);
struct gendisk *dm_disk(struct mapped_device *md);
int dm_suspended(struct dm_target *ti);
+int dm_post_suspending(struct dm_target *ti);
int dm_noflush_suspending(struct dm_target *ti);
void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors);
union map_info *dm_get_rq_mapinfo(struct request *rq);
@@ -478,11 +473,6 @@ int dm_table_add_target(struct dm_table *t, const char *type,
sector_t start, sector_t len, char *params);
/*
- * Target_ctr should call this if it needs to add any callbacks.
- */
-void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb);
-
-/*
* Target can use this to set the table's type.
* Can only ever be called from a target's ctr.
* Useful for "hybrid" target (supports both bio-based
diff --git a/include/linux/efi.h b/include/linux/efi.h
index bb35f3305e55..05c47f857383 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -994,6 +994,7 @@ int efivars_register(struct efivars *efivars,
int efivars_unregister(struct efivars *efivars);
struct kobject *efivars_kobject(void);
+int efivar_supports_writes(void);
int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
void *data, bool duplicates, struct list_head *head);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f5abba86107d..bd7ec3eaeed0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -175,6 +175,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File does not contribute to nr_files count */
#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)
+/* File supports async buffered reads */
+#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000)
+
/*
* Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
* that indicates that they should check the contents of the iovec are
@@ -315,6 +318,8 @@ enum rw_hint {
#define IOCB_SYNC (1 << 5)
#define IOCB_WRITE (1 << 6)
#define IOCB_NOWAIT (1 << 7)
+/* iocb->ki_waitq is valid */
+#define IOCB_WAITQ (1 << 8)
#define IOCB_NOIO (1 << 9)
struct kiocb {
@@ -329,7 +334,10 @@ struct kiocb {
int ki_flags;
u16 ki_hint;
u16 ki_ioprio; /* See linux/ioprio.h */
- unsigned int ki_cookie; /* for ->iopoll */
+ union {
+ unsigned int ki_cookie; /* for ->iopoll */
+ struct wait_page_queue *ki_waitq; /* for async buffered IO */
+ };
randomized_struct_fields_end
};
@@ -471,45 +479,6 @@ struct address_space {
* must be enforced here for CRIS, to let the least significant bit
* of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
*/
-struct request_queue;
-
-struct block_device {
- dev_t bd_dev; /* not a kdev_t - it's a search key */
- int bd_openers;
- struct inode * bd_inode; /* will die */
- struct super_block * bd_super;
- struct mutex bd_mutex; /* open/close mutex */
- void * bd_claiming;
- void * bd_holder;
- int bd_holders;
- bool bd_write_holder;
-#ifdef CONFIG_SYSFS
- struct list_head bd_holder_disks;
-#endif
- struct block_device * bd_contains;
- unsigned bd_block_size;
- u8 bd_partno;
- struct hd_struct * bd_part;
- /* number of times partitions within this device have been opened. */
- unsigned bd_part_count;
- int bd_invalidated;
- struct gendisk * bd_disk;
- struct request_queue * bd_queue;
- struct backing_dev_info *bd_bdi;
- struct list_head bd_list;
- /*
- * Private data. You must have bd_claim'ed the block_device
- * to use this. NOTE: bd_claim allows an owner to claim
- * the same device multiple times, the owner must take special
- * care to not mess up bd_private for that case.
- */
- unsigned long bd_private;
-
- /* The counter of freeze processes */
- int bd_fsfreeze_count;
- /* Mutex for freeze */
- struct mutex bd_fsfreeze_mutex;
-} __randomize_layout;
/* XArray tags, for tagging dirty and writeback pages in the pagecache. */
#define PAGECACHE_TAG_DIRTY XA_MARK_0
@@ -908,8 +877,6 @@ static inline unsigned imajor(const struct inode *inode)
return MAJOR(inode->i_rdev);
}
-extern struct block_device *I_BDEV(struct inode *inode);
-
struct fown_struct {
rwlock_t lock; /* protects pid, uid, euid fields */
struct pid *pid; /* pid or -pgrp where SIGIO should be sent */
@@ -1381,6 +1348,7 @@ extern int send_sigurg(struct fown_struct *fown);
#define SB_NODIRATIME 2048 /* Do not update directory access times */
#define SB_SILENT 32768
#define SB_POSIXACL (1<<16) /* VFS does not apply the umask */
+#define SB_INLINECRYPT (1<<17) /* Use blk-crypto for encrypted files */
#define SB_KERNMOUNT (1<<22) /* this is a kern_mount call */
#define SB_I_VERSION (1<<23) /* Update inode I_version field */
#define SB_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
@@ -1775,14 +1743,6 @@ struct dir_context {
loff_t pos;
};
-struct block_device_operations;
-
-/* These macros are for out of kernel modules to test that
- * the kernel supports the unlocked_ioctl and compat_ioctl
- * fields in struct file_operations. */
-#define HAVE_COMPAT_IOCTL 1
-#define HAVE_UNLOCKED_IOCTL 1
-
/*
* These flags let !MMU mmap() govern direct device mapping vs immediate
* copying more easily for MAP_PRIVATE, especially for ROM filesystems.
@@ -2264,18 +2224,9 @@ struct file_system_type {
#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
-#ifdef CONFIG_BLOCK
extern struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
int (*fill_super)(struct super_block *, void *, int));
-#else
-static inline struct dentry *mount_bdev(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- int (*fill_super)(struct super_block *, void *, int))
-{
- return ERR_PTR(-ENODEV);
-}
-#endif
extern struct dentry *mount_single(struct file_system_type *fs_type,
int flags, void *data,
int (*fill_super)(struct super_block *, void *, int));
@@ -2284,14 +2235,7 @@ extern struct dentry *mount_nodev(struct file_system_type *fs_type,
int (*fill_super)(struct super_block *, void *, int));
extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
void generic_shutdown_super(struct super_block *sb);
-#ifdef CONFIG_BLOCK
void kill_block_super(struct super_block *sb);
-#else
-static inline void kill_block_super(struct super_block *sb)
-{
- BUG();
-}
-#endif
void kill_anon_super(struct super_block *sb);
void kill_litter_super(struct super_block *sb);
void deactivate_super(struct super_block *sb);
@@ -2581,93 +2525,16 @@ extern struct kmem_cache *names_cachep;
#define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL)
#define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
-#ifdef CONFIG_BLOCK
-extern int register_blkdev(unsigned int, const char *);
-extern void unregister_blkdev(unsigned int, const char *);
-extern struct block_device *bdget(dev_t);
-extern struct block_device *bdgrab(struct block_device *bdev);
-extern void bd_set_size(struct block_device *, loff_t size);
-extern void bd_forget(struct inode *inode);
-extern void bdput(struct block_device *);
-extern void invalidate_bdev(struct block_device *);
-extern void iterate_bdevs(void (*)(struct block_device *, void *), void *);
-extern int sync_blockdev(struct block_device *bdev);
-extern struct super_block *freeze_bdev(struct block_device *);
-extern void emergency_thaw_all(void);
-extern void emergency_thaw_bdev(struct super_block *sb);
-extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
-extern int fsync_bdev(struct block_device *);
-
extern struct super_block *blockdev_superblock;
-
static inline bool sb_is_blkdev_sb(struct super_block *sb)
{
- return sb == blockdev_superblock;
-}
-#else
-static inline void bd_forget(struct inode *inode) {}
-static inline int sync_blockdev(struct block_device *bdev) { return 0; }
-static inline void invalidate_bdev(struct block_device *bdev) {}
-
-static inline struct super_block *freeze_bdev(struct block_device *sb)
-{
- return NULL;
-}
-
-static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
-{
- return 0;
+ return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock;
}
-static inline int emergency_thaw_bdev(struct super_block *sb)
-{
- return 0;
-}
-
-static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg)
-{
-}
-
-static inline bool sb_is_blkdev_sb(struct super_block *sb)
-{
- return false;
-}
-#endif
+void emergency_thaw_all(void);
extern int sync_filesystem(struct super_block *);
extern const struct file_operations def_blk_fops;
extern const struct file_operations def_chr_fops;
-#ifdef CONFIG_BLOCK
-extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
-extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
-extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
-extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
- void *holder);
-extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
- void *holder);
-extern struct block_device *bd_start_claiming(struct block_device *bdev,
- void *holder);
-extern void bd_finish_claiming(struct block_device *bdev,
- struct block_device *whole, void *holder);
-extern void bd_abort_claiming(struct block_device *bdev,
- struct block_device *whole, void *holder);
-extern void blkdev_put(struct block_device *bdev, fmode_t mode);
-
-#ifdef CONFIG_SYSFS
-extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
-extern void bd_unlink_disk_holder(struct block_device *bdev,
- struct gendisk *disk);
-#else
-static inline int bd_link_disk_holder(struct block_device *bdev,
- struct gendisk *disk)
-{
- return 0;
-}
-static inline void bd_unlink_disk_holder(struct block_device *bdev,
- struct gendisk *disk)
-{
-}
-#endif
-#endif
/* fs/char_dev.c */
#define CHRDEV_MAJOR_MAX 512
@@ -2698,31 +2565,12 @@ static inline void unregister_chrdev(unsigned int major, const char *name)
__unregister_chrdev(major, 0, 256, name);
}
-/* fs/block_dev.c */
-#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
-#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */
-
-#ifdef CONFIG_BLOCK
-#define BLKDEV_MAJOR_MAX 512
-extern const char *bdevname(struct block_device *bdev, char *buffer);
-extern struct block_device *lookup_bdev(const char *);
-extern void blkdev_show(struct seq_file *,off_t);
-
-#else
-#define BLKDEV_MAJOR_MAX 0
-#endif
-
extern void init_special_inode(struct inode *, umode_t, dev_t);
/* Invalid inode operations -- fs/bad_inode.c */
extern void make_bad_inode(struct inode *);
extern bool is_bad_inode(struct inode *);
-#ifdef CONFIG_BLOCK
-extern int revalidate_disk(struct gendisk *);
-extern int check_disk_change(struct block_device *);
-extern int __invalidate_device(struct block_device *, bool);
-#endif
unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end);
@@ -3123,10 +2971,6 @@ static inline void remove_inode_hash(struct inode *inode)
extern void inode_sb_list_add(struct inode *inode);
-#ifdef CONFIG_BLOCK
-extern int bdev_read_only(struct block_device *);
-#endif
-extern int set_blocksize(struct block_device *, int);
extern int sb_set_blocksize(struct super_block *, int);
extern int sb_min_blocksize(struct super_block *, int);
@@ -3439,22 +3283,28 @@ static inline int iocb_flags(struct file *file)
static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
{
+ int kiocb_flags = 0;
+
+ if (!flags)
+ return 0;
if (unlikely(flags & ~RWF_SUPPORTED))
return -EOPNOTSUPP;
if (flags & RWF_NOWAIT) {
if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
return -EOPNOTSUPP;
- ki->ki_flags |= IOCB_NOWAIT;
+ kiocb_flags |= IOCB_NOWAIT;
}
if (flags & RWF_HIPRI)
- ki->ki_flags |= IOCB_HIPRI;
+ kiocb_flags |= IOCB_HIPRI;
if (flags & RWF_DSYNC)
- ki->ki_flags |= IOCB_DSYNC;
+ kiocb_flags |= IOCB_DSYNC;
if (flags & RWF_SYNC)
- ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+ kiocb_flags |= (IOCB_DSYNC | IOCB_SYNC);
if (flags & RWF_APPEND)
- ki->ki_flags |= IOCB_APPEND;
+ kiocb_flags |= IOCB_APPEND;
+
+ ki->ki_flags |= kiocb_flags;
return 0;
}
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 2862ca5fea33..991ff8575d0e 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -69,12 +69,20 @@ struct fscrypt_operations {
bool (*has_stable_inodes)(struct super_block *sb);
void (*get_ino_and_lblk_bits)(struct super_block *sb,
int *ino_bits_ret, int *lblk_bits_ret);
+ int (*get_num_devices)(struct super_block *sb);
+ void (*get_devices)(struct super_block *sb,
+ struct request_queue **devs);
};
-static inline bool fscrypt_has_encryption_key(const struct inode *inode)
+static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode)
{
- /* pairs with cmpxchg_release() in fscrypt_get_encryption_info() */
- return READ_ONCE(inode->i_crypt_info) != NULL;
+ /*
+ * Pairs with the cmpxchg_release() in fscrypt_get_encryption_info().
+ * I.e., another task may publish ->i_crypt_info concurrently, executing
+ * a RELEASE barrier. We need to use smp_load_acquire() here to safely
+ * ACQUIRE the memory the other task published.
+ */
+ return smp_load_acquire(&inode->i_crypt_info);
}
/**
@@ -231,9 +239,9 @@ static inline void fscrypt_set_ops(struct super_block *sb,
}
#else /* !CONFIG_FS_ENCRYPTION */
-static inline bool fscrypt_has_encryption_key(const struct inode *inode)
+static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode)
{
- return false;
+ return NULL;
}
static inline bool fscrypt_needs_contents_encryption(const struct inode *inode)
@@ -537,6 +545,99 @@ static inline void fscrypt_set_ops(struct super_block *sb,
#endif /* !CONFIG_FS_ENCRYPTION */
+/* inline_crypt.c */
+#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
+
+bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode);
+
+void fscrypt_set_bio_crypt_ctx(struct bio *bio,
+ const struct inode *inode, u64 first_lblk,
+ gfp_t gfp_mask);
+
+void fscrypt_set_bio_crypt_ctx_bh(struct bio *bio,
+ const struct buffer_head *first_bh,
+ gfp_t gfp_mask);
+
+bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode,
+ u64 next_lblk);
+
+bool fscrypt_mergeable_bio_bh(struct bio *bio,
+ const struct buffer_head *next_bh);
+
+#else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */
+
+static inline bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode)
+{
+ return false;
+}
+
+static inline void fscrypt_set_bio_crypt_ctx(struct bio *bio,
+ const struct inode *inode,
+ u64 first_lblk, gfp_t gfp_mask) { }
+
+static inline void fscrypt_set_bio_crypt_ctx_bh(
+ struct bio *bio,
+ const struct buffer_head *first_bh,
+ gfp_t gfp_mask) { }
+
+static inline bool fscrypt_mergeable_bio(struct bio *bio,
+ const struct inode *inode,
+ u64 next_lblk)
+{
+ return true;
+}
+
+static inline bool fscrypt_mergeable_bio_bh(struct bio *bio,
+ const struct buffer_head *next_bh)
+{
+ return true;
+}
+#endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */
+
+/**
+ * fscrypt_inode_uses_inline_crypto() - test whether an inode uses inline
+ * encryption
+ * @inode: an inode. If encrypted, its key must be set up.
+ *
+ * Return: true if the inode requires file contents encryption and if the
+ * encryption should be done in the block layer via blk-crypto rather
+ * than in the filesystem layer.
+ */
+static inline bool fscrypt_inode_uses_inline_crypto(const struct inode *inode)
+{
+ return fscrypt_needs_contents_encryption(inode) &&
+ __fscrypt_inode_uses_inline_crypto(inode);
+}
+
+/**
+ * fscrypt_inode_uses_fs_layer_crypto() - test whether an inode uses fs-layer
+ * encryption
+ * @inode: an inode. If encrypted, its key must be set up.
+ *
+ * Return: true if the inode requires file contents encryption and if the
+ * encryption should be done in the filesystem layer rather than in the
+ * block layer via blk-crypto.
+ */
+static inline bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode)
+{
+ return fscrypt_needs_contents_encryption(inode) &&
+ !__fscrypt_inode_uses_inline_crypto(inode);
+}
+
+/**
+ * fscrypt_has_encryption_key() - check whether an inode has had its key set up
+ * @inode: the inode to check
+ *
+ * Return: %true if the inode has had its encryption key set up, else %false.
+ *
+ * Usually this should be preceded by fscrypt_get_encryption_info() to try to
+ * set up the key first.
+ */
+static inline bool fscrypt_has_encryption_key(const struct inode *inode)
+{
+ return fscrypt_get_info(inode) != NULL;
+}
+
/**
* fscrypt_require_key() - require an inode's encryption key
* @inode: the inode we need the key for
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index 78201a6d35f6..c1144a450392 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -115,8 +115,13 @@ struct fsverity_operations {
static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
{
- /* pairs with the cmpxchg() in fsverity_set_info() */
- return READ_ONCE(inode->i_verity_info);
+ /*
+ * Pairs with the cmpxchg_release() in fsverity_set_info().
+ * I.e., another task may publish ->i_verity_info concurrently,
+ * executing a RELEASE barrier. We need to use smp_load_acquire() here
+ * to safely ACQUIRE the memory the other task published.
+ */
+ return smp_load_acquire(&inode->i_verity_info);
}
/* enable.c */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e339dac91ee6..ce2c06f72e86 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -58,9 +58,6 @@ struct ftrace_direct_func;
const char *
ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char **modname, char *sym);
-int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
- char *type, char *name,
- char *module_name, int *exported);
#else
static inline const char *
ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
@@ -68,6 +65,13 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
{
return NULL;
}
+#endif
+
+#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
+int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
+ char *type, char *name,
+ char *module_name, int *exported);
+#else
static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
char *type, char *name,
char *module_name, int *exported)
@@ -76,7 +80,6 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val
}
#endif
-
#ifdef CONFIG_FUNCTION_TRACER
extern int ftrace_enabled;
@@ -207,6 +210,7 @@ struct ftrace_ops {
struct ftrace_ops_hash old_hash;
unsigned long trampoline;
unsigned long trampoline_size;
+ struct list_head list;
#endif
};
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 392aad5e29a2..4ab853461dff 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -19,13 +19,12 @@
#include <linux/blk_types.h>
#include <asm/local.h>
-#ifdef CONFIG_BLOCK
-
#define dev_to_disk(device) container_of((device), struct gendisk, part0.__dev)
#define dev_to_part(device) container_of((device), struct hd_struct, __dev)
#define disk_to_dev(disk) (&(disk)->part0.__dev)
#define part_to_dev(part) (&((part)->__dev))
+extern const struct device_type disk_type;
extern struct device_type part_type;
extern struct class block_class;
@@ -337,12 +336,9 @@ static inline void set_capacity(struct gendisk *disk, sector_t size)
disk->part0.nr_sects = size;
}
-extern dev_t blk_lookup_devt(const char *name, int partno);
-
int bdev_disk_changed(struct block_device *bdev, bool invalidate);
int blk_add_partitions(struct gendisk *disk, struct block_device *bdev);
int blk_drop_partitions(struct block_device *bdev);
-extern void printk_all_partitions(void);
extern struct gendisk *__alloc_disk_node(int minors, int node_id);
extern struct kobject *get_disk_and_module(struct gendisk *disk);
@@ -373,10 +369,40 @@ extern void blk_unregister_region(dev_t devt, unsigned long range);
#define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE)
-#else /* CONFIG_BLOCK */
+int register_blkdev(unsigned int major, const char *name);
+void unregister_blkdev(unsigned int major, const char *name);
-static inline void printk_all_partitions(void) { }
+int revalidate_disk(struct gendisk *disk);
+int check_disk_change(struct block_device *bdev);
+int __invalidate_device(struct block_device *bdev, bool kill_dirty);
+void bd_set_size(struct block_device *bdev, loff_t size);
+/* for drivers/char/raw.c: */
+int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
+long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
+
+#ifdef CONFIG_SYSFS
+int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
+void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
+#else
+static inline int bd_link_disk_holder(struct block_device *bdev,
+ struct gendisk *disk)
+{
+ return 0;
+}
+static inline void bd_unlink_disk_holder(struct block_device *bdev,
+ struct gendisk *disk)
+{
+}
+#endif /* CONFIG_SYSFS */
+
+#ifdef CONFIG_BLOCK
+void printk_all_partitions(void);
+dev_t blk_lookup_devt(const char *name, int partno);
+#else /* CONFIG_BLOCK */
+static inline void printk_all_partitions(void)
+{
+}
static inline dev_t blk_lookup_devt(const char *name, int partno)
{
dev_t devt = MKDEV(0, 0);
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 03c9fece7d43..754f67ac4326 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -111,32 +111,42 @@ extern void rcu_nmi_exit(void);
/*
* nmi_enter() can nest up to 15 times; see NMI_BITS.
*/
-#define nmi_enter() \
+#define __nmi_enter() \
do { \
+ lockdep_off(); \
arch_nmi_enter(); \
printk_nmi_enter(); \
- lockdep_off(); \
BUG_ON(in_nmi() == NMI_MASK); \
__preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
- rcu_nmi_enter(); \
+ } while (0)
+
+#define nmi_enter() \
+ do { \
+ __nmi_enter(); \
lockdep_hardirq_enter(); \
+ rcu_nmi_enter(); \
instrumentation_begin(); \
ftrace_nmi_enter(); \
instrumentation_end(); \
} while (0)
+#define __nmi_exit() \
+ do { \
+ BUG_ON(!in_nmi()); \
+ __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
+ printk_nmi_exit(); \
+ arch_nmi_exit(); \
+ lockdep_on(); \
+ } while (0)
+
#define nmi_exit() \
do { \
instrumentation_begin(); \
ftrace_nmi_exit(); \
instrumentation_end(); \
- lockdep_hardirq_exit(); \
rcu_nmi_exit(); \
- BUG_ON(!in_nmi()); \
- __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
- lockdep_on(); \
- printk_nmi_exit(); \
- arch_nmi_exit(); \
+ lockdep_hardirq_exit(); \
+ __nmi_exit(); \
} while (0)
#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index b8b8963f8bb9..4e7714c88f95 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -56,7 +56,7 @@ struct property_entry;
* on a bus (or read from them). Apart from two basic transfer functions to
* transmit one message at a time, a more complex version can be used to
* transmit an arbitrary number of messages without interruption.
- * @count must be be less than 64k since msg.len is u16.
+ * @count must be less than 64k since msg.len is u16.
*/
int i2c_transfer_buffer_flags(const struct i2c_client *client,
char *buf, int count, u16 flags);
@@ -1001,7 +1001,7 @@ static inline u32 i2c_acpi_find_bus_speed(struct device *dev)
static inline struct i2c_client *i2c_acpi_new_device(struct device *dev,
int index, struct i2c_board_info *info)
{
- return NULL;
+ return ERR_PTR(-ENODEV);
}
static inline struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
{
diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h
new file mode 100644
index 000000000000..93e2ad67fc10
--- /dev/null
+++ b/include/linux/instrumentation.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_INSTRUMENTATION_H
+#define __LINUX_INSTRUMENTATION_H
+
+#if defined(CONFIG_DEBUG_ENTRY) && defined(CONFIG_STACK_VALIDATION)
+
+/* Begin/end of an instrumentation safe region */
+#define instrumentation_begin() ({ \
+ asm volatile("%c0: nop\n\t" \
+ ".pushsection .discard.instr_begin\n\t" \
+ ".long %c0b - .\n\t" \
+ ".popsection\n\t" : : "i" (__COUNTER__)); \
+})
+
+/*
+ * Because instrumentation_{begin,end}() can nest, objtool validation considers
+ * _begin() a +1 and _end() a -1 and computes a sum over the instructions.
+ * When the value is greater than 0, we consider instrumentation allowed.
+ *
+ * There is a problem with code like:
+ *
+ * noinstr void foo()
+ * {
+ * instrumentation_begin();
+ * ...
+ * if (cond) {
+ * instrumentation_begin();
+ * ...
+ * instrumentation_end();
+ * }
+ * bar();
+ * instrumentation_end();
+ * }
+ *
+ * If instrumentation_end() would be an empty label, like all the other
+ * annotations, the inner _end(), which is at the end of a conditional block,
+ * would land on the instruction after the block.
+ *
+ * If we then consider the sum of the !cond path, we'll see that the call to
+ * bar() is with a 0-value, even though, we meant it to happen with a positive
+ * value.
+ *
+ * To avoid this, have _end() be a NOP instruction, this ensures it will be
+ * part of the condition block and does not escape.
+ */
+#define instrumentation_end() ({ \
+ asm volatile("%c0: nop\n\t" \
+ ".pushsection .discard.instr_end\n\t" \
+ ".long %c0b - .\n\t" \
+ ".popsection\n\t" : : "i" (__COUNTER__)); \
+})
+#else
+# define instrumentation_begin() do { } while(0)
+# define instrumentation_end() do { } while(0)
+#endif
+
+#endif /* __LINUX_INSTRUMENTATION_H */
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 0beaa3eba155..c75e4d3d8833 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -107,9 +107,12 @@ io_mapping_init_wc(struct io_mapping *iomap,
resource_size_t base,
unsigned long size)
{
+ iomap->iomem = ioremap_wc(base, size);
+ if (!iomap->iomem)
+ return NULL;
+
iomap->base = base;
iomap->size = size;
- iomap->iomem = ioremap_wc(base, size);
#if defined(pgprot_noncached_wc) /* archs can't agree on a name ... */
iomap->prot = pgprot_noncached_wc(PAGE_KERNEL);
#elif defined(pgprot_writecombine)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8d5bc2c237d7..1b7f4dfee35b 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -213,6 +213,8 @@ struct irq_data {
* required
* IRQD_HANDLE_ENFORCE_IRQCTX - Enforce that handle_irq_*() is only invoked
* from actual interrupt context.
+ * IRQD_AFFINITY_ON_ACTIVATE - Affinity is set on activation. Don't call
+ * irq_chip::irq_set_affinity() when deactivated.
*/
enum {
IRQD_TRIGGER_MASK = 0xf,
@@ -237,6 +239,7 @@ enum {
IRQD_CAN_RESERVE = (1 << 26),
IRQD_MSI_NOMASK_QUIRK = (1 << 27),
IRQD_HANDLE_ENFORCE_IRQCTX = (1 << 28),
+ IRQD_AFFINITY_ON_ACTIVATE = (1 << 29),
};
#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -421,6 +424,16 @@ static inline bool irqd_msi_nomask_quirk(struct irq_data *d)
return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK;
}
+static inline void irqd_set_affinity_on_activate(struct irq_data *d)
+{
+ __irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE;
+}
+
+static inline bool irqd_affinity_on_activate(struct irq_data *d)
+{
+ return __irqd_to_state(d) & IRQD_AFFINITY_ON_ACTIVATE;
+}
+
#undef __irqd_to_state
static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 6384d2813ded..bd5c55755447 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -14,6 +14,7 @@
#include <linux/typecheck.h>
#include <asm/irqflags.h>
+#include <asm/percpu.h>
/* Currently lockdep_softirqs_on/off is used only by lockdep */
#ifdef CONFIG_PROVE_LOCKING
@@ -31,18 +32,35 @@
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
+
+/* Per-task IRQ trace events information. */
+struct irqtrace_events {
+ unsigned int irq_events;
+ unsigned long hardirq_enable_ip;
+ unsigned long hardirq_disable_ip;
+ unsigned int hardirq_enable_event;
+ unsigned int hardirq_disable_event;
+ unsigned long softirq_disable_ip;
+ unsigned long softirq_enable_ip;
+ unsigned int softirq_disable_event;
+ unsigned int softirq_enable_event;
+};
+
+DECLARE_PER_CPU(int, hardirqs_enabled);
+DECLARE_PER_CPU(int, hardirq_context);
+
extern void trace_hardirqs_on_prepare(void);
extern void trace_hardirqs_off_finish(void);
extern void trace_hardirqs_on(void);
extern void trace_hardirqs_off(void);
-# define lockdep_hardirq_context(p) ((p)->hardirq_context)
+# define lockdep_hardirq_context() (this_cpu_read(hardirq_context))
# define lockdep_softirq_context(p) ((p)->softirq_context)
-# define lockdep_hardirqs_enabled(p) ((p)->hardirqs_enabled)
+# define lockdep_hardirqs_enabled() (this_cpu_read(hardirqs_enabled))
# define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled)
-# define lockdep_hardirq_enter() \
-do { \
- if (!current->hardirq_context++) \
- current->hardirq_threaded = 0; \
+# define lockdep_hardirq_enter() \
+do { \
+ if (this_cpu_inc_return(hardirq_context) == 1) \
+ current->hardirq_threaded = 0; \
} while (0)
# define lockdep_hardirq_threaded() \
do { \
@@ -50,7 +68,7 @@ do { \
} while (0)
# define lockdep_hardirq_exit() \
do { \
- current->hardirq_context--; \
+ this_cpu_dec(hardirq_context); \
} while (0)
# define lockdep_softirq_enter() \
do { \
@@ -104,9 +122,9 @@ do { \
# define trace_hardirqs_off_finish() do { } while (0)
# define trace_hardirqs_on() do { } while (0)
# define trace_hardirqs_off() do { } while (0)
-# define lockdep_hardirq_context(p) 0
+# define lockdep_hardirq_context() 0
# define lockdep_softirq_context(p) 0
-# define lockdep_hardirqs_enabled(p) 0
+# define lockdep_hardirqs_enabled() 0
# define lockdep_softirqs_enabled(p) 0
# define lockdep_hardirq_enter() do { } while (0)
# define lockdep_hardirq_threaded() do { } while (0)
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index d56128df2aff..4aaa29772bb0 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -27,6 +27,7 @@
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/bit_spinlock.h>
+#include <linux/blkdev.h>
#include <crypto/hash.h>
#endif
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 6adf90f248d7..45b8cdc9fad7 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -242,6 +242,7 @@ struct kprobe_insn_cache {
struct mutex mutex;
void *(*alloc)(void); /* allocate insn page */
void (*free)(void *); /* free insn page */
+ const char *sym; /* symbol for insn pages */
struct list_head pages; /* list of kprobe_insn_page */
size_t insn_size; /* size of instruction slot */
int nr_garbage;
@@ -272,6 +273,10 @@ static inline bool is_kprobe_##__name##_slot(unsigned long addr) \
{ \
return __is_insn_slot_addr(&kprobe_##__name##_slots, addr); \
}
+#define KPROBE_INSN_PAGE_SYM "kprobe_insn_page"
+#define KPROBE_OPTINSN_PAGE_SYM "kprobe_optinsn_page"
+int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum,
+ unsigned long *value, char *type, char *sym);
#else /* __ARCH_WANT_KPROBES_INSN_SLOT */
#define DEFINE_INSN_CACHE_OPS(__name) \
static inline bool is_kprobe_##__name##_slot(unsigned long addr) \
@@ -377,6 +382,11 @@ void dump_kprobe(struct kprobe *kp);
void *alloc_insn_page(void);
void free_insn_page(void *page);
+int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
+ char *sym);
+
+int arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
+ char *type, char *sym);
#else /* !CONFIG_KPROBES: */
static inline int kprobes_built_in(void)
@@ -439,6 +449,11 @@ static inline bool within_kprobe_blacklist(unsigned long addr)
{
return true;
}
+static inline int kprobe_get_kallsym(unsigned int symnum, unsigned long *value,
+ char *type, char *sym)
+{
+ return -ERANGE;
+}
#endif /* CONFIG_KPROBES */
static inline int disable_kretprobe(struct kretprobe *rp)
{
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index ee8ec2e68055..1db223710b28 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -631,7 +631,6 @@ static inline int nvm_next_ppa_in_chk(struct nvm_tgt_dev *dev,
return last;
}
-typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
typedef sector_t (nvm_tgt_capacity_fn)(void *);
typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
int flags);
@@ -650,7 +649,7 @@ struct nvm_tgt_type {
int flags;
/* target entry points */
- nvm_tgt_make_rq_fn *make_rq;
+ const struct block_device_operations *bops;
nvm_tgt_capacity_fn *capacity;
/* module-specific init/teardown */
diff --git a/include/linux/list.h b/include/linux/list.h
index aff44d34f4e4..0d0d17a10d25 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -283,6 +283,24 @@ static inline int list_empty(const struct list_head *head)
}
/**
+ * list_del_init_careful - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ *
+ * This is the same as list_del_init(), except designed to be used
+ * together with list_empty_careful() in a way to guarantee ordering
+ * of other memory operations.
+ *
+ * Any memory operations done before a list_del_init_careful() are
+ * guaranteed to be visible after a list_empty_careful() test.
+ */
+static inline void list_del_init_careful(struct list_head *entry)
+{
+ __list_del_entry(entry);
+ entry->prev = entry;
+ smp_store_release(&entry->next, entry);
+}
+
+/**
* list_empty_careful - tests whether a list is empty and not being modified
* @head: the list to test
*
@@ -297,7 +315,7 @@ static inline int list_empty(const struct list_head *head)
*/
static inline int list_empty_careful(const struct list_head *head)
{
- struct list_head *next = head->next;
+ struct list_head *next = smp_load_acquire(&head->next);
return (next == head) && (next == head->prev);
}
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 8fce5c98a4b0..39a35699d0d6 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -10,33 +10,15 @@
#ifndef __LINUX_LOCKDEP_H
#define __LINUX_LOCKDEP_H
+#include <linux/lockdep_types.h>
+#include <asm/percpu.h>
+
struct task_struct;
-struct lockdep_map;
/* for sysctl */
extern int prove_locking;
extern int lock_stat;
-#define MAX_LOCKDEP_SUBCLASSES 8UL
-
-#include <linux/types.h>
-
-enum lockdep_wait_type {
- LD_WAIT_INV = 0, /* not checked, catch all */
-
- LD_WAIT_FREE, /* wait free, rcu etc.. */
- LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */
-
-#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
- LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
-#else
- LD_WAIT_CONFIG = LD_WAIT_SPIN,
-#endif
- LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */
-
- LD_WAIT_MAX, /* must be last */
-};
-
#ifdef CONFIG_LOCKDEP
#include <linux/linkage.h>
@@ -44,147 +26,6 @@ enum lockdep_wait_type {
#include <linux/debug_locks.h>
#include <linux/stacktrace.h>
-/*
- * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
- * the total number of states... :-(
- */
-#define XXX_LOCK_USAGE_STATES (1+2*4)
-
-/*
- * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
- * cached in the instance of lockdep_map
- *
- * Currently main class (subclass == 0) and signle depth subclass
- * are cached in lockdep_map. This optimization is mainly targeting
- * on rq->lock. double_rq_lock() acquires this highly competitive with
- * single depth.
- */
-#define NR_LOCKDEP_CACHING_CLASSES 2
-
-/*
- * A lockdep key is associated with each lock object. For static locks we use
- * the lock address itself as the key. Dynamically allocated lock objects can
- * have a statically or dynamically allocated key. Dynamically allocated lock
- * keys must be registered before being used and must be unregistered before
- * the key memory is freed.
- */
-struct lockdep_subclass_key {
- char __one_byte;
-} __attribute__ ((__packed__));
-
-/* hash_entry is used to keep track of dynamically allocated keys. */
-struct lock_class_key {
- union {
- struct hlist_node hash_entry;
- struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
- };
-};
-
-extern struct lock_class_key __lockdep_no_validate__;
-
-struct lock_trace;
-
-#define LOCKSTAT_POINTS 4
-
-/*
- * The lock-class itself. The order of the structure members matters.
- * reinit_class() zeroes the key member and all subsequent members.
- */
-struct lock_class {
- /*
- * class-hash:
- */
- struct hlist_node hash_entry;
-
- /*
- * Entry in all_lock_classes when in use. Entry in free_lock_classes
- * when not in use. Instances that are being freed are on one of the
- * zapped_classes lists.
- */
- struct list_head lock_entry;
-
- /*
- * These fields represent a directed graph of lock dependencies,
- * to every node we attach a list of "forward" and a list of
- * "backward" graph nodes.
- */
- struct list_head locks_after, locks_before;
-
- const struct lockdep_subclass_key *key;
- unsigned int subclass;
- unsigned int dep_gen_id;
-
- /*
- * IRQ/softirq usage tracking bits:
- */
- unsigned long usage_mask;
- const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES];
-
- /*
- * Generation counter, when doing certain classes of graph walking,
- * to ensure that we check one node only once:
- */
- int name_version;
- const char *name;
-
- short wait_type_inner;
- short wait_type_outer;
-
-#ifdef CONFIG_LOCK_STAT
- unsigned long contention_point[LOCKSTAT_POINTS];
- unsigned long contending_point[LOCKSTAT_POINTS];
-#endif
-} __no_randomize_layout;
-
-#ifdef CONFIG_LOCK_STAT
-struct lock_time {
- s64 min;
- s64 max;
- s64 total;
- unsigned long nr;
-};
-
-enum bounce_type {
- bounce_acquired_write,
- bounce_acquired_read,
- bounce_contended_write,
- bounce_contended_read,
- nr_bounce_types,
-
- bounce_acquired = bounce_acquired_write,
- bounce_contended = bounce_contended_write,
-};
-
-struct lock_class_stats {
- unsigned long contention_point[LOCKSTAT_POINTS];
- unsigned long contending_point[LOCKSTAT_POINTS];
- struct lock_time read_waittime;
- struct lock_time write_waittime;
- struct lock_time read_holdtime;
- struct lock_time write_holdtime;
- unsigned long bounces[nr_bounce_types];
-};
-
-struct lock_class_stats lock_stats(struct lock_class *class);
-void clear_lock_stats(struct lock_class *class);
-#endif
-
-/*
- * Map the lock object (the lock instance) to the lock-class object.
- * This is embedded into specific lock instances:
- */
-struct lockdep_map {
- struct lock_class_key *key;
- struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
- const char *name;
- short wait_type_outer; /* can be taken in this context */
- short wait_type_inner; /* presents this context */
-#ifdef CONFIG_LOCK_STAT
- int cpu;
- unsigned long ip;
-#endif
-};
-
static inline void lockdep_copy_map(struct lockdep_map *to,
struct lockdep_map *from)
{
@@ -440,8 +281,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock,
extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip);
-struct pin_cookie { unsigned int val; };
-
#define NIL_COOKIE (struct pin_cookie){ .val = 0U, }
extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock);
@@ -520,10 +359,6 @@ static inline void lockdep_set_selftest_task(struct task_struct *task)
# define lockdep_reset() do { debug_locks = 1; } while (0)
# define lockdep_free_key_range(start, size) do { } while (0)
# define lockdep_sys_exit() do { } while (0)
-/*
- * The class key takes no space if lockdep is disabled:
- */
-struct lock_class_key { };
static inline void lockdep_register_key(struct lock_class_key *key)
{
@@ -533,11 +368,6 @@ static inline void lockdep_unregister_key(struct lock_class_key *key)
{
}
-/*
- * The lockdep_map takes no space if lockdep is disabled:
- */
-struct lockdep_map { };
-
#define lockdep_depth(tsk) (0)
#define lockdep_is_held_type(l, r) (1)
@@ -549,8 +379,6 @@ struct lockdep_map { };
#define lockdep_recursing(tsk) (0)
-struct pin_cookie { };
-
#define NIL_COOKIE (struct pin_cookie){ }
#define lockdep_pin_lock(l) ({ struct pin_cookie cookie = { }; cookie; })
@@ -703,38 +531,58 @@ do { \
lock_release(&(lock)->dep_map, _THIS_IP_); \
} while (0)
-#define lockdep_assert_irqs_enabled() do { \
- WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- !current->hardirqs_enabled, \
- "IRQs not enabled as expected\n"); \
- } while (0)
+DECLARE_PER_CPU(int, hardirqs_enabled);
+DECLARE_PER_CPU(int, hardirq_context);
-#define lockdep_assert_irqs_disabled() do { \
- WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- current->hardirqs_enabled, \
- "IRQs not disabled as expected\n"); \
- } while (0)
+#define lockdep_assert_irqs_enabled() \
+do { \
+ WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled)); \
+} while (0)
-#define lockdep_assert_in_irq() do { \
- WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- !current->hardirq_context, \
- "Not in hardirq as expected\n"); \
- } while (0)
+#define lockdep_assert_irqs_disabled() \
+do { \
+ WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled)); \
+} while (0)
+
+#define lockdep_assert_in_irq() \
+do { \
+ WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context)); \
+} while (0)
+
+#define lockdep_assert_preemption_enabled() \
+do { \
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
+ debug_locks && \
+ (preempt_count() != 0 || \
+ !this_cpu_read(hardirqs_enabled))); \
+} while (0)
+
+#define lockdep_assert_preemption_disabled() \
+do { \
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
+ debug_locks && \
+ (preempt_count() == 0 && \
+ this_cpu_read(hardirqs_enabled))); \
+} while (0)
#else
# define might_lock(lock) do { } while (0)
# define might_lock_read(lock) do { } while (0)
# define might_lock_nested(lock, subclass) do { } while (0)
+
# define lockdep_assert_irqs_enabled() do { } while (0)
# define lockdep_assert_irqs_disabled() do { } while (0)
# define lockdep_assert_in_irq() do { } while (0)
+
+# define lockdep_assert_preemption_enabled() do { } while (0)
+# define lockdep_assert_preemption_disabled() do { } while (0)
#endif
#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
# define lockdep_assert_RT_in_threaded_ctx() do { \
WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- current->hardirq_context && \
+ lockdep_hardirq_context() && \
!(current->hardirq_threaded || current->irq_config), \
"Not in threaded context on PREEMPT_RT as expected\n"); \
} while (0)
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
new file mode 100644
index 000000000000..bb35b449f533
--- /dev/null
+++ b/include/linux/lockdep_types.h
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Runtime locking correctness validator
+ *
+ * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
+ *
+ * see Documentation/locking/lockdep-design.rst for more details.
+ */
+#ifndef __LINUX_LOCKDEP_TYPES_H
+#define __LINUX_LOCKDEP_TYPES_H
+
+#include <linux/types.h>
+
+#define MAX_LOCKDEP_SUBCLASSES 8UL
+
+enum lockdep_wait_type {
+ LD_WAIT_INV = 0, /* not checked, catch all */
+
+ LD_WAIT_FREE, /* wait free, rcu etc.. */
+ LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */
+
+#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
+ LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
+#else
+ LD_WAIT_CONFIG = LD_WAIT_SPIN,
+#endif
+ LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */
+
+ LD_WAIT_MAX, /* must be last */
+};
+
+#ifdef CONFIG_LOCKDEP
+
+/*
+ * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
+ * the total number of states... :-(
+ */
+#define XXX_LOCK_USAGE_STATES (1+2*4)
+
+/*
+ * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
+ * cached in the instance of lockdep_map
+ *
+ * Currently main class (subclass == 0) and signle depth subclass
+ * are cached in lockdep_map. This optimization is mainly targeting
+ * on rq->lock. double_rq_lock() acquires this highly competitive with
+ * single depth.
+ */
+#define NR_LOCKDEP_CACHING_CLASSES 2
+
+/*
+ * A lockdep key is associated with each lock object. For static locks we use
+ * the lock address itself as the key. Dynamically allocated lock objects can
+ * have a statically or dynamically allocated key. Dynamically allocated lock
+ * keys must be registered before being used and must be unregistered before
+ * the key memory is freed.
+ */
+struct lockdep_subclass_key {
+ char __one_byte;
+} __attribute__ ((__packed__));
+
+/* hash_entry is used to keep track of dynamically allocated keys. */
+struct lock_class_key {
+ union {
+ struct hlist_node hash_entry;
+ struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
+ };
+};
+
+extern struct lock_class_key __lockdep_no_validate__;
+
+struct lock_trace;
+
+#define LOCKSTAT_POINTS 4
+
+/*
+ * The lock-class itself. The order of the structure members matters.
+ * reinit_class() zeroes the key member and all subsequent members.
+ */
+struct lock_class {
+ /*
+ * class-hash:
+ */
+ struct hlist_node hash_entry;
+
+ /*
+ * Entry in all_lock_classes when in use. Entry in free_lock_classes
+ * when not in use. Instances that are being freed are on one of the
+ * zapped_classes lists.
+ */
+ struct list_head lock_entry;
+
+ /*
+ * These fields represent a directed graph of lock dependencies,
+ * to every node we attach a list of "forward" and a list of
+ * "backward" graph nodes.
+ */
+ struct list_head locks_after, locks_before;
+
+ const struct lockdep_subclass_key *key;
+ unsigned int subclass;
+ unsigned int dep_gen_id;
+
+ /*
+ * IRQ/softirq usage tracking bits:
+ */
+ unsigned long usage_mask;
+ const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES];
+
+ /*
+ * Generation counter, when doing certain classes of graph walking,
+ * to ensure that we check one node only once:
+ */
+ int name_version;
+ const char *name;
+
+ short wait_type_inner;
+ short wait_type_outer;
+
+#ifdef CONFIG_LOCK_STAT
+ unsigned long contention_point[LOCKSTAT_POINTS];
+ unsigned long contending_point[LOCKSTAT_POINTS];
+#endif
+} __no_randomize_layout;
+
+#ifdef CONFIG_LOCK_STAT
+struct lock_time {
+ s64 min;
+ s64 max;
+ s64 total;
+ unsigned long nr;
+};
+
+enum bounce_type {
+ bounce_acquired_write,
+ bounce_acquired_read,
+ bounce_contended_write,
+ bounce_contended_read,
+ nr_bounce_types,
+
+ bounce_acquired = bounce_acquired_write,
+ bounce_contended = bounce_contended_write,
+};
+
+struct lock_class_stats {
+ unsigned long contention_point[LOCKSTAT_POINTS];
+ unsigned long contending_point[LOCKSTAT_POINTS];
+ struct lock_time read_waittime;
+ struct lock_time write_waittime;
+ struct lock_time read_holdtime;
+ struct lock_time write_holdtime;
+ unsigned long bounces[nr_bounce_types];
+};
+
+struct lock_class_stats lock_stats(struct lock_class *class);
+void clear_lock_stats(struct lock_class *class);
+#endif
+
+/*
+ * Map the lock object (the lock instance) to the lock-class object.
+ * This is embedded into specific lock instances:
+ */
+struct lockdep_map {
+ struct lock_class_key *key;
+ struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
+ const char *name;
+ short wait_type_outer; /* can be taken in this context */
+ short wait_type_inner; /* presents this context */
+#ifdef CONFIG_LOCK_STAT
+ int cpu;
+ unsigned long ip;
+#endif
+};
+
+struct pin_cookie { unsigned int val; };
+
+#else /* !CONFIG_LOCKDEP */
+
+/*
+ * The class key takes no space if lockdep is disabled:
+ */
+struct lock_class_key { };
+
+/*
+ * The lockdep_map takes no space if lockdep is disabled:
+ */
+struct lockdep_map { };
+
+struct pin_cookie { };
+
+#endif /* !LOCKDEP */
+
+#endif /* __LINUX_LOCKDEP_TYPES_H */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 017fae833d4a..9d925db0d355 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -77,16 +77,12 @@ struct memblock_type {
* @current_limit: physical address of the current allocation limit
* @memory: usable memory regions
* @reserved: reserved memory regions
- * @physmem: all physical memory
*/
struct memblock {
bool bottom_up; /* is bottom up direction? */
phys_addr_t current_limit;
struct memblock_type memory;
struct memblock_type reserved;
-#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
- struct memblock_type physmem;
-#endif
};
extern struct memblock memblock;
@@ -145,6 +141,30 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start,
void __memblock_free_late(phys_addr_t base, phys_addr_t size);
+#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
+static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
+ phys_addr_t *out_start,
+ phys_addr_t *out_end)
+{
+ extern struct memblock_type physmem;
+
+ __next_mem_range(idx, NUMA_NO_NODE, MEMBLOCK_NONE, &physmem, type,
+ out_start, out_end, NULL);
+}
+
+/**
+ * for_each_physmem_range - iterate through physmem areas not included in type.
+ * @i: u64 used as loop variable
+ * @type: ptr to memblock_type which excludes from the iteration, can be %NULL
+ * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
+ * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
+ */
+#define for_each_physmem_range(i, type, p_start, p_end) \
+ for (i = 0, __next_physmem_range(&i, type, p_start, p_end); \
+ i != (u64)ULLONG_MAX; \
+ __next_physmem_range(&i, type, p_start, p_end))
+#endif /* CONFIG_HAVE_MEMBLOCK_PHYS_MAP */
+
/**
* for_each_mem_range - iterate through memblock areas from type_a and not
* included in type_b. Or just type_a if type_b is NULL.
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 073b79eacc99..1340e02b14ef 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -4381,6 +4381,7 @@ struct mlx5_ifc_query_vport_state_out_bits {
enum {
MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT = 0x0,
MLX5_VPORT_STATE_OP_MOD_ESW_VPORT = 0x1,
+ MLX5_VPORT_STATE_OP_MOD_UPLINK = 0x2,
};
struct mlx5_ifc_arm_monitor_counter_in_bits {
diff --git a/include/linux/mpi.h b/include/linux/mpi.h
index 7bd6d8af0004..5d906dfbf3ed 100644
--- a/include/linux/mpi.h
+++ b/include/linux/mpi.h
@@ -63,6 +63,9 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod);
int mpi_cmp_ui(MPI u, ulong v);
int mpi_cmp(MPI u, MPI v);
+/*-- mpi-sub-ui.c --*/
+int mpi_sub_ui(MPI w, MPI u, unsigned long vval);
+
/*-- mpi-bit.c --*/
void mpi_normalize(MPI a);
unsigned mpi_get_nbits(MPI a);
diff --git a/include/linux/nospec.h b/include/linux/nospec.h
index 0c5ef54fd416..c1e79f72cd89 100644
--- a/include/linux/nospec.h
+++ b/include/linux/nospec.h
@@ -5,6 +5,8 @@
#ifndef _LINUX_NOSPEC_H
#define _LINUX_NOSPEC_H
+
+#include <linux/compiler.h>
#include <asm/barrier.h>
struct task_struct;
diff --git a/include/linux/of.h b/include/linux/of.h
index c669c0a4732f..60abe3f636ad 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -554,7 +554,7 @@ bool of_console_check(struct device_node *dn, char *name, int index);
extern int of_cpu_node_to_id(struct device_node *np);
-int of_map_rid(struct device_node *np, u32 rid,
+int of_map_id(struct device_node *np, u32 id,
const char *map_name, const char *map_mask_name,
struct device_node **target, u32 *id_out);
@@ -978,7 +978,7 @@ static inline int of_cpu_node_to_id(struct device_node *np)
return -ENODEV;
}
-static inline int of_map_rid(struct device_node *np, u32 rid,
+static inline int of_map_id(struct device_node *np, u32 id,
const char *map_name, const char *map_mask_name,
struct device_node **target, u32 *id_out)
{
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 8d31e39dd564..07ca187fc5e4 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -55,9 +55,15 @@ static inline struct device_node *of_cpu_device_node_get(int cpu)
return of_node_get(cpu_dev->of_node);
}
-int of_dma_configure(struct device *dev,
+int of_dma_configure_id(struct device *dev,
struct device_node *np,
- bool force_dma);
+ bool force_dma, const u32 *id);
+static inline int of_dma_configure(struct device *dev,
+ struct device_node *np,
+ bool force_dma)
+{
+ return of_dma_configure_id(dev, np, force_dma, NULL);
+}
#else /* CONFIG_OF */
static inline int of_driver_match_device(struct device *dev,
@@ -106,6 +112,12 @@ static inline struct device_node *of_cpu_device_node_get(int cpu)
return NULL;
}
+static inline int of_dma_configure_id(struct device *dev,
+ struct device_node *np,
+ bool force_dma)
+{
+ return 0;
+}
static inline int of_dma_configure(struct device *dev,
struct device_node *np,
bool force_dma)
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index f3d40dd7bb66..16f4b3e87f20 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -13,7 +13,8 @@ extern int of_get_dma_window(struct device_node *dn, const char *prefix,
size_t *size);
extern const struct iommu_ops *of_iommu_configure(struct device *dev,
- struct device_node *master_np);
+ struct device_node *master_np,
+ const u32 *id);
#else
@@ -25,7 +26,8 @@ static inline int of_get_dma_window(struct device_node *dn, const char *prefix,
}
static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
- struct device_node *master_np)
+ struct device_node *master_np,
+ const u32 *id)
{
return NULL;
}
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 1214cabb2247..e8b78139f78c 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -52,9 +52,10 @@ extern struct irq_domain *of_msi_get_domain(struct device *dev,
struct device_node *np,
enum irq_domain_bus_token token);
extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev,
- u32 rid);
+ u32 id,
+ u32 bus_token);
extern void of_msi_configure(struct device *dev, struct device_node *np);
-u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in);
+u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in);
#else
static inline int of_irq_count(struct device_node *dev)
{
@@ -85,17 +86,17 @@ static inline struct irq_domain *of_msi_get_domain(struct device *dev,
return NULL;
}
static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev,
- u32 rid)
+ u32 id, u32 bus_token)
{
return NULL;
}
static inline void of_msi_configure(struct device *dev, struct device_node *np)
{
}
-static inline u32 of_msi_map_rid(struct device *dev,
- struct device_node *msi_np, u32 rid_in)
+static inline u32 of_msi_map_id(struct device *dev,
+ struct device_node *msi_np, u32 id_in)
{
- return rid_in;
+ return id_in;
}
#endif
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 7302efff5e65..a433f13fc4bf 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -67,17 +67,6 @@ struct padata_serial_queue {
};
/**
- * struct padata_parallel_queue - The percpu padata parallel queue
- *
- * @reorder: List to wait for reordering after parallel processing.
- * @num_obj: Number of objects that are processed by this cpu.
- */
-struct padata_parallel_queue {
- struct padata_list reorder;
- atomic_t num_obj;
-};
-
-/**
* struct padata_cpumask - The cpumasks for the parallel/serial workers
*
* @pcpu: cpumask for the parallel workers.
@@ -93,7 +82,7 @@ struct padata_cpumask {
* that depends on the cpumask in use.
*
* @ps: padata_shell object.
- * @pqueue: percpu padata queues used for parallelization.
+ * @reorder_list: percpu reorder lists
* @squeue: percpu padata queues used for serialuzation.
* @refcnt: Number of objects holding a reference on this parallel_data.
* @seq_nr: Sequence number of the parallelized data object.
@@ -105,7 +94,7 @@ struct padata_cpumask {
*/
struct parallel_data {
struct padata_shell *ps;
- struct padata_parallel_queue __percpu *pqueue;
+ struct padata_list __percpu *reorder_list;
struct padata_serial_queue __percpu *squeue;
atomic_t refcnt;
unsigned int seq_nr;
@@ -167,7 +156,6 @@ struct padata_mt_job {
* @serial_wq: The workqueue used for serial work.
* @pslist: List of padata_shell objects attached to this instance.
* @cpumask: User supplied cpumasks for parallel and serial works.
- * @rcpumask: Actual cpumasks based on user cpumask and cpu_online_mask.
* @kobj: padata instance kernel object.
* @lock: padata instance lock.
* @flags: padata flags.
@@ -179,7 +167,6 @@ struct padata_instance {
struct workqueue_struct *serial_wq;
struct list_head pslist;
struct padata_cpumask cpumask;
- struct padata_cpumask rcpumask;
struct kobject kobj;
struct mutex lock;
u8 flags;
@@ -194,7 +181,7 @@ extern void __init padata_init(void);
static inline void __init padata_init(void) {}
#endif
-extern struct padata_instance *padata_alloc_possible(const char *name);
+extern struct padata_instance *padata_alloc(const char *name);
extern void padata_free(struct padata_instance *pinst);
extern struct padata_shell *padata_alloc_shell(struct padata_instance *pinst);
extern void padata_free_shell(struct padata_shell *ps);
@@ -204,6 +191,4 @@ extern void padata_do_serial(struct padata_priv *padata);
extern void __init padata_do_multithreaded(struct padata_mt_job *job);
extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
cpumask_var_t cpumask);
-extern int padata_start(struct padata_instance *pinst);
-extern void padata_stop(struct padata_instance *pinst);
#endif
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index cf2468da68e9..d1f4eff605ad 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -496,8 +496,35 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
return pgoff;
}
+/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
+struct wait_page_key {
+ struct page *page;
+ int bit_nr;
+ int page_match;
+};
+
+struct wait_page_queue {
+ struct page *page;
+ int bit_nr;
+ wait_queue_entry_t wait;
+};
+
+static inline bool wake_page_match(struct wait_page_queue *wait_page,
+ struct wait_page_key *key)
+{
+ if (wait_page->page != key->page)
+ return false;
+ key->page_match = 1;
+
+ if (wait_page->bit_nr != key->bit_nr)
+ return false;
+
+ return true;
+}
+
extern void __lock_page(struct page *page);
extern int __lock_page_killable(struct page *page);
+extern int __lock_page_async(struct page *page, struct wait_page_queue *wait);
extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags);
extern void unlock_page(struct page *page);
@@ -535,6 +562,22 @@ static inline int lock_page_killable(struct page *page)
}
/*
+ * lock_page_async - Lock the page, unless this would block. If the page
+ * is already locked, then queue a callback when the page becomes unlocked.
+ * This callback can then retry the operation.
+ *
+ * Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page
+ * was already locked and the callback defined in 'wait' was queued.
+ */
+static inline int lock_page_async(struct page *page,
+ struct wait_page_queue *wait)
+{
+ if (!trylock_page(page))
+ return __lock_page_async(page, wait);
+ return 0;
+}
+
+/*
* lock_page_or_retry - Lock the page, unless this would block and the
* caller indicated that it can handle a retry.
*
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 22d9d183950d..87d8a38bdea1 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -155,7 +155,7 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
* between contaminating the pointer value, meaning that
* READ_ONCE() is required when fetching it.
*
- * The smp_read_barrier_depends() implied by READ_ONCE() pairs
+ * The dependency ordering from the READ_ONCE() pairs
* with smp_store_release() in __percpu_ref_switch_to_percpu().
*/
percpu_ptr = READ_ONCE(ref->percpu_count_ptr);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b4bb32082342..0edd257a5916 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -366,7 +366,7 @@ struct pmu {
* ->stop() with PERF_EF_UPDATE will read the counter and update
* period/count values like ->read() would.
*
- * ->start() with PERF_EF_RELOAD will reprogram the the counter
+ * ->start() with PERF_EF_RELOAD will reprogram the counter
* value, must be preceded by a ->stop() with PERF_EF_UPDATE.
*/
void (*start) (struct perf_event *event, int flags);
@@ -419,10 +419,11 @@ struct pmu {
*/
void (*sched_task) (struct perf_event_context *ctx,
bool sched_in);
+
/*
- * PMU specific data size
+ * Kmem cache of PMU specific data
*/
- size_t task_ctx_size;
+ struct kmem_cache *task_ctx_cache;
/*
* PMU specific parts of task perf event context (i.e. ctx->task_ctx_data)
@@ -1232,6 +1233,9 @@ extern void perf_event_exec(void);
extern void perf_event_comm(struct task_struct *tsk, bool exec);
extern void perf_event_namespaces(struct task_struct *tsk);
extern void perf_event_fork(struct task_struct *tsk);
+extern void perf_event_text_poke(const void *addr,
+ const void *old_bytes, size_t old_len,
+ const void *new_bytes, size_t new_len);
/* Callchains */
DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
@@ -1479,6 +1483,11 @@ static inline void perf_event_exec(void) { }
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
static inline void perf_event_namespaces(struct task_struct *tsk) { }
static inline void perf_event_fork(struct task_struct *tsk) { }
+static inline void perf_event_text_poke(const void *addr,
+ const void *old_bytes,
+ size_t old_len,
+ const void *new_bytes,
+ size_t new_len) { }
static inline void perf_event_init(void) { }
static inline int perf_swevent_get_recursion_context(void) { return -1; }
static inline void perf_swevent_put_recursion_context(int rctx) { }
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 417db0a79a62..808f9d3ee546 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -107,7 +107,7 @@ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
return -ENOSPC;
/* Make sure the pointer we are storing points to a valid data. */
- /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
+ /* Pairs with the dependency ordering in __ptr_ring_consume. */
smp_wmb();
WRITE_ONCE(r->queue[r->producer++], ptr);
diff --git a/include/linux/random.h b/include/linux/random.h
index 45e1f8fa742b..9ab7443bd91b 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/once.h>
+#include <asm/percpu.h>
#include <uapi/linux/random.h>
@@ -119,6 +120,8 @@ struct rnd_state {
__u32 s1, s2, s3, s4;
};
+DECLARE_PER_CPU(struct rnd_state, net_rand_state);
+
u32 prandom_u32_state(struct rnd_state *state);
void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes);
void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state);
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index df587d181844..7a6fc9956510 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -248,6 +248,8 @@ static inline void __list_splice_init_rcu(struct list_head *list,
*/
sync();
+ ASSERT_EXCLUSIVE_ACCESS(*first);
+ ASSERT_EXCLUSIVE_ACCESS(*last);
/*
* Readers are finished with the source list, so perform splice.
@@ -512,7 +514,7 @@ static inline void hlist_replace_rcu(struct hlist_node *old,
* @right: The hlist head on the right
*
* The lists start out as [@left ][node1 ... ] and
- [@right ][node2 ... ]
+ * [@right ][node2 ... ]
* The lists end up as [@left ][node2 ... ]
* [@right ][node1 ... ]
*/
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index 9670b54b484a..ff3e94779e73 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -162,7 +162,7 @@ static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n)
* The barrier() is needed to make sure compiler doesn't cache first element [1],
* as this loop can be restarted [2]
* [1] Documentation/core-api/atomic_ops.rst around line 114
- * [2] Documentation/RCU/rculist_nulls.txt around line 146
+ * [2] Documentation/RCU/rculist_nulls.rst around line 146
*/
#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
for (({barrier();}), \
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 659cbfa7581a..d15d46db61f7 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -828,17 +828,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
/*
* Does the specified offset indicate that the corresponding rcu_head
- * structure can be handled by kfree_rcu()?
+ * structure can be handled by kvfree_rcu()?
*/
-#define __is_kfree_rcu_offset(offset) ((offset) < 4096)
+#define __is_kvfree_rcu_offset(offset) ((offset) < 4096)
/*
* Helper macro for kfree_rcu() to prevent argument-expansion eyestrain.
*/
-#define __kfree_rcu(head, offset) \
+#define __kvfree_rcu(head, offset) \
do { \
- BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
- kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
+ BUILD_BUG_ON(!__is_kvfree_rcu_offset(offset)); \
+ kvfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
} while (0)
/**
@@ -857,7 +857,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
* Because the functions are not allowed in the low-order 4096 bytes of
* kernel virtual memory, offsets up to 4095 bytes can be accommodated.
* If the offset is larger than 4095 bytes, a compile-time error will
- * be generated in __kfree_rcu(). If this error is triggered, you can
+ * be generated in __kvfree_rcu(). If this error is triggered, you can
* either fall back to use of call_rcu() or rearrange the structure to
* position the rcu_head structure into the first 4096 bytes.
*
@@ -872,7 +872,46 @@ do { \
typeof (ptr) ___p = (ptr); \
\
if (___p) \
- __kfree_rcu(&((___p)->rhf), offsetof(typeof(*(ptr)), rhf)); \
+ __kvfree_rcu(&((___p)->rhf), offsetof(typeof(*(ptr)), rhf)); \
+} while (0)
+
+/**
+ * kvfree_rcu() - kvfree an object after a grace period.
+ *
+ * This macro consists of one or two arguments and it is
+ * based on whether an object is head-less or not. If it
+ * has a head then a semantic stays the same as it used
+ * to be before:
+ *
+ * kvfree_rcu(ptr, rhf);
+ *
+ * where @ptr is a pointer to kvfree(), @rhf is the name
+ * of the rcu_head structure within the type of @ptr.
+ *
+ * When it comes to head-less variant, only one argument
+ * is passed and that is just a pointer which has to be
+ * freed after a grace period. Therefore the semantic is
+ *
+ * kvfree_rcu(ptr);
+ *
+ * where @ptr is a pointer to kvfree().
+ *
+ * Please note, head-less way of freeing is permitted to
+ * use from a context that has to follow might_sleep()
+ * annotation. Otherwise, please switch and embed the
+ * rcu_head structure within the type of @ptr.
+ */
+#define kvfree_rcu(...) KVFREE_GET_MACRO(__VA_ARGS__, \
+ kvfree_rcu_arg_2, kvfree_rcu_arg_1)(__VA_ARGS__)
+
+#define KVFREE_GET_MACRO(_1, _2, NAME, ...) NAME
+#define kvfree_rcu_arg_2(ptr, rhf) kfree_rcu(ptr, rhf)
+#define kvfree_rcu_arg_1(ptr) \
+do { \
+ typeof(ptr) ___p = (ptr); \
+ \
+ if (___p) \
+ kvfree_call_rcu(NULL, (rcu_callback_t) (___p)); \
} while (0)
/*
diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
index 4c25a41f8b27..d9015aac78c6 100644
--- a/include/linux/rcupdate_trace.h
+++ b/include/linux/rcupdate_trace.h
@@ -36,8 +36,8 @@ void rcu_read_unlock_trace_special(struct task_struct *t, int nesting);
/**
* rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section
*
- * When synchronize_rcu_trace() is invoked by one task, then that task
- * is guaranteed to block until all other tasks exit their read-side
+ * When synchronize_rcu_tasks_trace() is invoked by one task, then that
+ * task is guaranteed to block until all other tasks exit their read-side
* critical sections. Similarly, if call_rcu_trace() is invoked on one
* task while other tasks are within RCU read-side critical sections,
* invocation of the corresponding RCU callback is deferred until after
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 8512caeb7682..5cc9637cac16 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -34,9 +34,25 @@ static inline void synchronize_rcu_expedited(void)
synchronize_rcu();
}
-static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
+/*
+ * Add one more declaration of kvfree() here. It is
+ * not so straight forward to just include <linux/mm.h>
+ * where it is defined due to getting many compile
+ * errors caused by that include.
+ */
+extern void kvfree(const void *addr);
+
+static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
{
- call_rcu(head, func);
+ if (head) {
+ call_rcu(head, func);
+ return;
+ }
+
+ // kvfree_rcu(one_arg) call.
+ might_sleep();
+ synchronize_rcu();
+ kvfree((void *) func);
}
void rcu_qs(void);
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index d5cc9d675987..d2f4064ebd1d 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -33,7 +33,7 @@ static inline void rcu_virt_note_context_switch(int cpu)
}
void synchronize_rcu_expedited(void);
-void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
+void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
void rcu_barrier(void);
bool rcu_eqs_special_set(int cpu);
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 70ebef866cc8..68dab3e08aad 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -33,7 +33,7 @@
* of two or more hash tables when the rhashtable is being resized.
* The end of the chain is marked with a special nulls marks which has
* the least significant bit set but otherwise stores the address of
- * the hash bucket. This allows us to be be sure we've found the end
+ * the hash bucket. This allows us to be sure we've found the end
* of the right list.
* The value stored in the hash bucket has BIT(0) used as a lock bit.
* This bit must be atomically set before any changes are made to
@@ -84,7 +84,7 @@ struct bucket_table {
struct lockdep_map dep_map;
- struct rhash_lock_head *buckets[] ____cacheline_aligned_in_smp;
+ struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp;
};
/*
@@ -261,13 +261,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
void *arg);
void rhashtable_destroy(struct rhashtable *ht);
-struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl,
- unsigned int hash);
-struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl,
- unsigned int hash);
-struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht,
- struct bucket_table *tbl,
- unsigned int hash);
+struct rhash_lock_head __rcu **rht_bucket_nested(
+ const struct bucket_table *tbl, unsigned int hash);
+struct rhash_lock_head __rcu **__rht_bucket_nested(
+ const struct bucket_table *tbl, unsigned int hash);
+struct rhash_lock_head __rcu **rht_bucket_nested_insert(
+ struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash);
#define rht_dereference(p, ht) \
rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
@@ -284,21 +283,21 @@ struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht,
#define rht_entry(tpos, pos, member) \
({ tpos = container_of(pos, typeof(*tpos), member); 1; })
-static inline struct rhash_lock_head *const *rht_bucket(
+static inline struct rhash_lock_head __rcu *const *rht_bucket(
const struct bucket_table *tbl, unsigned int hash)
{
return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
&tbl->buckets[hash];
}
-static inline struct rhash_lock_head **rht_bucket_var(
+static inline struct rhash_lock_head __rcu **rht_bucket_var(
struct bucket_table *tbl, unsigned int hash)
{
return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) :
&tbl->buckets[hash];
}
-static inline struct rhash_lock_head **rht_bucket_insert(
+static inline struct rhash_lock_head __rcu **rht_bucket_insert(
struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
{
return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
@@ -325,7 +324,7 @@ static inline struct rhash_lock_head **rht_bucket_insert(
*/
static inline void rht_lock(struct bucket_table *tbl,
- struct rhash_lock_head **bkt)
+ struct rhash_lock_head __rcu **bkt)
{
local_bh_disable();
bit_spin_lock(0, (unsigned long *)bkt);
@@ -333,7 +332,7 @@ static inline void rht_lock(struct bucket_table *tbl,
}
static inline void rht_lock_nested(struct bucket_table *tbl,
- struct rhash_lock_head **bucket,
+ struct rhash_lock_head __rcu **bucket,
unsigned int subclass)
{
local_bh_disable();
@@ -342,18 +341,18 @@ static inline void rht_lock_nested(struct bucket_table *tbl,
}
static inline void rht_unlock(struct bucket_table *tbl,
- struct rhash_lock_head **bkt)
+ struct rhash_lock_head __rcu **bkt)
{
lock_map_release(&tbl->dep_map);
bit_spin_unlock(0, (unsigned long *)bkt);
local_bh_enable();
}
-static inline struct rhash_head __rcu *__rht_ptr(
- struct rhash_lock_head *const *bkt)
+static inline struct rhash_head *__rht_ptr(
+ struct rhash_lock_head *p, struct rhash_lock_head __rcu *const *bkt)
{
- return (struct rhash_head __rcu *)
- ((unsigned long)*bkt & ~BIT(0) ?:
+ return (struct rhash_head *)
+ ((unsigned long)p & ~BIT(0) ?:
(unsigned long)RHT_NULLS_MARKER(bkt));
}
@@ -365,47 +364,41 @@ static inline struct rhash_head __rcu *__rht_ptr(
* access is guaranteed, such as when destroying the table.
*/
static inline struct rhash_head *rht_ptr_rcu(
- struct rhash_lock_head *const *bkt)
+ struct rhash_lock_head __rcu *const *bkt)
{
- struct rhash_head __rcu *p = __rht_ptr(bkt);
-
- return rcu_dereference(p);
+ return __rht_ptr(rcu_dereference(*bkt), bkt);
}
static inline struct rhash_head *rht_ptr(
- struct rhash_lock_head *const *bkt,
+ struct rhash_lock_head __rcu *const *bkt,
struct bucket_table *tbl,
unsigned int hash)
{
- return rht_dereference_bucket(__rht_ptr(bkt), tbl, hash);
+ return __rht_ptr(rht_dereference_bucket(*bkt, tbl, hash), bkt);
}
static inline struct rhash_head *rht_ptr_exclusive(
- struct rhash_lock_head *const *bkt)
+ struct rhash_lock_head __rcu *const *bkt)
{
- return rcu_dereference_protected(__rht_ptr(bkt), 1);
+ return __rht_ptr(rcu_dereference_protected(*bkt, 1), bkt);
}
-static inline void rht_assign_locked(struct rhash_lock_head **bkt,
+static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt,
struct rhash_head *obj)
{
- struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
-
if (rht_is_a_nulls(obj))
obj = NULL;
- rcu_assign_pointer(*p, (void *)((unsigned long)obj | BIT(0)));
+ rcu_assign_pointer(*bkt, (void *)((unsigned long)obj | BIT(0)));
}
static inline void rht_assign_unlock(struct bucket_table *tbl,
- struct rhash_lock_head **bkt,
+ struct rhash_lock_head __rcu **bkt,
struct rhash_head *obj)
{
- struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
-
if (rht_is_a_nulls(obj))
obj = NULL;
lock_map_release(&tbl->dep_map);
- rcu_assign_pointer(*p, obj);
+ rcu_assign_pointer(*bkt, (void *)obj);
preempt_enable();
__release(bitlock);
local_bh_enable();
@@ -593,7 +586,7 @@ static inline struct rhash_head *__rhashtable_lookup(
.ht = ht,
.key = key,
};
- struct rhash_lock_head *const *bkt;
+ struct rhash_lock_head __rcu *const *bkt;
struct bucket_table *tbl;
struct rhash_head *he;
unsigned int hash;
@@ -709,7 +702,7 @@ static inline void *__rhashtable_insert_fast(
.ht = ht,
.key = key,
};
- struct rhash_lock_head **bkt;
+ struct rhash_lock_head __rcu **bkt;
struct rhash_head __rcu **pprev;
struct bucket_table *tbl;
struct rhash_head *head;
@@ -995,7 +988,7 @@ static inline int __rhashtable_remove_fast_one(
struct rhash_head *obj, const struct rhashtable_params params,
bool rhlist)
{
- struct rhash_lock_head **bkt;
+ struct rhash_lock_head __rcu **bkt;
struct rhash_head __rcu **pprev;
struct rhash_head *he;
unsigned int hash;
@@ -1147,7 +1140,7 @@ static inline int __rhashtable_replace_fast(
struct rhash_head *obj_old, struct rhash_head *obj_new,
const struct rhashtable_params params)
{
- struct rhash_lock_head **bkt;
+ struct rhash_lock_head __rcu **bkt;
struct rhash_head __rcu **pprev;
struct rhash_head *he;
unsigned int hash;
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 7e5b2a4eb560..25e3fde85617 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -60,39 +60,39 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
}
#define RWSEM_UNLOCKED_VALUE 0L
-#define __RWSEM_INIT_COUNT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
+#define __RWSEM_COUNT_INIT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
/* Common initializer macros and functions */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define __RWSEM_DEP_MAP_INIT(lockname) \
- , .dep_map = { \
+ .dep_map = { \
.name = #lockname, \
.wait_type_inner = LD_WAIT_SLEEP, \
- }
+ },
#else
# define __RWSEM_DEP_MAP_INIT(lockname)
#endif
#ifdef CONFIG_DEBUG_RWSEMS
-# define __DEBUG_RWSEM_INITIALIZER(lockname) , .magic = &lockname
+# define __RWSEM_DEBUG_INIT(lockname) .magic = &lockname,
#else
-# define __DEBUG_RWSEM_INITIALIZER(lockname)
+# define __RWSEM_DEBUG_INIT(lockname)
#endif
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED
+#define __RWSEM_OPT_INIT(lockname) .osq = OSQ_LOCK_UNLOCKED,
#else
#define __RWSEM_OPT_INIT(lockname)
#endif
#define __RWSEM_INITIALIZER(name) \
- { __RWSEM_INIT_COUNT(name), \
+ { __RWSEM_COUNT_INIT(name), \
.owner = ATOMIC_LONG_INIT(0), \
- .wait_list = LIST_HEAD_INIT((name).wait_list), \
- .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \
__RWSEM_OPT_INIT(name) \
- __DEBUG_RWSEM_INITIALIZER(name) \
+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\
+ .wait_list = LIST_HEAD_INIT((name).wait_list), \
+ __RWSEM_DEBUG_INIT(name) \
__RWSEM_DEP_MAP_INIT(name) }
#define DECLARE_RWSEM(name) \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a6bf77c34687..6d6683b48c2a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -18,6 +18,7 @@
#include <linux/mutex.h>
#include <linux/plist.h>
#include <linux/hrtimer.h>
+#include <linux/irqflags.h>
#include <linux/seccomp.h>
#include <linux/nodemask.h>
#include <linux/rcupdate.h>
@@ -986,19 +987,9 @@ struct task_struct {
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
- unsigned int irq_events;
+ struct irqtrace_events irqtrace;
unsigned int hardirq_threaded;
- unsigned long hardirq_enable_ip;
- unsigned long hardirq_disable_ip;
- unsigned int hardirq_enable_event;
- unsigned int hardirq_disable_event;
- int hardirqs_enabled;
- int hardirq_context;
u64 hardirq_chain_key;
- unsigned long softirq_disable_ip;
- unsigned long softirq_enable_ip;
- unsigned int softirq_disable_event;
- unsigned int softirq_enable_event;
int softirqs_enabled;
int softirq_context;
int irq_config;
@@ -1199,8 +1190,12 @@ struct task_struct {
#ifdef CONFIG_KASAN
unsigned int kasan_depth;
#endif
+
#ifdef CONFIG_KCSAN
struct kcsan_ctx kcsan_ctx;
+#ifdef CONFIG_TRACE_IRQFLAGS
+ struct irqtrace_events kcsan_save_irqtrace;
+#endif
#endif
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index e7ddab095baf..27b4fa454c80 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -127,6 +127,12 @@ static inline void put_task_struct(struct task_struct *t)
__put_task_struct(t);
}
+static inline void put_task_struct_many(struct task_struct *t, int nr)
+{
+ if (refcount_sub_and_test(nr, &t->usage))
+ __put_task_struct(t);
+}
+
void put_task_struct_rcu_user(struct task_struct *task);
#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index 0bb04a96a6d4..528718e4ed52 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -6,6 +6,34 @@
#define LINUX_SCHED_CLOCK
#ifdef CONFIG_GENERIC_SCHED_CLOCK
+/**
+ * struct clock_read_data - data required to read from sched_clock()
+ *
+ * @epoch_ns: sched_clock() value at last update
+ * @epoch_cyc: Clock cycle value at last update.
+ * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit
+ * clocks.
+ * @read_sched_clock: Current clock source (or dummy source when suspended).
+ * @mult: Multipler for scaled math conversion.
+ * @shift: Shift value for scaled math conversion.
+ *
+ * Care must be taken when updating this structure; it is read by
+ * some very hot code paths. It occupies <=40 bytes and, when combined
+ * with the seqcount used to synchronize access, comfortably fits into
+ * a 64 byte cache line.
+ */
+struct clock_read_data {
+ u64 epoch_ns;
+ u64 epoch_cyc;
+ u64 sched_clock_mask;
+ u64 (*read_sched_clock)(void);
+ u32 mult;
+ u32 shift;
+};
+
+extern struct clock_read_data *sched_clock_read_begin(unsigned int *seq);
+extern int sched_clock_read_retry(unsigned int seq);
+
extern void generic_sched_clock_init(void);
extern void sched_clock_register(u64 (*read)(void), int bits,
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 8b97204f35a7..54bc20496392 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -1,36 +1,15 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_SEQLOCK_H
#define __LINUX_SEQLOCK_H
+
/*
- * Reader/writer consistent mechanism without starving writers. This type of
- * lock for data where the reader wants a consistent set of information
- * and is willing to retry if the information changes. There are two types
- * of readers:
- * 1. Sequence readers which never block a writer but they may have to retry
- * if a writer is in progress by detecting change in sequence number.
- * Writers do not wait for a sequence reader.
- * 2. Locking readers which will wait if a writer or another locking reader
- * is in progress. A locking reader in progress will also block a writer
- * from going forward. Unlike the regular rwlock, the read lock here is
- * exclusive so that only one locking reader can get it.
- *
- * This is not as cache friendly as brlock. Also, this may not work well
- * for data that contains pointers, because any writer could
- * invalidate a pointer that a reader was following.
- *
- * Expected non-blocking reader usage:
- * do {
- * seq = read_seqbegin(&foo);
- * ...
- * } while (read_seqretry(&foo, seq));
- *
+ * seqcount_t / seqlock_t - a reader-writer consistency mechanism with
+ * lockless readers (read-only retry loops), and no writer starvation.
*
- * On non-SMP the spin locks disappear but the writer still needs
- * to increment the sequence variables because an interrupt routine could
- * change the state of the data.
+ * See Documentation/locking/seqlock.rst
*
- * Based on x86_64 vsyscall gettimeofday
- * by Keith Owens and Andrea Arcangeli
+ * Copyrights:
+ * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli
*/
#include <linux/spinlock.h>
@@ -41,8 +20,8 @@
#include <asm/processor.h>
/*
- * The seqlock interface does not prescribe a precise sequence of read
- * begin/retry/end. For readers, typically there is a call to
+ * The seqlock seqcount_t interface does not prescribe a precise sequence of
+ * read begin/retry/end. For readers, typically there is a call to
* read_seqcount_begin() and read_seqcount_retry(), however, there are more
* esoteric cases which do not follow this pattern.
*
@@ -50,16 +29,30 @@
* via seqcount_t under KCSAN: upon beginning a seq-reader critical section,
* pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as
* atomics; if there is a matching read_seqcount_retry() call, no following
- * memory operations are considered atomic. Usage of seqlocks via seqlock_t
- * interface is not affected.
+ * memory operations are considered atomic. Usage of the seqlock_t interface
+ * is not affected.
*/
#define KCSAN_SEQLOCK_REGION_MAX 1000
/*
- * Version using sequence counter only.
- * This can be used when code has its own mutex protecting the
- * updating starting before the write_seqcountbeqin() and ending
- * after the write_seqcount_end().
+ * Sequence counters (seqcount_t)
+ *
+ * This is the raw counting mechanism, without any writer protection.
+ *
+ * Write side critical sections must be serialized and non-preemptible.
+ *
+ * If readers can be invoked from hardirq or softirq contexts,
+ * interrupts or bottom halves must also be respectively disabled before
+ * entering the write section.
+ *
+ * This mechanism can't be used if the protected data contains pointers,
+ * as the writer can invalidate a pointer that a reader is following.
+ *
+ * If it's desired to automatically handle the sequence counter writer
+ * serialization and non-preemptibility requirements, use a sequential
+ * lock (seqlock_t) instead.
+ *
+ * See Documentation/locking/seqlock.rst
*/
typedef struct seqcount {
unsigned sequence;
@@ -82,6 +75,10 @@ static inline void __seqcount_init(seqcount_t *s, const char *name,
# define SEQCOUNT_DEP_MAP_INIT(lockname) \
.dep_map = { .name = #lockname } \
+/**
+ * seqcount_init() - runtime initializer for seqcount_t
+ * @s: Pointer to the seqcount_t instance
+ */
# define seqcount_init(s) \
do { \
static struct lock_class_key __key; \
@@ -105,13 +102,15 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
# define seqcount_lockdep_reader_access(x)
#endif
-#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)}
-
+/**
+ * SEQCNT_ZERO() - static initializer for seqcount_t
+ * @name: Name of the seqcount_t instance
+ */
+#define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) }
/**
- * __read_seqcount_begin - begin a seq-read critical section (without barrier)
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
+ * @s: Pointer to seqcount_t
*
* __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
* barrier. Callers should ensure that smp_rmb() or equivalent ordering is
@@ -120,6 +119,8 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
*
* Use carefully, only in critical code, and comment how the barrier is
* provided.
+ *
+ * Return: count to be passed to read_seqcount_retry()
*/
static inline unsigned __read_seqcount_begin(const seqcount_t *s)
{
@@ -136,30 +137,10 @@ repeat:
}
/**
- * raw_read_seqcount - Read the raw seqcount
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
+ * @s: Pointer to seqcount_t
*
- * raw_read_seqcount opens a read critical section of the given
- * seqcount without any lockdep checking and without checking or
- * masking the LSB. Calling code is responsible for handling that.
- */
-static inline unsigned raw_read_seqcount(const seqcount_t *s)
-{
- unsigned ret = READ_ONCE(s->sequence);
- smp_rmb();
- kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
- return ret;
-}
-
-/**
- * raw_read_seqcount_begin - start seq-read critical section w/o lockdep
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
- *
- * raw_read_seqcount_begin opens a read critical section of the given
- * seqcount, but without any lockdep checking. Validity of the critical
- * section is tested by checking read_seqcount_retry function.
+ * Return: count to be passed to read_seqcount_retry()
*/
static inline unsigned raw_read_seqcount_begin(const seqcount_t *s)
{
@@ -169,13 +150,10 @@ static inline unsigned raw_read_seqcount_begin(const seqcount_t *s)
}
/**
- * read_seqcount_begin - begin a seq-read critical section
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * read_seqcount_begin() - begin a seqcount_t read critical section
+ * @s: Pointer to seqcount_t
*
- * read_seqcount_begin opens a read critical section of the given seqcount.
- * Validity of the critical section is tested by checking read_seqcount_retry
- * function.
+ * Return: count to be passed to read_seqcount_retry()
*/
static inline unsigned read_seqcount_begin(const seqcount_t *s)
{
@@ -184,32 +162,54 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s)
}
/**
- * raw_seqcount_begin - begin a seq-read critical section
- * @s: pointer to seqcount_t
- * Returns: count to be passed to read_seqcount_retry
+ * raw_read_seqcount() - read the raw seqcount_t counter value
+ * @s: Pointer to seqcount_t
*
- * raw_seqcount_begin opens a read critical section of the given seqcount.
- * Validity of the critical section is tested by checking read_seqcount_retry
- * function.
+ * raw_read_seqcount opens a read critical section of the given
+ * seqcount_t, without any lockdep checking, and without checking or
+ * masking the sequence counter LSB. Calling code is responsible for
+ * handling that.
*
- * Unlike read_seqcount_begin(), this function will not wait for the count
- * to stabilize. If a writer is active when we begin, we will fail the
- * read_seqcount_retry() instead of stabilizing at the beginning of the
- * critical section.
+ * Return: count to be passed to read_seqcount_retry()
*/
-static inline unsigned raw_seqcount_begin(const seqcount_t *s)
+static inline unsigned raw_read_seqcount(const seqcount_t *s)
{
unsigned ret = READ_ONCE(s->sequence);
smp_rmb();
kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
- return ret & ~1;
+ return ret;
}
/**
- * __read_seqcount_retry - end a seq-read critical section (without barrier)
- * @s: pointer to seqcount_t
- * @start: count, from read_seqcount_begin
- * Returns: 1 if retry is required, else 0
+ * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
+ * lockdep and w/o counter stabilization
+ * @s: Pointer to seqcount_t
+ *
+ * raw_seqcount_begin opens a read critical section of the given
+ * seqcount_t. Unlike read_seqcount_begin(), this function will not wait
+ * for the count to stabilize. If a writer is active when it begins, it
+ * will fail the read_seqcount_retry() at the end of the read critical
+ * section instead of stabilizing at the beginning of it.
+ *
+ * Use this only in special kernel hot paths where the read section is
+ * small and has a high probability of success through other external
+ * means. It will save a single branching instruction.
+ *
+ * Return: count to be passed to read_seqcount_retry()
+ */
+static inline unsigned raw_seqcount_begin(const seqcount_t *s)
+{
+ /*
+ * If the counter is odd, let read_seqcount_retry() fail
+ * by decrementing the counter.
+ */
+ return raw_read_seqcount(s) & ~1;
+}
+
+/**
+ * __read_seqcount_retry() - end a seqcount_t read section w/o barrier
+ * @s: Pointer to seqcount_t
+ * @start: count, from read_seqcount_begin()
*
* __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
* barrier. Callers should ensure that smp_rmb() or equivalent ordering is
@@ -218,6 +218,8 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s)
*
* Use carefully, only in critical code, and comment how the barrier is
* provided.
+ *
+ * Return: true if a read section retry is required, else false
*/
static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
{
@@ -226,14 +228,15 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
}
/**
- * read_seqcount_retry - end a seq-read critical section
- * @s: pointer to seqcount_t
- * @start: count, from read_seqcount_begin
- * Returns: 1 if retry is required, else 0
+ * read_seqcount_retry() - end a seqcount_t read critical section
+ * @s: Pointer to seqcount_t
+ * @start: count, from read_seqcount_begin()
*
- * read_seqcount_retry closes a read critical section of the given seqcount.
- * If the critical section was invalid, it must be ignored (and typically
- * retried).
+ * read_seqcount_retry closes the read critical section of given
+ * seqcount_t. If the critical section was invalid, it must be ignored
+ * (and typically retried).
+ *
+ * Return: true if a read section retry is required, else false
*/
static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
{
@@ -241,8 +244,10 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
return __read_seqcount_retry(s, start);
}
-
-
+/**
+ * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
+ * @s: Pointer to seqcount_t
+ */
static inline void raw_write_seqcount_begin(seqcount_t *s)
{
kcsan_nestable_atomic_begin();
@@ -250,6 +255,10 @@ static inline void raw_write_seqcount_begin(seqcount_t *s)
smp_wmb();
}
+/**
+ * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
+ * @s: Pointer to seqcount_t
+ */
static inline void raw_write_seqcount_end(seqcount_t *s)
{
smp_wmb();
@@ -257,45 +266,104 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
kcsan_nestable_atomic_end();
}
+static inline void __write_seqcount_begin_nested(seqcount_t *s, int subclass)
+{
+ raw_write_seqcount_begin(s);
+ seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
+}
+
/**
- * raw_write_seqcount_barrier - do a seq write barrier
- * @s: pointer to seqcount_t
+ * write_seqcount_begin_nested() - start a seqcount_t write section with
+ * custom lockdep nesting level
+ * @s: Pointer to seqcount_t
+ * @subclass: lockdep nesting level
*
- * This can be used to provide an ordering guarantee instead of the
- * usual consistency guarantee. It is one wmb cheaper, because we can
- * collapse the two back-to-back wmb()s.
+ * See Documentation/locking/lockdep-design.rst
+ */
+static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
+{
+ lockdep_assert_preemption_disabled();
+ __write_seqcount_begin_nested(s, subclass);
+}
+
+/*
+ * A write_seqcount_begin() variant w/o lockdep non-preemptibility checks.
+ *
+ * Use for internal seqlock.h code where it's known that preemption is
+ * already disabled. For example, seqlock_t write side functions.
+ */
+static inline void __write_seqcount_begin(seqcount_t *s)
+{
+ __write_seqcount_begin_nested(s, 0);
+}
+
+/**
+ * write_seqcount_begin() - start a seqcount_t write side critical section
+ * @s: Pointer to seqcount_t
+ *
+ * write_seqcount_begin opens a write side critical section of the given
+ * seqcount_t.
+ *
+ * Context: seqcount_t write side critical sections must be serialized and
+ * non-preemptible. If readers can be invoked from hardirq or softirq
+ * context, interrupts or bottom halves must be respectively disabled.
+ */
+static inline void write_seqcount_begin(seqcount_t *s)
+{
+ write_seqcount_begin_nested(s, 0);
+}
+
+/**
+ * write_seqcount_end() - end a seqcount_t write side critical section
+ * @s: Pointer to seqcount_t
+ *
+ * The write section must've been opened with write_seqcount_begin().
+ */
+static inline void write_seqcount_end(seqcount_t *s)
+{
+ seqcount_release(&s->dep_map, _RET_IP_);
+ raw_write_seqcount_end(s);
+}
+
+/**
+ * raw_write_seqcount_barrier() - do a seqcount_t write barrier
+ * @s: Pointer to seqcount_t
+ *
+ * This can be used to provide an ordering guarantee instead of the usual
+ * consistency guarantee. It is one wmb cheaper, because it can collapse
+ * the two back-to-back wmb()s.
*
* Note that writes surrounding the barrier should be declared atomic (e.g.
* via WRITE_ONCE): a) to ensure the writes become visible to other threads
* atomically, avoiding compiler optimizations; b) to document which writes are
* meant to propagate to the reader critical section. This is necessary because
* neither writes before and after the barrier are enclosed in a seq-writer
- * critical section that would ensure readers are aware of ongoing writes.
+ * critical section that would ensure readers are aware of ongoing writes::
*
- * seqcount_t seq;
- * bool X = true, Y = false;
+ * seqcount_t seq;
+ * bool X = true, Y = false;
*
- * void read(void)
- * {
- * bool x, y;
+ * void read(void)
+ * {
+ * bool x, y;
*
- * do {
- * int s = read_seqcount_begin(&seq);
+ * do {
+ * int s = read_seqcount_begin(&seq);
*
- * x = X; y = Y;
+ * x = X; y = Y;
*
- * } while (read_seqcount_retry(&seq, s));
+ * } while (read_seqcount_retry(&seq, s));
*
- * BUG_ON(!x && !y);
+ * BUG_ON(!x && !y);
* }
*
* void write(void)
* {
- * WRITE_ONCE(Y, true);
+ * WRITE_ONCE(Y, true);
*
- * raw_write_seqcount_barrier(seq);
+ * raw_write_seqcount_barrier(seq);
*
- * WRITE_ONCE(X, false);
+ * WRITE_ONCE(X, false);
* }
*/
static inline void raw_write_seqcount_barrier(seqcount_t *s)
@@ -307,6 +375,37 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s)
kcsan_nestable_atomic_end();
}
+/**
+ * write_seqcount_invalidate() - invalidate in-progress seqcount_t read
+ * side operations
+ * @s: Pointer to seqcount_t
+ *
+ * After write_seqcount_invalidate, no seqcount_t read side operations
+ * will complete successfully and see data older than this.
+ */
+static inline void write_seqcount_invalidate(seqcount_t *s)
+{
+ smp_wmb();
+ kcsan_nestable_atomic_begin();
+ s->sequence+=2;
+ kcsan_nestable_atomic_end();
+}
+
+/**
+ * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy
+ * @s: Pointer to seqcount_t
+ *
+ * Use seqcount_t latching to switch between two storage places protected
+ * by a sequence counter. Doing so allows having interruptible, preemptible,
+ * seqcount_t write side critical sections.
+ *
+ * Check raw_write_seqcount_latch() for more details and a full reader and
+ * writer usage example.
+ *
+ * Return: sequence counter raw value. Use the lowest bit as an index for
+ * picking which data copy to read. The full counter value must then be
+ * checked with read_seqcount_retry().
+ */
static inline int raw_read_seqcount_latch(seqcount_t *s)
{
/* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
@@ -315,8 +414,8 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
}
/**
- * raw_write_seqcount_latch - redirect readers to even/odd copy
- * @s: pointer to seqcount_t
+ * raw_write_seqcount_latch() - redirect readers to even/odd copy
+ * @s: Pointer to seqcount_t
*
* The latch technique is a multiversion concurrency control method that allows
* queries during non-atomic modifications. If you can guarantee queries never
@@ -332,64 +431,68 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
* Very simply put: we first modify one copy and then the other. This ensures
* there is always one copy in a stable state, ready to give us an answer.
*
- * The basic form is a data structure like:
+ * The basic form is a data structure like::
*
- * struct latch_struct {
- * seqcount_t seq;
- * struct data_struct data[2];
- * };
+ * struct latch_struct {
+ * seqcount_t seq;
+ * struct data_struct data[2];
+ * };
*
* Where a modification, which is assumed to be externally serialized, does the
- * following:
+ * following::
*
- * void latch_modify(struct latch_struct *latch, ...)
- * {
- * smp_wmb(); <- Ensure that the last data[1] update is visible
- * latch->seq++;
- * smp_wmb(); <- Ensure that the seqcount update is visible
+ * void latch_modify(struct latch_struct *latch, ...)
+ * {
+ * smp_wmb(); // Ensure that the last data[1] update is visible
+ * latch->seq++;
+ * smp_wmb(); // Ensure that the seqcount update is visible
*
- * modify(latch->data[0], ...);
+ * modify(latch->data[0], ...);
*
- * smp_wmb(); <- Ensure that the data[0] update is visible
- * latch->seq++;
- * smp_wmb(); <- Ensure that the seqcount update is visible
+ * smp_wmb(); // Ensure that the data[0] update is visible
+ * latch->seq++;
+ * smp_wmb(); // Ensure that the seqcount update is visible
*
- * modify(latch->data[1], ...);
- * }
+ * modify(latch->data[1], ...);
+ * }
*
- * The query will have a form like:
+ * The query will have a form like::
*
- * struct entry *latch_query(struct latch_struct *latch, ...)
- * {
- * struct entry *entry;
- * unsigned seq, idx;
+ * struct entry *latch_query(struct latch_struct *latch, ...)
+ * {
+ * struct entry *entry;
+ * unsigned seq, idx;
*
- * do {
- * seq = raw_read_seqcount_latch(&latch->seq);
+ * do {
+ * seq = raw_read_seqcount_latch(&latch->seq);
*
- * idx = seq & 0x01;
- * entry = data_query(latch->data[idx], ...);
+ * idx = seq & 0x01;
+ * entry = data_query(latch->data[idx], ...);
*
- * smp_rmb();
- * } while (seq != latch->seq);
+ * // read_seqcount_retry() includes needed smp_rmb()
+ * } while (read_seqcount_retry(&latch->seq, seq));
*
- * return entry;
- * }
+ * return entry;
+ * }
*
* So during the modification, queries are first redirected to data[1]. Then we
* modify data[0]. When that is complete, we redirect queries back to data[0]
* and we can modify data[1].
*
- * NOTE: The non-requirement for atomic modifications does _NOT_ include
- * the publishing of new entries in the case where data is a dynamic
- * data structure.
+ * NOTE:
*
- * An iteration might start in data[0] and get suspended long enough
- * to miss an entire modification sequence, once it resumes it might
- * observe the new entry.
+ * The non-requirement for atomic modifications does _NOT_ include
+ * the publishing of new entries in the case where data is a dynamic
+ * data structure.
*
- * NOTE: When data is a dynamic data structure; one should use regular RCU
- * patterns to manage the lifetimes of the objects within.
+ * An iteration might start in data[0] and get suspended long enough
+ * to miss an entire modification sequence, once it resumes it might
+ * observe the new entry.
+ *
+ * NOTE:
+ *
+ * When data is a dynamic data structure; one should use regular RCU
+ * patterns to manage the lifetimes of the objects within.
*/
static inline void raw_write_seqcount_latch(seqcount_t *s)
{
@@ -399,67 +502,48 @@ static inline void raw_write_seqcount_latch(seqcount_t *s)
}
/*
- * Sequence counter only version assumes that callers are using their
- * own mutexing.
- */
-static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
-{
- raw_write_seqcount_begin(s);
- seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
-}
-
-static inline void write_seqcount_begin(seqcount_t *s)
-{
- write_seqcount_begin_nested(s, 0);
-}
-
-static inline void write_seqcount_end(seqcount_t *s)
-{
- seqcount_release(&s->dep_map, _RET_IP_);
- raw_write_seqcount_end(s);
-}
-
-/**
- * write_seqcount_invalidate - invalidate in-progress read-side seq operations
- * @s: pointer to seqcount_t
+ * Sequential locks (seqlock_t)
*
- * After write_seqcount_invalidate, no read-side seq operations will complete
- * successfully and see data older than this.
+ * Sequence counters with an embedded spinlock for writer serialization
+ * and non-preemptibility.
+ *
+ * For more info, see:
+ * - Comments on top of seqcount_t
+ * - Documentation/locking/seqlock.rst
*/
-static inline void write_seqcount_invalidate(seqcount_t *s)
-{
- smp_wmb();
- kcsan_nestable_atomic_begin();
- s->sequence+=2;
- kcsan_nestable_atomic_end();
-}
-
typedef struct {
struct seqcount seqcount;
spinlock_t lock;
} seqlock_t;
-/*
- * These macros triggered gcc-3.x compile-time problems. We think these are
- * OK now. Be cautious.
- */
#define __SEQLOCK_UNLOCKED(lockname) \
{ \
.seqcount = SEQCNT_ZERO(lockname), \
.lock = __SPIN_LOCK_UNLOCKED(lockname) \
}
-#define seqlock_init(x) \
+/**
+ * seqlock_init() - dynamic initializer for seqlock_t
+ * @sl: Pointer to the seqlock_t instance
+ */
+#define seqlock_init(sl) \
do { \
- seqcount_init(&(x)->seqcount); \
- spin_lock_init(&(x)->lock); \
+ seqcount_init(&(sl)->seqcount); \
+ spin_lock_init(&(sl)->lock); \
} while (0)
-#define DEFINE_SEQLOCK(x) \
- seqlock_t x = __SEQLOCK_UNLOCKED(x)
+/**
+ * DEFINE_SEQLOCK() - Define a statically allocated seqlock_t
+ * @sl: Name of the seqlock_t instance
+ */
+#define DEFINE_SEQLOCK(sl) \
+ seqlock_t sl = __SEQLOCK_UNLOCKED(sl)
-/*
- * Read side functions for starting and finalizing a read side section.
+/**
+ * read_seqbegin() - start a seqlock_t read side critical section
+ * @sl: Pointer to seqlock_t
+ *
+ * Return: count, to be passed to read_seqretry()
*/
static inline unsigned read_seqbegin(const seqlock_t *sl)
{
@@ -470,6 +554,17 @@ static inline unsigned read_seqbegin(const seqlock_t *sl)
return ret;
}
+/**
+ * read_seqretry() - end a seqlock_t read side section
+ * @sl: Pointer to seqlock_t
+ * @start: count, from read_seqbegin()
+ *
+ * read_seqretry closes the read side critical section of given seqlock_t.
+ * If the critical section was invalid, it must be ignored (and typically
+ * retried).
+ *
+ * Return: true if a read section retry is required, else false
+ */
static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
{
/*
@@ -481,41 +576,85 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
return read_seqcount_retry(&sl->seqcount, start);
}
-/*
- * Lock out other writers and update the count.
- * Acts like a normal spin_lock/unlock.
- * Don't need preempt_disable() because that is in the spin_lock already.
+/**
+ * write_seqlock() - start a seqlock_t write side critical section
+ * @sl: Pointer to seqlock_t
+ *
+ * write_seqlock opens a write side critical section for the given
+ * seqlock_t. It also implicitly acquires the spinlock_t embedded inside
+ * that sequential lock. All seqlock_t write side sections are thus
+ * automatically serialized and non-preemptible.
+ *
+ * Context: if the seqlock_t read section, or other write side critical
+ * sections, can be invoked from hardirq or softirq contexts, use the
+ * _irqsave or _bh variants of this function instead.
*/
static inline void write_seqlock(seqlock_t *sl)
{
spin_lock(&sl->lock);
- write_seqcount_begin(&sl->seqcount);
+ __write_seqcount_begin(&sl->seqcount);
}
+/**
+ * write_sequnlock() - end a seqlock_t write side critical section
+ * @sl: Pointer to seqlock_t
+ *
+ * write_sequnlock closes the (serialized and non-preemptible) write side
+ * critical section of given seqlock_t.
+ */
static inline void write_sequnlock(seqlock_t *sl)
{
write_seqcount_end(&sl->seqcount);
spin_unlock(&sl->lock);
}
+/**
+ * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section
+ * @sl: Pointer to seqlock_t
+ *
+ * _bh variant of write_seqlock(). Use only if the read side section, or
+ * other write side sections, can be invoked from softirq contexts.
+ */
static inline void write_seqlock_bh(seqlock_t *sl)
{
spin_lock_bh(&sl->lock);
- write_seqcount_begin(&sl->seqcount);
+ __write_seqcount_begin(&sl->seqcount);
}
+/**
+ * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section
+ * @sl: Pointer to seqlock_t
+ *
+ * write_sequnlock_bh closes the serialized, non-preemptible, and
+ * softirqs-disabled, seqlock_t write side critical section opened with
+ * write_seqlock_bh().
+ */
static inline void write_sequnlock_bh(seqlock_t *sl)
{
write_seqcount_end(&sl->seqcount);
spin_unlock_bh(&sl->lock);
}
+/**
+ * write_seqlock_irq() - start a non-interruptible seqlock_t write section
+ * @sl: Pointer to seqlock_t
+ *
+ * _irq variant of write_seqlock(). Use only if the read side section, or
+ * other write sections, can be invoked from hardirq contexts.
+ */
static inline void write_seqlock_irq(seqlock_t *sl)
{
spin_lock_irq(&sl->lock);
- write_seqcount_begin(&sl->seqcount);
+ __write_seqcount_begin(&sl->seqcount);
}
+/**
+ * write_sequnlock_irq() - end a non-interruptible seqlock_t write section
+ * @sl: Pointer to seqlock_t
+ *
+ * write_sequnlock_irq closes the serialized and non-interruptible
+ * seqlock_t write side section opened with write_seqlock_irq().
+ */
static inline void write_sequnlock_irq(seqlock_t *sl)
{
write_seqcount_end(&sl->seqcount);
@@ -527,13 +666,32 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
unsigned long flags;
spin_lock_irqsave(&sl->lock, flags);
- write_seqcount_begin(&sl->seqcount);
+ __write_seqcount_begin(&sl->seqcount);
return flags;
}
+/**
+ * write_seqlock_irqsave() - start a non-interruptible seqlock_t write
+ * section
+ * @lock: Pointer to seqlock_t
+ * @flags: Stack-allocated storage for saving caller's local interrupt
+ * state, to be passed to write_sequnlock_irqrestore().
+ *
+ * _irqsave variant of write_seqlock(). Use it only if the read side
+ * section, or other write sections, can be invoked from hardirq context.
+ */
#define write_seqlock_irqsave(lock, flags) \
do { flags = __write_seqlock_irqsave(lock); } while (0)
+/**
+ * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write
+ * section
+ * @sl: Pointer to seqlock_t
+ * @flags: Caller's saved interrupt state, from write_seqlock_irqsave()
+ *
+ * write_sequnlock_irqrestore closes the serialized and non-interruptible
+ * seqlock_t write section previously opened with write_seqlock_irqsave().
+ */
static inline void
write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
{
@@ -541,65 +699,79 @@ write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
spin_unlock_irqrestore(&sl->lock, flags);
}
-/*
- * A locking reader exclusively locks out other writers and locking readers,
- * but doesn't update the sequence number. Acts like a normal spin_lock/unlock.
- * Don't need preempt_disable() because that is in the spin_lock already.
+/**
+ * read_seqlock_excl() - begin a seqlock_t locking reader section
+ * @sl: Pointer to seqlock_t
+ *
+ * read_seqlock_excl opens a seqlock_t locking reader critical section. A
+ * locking reader exclusively locks out *both* other writers *and* other
+ * locking readers, but it does not update the embedded sequence number.
+ *
+ * Locking readers act like a normal spin_lock()/spin_unlock().
+ *
+ * Context: if the seqlock_t write section, *or other read sections*, can
+ * be invoked from hardirq or softirq contexts, use the _irqsave or _bh
+ * variant of this function instead.
+ *
+ * The opened read section must be closed with read_sequnlock_excl().
*/
static inline void read_seqlock_excl(seqlock_t *sl)
{
spin_lock(&sl->lock);
}
+/**
+ * read_sequnlock_excl() - end a seqlock_t locking reader critical section
+ * @sl: Pointer to seqlock_t
+ */
static inline void read_sequnlock_excl(seqlock_t *sl)
{
spin_unlock(&sl->lock);
}
/**
- * read_seqbegin_or_lock - begin a sequence number check or locking block
- * @lock: sequence lock
- * @seq : sequence number to be checked
+ * read_seqlock_excl_bh() - start a seqlock_t locking reader section with
+ * softirqs disabled
+ * @sl: Pointer to seqlock_t
*
- * First try it once optimistically without taking the lock. If that fails,
- * take the lock. The sequence number is also used as a marker for deciding
- * whether to be a reader (even) or writer (odd).
- * N.B. seq must be initialized to an even number to begin with.
+ * _bh variant of read_seqlock_excl(). Use this variant only if the
+ * seqlock_t write side section, *or other read sections*, can be invoked
+ * from softirq contexts.
*/
-static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
-{
- if (!(*seq & 1)) /* Even */
- *seq = read_seqbegin(lock);
- else /* Odd */
- read_seqlock_excl(lock);
-}
-
-static inline int need_seqretry(seqlock_t *lock, int seq)
-{
- return !(seq & 1) && read_seqretry(lock, seq);
-}
-
-static inline void done_seqretry(seqlock_t *lock, int seq)
-{
- if (seq & 1)
- read_sequnlock_excl(lock);
-}
-
static inline void read_seqlock_excl_bh(seqlock_t *sl)
{
spin_lock_bh(&sl->lock);
}
+/**
+ * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking
+ * reader section
+ * @sl: Pointer to seqlock_t
+ */
static inline void read_sequnlock_excl_bh(seqlock_t *sl)
{
spin_unlock_bh(&sl->lock);
}
+/**
+ * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking
+ * reader section
+ * @sl: Pointer to seqlock_t
+ *
+ * _irq variant of read_seqlock_excl(). Use this only if the seqlock_t
+ * write side section, *or other read sections*, can be invoked from a
+ * hardirq context.
+ */
static inline void read_seqlock_excl_irq(seqlock_t *sl)
{
spin_lock_irq(&sl->lock);
}
+/**
+ * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t
+ * locking reader section
+ * @sl: Pointer to seqlock_t
+ */
static inline void read_sequnlock_excl_irq(seqlock_t *sl)
{
spin_unlock_irq(&sl->lock);
@@ -613,15 +785,117 @@ static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl)
return flags;
}
+/**
+ * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t
+ * locking reader section
+ * @lock: Pointer to seqlock_t
+ * @flags: Stack-allocated storage for saving caller's local interrupt
+ * state, to be passed to read_sequnlock_excl_irqrestore().
+ *
+ * _irqsave variant of read_seqlock_excl(). Use this only if the seqlock_t
+ * write side section, *or other read sections*, can be invoked from a
+ * hardirq context.
+ */
#define read_seqlock_excl_irqsave(lock, flags) \
do { flags = __read_seqlock_excl_irqsave(lock); } while (0)
+/**
+ * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t
+ * locking reader section
+ * @sl: Pointer to seqlock_t
+ * @flags: Caller saved interrupt state, from read_seqlock_excl_irqsave()
+ */
static inline void
read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
{
spin_unlock_irqrestore(&sl->lock, flags);
}
+/**
+ * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader
+ * @lock: Pointer to seqlock_t
+ * @seq : Marker and return parameter. If the passed value is even, the
+ * reader will become a *lockless* seqlock_t reader as in read_seqbegin().
+ * If the passed value is odd, the reader will become a *locking* reader
+ * as in read_seqlock_excl(). In the first call to this function, the
+ * caller *must* initialize and pass an even value to @seq; this way, a
+ * lockless read can be optimistically tried first.
+ *
+ * read_seqbegin_or_lock is an API designed to optimistically try a normal
+ * lockless seqlock_t read section first. If an odd counter is found, the
+ * lockless read trial has failed, and the next read iteration transforms
+ * itself into a full seqlock_t locking reader.
+ *
+ * This is typically used to avoid seqlock_t lockless readers starvation
+ * (too much retry loops) in the case of a sharp spike in write side
+ * activity.
+ *
+ * Context: if the seqlock_t write section, *or other read sections*, can
+ * be invoked from hardirq or softirq contexts, use the _irqsave or _bh
+ * variant of this function instead.
+ *
+ * Check Documentation/locking/seqlock.rst for template example code.
+ *
+ * Return: the encountered sequence counter value, through the @seq
+ * parameter, which is overloaded as a return parameter. This returned
+ * value must be checked with need_seqretry(). If the read section need to
+ * be retried, this returned value must also be passed as the @seq
+ * parameter of the next read_seqbegin_or_lock() iteration.
+ */
+static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
+{
+ if (!(*seq & 1)) /* Even */
+ *seq = read_seqbegin(lock);
+ else /* Odd */
+ read_seqlock_excl(lock);
+}
+
+/**
+ * need_seqretry() - validate seqlock_t "locking or lockless" read section
+ * @lock: Pointer to seqlock_t
+ * @seq: sequence count, from read_seqbegin_or_lock()
+ *
+ * Return: true if a read section retry is required, false otherwise
+ */
+static inline int need_seqretry(seqlock_t *lock, int seq)
+{
+ return !(seq & 1) && read_seqretry(lock, seq);
+}
+
+/**
+ * done_seqretry() - end seqlock_t "locking or lockless" reader section
+ * @lock: Pointer to seqlock_t
+ * @seq: count, from read_seqbegin_or_lock()
+ *
+ * done_seqretry finishes the seqlock_t read side critical section started
+ * with read_seqbegin_or_lock() and validated by need_seqretry().
+ */
+static inline void done_seqretry(seqlock_t *lock, int seq)
+{
+ if (seq & 1)
+ read_sequnlock_excl(lock);
+}
+
+/**
+ * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or
+ * a non-interruptible locking reader
+ * @lock: Pointer to seqlock_t
+ * @seq: Marker and return parameter. Check read_seqbegin_or_lock().
+ *
+ * This is the _irqsave variant of read_seqbegin_or_lock(). Use it only if
+ * the seqlock_t write section, *or other read sections*, can be invoked
+ * from hardirq context.
+ *
+ * Note: Interrupts will be disabled only for "locking reader" mode.
+ *
+ * Return:
+ *
+ * 1. The saved local interrupts state in case of a locking reader, to
+ * be passed to done_seqretry_irqrestore().
+ *
+ * 2. The encountered sequence counter value, returned through @seq
+ * overloaded as a return parameter. Check read_seqbegin_or_lock().
+ */
static inline unsigned long
read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
{
@@ -635,6 +909,18 @@ read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
return flags;
}
+/**
+ * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a
+ * non-interruptible locking reader section
+ * @lock: Pointer to seqlock_t
+ * @seq: Count, from read_seqbegin_or_lock_irqsave()
+ * @flags: Caller's saved local interrupt state in case of a locking
+ * reader, also from read_seqbegin_or_lock_irqsave()
+ *
+ * This is the _irqrestore variant of done_seqretry(). The read section
+ * must've been opened with read_seqbegin_or_lock_irqsave(), and validated
+ * by need_seqretry().
+ */
static inline void
done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags)
{
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index d3770b3f9d9a..f2f12d746dbd 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -56,6 +56,7 @@
#include <linux/kernel.h>
#include <linux/stringify.h>
#include <linux/bottom_half.h>
+#include <linux/lockdep.h>
#include <asm/barrier.h>
#include <asm/mmiowb.h>
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 6102e6bff3ae..b981caafe8bf 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -15,7 +15,7 @@
# include <linux/spinlock_types_up.h>
#endif
-#include <linux/lockdep.h>
+#include <linux/lockdep_types.h>
typedef struct raw_spinlock {
arch_spinlock_t raw_lock;
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 9aac824c523c..a1bbaa1c1a3a 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -220,7 +220,9 @@ struct tcp_sock {
} rack;
u16 advmss; /* Advertised MSS */
u8 compressed_ack;
- u8 dup_ack_counter;
+ u8 dup_ack_counter:2,
+ tlp_retrans:1, /* TLP is a retransmission */
+ unused:5;
u32 chrono_start; /* Start time in jiffies of a TCP chrono */
u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
u8 chrono_type:2, /* current chronograph type */
@@ -243,7 +245,7 @@ struct tcp_sock {
save_syn:1, /* Save headers of SYN packet */
is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
syn_smc:1; /* SYN includes SMC */
- u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */
+ u32 tlp_high_seq; /* snd_nxt at the time of TLP */
u32 tcp_tx_delay; /* delay (in usec) added to TX packets */
u64 tcp_wstamp_ns; /* departure time for next sent data packet */
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 629b66e6c161..7f65bd1dd307 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -55,6 +55,11 @@ struct torture_random_state {
#define DEFINE_TORTURE_RANDOM_PERCPU(name) \
DEFINE_PER_CPU(struct torture_random_state, name)
unsigned long torture_random(struct torture_random_state *trsp);
+static inline void torture_random_init(struct torture_random_state *trsp)
+{
+ trsp->trs_state = 0;
+ trsp->trs_count = 0;
+}
/* Task shuffler, which causes CPUs to occasionally go idle. */
void torture_shuffle_task_register(struct task_struct *tp);
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 03e9b184411b..8f4ff39f51e7 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -96,6 +96,7 @@ struct tpm_space {
u8 *context_buf;
u32 session_tbl[3];
u8 *session_buf;
+ u32 buf_size;
};
struct tpm_bios_log {
diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h
index 64356b199e94..739ba9a03ec1 100644
--- a/include/linux/tpm_eventlog.h
+++ b/include/linux/tpm_eventlog.h
@@ -211,9 +211,16 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
efispecid = (struct tcg_efi_specid_event_head *)event_header->event;
- /* Check if event is malformed. */
+ /*
+ * Perform validation of the event in order to identify malformed
+ * events. This function may be asked to parse arbitrary byte sequences
+ * immediately following a valid event log. The caller expects this
+ * function to recognize that the byte sequence is not a valid event
+ * and to return an event size of 0.
+ */
if (memcmp(efispecid->signature, TCG_SPECID_SIG,
- sizeof(TCG_SPECID_SIG)) || count > efispecid->num_algs) {
+ sizeof(TCG_SPECID_SIG)) ||
+ !efispecid->num_algs || count != efispecid->num_algs) {
size = 0;
goto out;
}
diff --git a/include/linux/types.h b/include/linux/types.h
index d3021c879179..a147977602b5 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -167,6 +167,8 @@ typedef struct {
int counter;
} atomic_t;
+#define ATOMIC_INIT(i) { (i) }
+
#ifdef CONFIG_64BIT
typedef struct {
s64 counter;
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 47eaa34f8761..c5afaf8ca7a2 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -15,6 +15,7 @@
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/spinlock.h>
+#include <linux/mm.h>
#include <uapi/linux/xattr.h>
struct inode;
@@ -94,7 +95,7 @@ static inline void simple_xattrs_free(struct simple_xattrs *xattrs)
list_for_each_entry_safe(xattr, node, &xattrs->head, list) {
kfree(xattr->name);
- kfree(xattr);
+ kvfree(xattr);
}
}