summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 22:47:20 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 22:47:20 +0300
commita0433f8cae3ac51f59b4b1863032822aaa2d8164 (patch)
tree9eb7b096aa9f7fa53921e6ff247488f3a55471f5 /include
parent0aa69d53ac7c30f6184f88f2e310d808b32b35a5 (diff)
parentfcaa174a9c995cf0af3967e55644a1543ea07e36 (diff)
downloadlinux-a0433f8cae3ac51f59b4b1863032822aaa2d8164.tar.xz
Merge tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - NVMe pull request via Keith: - Various cleanups all around (Irvin, Chaitanya, Christophe) - Better struct packing (Christophe JAILLET) - Reduce controller error logs for optional commands (Keith) - Support for >=64KiB block sizes (Daniel Gomez) - Fabrics fixes and code organization (Max, Chaitanya, Daniel Wagner) - bcache updates via Coly: - Fix a race at init time (Mingzhe Zou) - Misc fixes and cleanups (Andrea, Thomas, Zheng, Ye) - use page pinning in the block layer for dio (David) - convert old block dio code to page pinning (David, Christoph) - cleanups for pktcdvd (Andy) - cleanups for rnbd (Guoqing) - use the unchecked __bio_add_page() for the initial single page additions (Johannes) - fix overflows in the Amiga partition handling code (Michael) - improve mq-deadline zoned device support (Bart) - keep passthrough requests out of the IO schedulers (Christoph, Ming) - improve support for flush requests, making them less special to deal with (Christoph) - add bdev holder ops and shutdown methods (Christoph) - fix the name_to_dev_t() situation and use cases (Christoph) - decouple the block open flags from fmode_t (Christoph) - ublk updates and cleanups, including adding user copy support (Ming) - BFQ sanity checking (Bart) - convert brd from radix to xarray (Pankaj) - constify various structures (Thomas, Ivan) - more fine grained persistent reservation ioctl capability checks (Jingbo) - misc fixes and cleanups (Arnd, Azeem, Demi, Ed, Hengqi, Hou, Jan, Jordy, Li, Min, Yu, Zhong, Waiman) * tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux: (266 commits) scsi/sg: don't grab scsi host module reference ext4: Fix warning in blkdev_put() block: don't return -EINVAL for not found names in devt_from_devname cdrom: Fix spectre-v1 gadget block: Improve kernel-doc headers blk-mq: don't insert passthrough request into sw queue bsg: make bsg_class a static const structure ublk: make ublk_chr_class a static const structure aoe: make aoe_class a static const structure block/rnbd: make all 'class' structures const block: fix the exclusive open mask in disk_scan_partitions block: add overflow checks for Amiga partition support block: change all __u32 annotations to __be32 in affs_hardblocks.h block: fix signed int overflow in Amiga partition support block: add capacity validation in bdev_add_partition() block: fine-granular CAP_SYS_ADMIN for Persistent Reservation block: disallow Persistent Reservation on partitions reiserfs: fix blkdev_put() warning from release_journal_dev() block: fix wrong mode for blkdev_get_by_dev() from disk_scan_partitions() block: document the holder argument to blkdev_get_by_path ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/bio.h12
-rw-r--r--include/linux/blk-mq.h67
-rw-r--r--include/linux/blk_types.h4
-rw-r--r--include/linux/blkdev.h101
-rw-r--r--include/linux/blktrace_api.h6
-rw-r--r--include/linux/bsg.h2
-rw-r--r--include/linux/cdrom.h12
-rw-r--r--include/linux/device-mapper.h10
-rw-r--r--include/linux/device/driver.h2
-rw-r--r--include/linux/fs.h9
-rw-r--r--include/linux/mm.h27
-rw-r--r--include/linux/mount.h1
-rw-r--r--include/linux/mtd/blktrans.h2
-rw-r--r--include/linux/nvme-fc-driver.h10
-rw-r--r--include/linux/pktcdvd.h1
-rw-r--r--include/linux/root_dev.h9
-rw-r--r--include/linux/uio.h6
-rw-r--r--include/scsi/scsi_ioctl.h4
-rw-r--r--include/trace/events/block.h26
-rw-r--r--include/uapi/linux/affs_hardblocks.h68
-rw-r--r--include/uapi/linux/pktcdvd.h1
-rw-r--r--include/uapi/linux/ublk_cmd.h33
22 files changed, 250 insertions, 163 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b3e7529ff55e..c4f5b5228105 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -229,7 +229,7 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count)
static inline bool bio_flagged(struct bio *bio, unsigned int bit)
{
- return (bio->bi_flags & (1U << bit)) != 0;
+ return bio->bi_flags & (1U << bit);
}
static inline void bio_set_flag(struct bio *bio, unsigned int bit)
@@ -465,14 +465,18 @@ extern void bio_uninit(struct bio *);
void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf);
void bio_chain(struct bio *, struct bio *);
-int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off);
-bool bio_add_folio(struct bio *, struct folio *, size_t len, size_t off);
+int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len,
+ unsigned off);
+bool __must_check bio_add_folio(struct bio *bio, struct folio *folio,
+ size_t len, size_t off);
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
unsigned int, unsigned int);
int bio_add_zone_append_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset);
void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off);
+void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
+ size_t off);
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter);
void __bio_release_pages(struct bio *bio, bool mark_dirty);
@@ -488,7 +492,7 @@ void zero_fill_bio(struct bio *bio);
static inline void bio_release_pages(struct bio *bio, bool mark_dirty)
{
- if (!bio_flagged(bio, BIO_NO_PAGE_REF))
+ if (bio_flagged(bio, BIO_PAGE_PINNED))
__bio_release_pages(bio, mark_dirty);
}
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 06caacd77ed6..f401067ac03a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -28,8 +28,6 @@ typedef __u32 __bitwise req_flags_t;
/* drive already may have started this one */
#define RQF_STARTED ((__force req_flags_t)(1 << 1))
-/* may not be passed by ioscheduler */
-#define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3))
/* request for flush sequence */
#define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4))
/* merge of different types, fail separately */
@@ -38,12 +36,14 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6))
/* don't call prep for this one */
#define RQF_DONTPREP ((__force req_flags_t)(1 << 7))
+/* use hctx->sched_tags */
+#define RQF_SCHED_TAGS ((__force req_flags_t)(1 << 8))
+/* use an I/O scheduler for this request */
+#define RQF_USE_SCHED ((__force req_flags_t)(1 << 9))
/* vaguely specified driver internal error. Ignored by the block layer */
#define RQF_FAILED ((__force req_flags_t)(1 << 10))
/* don't warn about errors */
#define RQF_QUIET ((__force req_flags_t)(1 << 11))
-/* elevator private data attached */
-#define RQF_ELVPRIV ((__force req_flags_t)(1 << 12))
/* account into disk and partition IO statistics */
#define RQF_IO_STAT ((__force req_flags_t)(1 << 13))
/* runtime pm request */
@@ -59,13 +59,11 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
/* ->timeout has been called, don't expire again */
#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21))
-/* queue has elevator attached */
-#define RQF_ELV ((__force req_flags_t)(1 << 22))
-#define RQF_RESV ((__force req_flags_t)(1 << 23))
+#define RQF_RESV ((__force req_flags_t)(1 << 23))
/* flags that prevent us from merging requests: */
#define RQF_NOMERGE_FLAGS \
- (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
+ (RQF_STARTED | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
enum mq_rq_state {
MQ_RQ_IDLE = 0,
@@ -169,25 +167,20 @@ struct request {
void *completion_data;
};
-
/*
* Three pointers are available for the IO schedulers, if they need
- * more they have to dynamically allocate it. Flush requests are
- * never put on the IO scheduler. So let the flush fields share
- * space with the elevator data.
+ * more they have to dynamically allocate it.
*/
- union {
- struct {
- struct io_cq *icq;
- void *priv[2];
- } elv;
-
- struct {
- unsigned int seq;
- struct list_head list;
- rq_end_io_fn *saved_end_io;
- } flush;
- };
+ struct {
+ struct io_cq *icq;
+ void *priv[2];
+ } elv;
+
+ struct {
+ unsigned int seq;
+ struct list_head list;
+ rq_end_io_fn *saved_end_io;
+ } flush;
union {
struct __call_single_data csd;
@@ -208,7 +201,7 @@ static inline enum req_op req_op(const struct request *req)
static inline bool blk_rq_is_passthrough(struct request *rq)
{
- return blk_op_is_passthrough(req_op(rq));
+ return blk_op_is_passthrough(rq->cmd_flags);
}
static inline unsigned short req_get_ioprio(struct request *req)
@@ -746,8 +739,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
struct blk_mq_tags {
unsigned int nr_tags;
unsigned int nr_reserved_tags;
-
- atomic_t active_queues;
+ unsigned int active_queues;
struct sbitmap_queue bitmap_tags;
struct sbitmap_queue breserved_tags;
@@ -844,7 +836,7 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib);
*/
static inline bool blk_mq_need_time_stamp(struct request *rq)
{
- return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV));
+ return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED));
}
static inline bool blk_mq_is_reserved_rq(struct request *rq)
@@ -860,7 +852,7 @@ static inline bool blk_mq_add_to_batch(struct request *req,
struct io_comp_batch *iob, int ioerror,
void (*complete)(struct io_comp_batch *))
{
- if (!iob || (req->rq_flags & RQF_ELV) || ioerror ||
+ if (!iob || (req->rq_flags & RQF_USE_SCHED) || ioerror ||
(req->end_io && !blk_rq_is_passthrough(req)))
return false;
@@ -1164,6 +1156,18 @@ static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
return disk_zone_is_seq(rq->q->disk, blk_rq_pos(rq));
}
+/**
+ * blk_rq_is_seq_zoned_write() - Check if @rq requires write serialization.
+ * @rq: Request to examine.
+ *
+ * Note: REQ_OP_ZONE_APPEND requests do not require serialization.
+ */
+static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
+{
+ return op_needs_zoned_write_locking(req_op(rq)) &&
+ blk_rq_zone_is_seq(rq);
+}
+
bool blk_req_needs_zone_write_lock(struct request *rq);
bool blk_req_zone_write_trylock(struct request *rq);
void __blk_req_zone_write_lock(struct request *rq);
@@ -1194,6 +1198,11 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
return !blk_req_zone_is_write_locked(rq);
}
#else /* CONFIG_BLK_DEV_ZONED */
+static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
+{
+ return false;
+}
+
static inline bool blk_req_needs_zone_write_lock(struct request *rq)
{
return false;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 740afe80f297..752a54e3284b 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -55,6 +55,8 @@ struct block_device {
struct super_block * bd_super;
void * bd_claiming;
void * bd_holder;
+ const struct blk_holder_ops *bd_holder_ops;
+ struct mutex bd_holder_lock;
/* The counter of freeze processes */
int bd_fsfreeze_count;
int bd_holders;
@@ -323,7 +325,7 @@ struct bio {
* bio flags
*/
enum {
- BIO_NO_PAGE_REF, /* don't put release vec pages */
+ BIO_PAGE_PINNED, /* Unpin pages in bio_release_pages() */
BIO_CLONED, /* doesn't own data */
BIO_BOUNCED, /* bio is a bounce bio */
BIO_QUIET, /* Make BIO Quiet */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c0ffe203a602..ed44a997f629 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -41,7 +41,7 @@ struct blk_stat_callback;
struct blk_crypto_profile;
extern const struct device_type disk_type;
-extern struct device_type part_type;
+extern const struct device_type part_type;
extern struct class block_class;
/*
@@ -112,6 +112,19 @@ struct blk_integrity {
unsigned char tag_size;
};
+typedef unsigned int __bitwise blk_mode_t;
+
+/* open for reading */
+#define BLK_OPEN_READ ((__force blk_mode_t)(1 << 0))
+/* open for writing */
+#define BLK_OPEN_WRITE ((__force blk_mode_t)(1 << 1))
+/* open exclusively (vs other exclusive openers */
+#define BLK_OPEN_EXCL ((__force blk_mode_t)(1 << 2))
+/* opened with O_NDELAY */
+#define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3))
+/* open for "writes" only for ioctls (specialy hack for floppy.c) */
+#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4))
+
struct gendisk {
/*
* major/first_minor/minors should not be set by any new driver, the
@@ -187,6 +200,7 @@ struct gendisk {
struct badblocks *bb;
struct lockdep_map lockdep_map;
u64 diskseq;
+ blk_mode_t open_mode;
/*
* Independent sector access ranges. This is always NULL for
@@ -318,7 +332,6 @@ typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model);
#ifdef CONFIG_BLK_DEV_ZONED
-
#define BLK_ALL_ZONES ((unsigned int)-1)
int blkdev_report_zones(struct block_device *bdev, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
@@ -328,33 +341,11 @@ extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
gfp_t gfp_mask);
int blk_revalidate_disk_zones(struct gendisk *disk,
void (*update_driver_data)(struct gendisk *disk));
-
-extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg);
-extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg);
-
#else /* CONFIG_BLK_DEV_ZONED */
-
static inline unsigned int bdev_nr_zones(struct block_device *bdev)
{
return 0;
}
-
-static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
- fmode_t mode, unsigned int cmd,
- unsigned long arg)
-{
- return -ENOTTY;
-}
-
-static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
- fmode_t mode, unsigned int cmd,
- unsigned long arg)
-{
- return -ENOTTY;
-}
-
#endif /* CONFIG_BLK_DEV_ZONED */
/*
@@ -392,6 +383,7 @@ struct request_queue {
struct blk_queue_stats *stats;
struct rq_qos *rq_qos;
+ struct mutex rq_qos_mutex;
const struct blk_mq_ops *mq_ops;
@@ -487,6 +479,7 @@ struct request_queue {
* for flush operations
*/
struct blk_flush_queue *fq;
+ struct list_head flush_list;
struct list_head requeue_list;
spinlock_t requeue_lock;
@@ -815,7 +808,7 @@ int __register_blkdev(unsigned int major, const char *name,
__register_blkdev(major, name, NULL)
void unregister_blkdev(unsigned int major, const char *name);
-bool bdev_check_media_change(struct block_device *bdev);
+bool disk_check_media_change(struct gendisk *disk);
int __invalidate_device(struct block_device *bdev, bool kill_dirty);
void set_capacity(struct gendisk *disk, sector_t size);
@@ -836,7 +829,6 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
dev_t part_devt(struct gendisk *disk, u8 partno);
void inc_diskseq(struct gendisk *disk);
-dev_t blk_lookup_devt(const char *name, int partno);
void blk_request_module(dev_t devt);
extern int blk_register_queue(struct gendisk *disk);
@@ -1281,15 +1273,18 @@ static inline unsigned int bdev_zone_no(struct block_device *bdev, sector_t sec)
return disk_zone_no(bdev->bd_disk, sec);
}
-static inline bool bdev_op_is_zoned_write(struct block_device *bdev,
- blk_opf_t op)
+/* Whether write serialization is required for @op on zoned devices. */
+static inline bool op_needs_zoned_write_locking(enum req_op op)
{
- if (!bdev_is_zoned(bdev))
- return false;
-
return op == REQ_OP_WRITE || op == REQ_OP_WRITE_ZEROES;
}
+static inline bool bdev_op_is_zoned_write(struct block_device *bdev,
+ enum req_op op)
+{
+ return bdev_is_zoned(bdev) && op_needs_zoned_write_locking(op);
+}
+
static inline sector_t bdev_zone_sectors(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
@@ -1380,10 +1375,12 @@ struct block_device_operations {
void (*submit_bio)(struct bio *bio);
int (*poll_bio)(struct bio *bio, struct io_comp_batch *iob,
unsigned int flags);
- int (*open) (struct block_device *, fmode_t);
- void (*release) (struct gendisk *, fmode_t);
- int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
- int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
+ int (*open)(struct gendisk *disk, blk_mode_t mode);
+ void (*release)(struct gendisk *disk);
+ int (*ioctl)(struct block_device *bdev, blk_mode_t mode,
+ unsigned cmd, unsigned long arg);
+ int (*compat_ioctl)(struct block_device *bdev, blk_mode_t mode,
+ unsigned cmd, unsigned long arg);
unsigned int (*check_events) (struct gendisk *disk,
unsigned int clearing);
void (*unlock_native_capacity) (struct gendisk *);
@@ -1410,7 +1407,7 @@ struct block_device_operations {
};
#ifdef CONFIG_COMPAT
-extern int blkdev_compat_ptr_ioctl(struct block_device *, fmode_t,
+extern int blkdev_compat_ptr_ioctl(struct block_device *, blk_mode_t,
unsigned int, unsigned long);
#else
#define blkdev_compat_ptr_ioctl NULL
@@ -1463,22 +1460,31 @@ void blkdev_show(struct seq_file *seqf, off_t offset);
#define BLKDEV_MAJOR_MAX 0
#endif
-struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
- void *holder);
-struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder);
-int bd_prepare_to_claim(struct block_device *bdev, void *holder);
+struct blk_holder_ops {
+ void (*mark_dead)(struct block_device *bdev);
+};
+
+/*
+ * Return the correct open flags for blkdev_get_by_* for super block flags
+ * as stored in sb->s_flags.
+ */
+#define sb_open_mode(flags) \
+ (BLK_OPEN_READ | (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE))
+
+struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder,
+ const struct blk_holder_ops *hops);
+struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode,
+ void *holder, const struct blk_holder_ops *hops);
+int bd_prepare_to_claim(struct block_device *bdev, void *holder,
+ const struct blk_holder_ops *hops);
void bd_abort_claiming(struct block_device *bdev, void *holder);
-void blkdev_put(struct block_device *bdev, fmode_t mode);
+void blkdev_put(struct block_device *bdev, void *holder);
/* just for blk-cgroup, don't use elsewhere */
struct block_device *blkdev_get_no_open(dev_t dev);
void blkdev_put_no_open(struct block_device *bdev);
-struct block_device *bdev_alloc(struct gendisk *disk, u8 partno);
-void bdev_add(struct block_device *bdev, dev_t dev);
struct block_device *I_BDEV(struct inode *inode);
-int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart,
- loff_t lend);
#ifdef CONFIG_BLOCK
void invalidate_bdev(struct block_device *bdev);
@@ -1488,6 +1494,7 @@ int sync_blockdev_nowait(struct block_device *bdev);
void sync_bdevs(bool wait);
void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
void printk_all_partitions(void);
+int __init early_lookup_bdev(const char *pathname, dev_t *dev);
#else
static inline void invalidate_bdev(struct block_device *bdev)
{
@@ -1509,6 +1516,10 @@ static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
static inline void printk_all_partitions(void)
{
}
+static inline int early_lookup_bdev(const char *pathname, dev_t *dev)
+{
+ return -EINVAL;
+}
#endif /* CONFIG_BLOCK */
int fsync_bdev(struct block_device *bdev);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index cfbda114348c..122c62e561fc 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -85,10 +85,14 @@ extern int blk_trace_remove(struct request_queue *q);
# define blk_add_driver_data(rq, data, len) do {} while (0)
# define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY)
# define blk_trace_startstop(q, start) (-ENOTTY)
-# define blk_trace_remove(q) (-ENOTTY)
# define blk_add_trace_msg(q, fmt, ...) do { } while (0)
# define blk_add_cgroup_trace_msg(q, cg, fmt, ...) do { } while (0)
# define blk_trace_note_message_enabled(q) (false)
+
+static inline int blk_trace_remove(struct request_queue *q)
+{
+ return -ENOTTY;
+}
#endif /* CONFIG_BLK_DEV_IO_TRACE */
#ifdef CONFIG_COMPAT
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 1ac81c809da9..ee2df73edf83 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -9,7 +9,7 @@ struct device;
struct request_queue;
typedef int (bsg_sg_io_fn)(struct request_queue *, struct sg_io_v4 *hdr,
- fmode_t mode, unsigned int timeout);
+ bool open_for_write, unsigned int timeout);
struct bsg_device *bsg_register_queue(struct request_queue *q,
struct device *parent, const char *name,
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 67caa909e3e6..98c6fd0b39b6 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -13,6 +13,7 @@
#include <linux/fs.h> /* not really needed, later.. */
#include <linux/list.h>
+#include <linux/blkdev.h>
#include <scsi/scsi_common.h>
#include <uapi/linux/cdrom.h>
@@ -61,9 +62,9 @@ struct cdrom_device_info {
__u8 last_sense;
__u8 media_written; /* dirty flag, DVD+RW bookkeeping */
unsigned short mmc3_profile; /* current MMC3 profile */
- int for_data;
int (*exit)(struct cdrom_device_info *);
int mrw_mode_page;
+ bool opened_for_data;
__s64 last_media_change_ms;
};
@@ -101,11 +102,10 @@ int cdrom_read_tocentry(struct cdrom_device_info *cdi,
struct cdrom_tocentry *entry);
/* the general block_device operations structure: */
-extern int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
- fmode_t mode);
-extern void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode);
-extern int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
- fmode_t mode, unsigned int cmd, unsigned long arg);
+int cdrom_open(struct cdrom_device_info *cdi, blk_mode_t mode);
+void cdrom_release(struct cdrom_device_info *cdi);
+int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
+ unsigned int cmd, unsigned long arg);
extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi,
unsigned int clearing);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index a52d2b9a6846..69d0435c7ebb 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -166,17 +166,15 @@ void dm_error(const char *message);
struct dm_dev {
struct block_device *bdev;
struct dax_device *dax_dev;
- fmode_t mode;
+ blk_mode_t mode;
char name[16];
};
-dev_t dm_get_dev_t(const char *path);
-
/*
* Constructors should call these functions to ensure destination devices
* are opened/closed correctly.
*/
-int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
+int dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode,
struct dm_dev **result);
void dm_put_device(struct dm_target *ti, struct dm_dev *d);
@@ -545,7 +543,7 @@ int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo);
/*
* First create an empty table.
*/
-int dm_table_create(struct dm_table **result, fmode_t mode,
+int dm_table_create(struct dm_table **result, blk_mode_t mode,
unsigned int num_targets, struct mapped_device *md);
/*
@@ -588,7 +586,7 @@ void dm_sync_table(struct mapped_device *md);
* Queries
*/
sector_t dm_table_get_size(struct dm_table *t);
-fmode_t dm_table_get_mode(struct dm_table *t);
+blk_mode_t dm_table_get_mode(struct dm_table *t);
struct mapped_device *dm_table_get_md(struct dm_table *t);
const char *dm_table_device_name(struct dm_table *t);
diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
index c244267a6744..7738f458995f 100644
--- a/include/linux/device/driver.h
+++ b/include/linux/device/driver.h
@@ -126,7 +126,7 @@ int __must_check driver_register(struct device_driver *drv);
void driver_unregister(struct device_driver *drv);
struct device_driver *driver_find(const char *name, const struct bus_type *bus);
-int driver_probe_done(void);
+bool __init driver_probe_done(void);
void wait_for_device_probe(void);
void __init wait_for_init_devices_probe(void);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 42129a67d98d..ed5b32dc2625 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -119,13 +119,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define FMODE_PWRITE ((__force fmode_t)0x10)
/* File is opened for execution with sys_execve / sys_uselib */
#define FMODE_EXEC ((__force fmode_t)0x20)
-/* File is opened with O_NDELAY (only set for block devices) */
-#define FMODE_NDELAY ((__force fmode_t)0x40)
-/* File is opened with O_EXCL (only set for block devices) */
-#define FMODE_EXCL ((__force fmode_t)0x80)
-/* File is opened using open(.., 3, ..) and is writeable only for ioctls
- (specialy hack for floppy.c) */
-#define FMODE_WRITE_IOCTL ((__force fmode_t)0x100)
/* 32bit hashes as llseek() offset (for directories) */
#define FMODE_32BITHASH ((__force fmode_t)0x200)
/* 64bit hashes as llseek() offset (for directories) */
@@ -1224,7 +1217,6 @@ struct super_block {
uuid_t s_uuid; /* UUID */
unsigned int s_max_links;
- fmode_t s_mode;
/*
* The next field is for VFS *only*. No filesystems have any business
@@ -1944,6 +1936,7 @@ struct super_operations {
struct shrink_control *);
long (*free_cached_objects)(struct super_block *,
struct shrink_control *);
+ void (*shutdown)(struct super_block *sb);
};
/*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 27ce77080c79..200068d98686 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1910,6 +1910,28 @@ static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
return page_maybe_dma_pinned(page);
}
+/**
+ * is_zero_page - Query if a page is a zero page
+ * @page: The page to query
+ *
+ * This returns true if @page is one of the permanent zero pages.
+ */
+static inline bool is_zero_page(const struct page *page)
+{
+ return is_zero_pfn(page_to_pfn(page));
+}
+
+/**
+ * is_zero_folio - Query if a folio is a zero page
+ * @folio: The folio to query
+ *
+ * This returns true if @folio is one of the permanent zero pages.
+ */
+static inline bool is_zero_folio(const struct folio *folio)
+{
+ return is_zero_page(&folio->page);
+}
+
/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */
#ifdef CONFIG_MIGRATION
static inline bool is_longterm_pinnable_page(struct page *page)
@@ -1920,8 +1942,8 @@ static inline bool is_longterm_pinnable_page(struct page *page)
if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE)
return false;
#endif
- /* The zero page may always be pinned */
- if (is_zero_pfn(page_to_pfn(page)))
+ /* The zero page can be "pinned" but gets special handling. */
+ if (is_zero_page(page))
return true;
/* Coherent device memory must always allow eviction. */
@@ -2383,6 +2405,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages);
int pin_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages);
+void folio_add_pin(struct folio *folio);
int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc);
int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 1ea326c368f7..4b81ea90440e 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -107,7 +107,6 @@ extern struct vfsmount *vfs_submount(const struct dentry *mountpoint,
extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
extern void mark_mounts_for_expiry(struct list_head *mounts);
-extern dev_t name_to_dev_t(const char *name);
extern bool path_is_mountpoint(const struct path *path);
extern bool our_mnt(struct vfsmount *mnt);
diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index 15cc9b95e32b..6e471436bba5 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h
@@ -34,7 +34,7 @@ struct mtd_blktrans_dev {
struct blk_mq_tag_set *tag_set;
spinlock_t queue_lock;
void *priv;
- fmode_t file_mode;
+ bool writable;
};
struct mtd_blktrans_ops {
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index fa092b9be2fd..4109f1bd6128 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -185,7 +185,6 @@ enum nvmefc_fcp_datadir {
* @first_sgl: memory for 1st scatter/gather list segment for payload data
* @sg_cnt: number of elements in the scatter/gather list
* @io_dir: direction of the FCP request (see NVMEFC_FCP_xxx)
- * @sqid: The nvme SQID the command is being issued on
* @done: The callback routine the LLDD is to invoke upon completion of
* the FCP operation. req argument is the pointer to the original
* FCP IO operation.
@@ -194,12 +193,13 @@ enum nvmefc_fcp_datadir {
* while processing the operation. The length of the buffer
* corresponds to the fcprqst_priv_sz value specified in the
* nvme_fc_port_template supplied by the LLDD.
+ * @sqid: The nvme SQID the command is being issued on
*
* Values set by the LLDD indicating completion status of the FCP operation.
* Must be set prior to calling the done() callback.
+ * @rcv_rsplen: length, in bytes, of the FCP RSP IU received.
* @transferred_length: amount of payload data, in bytes, that were
* transferred. Should equal payload_length on success.
- * @rcv_rsplen: length, in bytes, of the FCP RSP IU received.
* @status: Completion status of the FCP operation. must be 0 upon success,
* negative errno value upon failure (ex: -EIO). Note: this is
* NOT a reflection of the NVME CQE completion status. Only the
@@ -219,14 +219,14 @@ struct nvmefc_fcp_req {
int sg_cnt;
enum nvmefc_fcp_datadir io_dir;
- __le16 sqid;
-
void (*done)(struct nvmefc_fcp_req *req);
void *private;
- u32 transferred_length;
+ __le16 sqid;
+
u16 rcv_rsplen;
+ u32 transferred_length;
u32 status;
} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
index f9c5ac80d59b..80cb00db42a4 100644
--- a/include/linux/pktcdvd.h
+++ b/include/linux/pktcdvd.h
@@ -156,7 +156,6 @@ struct pktcdvd_device
{
struct block_device *bdev; /* dev attached */
dev_t pkt_dev; /* our dev */
- char name[20];
struct packet_settings settings;
struct packet_stats stats;
int refcnt; /* Open count */
diff --git a/include/linux/root_dev.h b/include/linux/root_dev.h
index 4e78651371ba..847c9a06101b 100644
--- a/include/linux/root_dev.h
+++ b/include/linux/root_dev.h
@@ -9,15 +9,8 @@
enum {
Root_NFS = MKDEV(UNNAMED_MAJOR, 255),
Root_CIFS = MKDEV(UNNAMED_MAJOR, 254),
+ Root_Generic = MKDEV(UNNAMED_MAJOR, 253),
Root_RAM0 = MKDEV(RAMDISK_MAJOR, 0),
- Root_RAM1 = MKDEV(RAMDISK_MAJOR, 1),
- Root_FD0 = MKDEV(FLOPPY_MAJOR, 0),
- Root_HDA1 = MKDEV(IDE0_MAJOR, 1),
- Root_HDA2 = MKDEV(IDE0_MAJOR, 2),
- Root_SDA1 = MKDEV(SCSI_DISK0_MAJOR, 1),
- Root_SDA2 = MKDEV(SCSI_DISK0_MAJOR, 2),
- Root_HDC1 = MKDEV(IDE1_MAJOR, 1),
- Root_SR0 = MKDEV(SCSI_CDROM_MAJOR, 0),
};
extern dev_t ROOT_DEV;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 60c342bb7ab8..8e7d2c425340 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -277,14 +277,8 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_
void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray,
loff_t start, size_t count);
-ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
- size_t maxsize, unsigned maxpages, size_t *start,
- iov_iter_extraction_t extraction_flags);
ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
size_t maxsize, unsigned maxpages, size_t *start);
-ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
- struct page ***pages, size_t maxsize, size_t *start,
- iov_iter_extraction_t extraction_flags);
ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages,
size_t maxsize, size_t *start);
int iov_iter_npages(const struct iov_iter *i, int maxpages);
diff --git a/include/scsi/scsi_ioctl.h b/include/scsi/scsi_ioctl.h
index beac64e38b87..a207c07da9d2 100644
--- a/include/scsi/scsi_ioctl.h
+++ b/include/scsi/scsi_ioctl.h
@@ -45,11 +45,11 @@ typedef struct scsi_fctargaddress {
int scsi_ioctl_block_when_processing_errors(struct scsi_device *sdev,
int cmd, bool ndelay);
-int scsi_ioctl(struct scsi_device *sdev, fmode_t mode, int cmd,
+int scsi_ioctl(struct scsi_device *sdev, bool open_for_write, int cmd,
void __user *arg);
int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp);
int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp);
-bool scsi_cmd_allowed(unsigned char *cmd, fmode_t mode);
+bool scsi_cmd_allowed(unsigned char *cmd, bool open_for_write);
#endif /* __KERNEL__ */
#endif /* _SCSI_IOCTL_H */
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 7f4dfbdf12a6..40e60c33cc6f 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -246,6 +246,32 @@ DEFINE_EVENT(block_rq, block_rq_merge,
);
/**
+ * block_io_start - insert a request for execution
+ * @rq: block IO operation request
+ *
+ * Called when block operation request @rq is queued for execution
+ */
+DEFINE_EVENT(block_rq, block_io_start,
+
+ TP_PROTO(struct request *rq),
+
+ TP_ARGS(rq)
+);
+
+/**
+ * block_io_done - block IO operation request completed
+ * @rq: block IO operation request
+ *
+ * Called when block operation request @rq is completed
+ */
+DEFINE_EVENT(block_rq, block_io_done,
+
+ TP_PROTO(struct request *rq),
+
+ TP_ARGS(rq)
+);
+
+/**
* block_bio_complete - completed all work on the block operation
* @q: queue holding the block operation
* @bio: block operation completed
diff --git a/include/uapi/linux/affs_hardblocks.h b/include/uapi/linux/affs_hardblocks.h
index 5e2fb8481252..a5aff2eb5f70 100644
--- a/include/uapi/linux/affs_hardblocks.h
+++ b/include/uapi/linux/affs_hardblocks.h
@@ -7,42 +7,42 @@
/* Just the needed definitions for the RDB of an Amiga HD. */
struct RigidDiskBlock {
- __u32 rdb_ID;
+ __be32 rdb_ID;
__be32 rdb_SummedLongs;
- __s32 rdb_ChkSum;
- __u32 rdb_HostID;
+ __be32 rdb_ChkSum;
+ __be32 rdb_HostID;
__be32 rdb_BlockBytes;
- __u32 rdb_Flags;
- __u32 rdb_BadBlockList;
+ __be32 rdb_Flags;
+ __be32 rdb_BadBlockList;
__be32 rdb_PartitionList;
- __u32 rdb_FileSysHeaderList;
- __u32 rdb_DriveInit;
- __u32 rdb_Reserved1[6];
- __u32 rdb_Cylinders;
- __u32 rdb_Sectors;
- __u32 rdb_Heads;
- __u32 rdb_Interleave;
- __u32 rdb_Park;
- __u32 rdb_Reserved2[3];
- __u32 rdb_WritePreComp;
- __u32 rdb_ReducedWrite;
- __u32 rdb_StepRate;
- __u32 rdb_Reserved3[5];
- __u32 rdb_RDBBlocksLo;
- __u32 rdb_RDBBlocksHi;
- __u32 rdb_LoCylinder;
- __u32 rdb_HiCylinder;
- __u32 rdb_CylBlocks;
- __u32 rdb_AutoParkSeconds;
- __u32 rdb_HighRDSKBlock;
- __u32 rdb_Reserved4;
+ __be32 rdb_FileSysHeaderList;
+ __be32 rdb_DriveInit;
+ __be32 rdb_Reserved1[6];
+ __be32 rdb_Cylinders;
+ __be32 rdb_Sectors;
+ __be32 rdb_Heads;
+ __be32 rdb_Interleave;
+ __be32 rdb_Park;
+ __be32 rdb_Reserved2[3];
+ __be32 rdb_WritePreComp;
+ __be32 rdb_ReducedWrite;
+ __be32 rdb_StepRate;
+ __be32 rdb_Reserved3[5];
+ __be32 rdb_RDBBlocksLo;
+ __be32 rdb_RDBBlocksHi;
+ __be32 rdb_LoCylinder;
+ __be32 rdb_HiCylinder;
+ __be32 rdb_CylBlocks;
+ __be32 rdb_AutoParkSeconds;
+ __be32 rdb_HighRDSKBlock;
+ __be32 rdb_Reserved4;
char rdb_DiskVendor[8];
char rdb_DiskProduct[16];
char rdb_DiskRevision[4];
char rdb_ControllerVendor[8];
char rdb_ControllerProduct[16];
char rdb_ControllerRevision[4];
- __u32 rdb_Reserved5[10];
+ __be32 rdb_Reserved5[10];
};
#define IDNAME_RIGIDDISK 0x5244534B /* "RDSK" */
@@ -50,16 +50,16 @@ struct RigidDiskBlock {
struct PartitionBlock {
__be32 pb_ID;
__be32 pb_SummedLongs;
- __s32 pb_ChkSum;
- __u32 pb_HostID;
+ __be32 pb_ChkSum;
+ __be32 pb_HostID;
__be32 pb_Next;
- __u32 pb_Flags;
- __u32 pb_Reserved1[2];
- __u32 pb_DevFlags;
+ __be32 pb_Flags;
+ __be32 pb_Reserved1[2];
+ __be32 pb_DevFlags;
__u8 pb_DriveName[32];
- __u32 pb_Reserved2[15];
+ __be32 pb_Reserved2[15];
__be32 pb_Environment[17];
- __u32 pb_EReserved[15];
+ __be32 pb_EReserved[15];
};
#define IDNAME_PARTITION 0x50415254 /* "PART" */
diff --git a/include/uapi/linux/pktcdvd.h b/include/uapi/linux/pktcdvd.h
index 6a5552dfd6af..987a3022dc5f 100644
--- a/include/uapi/linux/pktcdvd.h
+++ b/include/uapi/linux/pktcdvd.h
@@ -16,6 +16,7 @@
#include <linux/types.h>
/*
+ * UNUSED:
* 1 for normal debug messages, 2 is very verbose. 0 to turn it off.
*/
#define PACKET_DEBUG 1
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index 640bf687b94a..4b8558db90e1 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -47,6 +47,14 @@
_IOWR('u', UBLK_CMD_END_USER_RECOVERY, struct ublksrv_ctrl_cmd)
#define UBLK_U_CMD_GET_DEV_INFO2 \
_IOR('u', UBLK_CMD_GET_DEV_INFO2, struct ublksrv_ctrl_cmd)
+#define UBLK_U_CMD_GET_FEATURES \
+ _IOR('u', 0x13, struct ublksrv_ctrl_cmd)
+
+/*
+ * 64bits are enough now, and it should be easy to extend in case of
+ * running out of feature flags
+ */
+#define UBLK_FEATURES_LEN 8
/*
* IO commands, issued by ublk server, and handled by ublk driver.
@@ -93,9 +101,29 @@
#define UBLKSRV_CMD_BUF_OFFSET 0
#define UBLKSRV_IO_BUF_OFFSET 0x80000000
-/* tag bit is 12bit, so at most 4096 IOs for each queue */
+/* tag bit is 16bit, so far limit at most 4096 IOs for each queue */
#define UBLK_MAX_QUEUE_DEPTH 4096
+/* single IO buffer max size is 32MB */
+#define UBLK_IO_BUF_OFF 0
+#define UBLK_IO_BUF_BITS 25
+#define UBLK_IO_BUF_BITS_MASK ((1ULL << UBLK_IO_BUF_BITS) - 1)
+
+/* so at most 64K IOs for each queue */
+#define UBLK_TAG_OFF UBLK_IO_BUF_BITS
+#define UBLK_TAG_BITS 16
+#define UBLK_TAG_BITS_MASK ((1ULL << UBLK_TAG_BITS) - 1)
+
+/* max 4096 queues */
+#define UBLK_QID_OFF (UBLK_TAG_OFF + UBLK_TAG_BITS)
+#define UBLK_QID_BITS 12
+#define UBLK_QID_BITS_MASK ((1ULL << UBLK_QID_BITS) - 1)
+
+#define UBLK_MAX_NR_QUEUES (1U << UBLK_QID_BITS)
+
+#define UBLKSRV_IO_BUF_TOTAL_BITS (UBLK_QID_OFF + UBLK_QID_BITS)
+#define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS)
+
/*
* zero copy requires 4k block size, and can remap ublk driver's io
* request into ublksrv's vm space
@@ -145,6 +173,9 @@
/* use ioctl encoding for uring command */
#define UBLK_F_CMD_IOCTL_ENCODE (1UL << 6)
+/* Copy between request and user buffer by pread()/pwrite() */
+#define UBLK_F_USER_COPY (1UL << 7)
+
/* device state */
#define UBLK_S_DEV_DEAD 0
#define UBLK_S_DEV_LIVE 1