diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-26 22:47:20 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-26 22:47:20 +0300 |
commit | a0433f8cae3ac51f59b4b1863032822aaa2d8164 (patch) | |
tree | 9eb7b096aa9f7fa53921e6ff247488f3a55471f5 /include | |
parent | 0aa69d53ac7c30f6184f88f2e310d808b32b35a5 (diff) | |
parent | fcaa174a9c995cf0af3967e55644a1543ea07e36 (diff) | |
download | linux-a0433f8cae3ac51f59b4b1863032822aaa2d8164.tar.xz |
Merge tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- NVMe pull request via Keith:
- Various cleanups all around (Irvin, Chaitanya, Christophe)
- Better struct packing (Christophe JAILLET)
- Reduce controller error logs for optional commands (Keith)
- Support for >=64KiB block sizes (Daniel Gomez)
- Fabrics fixes and code organization (Max, Chaitanya, Daniel
Wagner)
- bcache updates via Coly:
- Fix a race at init time (Mingzhe Zou)
- Misc fixes and cleanups (Andrea, Thomas, Zheng, Ye)
- use page pinning in the block layer for dio (David)
- convert old block dio code to page pinning (David, Christoph)
- cleanups for pktcdvd (Andy)
- cleanups for rnbd (Guoqing)
- use the unchecked __bio_add_page() for the initial single page
additions (Johannes)
- fix overflows in the Amiga partition handling code (Michael)
- improve mq-deadline zoned device support (Bart)
- keep passthrough requests out of the IO schedulers (Christoph, Ming)
- improve support for flush requests, making them less special to deal
with (Christoph)
- add bdev holder ops and shutdown methods (Christoph)
- fix the name_to_dev_t() situation and use cases (Christoph)
- decouple the block open flags from fmode_t (Christoph)
- ublk updates and cleanups, including adding user copy support (Ming)
- BFQ sanity checking (Bart)
- convert brd from radix to xarray (Pankaj)
- constify various structures (Thomas, Ivan)
- more fine grained persistent reservation ioctl capability checks
(Jingbo)
- misc fixes and cleanups (Arnd, Azeem, Demi, Ed, Hengqi, Hou, Jan,
Jordy, Li, Min, Yu, Zhong, Waiman)
* tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux: (266 commits)
scsi/sg: don't grab scsi host module reference
ext4: Fix warning in blkdev_put()
block: don't return -EINVAL for not found names in devt_from_devname
cdrom: Fix spectre-v1 gadget
block: Improve kernel-doc headers
blk-mq: don't insert passthrough request into sw queue
bsg: make bsg_class a static const structure
ublk: make ublk_chr_class a static const structure
aoe: make aoe_class a static const structure
block/rnbd: make all 'class' structures const
block: fix the exclusive open mask in disk_scan_partitions
block: add overflow checks for Amiga partition support
block: change all __u32 annotations to __be32 in affs_hardblocks.h
block: fix signed int overflow in Amiga partition support
block: add capacity validation in bdev_add_partition()
block: fine-granular CAP_SYS_ADMIN for Persistent Reservation
block: disallow Persistent Reservation on partitions
reiserfs: fix blkdev_put() warning from release_journal_dev()
block: fix wrong mode for blkdev_get_by_dev() from disk_scan_partitions()
block: document the holder argument to blkdev_get_by_path
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/bio.h | 12 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 67 | ||||
-rw-r--r-- | include/linux/blk_types.h | 4 | ||||
-rw-r--r-- | include/linux/blkdev.h | 101 | ||||
-rw-r--r-- | include/linux/blktrace_api.h | 6 | ||||
-rw-r--r-- | include/linux/bsg.h | 2 | ||||
-rw-r--r-- | include/linux/cdrom.h | 12 | ||||
-rw-r--r-- | include/linux/device-mapper.h | 10 | ||||
-rw-r--r-- | include/linux/device/driver.h | 2 | ||||
-rw-r--r-- | include/linux/fs.h | 9 | ||||
-rw-r--r-- | include/linux/mm.h | 27 | ||||
-rw-r--r-- | include/linux/mount.h | 1 | ||||
-rw-r--r-- | include/linux/mtd/blktrans.h | 2 | ||||
-rw-r--r-- | include/linux/nvme-fc-driver.h | 10 | ||||
-rw-r--r-- | include/linux/pktcdvd.h | 1 | ||||
-rw-r--r-- | include/linux/root_dev.h | 9 | ||||
-rw-r--r-- | include/linux/uio.h | 6 | ||||
-rw-r--r-- | include/scsi/scsi_ioctl.h | 4 | ||||
-rw-r--r-- | include/trace/events/block.h | 26 | ||||
-rw-r--r-- | include/uapi/linux/affs_hardblocks.h | 68 | ||||
-rw-r--r-- | include/uapi/linux/pktcdvd.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/ublk_cmd.h | 33 |
22 files changed, 250 insertions, 163 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h index b3e7529ff55e..c4f5b5228105 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -229,7 +229,7 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count) static inline bool bio_flagged(struct bio *bio, unsigned int bit) { - return (bio->bi_flags & (1U << bit)) != 0; + return bio->bi_flags & (1U << bit); } static inline void bio_set_flag(struct bio *bio, unsigned int bit) @@ -465,14 +465,18 @@ extern void bio_uninit(struct bio *); void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf); void bio_chain(struct bio *, struct bio *); -int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off); -bool bio_add_folio(struct bio *, struct folio *, size_t len, size_t off); +int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len, + unsigned off); +bool __must_check bio_add_folio(struct bio *bio, struct folio *folio, + size_t len, size_t off); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); int bio_add_zone_append_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset); void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); +void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len, + size_t off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter); void __bio_release_pages(struct bio *bio, bool mark_dirty); @@ -488,7 +492,7 @@ void zero_fill_bio(struct bio *bio); static inline void bio_release_pages(struct bio *bio, bool mark_dirty) { - if (!bio_flagged(bio, BIO_NO_PAGE_REF)) + if (bio_flagged(bio, BIO_PAGE_PINNED)) __bio_release_pages(bio, mark_dirty); } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 06caacd77ed6..f401067ac03a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -28,8 +28,6 @@ typedef __u32 __bitwise req_flags_t; /* drive already may have started this one */ #define RQF_STARTED ((__force req_flags_t)(1 << 1)) -/* may not be passed by ioscheduler */ -#define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3)) /* request for flush sequence */ #define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4)) /* merge of different types, fail separately */ @@ -38,12 +36,14 @@ typedef __u32 __bitwise req_flags_t; #define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) /* don't call prep for this one */ #define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) +/* use hctx->sched_tags */ +#define RQF_SCHED_TAGS ((__force req_flags_t)(1 << 8)) +/* use an I/O scheduler for this request */ +#define RQF_USE_SCHED ((__force req_flags_t)(1 << 9)) /* vaguely specified driver internal error. Ignored by the block layer */ #define RQF_FAILED ((__force req_flags_t)(1 << 10)) /* don't warn about errors */ #define RQF_QUIET ((__force req_flags_t)(1 << 11)) -/* elevator private data attached */ -#define RQF_ELVPRIV ((__force req_flags_t)(1 << 12)) /* account into disk and partition IO statistics */ #define RQF_IO_STAT ((__force req_flags_t)(1 << 13)) /* runtime pm request */ @@ -59,13 +59,11 @@ typedef __u32 __bitwise req_flags_t; #define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) /* ->timeout has been called, don't expire again */ #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) -/* queue has elevator attached */ -#define RQF_ELV ((__force req_flags_t)(1 << 22)) -#define RQF_RESV ((__force req_flags_t)(1 << 23)) +#define RQF_RESV ((__force req_flags_t)(1 << 23)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ - (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) + (RQF_STARTED | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) enum mq_rq_state { MQ_RQ_IDLE = 0, @@ -169,25 +167,20 @@ struct request { void *completion_data; }; - /* * Three pointers are available for the IO schedulers, if they need - * more they have to dynamically allocate it. Flush requests are - * never put on the IO scheduler. So let the flush fields share - * space with the elevator data. + * more they have to dynamically allocate it. */ - union { - struct { - struct io_cq *icq; - void *priv[2]; - } elv; - - struct { - unsigned int seq; - struct list_head list; - rq_end_io_fn *saved_end_io; - } flush; - }; + struct { + struct io_cq *icq; + void *priv[2]; + } elv; + + struct { + unsigned int seq; + struct list_head list; + rq_end_io_fn *saved_end_io; + } flush; union { struct __call_single_data csd; @@ -208,7 +201,7 @@ static inline enum req_op req_op(const struct request *req) static inline bool blk_rq_is_passthrough(struct request *rq) { - return blk_op_is_passthrough(req_op(rq)); + return blk_op_is_passthrough(rq->cmd_flags); } static inline unsigned short req_get_ioprio(struct request *req) @@ -746,8 +739,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, struct blk_mq_tags { unsigned int nr_tags; unsigned int nr_reserved_tags; - - atomic_t active_queues; + unsigned int active_queues; struct sbitmap_queue bitmap_tags; struct sbitmap_queue breserved_tags; @@ -844,7 +836,7 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib); */ static inline bool blk_mq_need_time_stamp(struct request *rq) { - return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV)); + return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED)); } static inline bool blk_mq_is_reserved_rq(struct request *rq) @@ -860,7 +852,7 @@ static inline bool blk_mq_add_to_batch(struct request *req, struct io_comp_batch *iob, int ioerror, void (*complete)(struct io_comp_batch *)) { - if (!iob || (req->rq_flags & RQF_ELV) || ioerror || + if (!iob || (req->rq_flags & RQF_USE_SCHED) || ioerror || (req->end_io && !blk_rq_is_passthrough(req))) return false; @@ -1164,6 +1156,18 @@ static inline unsigned int blk_rq_zone_is_seq(struct request *rq) return disk_zone_is_seq(rq->q->disk, blk_rq_pos(rq)); } +/** + * blk_rq_is_seq_zoned_write() - Check if @rq requires write serialization. + * @rq: Request to examine. + * + * Note: REQ_OP_ZONE_APPEND requests do not require serialization. + */ +static inline bool blk_rq_is_seq_zoned_write(struct request *rq) +{ + return op_needs_zoned_write_locking(req_op(rq)) && + blk_rq_zone_is_seq(rq); +} + bool blk_req_needs_zone_write_lock(struct request *rq); bool blk_req_zone_write_trylock(struct request *rq); void __blk_req_zone_write_lock(struct request *rq); @@ -1194,6 +1198,11 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq) return !blk_req_zone_is_write_locked(rq); } #else /* CONFIG_BLK_DEV_ZONED */ +static inline bool blk_rq_is_seq_zoned_write(struct request *rq) +{ + return false; +} + static inline bool blk_req_needs_zone_write_lock(struct request *rq) { return false; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 740afe80f297..752a54e3284b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -55,6 +55,8 @@ struct block_device { struct super_block * bd_super; void * bd_claiming; void * bd_holder; + const struct blk_holder_ops *bd_holder_ops; + struct mutex bd_holder_lock; /* The counter of freeze processes */ int bd_fsfreeze_count; int bd_holders; @@ -323,7 +325,7 @@ struct bio { * bio flags */ enum { - BIO_NO_PAGE_REF, /* don't put release vec pages */ + BIO_PAGE_PINNED, /* Unpin pages in bio_release_pages() */ BIO_CLONED, /* doesn't own data */ BIO_BOUNCED, /* bio is a bounce bio */ BIO_QUIET, /* Make BIO Quiet */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c0ffe203a602..ed44a997f629 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -41,7 +41,7 @@ struct blk_stat_callback; struct blk_crypto_profile; extern const struct device_type disk_type; -extern struct device_type part_type; +extern const struct device_type part_type; extern struct class block_class; /* @@ -112,6 +112,19 @@ struct blk_integrity { unsigned char tag_size; }; +typedef unsigned int __bitwise blk_mode_t; + +/* open for reading */ +#define BLK_OPEN_READ ((__force blk_mode_t)(1 << 0)) +/* open for writing */ +#define BLK_OPEN_WRITE ((__force blk_mode_t)(1 << 1)) +/* open exclusively (vs other exclusive openers */ +#define BLK_OPEN_EXCL ((__force blk_mode_t)(1 << 2)) +/* opened with O_NDELAY */ +#define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3)) +/* open for "writes" only for ioctls (specialy hack for floppy.c) */ +#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4)) + struct gendisk { /* * major/first_minor/minors should not be set by any new driver, the @@ -187,6 +200,7 @@ struct gendisk { struct badblocks *bb; struct lockdep_map lockdep_map; u64 diskseq; + blk_mode_t open_mode; /* * Independent sector access ranges. This is always NULL for @@ -318,7 +332,6 @@ typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model); #ifdef CONFIG_BLK_DEV_ZONED - #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); @@ -328,33 +341,11 @@ extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, gfp_t gfp_mask); int blk_revalidate_disk_zones(struct gendisk *disk, void (*update_driver_data)(struct gendisk *disk)); - -extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg); -extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg); - #else /* CONFIG_BLK_DEV_ZONED */ - static inline unsigned int bdev_nr_zones(struct block_device *bdev) { return 0; } - -static inline int blkdev_report_zones_ioctl(struct block_device *bdev, - fmode_t mode, unsigned int cmd, - unsigned long arg) -{ - return -ENOTTY; -} - -static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev, - fmode_t mode, unsigned int cmd, - unsigned long arg) -{ - return -ENOTTY; -} - #endif /* CONFIG_BLK_DEV_ZONED */ /* @@ -392,6 +383,7 @@ struct request_queue { struct blk_queue_stats *stats; struct rq_qos *rq_qos; + struct mutex rq_qos_mutex; const struct blk_mq_ops *mq_ops; @@ -487,6 +479,7 @@ struct request_queue { * for flush operations */ struct blk_flush_queue *fq; + struct list_head flush_list; struct list_head requeue_list; spinlock_t requeue_lock; @@ -815,7 +808,7 @@ int __register_blkdev(unsigned int major, const char *name, __register_blkdev(major, name, NULL) void unregister_blkdev(unsigned int major, const char *name); -bool bdev_check_media_change(struct block_device *bdev); +bool disk_check_media_change(struct gendisk *disk); int __invalidate_device(struct block_device *bdev, bool kill_dirty); void set_capacity(struct gendisk *disk, sector_t size); @@ -836,7 +829,6 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, dev_t part_devt(struct gendisk *disk, u8 partno); void inc_diskseq(struct gendisk *disk); -dev_t blk_lookup_devt(const char *name, int partno); void blk_request_module(dev_t devt); extern int blk_register_queue(struct gendisk *disk); @@ -1281,15 +1273,18 @@ static inline unsigned int bdev_zone_no(struct block_device *bdev, sector_t sec) return disk_zone_no(bdev->bd_disk, sec); } -static inline bool bdev_op_is_zoned_write(struct block_device *bdev, - blk_opf_t op) +/* Whether write serialization is required for @op on zoned devices. */ +static inline bool op_needs_zoned_write_locking(enum req_op op) { - if (!bdev_is_zoned(bdev)) - return false; - return op == REQ_OP_WRITE || op == REQ_OP_WRITE_ZEROES; } +static inline bool bdev_op_is_zoned_write(struct block_device *bdev, + enum req_op op) +{ + return bdev_is_zoned(bdev) && op_needs_zoned_write_locking(op); +} + static inline sector_t bdev_zone_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); @@ -1380,10 +1375,12 @@ struct block_device_operations { void (*submit_bio)(struct bio *bio); int (*poll_bio)(struct bio *bio, struct io_comp_batch *iob, unsigned int flags); - int (*open) (struct block_device *, fmode_t); - void (*release) (struct gendisk *, fmode_t); - int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); - int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); + int (*open)(struct gendisk *disk, blk_mode_t mode); + void (*release)(struct gendisk *disk); + int (*ioctl)(struct block_device *bdev, blk_mode_t mode, + unsigned cmd, unsigned long arg); + int (*compat_ioctl)(struct block_device *bdev, blk_mode_t mode, + unsigned cmd, unsigned long arg); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); void (*unlock_native_capacity) (struct gendisk *); @@ -1410,7 +1407,7 @@ struct block_device_operations { }; #ifdef CONFIG_COMPAT -extern int blkdev_compat_ptr_ioctl(struct block_device *, fmode_t, +extern int blkdev_compat_ptr_ioctl(struct block_device *, blk_mode_t, unsigned int, unsigned long); #else #define blkdev_compat_ptr_ioctl NULL @@ -1463,22 +1460,31 @@ void blkdev_show(struct seq_file *seqf, off_t offset); #define BLKDEV_MAJOR_MAX 0 #endif -struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, - void *holder); -struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); -int bd_prepare_to_claim(struct block_device *bdev, void *holder); +struct blk_holder_ops { + void (*mark_dead)(struct block_device *bdev); +}; + +/* + * Return the correct open flags for blkdev_get_by_* for super block flags + * as stored in sb->s_flags. + */ +#define sb_open_mode(flags) \ + (BLK_OPEN_READ | (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE)) + +struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder, + const struct blk_holder_ops *hops); +struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode, + void *holder, const struct blk_holder_ops *hops); +int bd_prepare_to_claim(struct block_device *bdev, void *holder, + const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -void blkdev_put(struct block_device *bdev, fmode_t mode); +void blkdev_put(struct block_device *bdev, void *holder); /* just for blk-cgroup, don't use elsewhere */ struct block_device *blkdev_get_no_open(dev_t dev); void blkdev_put_no_open(struct block_device *bdev); -struct block_device *bdev_alloc(struct gendisk *disk, u8 partno); -void bdev_add(struct block_device *bdev, dev_t dev); struct block_device *I_BDEV(struct inode *inode); -int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart, - loff_t lend); #ifdef CONFIG_BLOCK void invalidate_bdev(struct block_device *bdev); @@ -1488,6 +1494,7 @@ int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); void bdev_statx_dioalign(struct inode *inode, struct kstat *stat); void printk_all_partitions(void); +int __init early_lookup_bdev(const char *pathname, dev_t *dev); #else static inline void invalidate_bdev(struct block_device *bdev) { @@ -1509,6 +1516,10 @@ static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat) static inline void printk_all_partitions(void) { } +static inline int early_lookup_bdev(const char *pathname, dev_t *dev) +{ + return -EINVAL; +} #endif /* CONFIG_BLOCK */ int fsync_bdev(struct block_device *bdev); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index cfbda114348c..122c62e561fc 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -85,10 +85,14 @@ extern int blk_trace_remove(struct request_queue *q); # define blk_add_driver_data(rq, data, len) do {} while (0) # define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) # define blk_trace_startstop(q, start) (-ENOTTY) -# define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) # define blk_add_cgroup_trace_msg(q, cg, fmt, ...) do { } while (0) # define blk_trace_note_message_enabled(q) (false) + +static inline int blk_trace_remove(struct request_queue *q) +{ + return -ENOTTY; +} #endif /* CONFIG_BLK_DEV_IO_TRACE */ #ifdef CONFIG_COMPAT diff --git a/include/linux/bsg.h b/include/linux/bsg.h index 1ac81c809da9..ee2df73edf83 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -9,7 +9,7 @@ struct device; struct request_queue; typedef int (bsg_sg_io_fn)(struct request_queue *, struct sg_io_v4 *hdr, - fmode_t mode, unsigned int timeout); + bool open_for_write, unsigned int timeout); struct bsg_device *bsg_register_queue(struct request_queue *q, struct device *parent, const char *name, diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index 67caa909e3e6..98c6fd0b39b6 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -13,6 +13,7 @@ #include <linux/fs.h> /* not really needed, later.. */ #include <linux/list.h> +#include <linux/blkdev.h> #include <scsi/scsi_common.h> #include <uapi/linux/cdrom.h> @@ -61,9 +62,9 @@ struct cdrom_device_info { __u8 last_sense; __u8 media_written; /* dirty flag, DVD+RW bookkeeping */ unsigned short mmc3_profile; /* current MMC3 profile */ - int for_data; int (*exit)(struct cdrom_device_info *); int mrw_mode_page; + bool opened_for_data; __s64 last_media_change_ms; }; @@ -101,11 +102,10 @@ int cdrom_read_tocentry(struct cdrom_device_info *cdi, struct cdrom_tocentry *entry); /* the general block_device operations structure: */ -extern int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, - fmode_t mode); -extern void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode); -extern int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev, - fmode_t mode, unsigned int cmd, unsigned long arg); +int cdrom_open(struct cdrom_device_info *cdi, blk_mode_t mode); +void cdrom_release(struct cdrom_device_info *cdi); +int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev, + unsigned int cmd, unsigned long arg); extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi, unsigned int clearing); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a52d2b9a6846..69d0435c7ebb 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -166,17 +166,15 @@ void dm_error(const char *message); struct dm_dev { struct block_device *bdev; struct dax_device *dax_dev; - fmode_t mode; + blk_mode_t mode; char name[16]; }; -dev_t dm_get_dev_t(const char *path); - /* * Constructors should call these functions to ensure destination devices * are opened/closed correctly. */ -int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, +int dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode, struct dm_dev **result); void dm_put_device(struct dm_target *ti, struct dm_dev *d); @@ -545,7 +543,7 @@ int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo); /* * First create an empty table. */ -int dm_table_create(struct dm_table **result, fmode_t mode, +int dm_table_create(struct dm_table **result, blk_mode_t mode, unsigned int num_targets, struct mapped_device *md); /* @@ -588,7 +586,7 @@ void dm_sync_table(struct mapped_device *md); * Queries */ sector_t dm_table_get_size(struct dm_table *t); -fmode_t dm_table_get_mode(struct dm_table *t); +blk_mode_t dm_table_get_mode(struct dm_table *t); struct mapped_device *dm_table_get_md(struct dm_table *t); const char *dm_table_device_name(struct dm_table *t); diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index c244267a6744..7738f458995f 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -126,7 +126,7 @@ int __must_check driver_register(struct device_driver *drv); void driver_unregister(struct device_driver *drv); struct device_driver *driver_find(const char *name, const struct bus_type *bus); -int driver_probe_done(void); +bool __init driver_probe_done(void); void wait_for_device_probe(void); void __init wait_for_init_devices_probe(void); diff --git a/include/linux/fs.h b/include/linux/fs.h index 42129a67d98d..ed5b32dc2625 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -119,13 +119,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_PWRITE ((__force fmode_t)0x10) /* File is opened for execution with sys_execve / sys_uselib */ #define FMODE_EXEC ((__force fmode_t)0x20) -/* File is opened with O_NDELAY (only set for block devices) */ -#define FMODE_NDELAY ((__force fmode_t)0x40) -/* File is opened with O_EXCL (only set for block devices) */ -#define FMODE_EXCL ((__force fmode_t)0x80) -/* File is opened using open(.., 3, ..) and is writeable only for ioctls - (specialy hack for floppy.c) */ -#define FMODE_WRITE_IOCTL ((__force fmode_t)0x100) /* 32bit hashes as llseek() offset (for directories) */ #define FMODE_32BITHASH ((__force fmode_t)0x200) /* 64bit hashes as llseek() offset (for directories) */ @@ -1224,7 +1217,6 @@ struct super_block { uuid_t s_uuid; /* UUID */ unsigned int s_max_links; - fmode_t s_mode; /* * The next field is for VFS *only*. No filesystems have any business @@ -1944,6 +1936,7 @@ struct super_operations { struct shrink_control *); long (*free_cached_objects)(struct super_block *, struct shrink_control *); + void (*shutdown)(struct super_block *sb); }; /* diff --git a/include/linux/mm.h b/include/linux/mm.h index 27ce77080c79..200068d98686 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1910,6 +1910,28 @@ static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, return page_maybe_dma_pinned(page); } +/** + * is_zero_page - Query if a page is a zero page + * @page: The page to query + * + * This returns true if @page is one of the permanent zero pages. + */ +static inline bool is_zero_page(const struct page *page) +{ + return is_zero_pfn(page_to_pfn(page)); +} + +/** + * is_zero_folio - Query if a folio is a zero page + * @folio: The folio to query + * + * This returns true if @folio is one of the permanent zero pages. + */ +static inline bool is_zero_folio(const struct folio *folio) +{ + return is_zero_page(&folio->page); +} + /* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */ #ifdef CONFIG_MIGRATION static inline bool is_longterm_pinnable_page(struct page *page) @@ -1920,8 +1942,8 @@ static inline bool is_longterm_pinnable_page(struct page *page) if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE) return false; #endif - /* The zero page may always be pinned */ - if (is_zero_pfn(page_to_pfn(page))) + /* The zero page can be "pinned" but gets special handling. */ + if (is_zero_page(page)) return true; /* Coherent device memory must always allow eviction. */ @@ -2383,6 +2405,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); int pin_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); +void folio_add_pin(struct folio *folio); int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc); int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, diff --git a/include/linux/mount.h b/include/linux/mount.h index 1ea326c368f7..4b81ea90440e 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -107,7 +107,6 @@ extern struct vfsmount *vfs_submount(const struct dentry *mountpoint, extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list); extern void mark_mounts_for_expiry(struct list_head *mounts); -extern dev_t name_to_dev_t(const char *name); extern bool path_is_mountpoint(const struct path *path); extern bool our_mnt(struct vfsmount *mnt); diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h index 15cc9b95e32b..6e471436bba5 100644 --- a/include/linux/mtd/blktrans.h +++ b/include/linux/mtd/blktrans.h @@ -34,7 +34,7 @@ struct mtd_blktrans_dev { struct blk_mq_tag_set *tag_set; spinlock_t queue_lock; void *priv; - fmode_t file_mode; + bool writable; }; struct mtd_blktrans_ops { diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index fa092b9be2fd..4109f1bd6128 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -185,7 +185,6 @@ enum nvmefc_fcp_datadir { * @first_sgl: memory for 1st scatter/gather list segment for payload data * @sg_cnt: number of elements in the scatter/gather list * @io_dir: direction of the FCP request (see NVMEFC_FCP_xxx) - * @sqid: The nvme SQID the command is being issued on * @done: The callback routine the LLDD is to invoke upon completion of * the FCP operation. req argument is the pointer to the original * FCP IO operation. @@ -194,12 +193,13 @@ enum nvmefc_fcp_datadir { * while processing the operation. The length of the buffer * corresponds to the fcprqst_priv_sz value specified in the * nvme_fc_port_template supplied by the LLDD. + * @sqid: The nvme SQID the command is being issued on * * Values set by the LLDD indicating completion status of the FCP operation. * Must be set prior to calling the done() callback. + * @rcv_rsplen: length, in bytes, of the FCP RSP IU received. * @transferred_length: amount of payload data, in bytes, that were * transferred. Should equal payload_length on success. - * @rcv_rsplen: length, in bytes, of the FCP RSP IU received. * @status: Completion status of the FCP operation. must be 0 upon success, * negative errno value upon failure (ex: -EIO). Note: this is * NOT a reflection of the NVME CQE completion status. Only the @@ -219,14 +219,14 @@ struct nvmefc_fcp_req { int sg_cnt; enum nvmefc_fcp_datadir io_dir; - __le16 sqid; - void (*done)(struct nvmefc_fcp_req *req); void *private; - u32 transferred_length; + __le16 sqid; + u16 rcv_rsplen; + u32 transferred_length; u32 status; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index f9c5ac80d59b..80cb00db42a4 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -156,7 +156,6 @@ struct pktcdvd_device { struct block_device *bdev; /* dev attached */ dev_t pkt_dev; /* our dev */ - char name[20]; struct packet_settings settings; struct packet_stats stats; int refcnt; /* Open count */ diff --git a/include/linux/root_dev.h b/include/linux/root_dev.h index 4e78651371ba..847c9a06101b 100644 --- a/include/linux/root_dev.h +++ b/include/linux/root_dev.h @@ -9,15 +9,8 @@ enum { Root_NFS = MKDEV(UNNAMED_MAJOR, 255), Root_CIFS = MKDEV(UNNAMED_MAJOR, 254), + Root_Generic = MKDEV(UNNAMED_MAJOR, 253), Root_RAM0 = MKDEV(RAMDISK_MAJOR, 0), - Root_RAM1 = MKDEV(RAMDISK_MAJOR, 1), - Root_FD0 = MKDEV(FLOPPY_MAJOR, 0), - Root_HDA1 = MKDEV(IDE0_MAJOR, 1), - Root_HDA2 = MKDEV(IDE0_MAJOR, 2), - Root_SDA1 = MKDEV(SCSI_DISK0_MAJOR, 1), - Root_SDA2 = MKDEV(SCSI_DISK0_MAJOR, 2), - Root_HDC1 = MKDEV(IDE1_MAJOR, 1), - Root_SR0 = MKDEV(SCSI_CDROM_MAJOR, 0), }; extern dev_t ROOT_DEV; diff --git a/include/linux/uio.h b/include/linux/uio.h index 60c342bb7ab8..8e7d2c425340 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -277,14 +277,8 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_ void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray, loff_t start, size_t count); -ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, - size_t maxsize, unsigned maxpages, size_t *start, - iov_iter_extraction_t extraction_flags); ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); -ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, - struct page ***pages, size_t maxsize, size_t *start, - iov_iter_extraction_t extraction_flags); ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); diff --git a/include/scsi/scsi_ioctl.h b/include/scsi/scsi_ioctl.h index beac64e38b87..a207c07da9d2 100644 --- a/include/scsi/scsi_ioctl.h +++ b/include/scsi/scsi_ioctl.h @@ -45,11 +45,11 @@ typedef struct scsi_fctargaddress { int scsi_ioctl_block_when_processing_errors(struct scsi_device *sdev, int cmd, bool ndelay); -int scsi_ioctl(struct scsi_device *sdev, fmode_t mode, int cmd, +int scsi_ioctl(struct scsi_device *sdev, bool open_for_write, int cmd, void __user *arg); int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp); int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp); -bool scsi_cmd_allowed(unsigned char *cmd, fmode_t mode); +bool scsi_cmd_allowed(unsigned char *cmd, bool open_for_write); #endif /* __KERNEL__ */ #endif /* _SCSI_IOCTL_H */ diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 7f4dfbdf12a6..40e60c33cc6f 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -246,6 +246,32 @@ DEFINE_EVENT(block_rq, block_rq_merge, ); /** + * block_io_start - insert a request for execution + * @rq: block IO operation request + * + * Called when block operation request @rq is queued for execution + */ +DEFINE_EVENT(block_rq, block_io_start, + + TP_PROTO(struct request *rq), + + TP_ARGS(rq) +); + +/** + * block_io_done - block IO operation request completed + * @rq: block IO operation request + * + * Called when block operation request @rq is completed + */ +DEFINE_EVENT(block_rq, block_io_done, + + TP_PROTO(struct request *rq), + + TP_ARGS(rq) +); + +/** * block_bio_complete - completed all work on the block operation * @q: queue holding the block operation * @bio: block operation completed diff --git a/include/uapi/linux/affs_hardblocks.h b/include/uapi/linux/affs_hardblocks.h index 5e2fb8481252..a5aff2eb5f70 100644 --- a/include/uapi/linux/affs_hardblocks.h +++ b/include/uapi/linux/affs_hardblocks.h @@ -7,42 +7,42 @@ /* Just the needed definitions for the RDB of an Amiga HD. */ struct RigidDiskBlock { - __u32 rdb_ID; + __be32 rdb_ID; __be32 rdb_SummedLongs; - __s32 rdb_ChkSum; - __u32 rdb_HostID; + __be32 rdb_ChkSum; + __be32 rdb_HostID; __be32 rdb_BlockBytes; - __u32 rdb_Flags; - __u32 rdb_BadBlockList; + __be32 rdb_Flags; + __be32 rdb_BadBlockList; __be32 rdb_PartitionList; - __u32 rdb_FileSysHeaderList; - __u32 rdb_DriveInit; - __u32 rdb_Reserved1[6]; - __u32 rdb_Cylinders; - __u32 rdb_Sectors; - __u32 rdb_Heads; - __u32 rdb_Interleave; - __u32 rdb_Park; - __u32 rdb_Reserved2[3]; - __u32 rdb_WritePreComp; - __u32 rdb_ReducedWrite; - __u32 rdb_StepRate; - __u32 rdb_Reserved3[5]; - __u32 rdb_RDBBlocksLo; - __u32 rdb_RDBBlocksHi; - __u32 rdb_LoCylinder; - __u32 rdb_HiCylinder; - __u32 rdb_CylBlocks; - __u32 rdb_AutoParkSeconds; - __u32 rdb_HighRDSKBlock; - __u32 rdb_Reserved4; + __be32 rdb_FileSysHeaderList; + __be32 rdb_DriveInit; + __be32 rdb_Reserved1[6]; + __be32 rdb_Cylinders; + __be32 rdb_Sectors; + __be32 rdb_Heads; + __be32 rdb_Interleave; + __be32 rdb_Park; + __be32 rdb_Reserved2[3]; + __be32 rdb_WritePreComp; + __be32 rdb_ReducedWrite; + __be32 rdb_StepRate; + __be32 rdb_Reserved3[5]; + __be32 rdb_RDBBlocksLo; + __be32 rdb_RDBBlocksHi; + __be32 rdb_LoCylinder; + __be32 rdb_HiCylinder; + __be32 rdb_CylBlocks; + __be32 rdb_AutoParkSeconds; + __be32 rdb_HighRDSKBlock; + __be32 rdb_Reserved4; char rdb_DiskVendor[8]; char rdb_DiskProduct[16]; char rdb_DiskRevision[4]; char rdb_ControllerVendor[8]; char rdb_ControllerProduct[16]; char rdb_ControllerRevision[4]; - __u32 rdb_Reserved5[10]; + __be32 rdb_Reserved5[10]; }; #define IDNAME_RIGIDDISK 0x5244534B /* "RDSK" */ @@ -50,16 +50,16 @@ struct RigidDiskBlock { struct PartitionBlock { __be32 pb_ID; __be32 pb_SummedLongs; - __s32 pb_ChkSum; - __u32 pb_HostID; + __be32 pb_ChkSum; + __be32 pb_HostID; __be32 pb_Next; - __u32 pb_Flags; - __u32 pb_Reserved1[2]; - __u32 pb_DevFlags; + __be32 pb_Flags; + __be32 pb_Reserved1[2]; + __be32 pb_DevFlags; __u8 pb_DriveName[32]; - __u32 pb_Reserved2[15]; + __be32 pb_Reserved2[15]; __be32 pb_Environment[17]; - __u32 pb_EReserved[15]; + __be32 pb_EReserved[15]; }; #define IDNAME_PARTITION 0x50415254 /* "PART" */ diff --git a/include/uapi/linux/pktcdvd.h b/include/uapi/linux/pktcdvd.h index 6a5552dfd6af..987a3022dc5f 100644 --- a/include/uapi/linux/pktcdvd.h +++ b/include/uapi/linux/pktcdvd.h @@ -16,6 +16,7 @@ #include <linux/types.h> /* + * UNUSED: * 1 for normal debug messages, 2 is very verbose. 0 to turn it off. */ #define PACKET_DEBUG 1 diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 640bf687b94a..4b8558db90e1 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -47,6 +47,14 @@ _IOWR('u', UBLK_CMD_END_USER_RECOVERY, struct ublksrv_ctrl_cmd) #define UBLK_U_CMD_GET_DEV_INFO2 \ _IOR('u', UBLK_CMD_GET_DEV_INFO2, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_GET_FEATURES \ + _IOR('u', 0x13, struct ublksrv_ctrl_cmd) + +/* + * 64bits are enough now, and it should be easy to extend in case of + * running out of feature flags + */ +#define UBLK_FEATURES_LEN 8 /* * IO commands, issued by ublk server, and handled by ublk driver. @@ -93,9 +101,29 @@ #define UBLKSRV_CMD_BUF_OFFSET 0 #define UBLKSRV_IO_BUF_OFFSET 0x80000000 -/* tag bit is 12bit, so at most 4096 IOs for each queue */ +/* tag bit is 16bit, so far limit at most 4096 IOs for each queue */ #define UBLK_MAX_QUEUE_DEPTH 4096 +/* single IO buffer max size is 32MB */ +#define UBLK_IO_BUF_OFF 0 +#define UBLK_IO_BUF_BITS 25 +#define UBLK_IO_BUF_BITS_MASK ((1ULL << UBLK_IO_BUF_BITS) - 1) + +/* so at most 64K IOs for each queue */ +#define UBLK_TAG_OFF UBLK_IO_BUF_BITS +#define UBLK_TAG_BITS 16 +#define UBLK_TAG_BITS_MASK ((1ULL << UBLK_TAG_BITS) - 1) + +/* max 4096 queues */ +#define UBLK_QID_OFF (UBLK_TAG_OFF + UBLK_TAG_BITS) +#define UBLK_QID_BITS 12 +#define UBLK_QID_BITS_MASK ((1ULL << UBLK_QID_BITS) - 1) + +#define UBLK_MAX_NR_QUEUES (1U << UBLK_QID_BITS) + +#define UBLKSRV_IO_BUF_TOTAL_BITS (UBLK_QID_OFF + UBLK_QID_BITS) +#define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS) + /* * zero copy requires 4k block size, and can remap ublk driver's io * request into ublksrv's vm space @@ -145,6 +173,9 @@ /* use ioctl encoding for uring command */ #define UBLK_F_CMD_IOCTL_ENCODE (1UL << 6) +/* Copy between request and user buffer by pread()/pwrite() */ +#define UBLK_F_USER_COPY (1UL << 7) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 |