diff options
Diffstat (limited to 'include/linux/blkdev.h')
| -rw-r--r-- | include/linux/blkdev.h | 235 | 
1 files changed, 70 insertions, 165 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 79226ca8f80f..4293dc1cd160 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -27,8 +27,6 @@  #include <linux/percpu-refcount.h>  #include <linux/scatterlist.h>  #include <linux/blkzoned.h> -#include <linux/seqlock.h> -#include <linux/u64_stats_sync.h>  struct module;  struct scsi_ioctl_command; @@ -42,7 +40,7 @@ struct bsg_job;  struct blkcg_gq;  struct blk_flush_queue;  struct pr_ops; -struct rq_wb; +struct rq_qos;  struct blk_queue_stats;  struct blk_stat_callback; @@ -56,7 +54,7 @@ struct blk_stat_callback;   * Maximum number of blkcg policies allowed to be registered concurrently.   * Defined here to simplify include dependency.   */ -#define BLKCG_MAX_POLS		3 +#define BLKCG_MAX_POLS		5  typedef void (rq_end_io_fn)(struct request *, blk_status_t); @@ -110,7 +108,7 @@ typedef __u32 __bitwise req_flags_t;  #define RQF_QUIET		((__force req_flags_t)(1 << 11))  /* elevator private data attached */  #define RQF_ELVPRIV		((__force req_flags_t)(1 << 12)) -/* account I/O stat */ +/* account into disk and partition IO statistics */  #define RQF_IO_STAT		((__force req_flags_t)(1 << 13))  /* request came from our alloc pool */  #define RQF_ALLOCED		((__force req_flags_t)(1 << 14)) @@ -118,7 +116,7 @@ typedef __u32 __bitwise req_flags_t;  #define RQF_PM			((__force req_flags_t)(1 << 15))  /* on IO scheduler merge hash */  #define RQF_HASHED		((__force req_flags_t)(1 << 16)) -/* IO stats tracking on */ +/* track IO completion time */  #define RQF_STATS		((__force req_flags_t)(1 << 17))  /* Look at ->special_vec for the actual data payload instead of the     bio chain. */ @@ -398,16 +396,13 @@ struct queue_limits {  #ifdef CONFIG_BLK_DEV_ZONED -struct blk_zone_report_hdr { -	unsigned int	nr_zones; -	u8		padding[60]; -}; - +extern unsigned int blkdev_nr_zones(struct block_device *bdev);  extern int blkdev_report_zones(struct block_device *bdev,  			       sector_t sector, struct blk_zone *zones,  			       unsigned int *nr_zones, gfp_t gfp_mask);  extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,  			      sector_t nr_sectors, gfp_t gfp_mask); +extern int blk_revalidate_disk_zones(struct gendisk *disk);  extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,  				     unsigned int cmd, unsigned long arg); @@ -416,6 +411,16 @@ extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,  #else /* CONFIG_BLK_DEV_ZONED */ +static inline unsigned int blkdev_nr_zones(struct block_device *bdev) +{ +	return 0; +} + +static inline int blk_revalidate_disk_zones(struct gendisk *disk) +{ +	return 0; +} +  static inline int blkdev_report_zones_ioctl(struct block_device *bdev,  					    fmode_t mode, unsigned int cmd,  					    unsigned long arg) @@ -442,10 +447,8 @@ struct request_queue {  	int			nr_rqs[2];	/* # allocated [a]sync rqs */  	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */ -	atomic_t		shared_hctx_restart; -  	struct blk_queue_stats	*stats; -	struct rq_wb		*rq_wb; +	struct rq_qos		*rq_qos;  	/*  	 * If blkcg is not used, @q->root_rl serves all requests.  If blkcg @@ -508,6 +511,12 @@ struct request_queue {  	 * various queue flags, see QUEUE_* below  	 */  	unsigned long		queue_flags; +	/* +	 * Number of contexts that have called blk_set_pm_only(). If this +	 * counter is above zero then only RQF_PM and RQF_PREEMPT requests are +	 * processed. +	 */ +	atomic_t		pm_only;  	/*  	 * ida allocated id for this queue.  Used to index queues from @@ -592,6 +601,7 @@ struct request_queue {  	struct queue_limits	limits; +#ifdef CONFIG_BLK_DEV_ZONED  	/*  	 * Zoned block device information for request dispatch control.  	 * nr_zones is the total number of zones of the device. This is always @@ -612,6 +622,7 @@ struct request_queue {  	unsigned int		nr_zones;  	unsigned long		*seq_zones_bitmap;  	unsigned long		*seq_zones_wlock; +#endif /* CONFIG_BLK_DEV_ZONED */  	/*  	 * sg stuff @@ -681,7 +692,7 @@ struct request_queue {  #define QUEUE_FLAG_FAIL_IO	7	/* fake timeout */  #define QUEUE_FLAG_NONROT	9	/* non-rotational device (SSD) */  #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */ -#define QUEUE_FLAG_IO_STAT     10	/* do IO stats */ +#define QUEUE_FLAG_IO_STAT     10	/* do disk/partitions IO accounting */  #define QUEUE_FLAG_DISCARD     11	/* supports DISCARD */  #define QUEUE_FLAG_NOXMERGES   12	/* No extended merges */  #define QUEUE_FLAG_ADD_RANDOM  13	/* Contributes to random pool */ @@ -695,12 +706,12 @@ struct request_queue {  #define QUEUE_FLAG_FUA	       21	/* device supports FUA writes */  #define QUEUE_FLAG_FLUSH_NQ    22	/* flush not queueuable */  #define QUEUE_FLAG_DAX         23	/* device supports DAX */ -#define QUEUE_FLAG_STATS       24	/* track rq completion times */ +#define QUEUE_FLAG_STATS       24	/* track IO start and completion times */  #define QUEUE_FLAG_POLL_STATS  25	/* collecting stats for hybrid polling */  #define QUEUE_FLAG_REGISTERED  26	/* queue has been registered to a disk */  #define QUEUE_FLAG_SCSI_PASSTHROUGH 27	/* queue supports SCSI commands */  #define QUEUE_FLAG_QUIESCED    28	/* queue has been quiesced */ -#define QUEUE_FLAG_PREEMPT_ONLY	29	/* only process REQ_PREEMPT requests */ +#define QUEUE_FLAG_PCI_P2PDMA  29	/* device supports PCI p2p requests */  #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\  				 (1 << QUEUE_FLAG_SAME_COMP)	|	\ @@ -733,17 +744,18 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);  #define blk_queue_dax(q)	test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)  #define blk_queue_scsi_passthrough(q)	\  	test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags) +#define blk_queue_pci_p2pdma(q)	\ +	test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)  #define blk_noretry_request(rq) \  	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \  			     REQ_FAILFAST_DRIVER))  #define blk_queue_quiesced(q)	test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) -#define blk_queue_preempt_only(q)				\ -	test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags) +#define blk_queue_pm_only(q)	atomic_read(&(q)->pm_only)  #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags) -extern int blk_set_preempt_only(struct request_queue *q); -extern void blk_clear_preempt_only(struct request_queue *q); +extern void blk_set_pm_only(struct request_queue *q); +extern void blk_clear_pm_only(struct request_queue *q);  static inline int queue_in_flight(struct request_queue *q)  { @@ -800,9 +812,10 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)  	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;  } +#ifdef CONFIG_BLK_DEV_ZONED  static inline unsigned int blk_queue_nr_zones(struct request_queue *q)  { -	return q->nr_zones; +	return blk_queue_is_zoned(q) ? q->nr_zones : 0;  }  static inline unsigned int blk_queue_zone_no(struct request_queue *q, @@ -820,6 +833,12 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,  		return false;  	return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);  } +#else /* CONFIG_BLK_DEV_ZONED */ +static inline unsigned int blk_queue_nr_zones(struct request_queue *q) +{ +	return 0; +} +#endif /* CONFIG_BLK_DEV_ZONED */  static inline bool rq_is_sync(struct request *rq)  { @@ -1070,6 +1089,7 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)  	return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;  } +#ifdef CONFIG_BLK_DEV_ZONED  static inline unsigned int blk_rq_zone_no(struct request *rq)  {  	return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); @@ -1079,6 +1099,7 @@ static inline unsigned int blk_rq_zone_is_seq(struct request *rq)  {  	return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));  } +#endif /* CONFIG_BLK_DEV_ZONED */  /*   * Some commands like WRITE SAME have a payload or data transfer size which @@ -1284,29 +1305,6 @@ extern void blk_put_queue(struct request_queue *);  extern void blk_set_queue_dying(struct request_queue *);  /* - * block layer runtime pm functions - */ -#ifdef CONFIG_PM -extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev); -extern int blk_pre_runtime_suspend(struct request_queue *q); -extern void blk_post_runtime_suspend(struct request_queue *q, int err); -extern void blk_pre_runtime_resume(struct request_queue *q); -extern void blk_post_runtime_resume(struct request_queue *q, int err); -extern void blk_set_runtime_active(struct request_queue *q); -#else -static inline void blk_pm_runtime_init(struct request_queue *q, -	struct device *dev) {} -static inline int blk_pre_runtime_suspend(struct request_queue *q) -{ -	return -ENOSYS; -} -static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {} -static inline void blk_pre_runtime_resume(struct request_queue *q) {} -static inline void blk_post_runtime_resume(struct request_queue *q, int err) {} -static inline void blk_set_runtime_active(struct request_queue *q) {} -#endif - -/*   * blk_plug permits building a queue of related requests by holding the I/O   * fragments for a short period. This allows merging of sequential requests   * into single larger request. As the requests are moved from a per-task list to @@ -1437,8 +1435,6 @@ enum blk_default_limits {  	BLK_SEG_BOUNDARY_MASK	= 0xFFFFFFFFUL,  }; -#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) -  static inline unsigned long queue_segment_boundary(struct request_queue *q)  {  	return q->limits.seg_boundary_mask; @@ -1639,15 +1635,6 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev)  	return 0;  } -static inline unsigned int bdev_nr_zones(struct block_device *bdev) -{ -	struct request_queue *q = bdev_get_queue(bdev); - -	if (q) -		return blk_queue_nr_zones(q); -	return 0; -} -  static inline int queue_dma_alignment(struct request_queue *q)  {  	return q ? q->dma_alignment : 511; @@ -1690,94 +1677,6 @@ static inline void put_dev_sector(Sector p)  	put_page(p.v);  } -static inline bool __bvec_gap_to_prev(struct request_queue *q, -				struct bio_vec *bprv, unsigned int offset) -{ -	return offset || -		((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); -} - -/* - * Check if adding a bio_vec after bprv with offset would create a gap in - * the SG list. Most drivers don't care about this, but some do. - */ -static inline bool bvec_gap_to_prev(struct request_queue *q, -				struct bio_vec *bprv, unsigned int offset) -{ -	if (!queue_virt_boundary(q)) -		return false; -	return __bvec_gap_to_prev(q, bprv, offset); -} - -/* - * Check if the two bvecs from two bios can be merged to one segment. - * If yes, no need to check gap between the two bios since the 1st bio - * and the 1st bvec in the 2nd bio can be handled in one segment. - */ -static inline bool bios_segs_mergeable(struct request_queue *q, -		struct bio *prev, struct bio_vec *prev_last_bv, -		struct bio_vec *next_first_bv) -{ -	if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv)) -		return false; -	if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv)) -		return false; -	if (prev->bi_seg_back_size + next_first_bv->bv_len > -			queue_max_segment_size(q)) -		return false; -	return true; -} - -static inline bool bio_will_gap(struct request_queue *q, -				struct request *prev_rq, -				struct bio *prev, -				struct bio *next) -{ -	if (bio_has_data(prev) && queue_virt_boundary(q)) { -		struct bio_vec pb, nb; - -		/* -		 * don't merge if the 1st bio starts with non-zero -		 * offset, otherwise it is quite difficult to respect -		 * sg gap limit. We work hard to merge a huge number of small -		 * single bios in case of mkfs. -		 */ -		if (prev_rq) -			bio_get_first_bvec(prev_rq->bio, &pb); -		else -			bio_get_first_bvec(prev, &pb); -		if (pb.bv_offset) -			return true; - -		/* -		 * We don't need to worry about the situation that the -		 * merged segment ends in unaligned virt boundary: -		 * -		 * - if 'pb' ends aligned, the merged segment ends aligned -		 * - if 'pb' ends unaligned, the next bio must include -		 *   one single bvec of 'nb', otherwise the 'nb' can't -		 *   merge with 'pb' -		 */ -		bio_get_last_bvec(prev, &pb); -		bio_get_first_bvec(next, &nb); - -		if (!bios_segs_mergeable(q, prev, &pb, &nb)) -			return __bvec_gap_to_prev(q, &pb, nb.bv_offset); -	} - -	return false; -} - -static inline bool req_gap_back_merge(struct request *req, struct bio *bio) -{ -	return bio_will_gap(req->q, req, req->biotail, bio); -} - -static inline bool req_gap_front_merge(struct request *req, struct bio *bio) -{ -	return bio_will_gap(req->q, NULL, bio, req->bio); -} -  int kblockd_schedule_work(struct work_struct *work);  int kblockd_schedule_work_on(int cpu, struct work_struct *work);  int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); @@ -1857,24 +1756,26 @@ queue_max_integrity_segments(struct request_queue *q)  	return q->limits.max_integrity_segments;  } -static inline bool integrity_req_gap_back_merge(struct request *req, -						struct bio *next) +/** + * bio_integrity_intervals - Return number of integrity intervals for a bio + * @bi:		blk_integrity profile for device + * @sectors:	Size of the bio in 512-byte sectors + * + * Description: The block layer calculates everything in 512 byte + * sectors but integrity metadata is done in terms of the data integrity + * interval size of the storage device.  Convert the block layer sectors + * to the appropriate number of integrity intervals. + */ +static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, +						   unsigned int sectors)  { -	struct bio_integrity_payload *bip = bio_integrity(req->bio); -	struct bio_integrity_payload *bip_next = bio_integrity(next); - -	return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], -				bip_next->bip_vec[0].bv_offset); +	return sectors >> (bi->interval_exp - 9);  } -static inline bool integrity_req_gap_front_merge(struct request *req, -						 struct bio *bio) +static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, +					       unsigned int sectors)  { -	struct bio_integrity_payload *bip = bio_integrity(bio); -	struct bio_integrity_payload *bip_next = bio_integrity(req->bio); - -	return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], -				bip_next->bip_vec[0].bv_offset); +	return bio_integrity_intervals(bi, sectors) * bi->tuple_size;  }  #else /* CONFIG_BLK_DEV_INTEGRITY */ @@ -1939,15 +1840,16 @@ static inline bool blk_integrity_merge_bio(struct request_queue *rq,  	return true;  } -static inline bool integrity_req_gap_back_merge(struct request *req, -						struct bio *next) +static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, +						   unsigned int sectors)  { -	return false; +	return 0;  } -static inline bool integrity_req_gap_front_merge(struct request *req, -						 struct bio *bio) + +static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, +					       unsigned int sectors)  { -	return false; +	return 0;  }  #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -1955,7 +1857,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req,  struct block_device_operations {  	int (*open) (struct block_device *, fmode_t);  	void (*release) (struct gendisk *, fmode_t); -	int (*rw_page)(struct block_device *, sector_t, struct page *, bool); +	int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);  	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);  	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);  	unsigned int (*check_events) (struct gendisk *disk, @@ -1967,6 +1869,9 @@ struct block_device_operations {  	int (*getgeo)(struct block_device *, struct hd_geometry *);  	/* this callback is with swap_lock and sometimes page table lock held */  	void (*swap_slot_free_notify) (struct block_device *, unsigned long); +	int (*report_zones)(struct gendisk *, sector_t sector, +			    struct blk_zone *zones, unsigned int *nr_zones, +			    gfp_t gfp_mask);  	struct module *owner;  	const struct pr_ops *pr_ops;  };  | 
