diff options
Diffstat (limited to 'drivers/block')
42 files changed, 1060 insertions, 344 deletions
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index 8eea2529da20..7a368c90467d 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c @@ -273,7 +273,7 @@ static const struct file_operations aoe_fops = { .llseek = noop_llseek, }; -static char *aoe_devnode(struct device *dev, umode_t *mode) +static char *aoe_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "etherd/%s", dev_name(dev)); } diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig index cbacddc55a1d..6fb4e38fca88 100644 --- a/drivers/block/drbd/Kconfig +++ b/drivers/block/drbd/Kconfig @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: GPL-2.0-only # # DRBD device driver configuration # diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index 8bd534697d1b..c93e462130ff 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: GPL-2.0-only drbd-y := drbd_bitmap.o drbd_proc.o drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o drbd-y += drbd_main.o drbd_strings.o drbd_nl.o diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index e27478ae579c..429255876800 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_actlog.c @@ -868,9 +868,9 @@ int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, nr_sectors = get_capacity(device->vdisk); esector = sector + (size >> 9) - 1; - if (!expect(sector < nr_sectors)) + if (!expect(device, sector < nr_sectors)) goto out; - if (!expect(esector < nr_sectors)) + if (!expect(device, esector < nr_sectors)) esector = nr_sectors - 1; lbnr = BM_SECT_TO_BIT(nr_sectors-1); @@ -1143,7 +1143,7 @@ void drbd_rs_complete_io(struct drbd_device *device, sector_t sector) bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; if (!bm_ext) { spin_unlock_irqrestore(&device->al_lock, flags); - if (__ratelimit(&drbd_ratelimit_state)) + if (drbd_ratelimit()) drbd_err(device, "drbd_rs_complete_io() called, but extent not found\n"); return; } diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 7d9db33363de..289876ffbc31 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_bitmap.c @@ -113,7 +113,7 @@ struct drbd_bitmap { static void __bm_print_lock_info(struct drbd_device *device, const char *func) { struct drbd_bitmap *b = device->bitmap; - if (!__ratelimit(&drbd_ratelimit_state)) + if (!drbd_ratelimit()) return; drbd_err(device, "FIXME %s[%d] in %s, bitmap locked for '%s' by %s[%d]\n", current->comm, task_pid_nr(current), @@ -448,7 +448,7 @@ int drbd_bm_init(struct drbd_device *device) sector_t drbd_bm_capacity(struct drbd_device *device) { - if (!expect(device->bitmap)) + if (!expect(device, device->bitmap)) return 0; return device->bitmap->bm_dev_capacity; } @@ -457,7 +457,7 @@ sector_t drbd_bm_capacity(struct drbd_device *device) */ void drbd_bm_cleanup(struct drbd_device *device) { - if (!expect(device->bitmap)) + if (!expect(device, device->bitmap)) return; bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages); bm_vk_free(device->bitmap->bm_pages); @@ -636,7 +636,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi int err = 0; bool growing; - if (!expect(b)) + if (!expect(device, b)) return -ENOMEM; drbd_bm_lock(device, "resize", BM_LOCKED_MASK); @@ -757,9 +757,9 @@ unsigned long _drbd_bm_total_weight(struct drbd_device *device) unsigned long s; unsigned long flags; - if (!expect(b)) + if (!expect(device, b)) return 0; - if (!expect(b->bm_pages)) + if (!expect(device, b->bm_pages)) return 0; spin_lock_irqsave(&b->bm_lock, flags); @@ -783,9 +783,9 @@ unsigned long drbd_bm_total_weight(struct drbd_device *device) size_t drbd_bm_words(struct drbd_device *device) { struct drbd_bitmap *b = device->bitmap; - if (!expect(b)) + if (!expect(device, b)) return 0; - if (!expect(b->bm_pages)) + if (!expect(device, b->bm_pages)) return 0; return b->bm_words; @@ -794,7 +794,7 @@ size_t drbd_bm_words(struct drbd_device *device) unsigned long drbd_bm_bits(struct drbd_device *device) { struct drbd_bitmap *b = device->bitmap; - if (!expect(b)) + if (!expect(device, b)) return 0; return b->bm_bits; @@ -816,9 +816,9 @@ void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, size_t number, end = offset + number; - if (!expect(b)) + if (!expect(device, b)) return; - if (!expect(b->bm_pages)) + if (!expect(device, b->bm_pages)) return; if (number == 0) return; @@ -863,9 +863,9 @@ void drbd_bm_get_lel(struct drbd_device *device, size_t offset, size_t number, end = offset + number; - if (!expect(b)) + if (!expect(device, b)) return; - if (!expect(b->bm_pages)) + if (!expect(device, b->bm_pages)) return; spin_lock_irq(&b->bm_lock); @@ -894,9 +894,9 @@ void drbd_bm_get_lel(struct drbd_device *device, size_t offset, size_t number, void drbd_bm_set_all(struct drbd_device *device) { struct drbd_bitmap *b = device->bitmap; - if (!expect(b)) + if (!expect(device, b)) return; - if (!expect(b->bm_pages)) + if (!expect(device, b->bm_pages)) return; spin_lock_irq(&b->bm_lock); @@ -910,9 +910,9 @@ void drbd_bm_set_all(struct drbd_device *device) void drbd_bm_clear_all(struct drbd_device *device) { struct drbd_bitmap *b = device->bitmap; - if (!expect(b)) + if (!expect(device, b)) return; - if (!expect(b->bm_pages)) + if (!expect(device, b->bm_pages)) return; spin_lock_irq(&b->bm_lock); @@ -952,7 +952,7 @@ static void drbd_bm_endio(struct bio *bio) bm_set_page_io_err(b->bm_pages[idx]); /* Not identical to on disk version of it. * Is BM_PAGE_IO_ERROR enough? */ - if (__ratelimit(&drbd_ratelimit_state)) + if (drbd_ratelimit()) drbd_err(device, "IO ERROR %d on bitmap page idx %u\n", bio->bi_status, idx); } else { @@ -1013,7 +1013,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho else len = PAGE_SIZE; } else { - if (__ratelimit(&drbd_ratelimit_state)) { + if (drbd_ratelimit()) { drbd_err(device, "Invalid offset during on-disk bitmap access: " "page idx %u, sector %llu\n", page_nr, on_disk_sector); } @@ -1332,9 +1332,9 @@ static unsigned long bm_find_next(struct drbd_device *device, struct drbd_bitmap *b = device->bitmap; unsigned long i = DRBD_END_OF_BITMAP; - if (!expect(b)) + if (!expect(device, b)) return i; - if (!expect(b->bm_pages)) + if (!expect(device, b->bm_pages)) return i; spin_lock_irq(&b->bm_lock); @@ -1436,9 +1436,9 @@ static int bm_change_bits_to(struct drbd_device *device, const unsigned long s, struct drbd_bitmap *b = device->bitmap; int c = 0; - if (!expect(b)) + if (!expect(device, b)) return 1; - if (!expect(b->bm_pages)) + if (!expect(device, b->bm_pages)) return 0; spin_lock_irqsave(&b->bm_lock, flags); @@ -1582,9 +1582,9 @@ int drbd_bm_test_bit(struct drbd_device *device, const unsigned long bitnr) unsigned long *p_addr; int i; - if (!expect(b)) + if (!expect(device, b)) return 0; - if (!expect(b->bm_pages)) + if (!expect(device, b->bm_pages)) return 0; spin_lock_irqsave(&b->bm_lock, flags); @@ -1619,9 +1619,9 @@ int drbd_bm_count_bits(struct drbd_device *device, const unsigned long s, const * robust in case we screwed up elsewhere, in that case pretend there * was one dirty bit in the requested area, so we won't try to do a * local read there (no bitmap probably implies no disk) */ - if (!expect(b)) + if (!expect(device, b)) return 1; - if (!expect(b->bm_pages)) + if (!expect(device, b->bm_pages)) return 1; spin_lock_irqsave(&b->bm_lock, flags); @@ -1635,7 +1635,7 @@ int drbd_bm_count_bits(struct drbd_device *device, const unsigned long s, const bm_unmap(p_addr); p_addr = bm_map_pidx(b, idx); } - if (expect(bitnr < b->bm_bits)) + if (expect(device, bitnr < b->bm_bits)) c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); else drbd_err(device, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); @@ -1668,9 +1668,9 @@ int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr) unsigned long flags; unsigned long *p_addr, *bm; - if (!expect(b)) + if (!expect(device, b)) return 0; - if (!expect(b->bm_pages)) + if (!expect(device, b->bm_pages)) return 0; spin_lock_irqsave(&b->bm_lock, flags); diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c index b3b9cd5628fd..a72c096aa5b1 100644 --- a/drivers/block/drbd/drbd_debugfs.c +++ b/drivers/block/drbd/drbd_debugfs.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-only #define pr_fmt(fmt) "drbd debugfs: " fmt #include <linux/kernel.h> #include <linux/module.h> diff --git a/drivers/block/drbd/drbd_debugfs.h b/drivers/block/drbd/drbd_debugfs.h index 58e31cef0844..ee3d66eb40c6 100644 --- a/drivers/block/drbd/drbd_debugfs.h +++ b/drivers/block/drbd/drbd_debugfs.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/debugfs.h> diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4d661282ff41..ae713338aa46 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* SPDX-License-Identifier: GPL-2.0-only */ /* drbd_int.h @@ -37,6 +37,7 @@ #include "drbd_strings.h" #include "drbd_state.h" #include "drbd_protocol.h" +#include "drbd_polymorph_printk.h" #ifdef __CHECKER__ # define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) @@ -75,71 +76,6 @@ extern int drbd_proc_details; struct drbd_device; struct drbd_connection; -#define __drbd_printk_device(level, device, fmt, args...) \ - dev_printk(level, disk_to_dev((device)->vdisk), fmt, ## args) -#define __drbd_printk_peer_device(level, peer_device, fmt, args...) \ - dev_printk(level, disk_to_dev((peer_device)->device->vdisk), fmt, ## args) -#define __drbd_printk_resource(level, resource, fmt, args...) \ - printk(level "drbd %s: " fmt, (resource)->name, ## args) -#define __drbd_printk_connection(level, connection, fmt, args...) \ - printk(level "drbd %s: " fmt, (connection)->resource->name, ## args) - -void drbd_printk_with_wrong_object_type(void); - -#define __drbd_printk_if_same_type(obj, type, func, level, fmt, args...) \ - (__builtin_types_compatible_p(typeof(obj), type) || \ - __builtin_types_compatible_p(typeof(obj), const type)), \ - func(level, (const type)(obj), fmt, ## args) - -#define drbd_printk(level, obj, fmt, args...) \ - __builtin_choose_expr( \ - __drbd_printk_if_same_type(obj, struct drbd_device *, \ - __drbd_printk_device, level, fmt, ## args), \ - __builtin_choose_expr( \ - __drbd_printk_if_same_type(obj, struct drbd_resource *, \ - __drbd_printk_resource, level, fmt, ## args), \ - __builtin_choose_expr( \ - __drbd_printk_if_same_type(obj, struct drbd_connection *, \ - __drbd_printk_connection, level, fmt, ## args), \ - __builtin_choose_expr( \ - __drbd_printk_if_same_type(obj, struct drbd_peer_device *, \ - __drbd_printk_peer_device, level, fmt, ## args), \ - drbd_printk_with_wrong_object_type())))) - -#define drbd_dbg(obj, fmt, args...) \ - drbd_printk(KERN_DEBUG, obj, fmt, ## args) -#define drbd_alert(obj, fmt, args...) \ - drbd_printk(KERN_ALERT, obj, fmt, ## args) -#define drbd_err(obj, fmt, args...) \ - drbd_printk(KERN_ERR, obj, fmt, ## args) -#define drbd_warn(obj, fmt, args...) \ - drbd_printk(KERN_WARNING, obj, fmt, ## args) -#define drbd_info(obj, fmt, args...) \ - drbd_printk(KERN_INFO, obj, fmt, ## args) -#define drbd_emerg(obj, fmt, args...) \ - drbd_printk(KERN_EMERG, obj, fmt, ## args) - -#define dynamic_drbd_dbg(device, fmt, args...) \ - dynamic_dev_dbg(disk_to_dev(device->vdisk), fmt, ## args) - -#define D_ASSERT(device, exp) do { \ - if (!(exp)) \ - drbd_err(device, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__); \ - } while (0) - -/** - * expect - Make an assertion - * - * Unlike the assert macro, this macro returns a boolean result. - */ -#define expect(exp) ({ \ - bool _bool = (exp); \ - if (!_bool) \ - drbd_err(device, "ASSERTION %s FAILED in %s\n", \ - #exp, __func__); \ - _bool; \ - }) - /* Defines to control fault insertion */ enum { DRBD_FAULT_MD_WR = 0, /* meta data write */ @@ -395,6 +331,7 @@ struct drbd_peer_request { struct drbd_peer_device *peer_device; struct drbd_epoch *epoch; /* for writes */ struct page *pages; + blk_opf_t opf; atomic_t pending_bios; struct drbd_interval i; /* see comments on ee flag bits below */ @@ -406,6 +343,10 @@ struct drbd_peer_request { }; }; +/* Equivalent to bio_op and req_op. */ +#define peer_req_op(peer_req) \ + ((peer_req)->opf & REQ_OP_MASK) + /* ee flag bits. * While corresponding bios are in flight, the only modification will be * set_bit WAS_ERROR, which has to be atomic. @@ -1545,8 +1486,7 @@ extern void drbd_send_acks_wf(struct work_struct *ws); extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, bool throttle_if_app_is_waiting); -extern int drbd_submit_peer_request(struct drbd_device *, - struct drbd_peer_request *, blk_opf_t, int); +extern int drbd_submit_peer_request(struct drbd_peer_request *peer_req); extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *); extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64, sector_t, unsigned int, @@ -1718,7 +1658,7 @@ static inline void __drbd_chk_io_error_(struct drbd_device *device, switch (ep) { case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */ if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) { - if (__ratelimit(&drbd_ratelimit_state)) + if (drbd_ratelimit()) drbd_err(device, "Local IO failed in %s.\n", where); if (device->state.disk > D_INCONSISTENT) _drbd_set_state(_NS(device, disk, D_INCONSISTENT), CS_HARD, NULL); diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c index f07b4378388b..5024ffd6143d 100644 --- a/drivers/block/drbd/drbd_interval.c +++ b/drivers/block/drbd/drbd_interval.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-only #include <asm/bug.h> #include <linux/rbtree_augmented.h> #include "drbd_interval.h" diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index b8c2dee5edc8..366489b72fe9 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef __DRBD_INTERVAL_H #define __DRBD_INTERVAL_H diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8532b839a343..e43dfb9eb6ad 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd.c @@ -1259,7 +1259,7 @@ static int _drbd_send_bitmap(struct drbd_device *device) struct bm_xfer_ctx c; int err; - if (!expect(device->bitmap)) + if (!expect(device, device->bitmap)) return false; if (get_ldev(device)) { @@ -1816,7 +1816,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock, /* THINK if (signal_pending) return ... ? */ - iov_iter_kvec(&msg.msg_iter, WRITE, &iov, 1, size); + iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iov, 1, size); if (sock == connection->data.socket) { rcu_read_lock(); @@ -2184,7 +2184,7 @@ void drbd_destroy_device(struct kref *kref) struct drbd_resource *resource = device->resource; struct drbd_peer_device *peer_device, *tmp_peer_device; - del_timer_sync(&device->request_timer); + timer_shutdown_sync(&device->request_timer); /* paranoia asserts */ D_ASSERT(device, device->open_cnt == 0); @@ -2217,7 +2217,8 @@ void drbd_destroy_device(struct kref *kref) kref_put(&peer_device->connection->kref, drbd_destroy_connection); kfree(peer_device); } - memset(device, 0xfd, sizeof(*device)); + if (device->submit.wq) + destroy_workqueue(device->submit.wq); kfree(device); kref_put(&resource->kref, drbd_destroy_resource); } @@ -2249,9 +2250,9 @@ static void do_retry(struct work_struct *ws) bool expected; expected = - expect(atomic_read(&req->completion_ref) == 0) && - expect(req->rq_state & RQ_POSTPONED) && - expect((req->rq_state & RQ_LOCAL_PENDING) == 0 || + expect(device, atomic_read(&req->completion_ref) == 0) && + expect(device, req->rq_state & RQ_POSTPONED) && + expect(device, (req->rq_state & RQ_LOCAL_PENDING) == 0 || (req->rq_state & RQ_LOCAL_ABORTED) != 0); if (!expected) @@ -2309,7 +2310,6 @@ void drbd_destroy_resource(struct kref *kref) idr_destroy(&resource->devices); free_cpumask_var(resource->cpu_mask); kfree(resource->name); - memset(resource, 0xf2, sizeof(*resource)); kfree(resource); } @@ -2650,7 +2650,6 @@ void drbd_destroy_connection(struct kref *kref) drbd_free_socket(&connection->data); kfree(connection->int_dig_in); kfree(connection->int_dig_vv); - memset(connection, 0xfc, sizeof(*connection)); kfree(connection); kref_put(&resource->kref, drbd_destroy_resource); } @@ -2774,7 +2773,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig err = add_disk(disk); if (err) - goto out_idr_remove_from_resource; + goto out_destroy_workqueue; /* inherit the connection state */ device->state.conn = first_connection(resource)->cstate; @@ -2788,6 +2787,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_debugfs_device_add(device); return NO_ERROR; +out_destroy_workqueue: + destroy_workqueue(device->submit.wq); out_idr_remove_from_resource: for_each_connection_safe(connection, n, resource) { peer_device = idr_remove(&connection->peer_devices, vnr); @@ -3766,7 +3767,7 @@ _drbd_insert_fault(struct drbd_device *device, unsigned int type) if (ret) { drbd_fault_count++; - if (__ratelimit(&drbd_ratelimit_state)) + if (drbd_ratelimit()) drbd_warn(device, "***Simulating %s failure\n", _drbd_fault_str(type)); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 864c98e74875..60757ac31701 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_nl.c @@ -1210,6 +1210,7 @@ static void decide_on_discard_support(struct drbd_device *device, struct drbd_connection *connection = first_peer_device(device)->connection; struct request_queue *q = device->rq_queue; + unsigned int max_discard_sectors; if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev)) goto not_supported; @@ -1230,15 +1231,14 @@ static void decide_on_discard_support(struct drbd_device *device, * topology on all peers. */ blk_queue_discard_granularity(q, 512); - q->limits.max_discard_sectors = drbd_max_discard_sectors(connection); - q->limits.max_write_zeroes_sectors = - drbd_max_discard_sectors(connection); + max_discard_sectors = drbd_max_discard_sectors(connection); + blk_queue_max_discard_sectors(q, max_discard_sectors); + blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); return; not_supported: blk_queue_discard_granularity(q, 0); - q->limits.max_discard_sectors = 0; - q->limits.max_write_zeroes_sectors = 0; + blk_queue_max_discard_sectors(q, 0); } static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q) @@ -1256,6 +1256,18 @@ static void fixup_write_zeroes(struct drbd_device *device, struct request_queue q->limits.max_write_zeroes_sectors = 0; } +static void fixup_discard_support(struct drbd_device *device, struct request_queue *q) +{ + unsigned int max_discard = device->rq_queue->limits.max_discard_sectors; + unsigned int discard_granularity = + device->rq_queue->limits.discard_granularity >> SECTOR_SHIFT; + + if (discard_granularity > max_discard) { + blk_queue_discard_granularity(q, 0); + blk_queue_max_discard_sectors(q, 0); + } +} + static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev, unsigned int max_bio_size, struct o_qlim *o) { @@ -1288,6 +1300,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi disk_update_readahead(device->vdisk); } fixup_write_zeroes(device, q); + fixup_discard_support(device, q); } void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o) @@ -1530,7 +1543,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) goto fail_unlock; } - if (!expect(new_disk_conf->resync_rate >= 1)) + if (!expect(device, new_disk_conf->resync_rate >= 1)) new_disk_conf->resync_rate = 1; sanitize_disk_conf(device, new_disk_conf, device->ldev); diff --git a/drivers/block/drbd/drbd_nla.c b/drivers/block/drbd/drbd_nla.c index 6a09b0b98018..df0d241d3f6a 100644 --- a/drivers/block/drbd/drbd_nla.c +++ b/drivers/block/drbd/drbd_nla.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <net/netlink.h> #include <linux/drbd_genl_api.h> diff --git a/drivers/block/drbd/drbd_nla.h b/drivers/block/drbd/drbd_nla.h index f5eaffb6474e..d3555df0d353 100644 --- a/drivers/block/drbd/drbd_nla.h +++ b/drivers/block/drbd/drbd_nla.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef __DRBD_NLA_H #define __DRBD_NLA_H diff --git a/drivers/block/drbd/drbd_polymorph_printk.h b/drivers/block/drbd/drbd_polymorph_printk.h new file mode 100644 index 000000000000..8e0082d139ba --- /dev/null +++ b/drivers/block/drbd/drbd_polymorph_printk.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef DRBD_POLYMORPH_PRINTK_H +#define DRBD_POLYMORPH_PRINTK_H + +#if !defined(CONFIG_DYNAMIC_DEBUG) +#undef DEFINE_DYNAMIC_DEBUG_METADATA +#undef __dynamic_pr_debug +#undef DYNAMIC_DEBUG_BRANCH +#define DEFINE_DYNAMIC_DEBUG_METADATA(D, F) const char *D = F; ((void)D) +#define __dynamic_pr_debug(D, F, args...) do { (void)(D); if (0) printk(F, ## args); } while (0) +#define DYNAMIC_DEBUG_BRANCH(D) false +#endif + + +#define __drbd_printk_drbd_device_prep(device) \ + const struct drbd_device *__d = (device); \ + const struct drbd_resource *__r = __d->resource +#define __drbd_printk_drbd_device_fmt(fmt) "drbd %s/%u drbd%u: " fmt +#define __drbd_printk_drbd_device_args() __r->name, __d->vnr, __d->minor +#define __drbd_printk_drbd_device_unprep() + +#define __drbd_printk_drbd_peer_device_prep(peer_device) \ + const struct drbd_device *__d; \ + const struct drbd_resource *__r; \ + __d = (peer_device)->device; \ + __r = __d->resource +#define __drbd_printk_drbd_peer_device_fmt(fmt) \ + "drbd %s/%u drbd%u: " fmt +#define __drbd_printk_drbd_peer_device_args() \ + __r->name, __d->vnr, __d->minor +#define __drbd_printk_drbd_peer_device_unprep() + +#define __drbd_printk_drbd_resource_prep(resource) \ + const struct drbd_resource *__r = resource +#define __drbd_printk_drbd_resource_fmt(fmt) "drbd %s: " fmt +#define __drbd_printk_drbd_resource_args() __r->name +#define __drbd_printk_drbd_resource_unprep(resource) + +#define __drbd_printk_drbd_connection_prep(connection) \ + const struct drbd_connection *__c = (connection); \ + const struct drbd_resource *__r = __c->resource +#define __drbd_printk_drbd_connection_fmt(fmt) \ + "drbd %s: " fmt +#define __drbd_printk_drbd_connection_args() \ + __r->name +#define __drbd_printk_drbd_connection_unprep() + +void drbd_printk_with_wrong_object_type(void); +void drbd_dyn_dbg_with_wrong_object_type(void); + +#define __drbd_printk_choose_cond(obj, struct_name) \ + (__builtin_types_compatible_p(typeof(obj), struct struct_name *) || \ + __builtin_types_compatible_p(typeof(obj), const struct struct_name *)) +#define __drbd_printk_if_same_type(obj, struct_name, level, fmt, args...) \ + __drbd_printk_choose_cond(obj, struct_name), \ +({ \ + __drbd_printk_ ## struct_name ## _prep((const struct struct_name *)(obj)); \ + printk(level __drbd_printk_ ## struct_name ## _fmt(fmt), \ + __drbd_printk_ ## struct_name ## _args(), ## args); \ + __drbd_printk_ ## struct_name ## _unprep(); \ +}) + +#define drbd_printk(level, obj, fmt, args...) \ + __builtin_choose_expr( \ + __drbd_printk_if_same_type(obj, drbd_device, level, fmt, ## args), \ + __builtin_choose_expr( \ + __drbd_printk_if_same_type(obj, drbd_resource, level, fmt, ## args), \ + __builtin_choose_expr( \ + __drbd_printk_if_same_type(obj, drbd_connection, level, fmt, ## args), \ + __builtin_choose_expr( \ + __drbd_printk_if_same_type(obj, drbd_peer_device, level, fmt, ## args), \ + drbd_printk_with_wrong_object_type())))) + +#define __drbd_dyn_dbg_if_same_type(obj, struct_name, fmt, args...) \ + __drbd_printk_choose_cond(obj, struct_name), \ +({ \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (DYNAMIC_DEBUG_BRANCH(descriptor)) { \ + __drbd_printk_ ## struct_name ## _prep((const struct struct_name *)(obj)); \ + __dynamic_pr_debug(&descriptor, __drbd_printk_ ## struct_name ## _fmt(fmt), \ + __drbd_printk_ ## struct_name ## _args(), ## args); \ + __drbd_printk_ ## struct_name ## _unprep(); \ + } \ +}) + +#define dynamic_drbd_dbg(obj, fmt, args...) \ + __builtin_choose_expr( \ + __drbd_dyn_dbg_if_same_type(obj, drbd_device, fmt, ## args), \ + __builtin_choose_expr( \ + __drbd_dyn_dbg_if_same_type(obj, drbd_resource, fmt, ## args), \ + __builtin_choose_expr( \ + __drbd_dyn_dbg_if_same_type(obj, drbd_connection, fmt, ## args), \ + __builtin_choose_expr( \ + __drbd_dyn_dbg_if_same_type(obj, drbd_peer_device, fmt, ## args), \ + drbd_dyn_dbg_with_wrong_object_type())))) + +#define drbd_emerg(device, fmt, args...) \ + drbd_printk(KERN_EMERG, device, fmt, ## args) +#define drbd_alert(device, fmt, args...) \ + drbd_printk(KERN_ALERT, device, fmt, ## args) +#define drbd_crit(device, fmt, args...) \ + drbd_printk(KERN_CRIT, device, fmt, ## args) +#define drbd_err(device, fmt, args...) \ + drbd_printk(KERN_ERR, device, fmt, ## args) +#define drbd_warn(device, fmt, args...) \ + drbd_printk(KERN_WARNING, device, fmt, ## args) +#define drbd_notice(device, fmt, args...) \ + drbd_printk(KERN_NOTICE, device, fmt, ## args) +#define drbd_info(device, fmt, args...) \ + drbd_printk(KERN_INFO, device, fmt, ## args) + + +#define drbd_ratelimit() \ +({ \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + __ratelimit(&_rs); \ +}) + +#define D_ASSERT(x, exp) \ + do { \ + if (!(exp)) \ + drbd_err(x, "ASSERTION %s FAILED in %s\n", \ + #exp, __func__); \ + } while (0) + +/** + * expect - Make an assertion + * + * Unlike the assert macro, this macro returns a boolean result. + */ +#define expect(x, exp) ({ \ + bool _bool = (exp); \ + if (!_bool && drbd_ratelimit()) \ + drbd_err(x, "ASSERTION %s FAILED in %s\n", \ + #exp, __func__); \ + _bool; \ + }) + +#endif diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 3c0193de2498..2227fb0db1ce 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_proc.c diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h index a882b65ab5d2..56bbca9d7700 100644 --- a/drivers/block/drbd/drbd_protocol.h +++ b/drivers/block/drbd/drbd_protocol.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef __DRBD_PROTOCOL_H #define __DRBD_PROTOCOL_H diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index ee69d50ba4fd..757f4692b5bd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_receiver.c @@ -413,7 +413,7 @@ void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request * drbd_free_pages(device, peer_req->pages, is_net); D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0); D_ASSERT(device, drbd_interval_empty(&peer_req->i)); - if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) { + if (!expect(device, !(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) { peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; drbd_al_complete_io(device, &peer_req->i); } @@ -507,7 +507,7 @@ static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flag struct msghdr msg = { .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL) }; - iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size); + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, size); return sock_recvmsg(sock, &msg, msg.msg_flags); } @@ -781,7 +781,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, timeo = connect_int * HZ; /* 28.5% random jitter */ - timeo += prandom_u32_max(2) ? timeo / 7 : -timeo / 7; + timeo += get_random_u32_below(2) ? timeo / 7 : -timeo / 7; err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo); if (err <= 0) @@ -1004,7 +1004,7 @@ retry: drbd_warn(connection, "Error receiving initial packet\n"); sock_release(s); randomize: - if (prandom_u32_max(2)) + if (get_random_u32_below(2)) goto retry; } } @@ -1030,6 +1030,9 @@ randomize: sock.socket->sk->sk_allocation = GFP_NOIO; msock.socket->sk->sk_allocation = GFP_NOIO; + sock.socket->sk->sk_use_task_frag = false; + msock.socket->sk->sk_use_task_frag = false; + sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE; @@ -1603,9 +1606,19 @@ static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, stru drbd_endio_write_sec_final(peer_req); } +static int peer_request_fault_type(struct drbd_peer_request *peer_req) +{ + if (peer_req_op(peer_req) == REQ_OP_READ) { + return peer_req->flags & EE_APPLICATION ? + DRBD_FAULT_DT_RD : DRBD_FAULT_RS_RD; + } else { + return peer_req->flags & EE_APPLICATION ? + DRBD_FAULT_DT_WR : DRBD_FAULT_RS_WR; + } +} + /** * drbd_submit_peer_request() - * @device: DRBD device. * @peer_req: peer request * * May spread the pages to multiple bios, @@ -1619,10 +1632,9 @@ static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, stru * on certain Xen deployments. */ /* TODO allocate from our own bio_set. */ -int drbd_submit_peer_request(struct drbd_device *device, - struct drbd_peer_request *peer_req, - const blk_opf_t opf, const int fault_type) +int drbd_submit_peer_request(struct drbd_peer_request *peer_req) { + struct drbd_device *device = peer_req->peer_device->device; struct bio *bios = NULL; struct bio *bio; struct page *page = peer_req->pages; @@ -1667,7 +1679,18 @@ int drbd_submit_peer_request(struct drbd_device *device, * generated bio, but a bio allocated on behalf of the peer. */ next_bio: - bio = bio_alloc(device->ldev->backing_bdev, nr_pages, opf, GFP_NOIO); + /* _DISCARD, _WRITE_ZEROES handled above. + * REQ_OP_FLUSH (empty flush) not expected, + * should have been mapped to a "drbd protocol barrier". + * REQ_OP_SECURE_ERASE: I don't see how we could ever support that. + */ + if (!(peer_req_op(peer_req) == REQ_OP_WRITE || + peer_req_op(peer_req) == REQ_OP_READ)) { + drbd_err(device, "Invalid bio op received: 0x%x\n", peer_req->opf); + return -EINVAL; + } + + bio = bio_alloc(device->ldev->backing_bdev, nr_pages, peer_req->opf, GFP_NOIO); /* > peer_req->i.sector, unless this is the first bio */ bio->bi_iter.bi_sector = sector; bio->bi_private = peer_req; @@ -1697,7 +1720,7 @@ next_bio: bios = bios->bi_next; bio->bi_next = NULL; - drbd_submit_bio_noacct(device, fault_type, bio); + drbd_submit_bio_noacct(device, peer_request_fault_type(peer_req), bio); } while (bios); return 0; } @@ -1853,21 +1876,21 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, /* assume request_size == data_size, but special case trim. */ ds = data_size; if (trim) { - if (!expect(data_size == 0)) + if (!expect(peer_device, data_size == 0)) return NULL; ds = be32_to_cpu(trim->size); } else if (zeroes) { - if (!expect(data_size == 0)) + if (!expect(peer_device, data_size == 0)) return NULL; ds = be32_to_cpu(zeroes->size); } - if (!expect(IS_ALIGNED(ds, 512))) + if (!expect(peer_device, IS_ALIGNED(ds, 512))) return NULL; if (trim || zeroes) { - if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9))) + if (!expect(peer_device, ds <= (DRBD_MAX_BBIO_SECTORS << 9))) return NULL; - } else if (!expect(ds <= DRBD_MAX_BIO_SIZE)) + } else if (!expect(peer_device, ds <= DRBD_MAX_BIO_SIZE)) return NULL; /* even though we trust out peer, @@ -2051,6 +2074,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto * respective _drbd_clear_done_ee */ peer_req->w.cb = e_end_resync_block; + peer_req->opf = REQ_OP_WRITE; peer_req->submit_jif = jiffies; spin_lock_irq(&device->resource->req_lock); @@ -2058,8 +2082,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto spin_unlock_irq(&device->resource->req_lock); atomic_add(pi->size >> 9, &device->rs_sect_ev); - if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, - DRBD_FAULT_RS_WR) == 0) + if (drbd_submit_peer_request(peer_req) == 0) return 0; /* don't care for the reason here */ @@ -2145,7 +2168,7 @@ static int receive_RSDataReply(struct drbd_connection *connection, struct packet * or in drbd_peer_request_endio. */ err = recv_resync_read(peer_device, sector, pi); } else { - if (__ratelimit(&drbd_ratelimit_state)) + if (drbd_ratelimit()) drbd_err(device, "Can not write resync data to local disk.\n"); err = drbd_drain_block(peer_device, pi->size); @@ -2375,16 +2398,6 @@ static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, co return ret; } -/* see also bio_flags_to_wire() - * DRBD_REQ_*, because we need to semantically map the flags to data packet - * flags and back. We may replicate to other kernel versions. */ -static blk_opf_t wire_flags_to_bio_flags(u32 dpf) -{ - return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | - (dpf & DP_FUA ? REQ_FUA : 0) | - (dpf & DP_FLUSH ? REQ_PREFLUSH : 0); -} - static enum req_op wire_flags_to_bio_op(u32 dpf) { if (dpf & DP_ZEROES) @@ -2395,6 +2408,15 @@ static enum req_op wire_flags_to_bio_op(u32 dpf) return REQ_OP_WRITE; } +/* see also bio_flags_to_wire() */ +static blk_opf_t wire_flags_to_bio(struct drbd_connection *connection, u32 dpf) +{ + return wire_flags_to_bio_op(dpf) | + (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | + (dpf & DP_FUA ? REQ_FUA : 0) | + (dpf & DP_FLUSH ? REQ_PREFLUSH : 0); +} + static void fail_postponed_requests(struct drbd_device *device, sector_t sector, unsigned int size) { @@ -2538,8 +2560,6 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * struct drbd_peer_request *peer_req; struct p_data *p = pi->data; u32 peer_seq = be32_to_cpu(p->seq_num); - enum req_op op; - blk_opf_t op_flags; u32 dp_flags; int err, tp; @@ -2578,11 +2598,10 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * peer_req->flags |= EE_APPLICATION; dp_flags = be32_to_cpu(p->dp_flags); - op = wire_flags_to_bio_op(dp_flags); - op_flags = wire_flags_to_bio_flags(dp_flags); + peer_req->opf = wire_flags_to_bio(connection, dp_flags); if (pi->cmd == P_TRIM) { D_ASSERT(peer_device, peer_req->i.size > 0); - D_ASSERT(peer_device, op == REQ_OP_DISCARD); + D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_DISCARD); D_ASSERT(peer_device, peer_req->pages == NULL); /* need to play safe: an older DRBD sender * may mean zero-out while sending P_TRIM. */ @@ -2590,7 +2609,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * peer_req->flags |= EE_ZEROOUT; } else if (pi->cmd == P_ZEROES) { D_ASSERT(peer_device, peer_req->i.size > 0); - D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES); + D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_WRITE_ZEROES); D_ASSERT(peer_device, peer_req->pages == NULL); /* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */ if (dp_flags & DP_DISCARD) @@ -2677,8 +2696,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * peer_req->flags |= EE_CALL_AL_COMPLETE_IO; } - err = drbd_submit_peer_request(device, peer_req, op | op_flags, - DRBD_FAULT_DT_WR); + err = drbd_submit_peer_request(peer_req); if (!err) return 0; @@ -2789,7 +2807,6 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet struct drbd_peer_request *peer_req; struct digest_info *di = NULL; int size, verb; - unsigned int fault_type; struct p_block_req *p = pi->data; peer_device = conn_peer_device(connection, pi->vnr); @@ -2832,7 +2849,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet default: BUG(); } - if (verb && __ratelimit(&drbd_ratelimit_state)) + if (verb && drbd_ratelimit()) drbd_err(device, "Can not satisfy peer's read request, " "no local data.\n"); @@ -2849,11 +2866,11 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet put_ldev(device); return -ENOMEM; } + peer_req->opf = REQ_OP_READ; switch (pi->cmd) { case P_DATA_REQUEST: peer_req->w.cb = w_e_end_data_req; - fault_type = DRBD_FAULT_DT_RD; /* application IO, don't drbd_rs_begin_io */ peer_req->flags |= EE_APPLICATION; goto submit; @@ -2867,14 +2884,12 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet fallthrough; case P_RS_DATA_REQUEST: peer_req->w.cb = w_e_end_rsdata_req; - fault_type = DRBD_FAULT_RS_RD; /* used in the sector offset progress display */ device->bm_resync_fo = BM_SECT_TO_BIT(sector); break; case P_OV_REPLY: case P_CSUM_RS_REQUEST: - fault_type = DRBD_FAULT_RS_RD; di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO); if (!di) goto out_free_e; @@ -2923,7 +2938,6 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet (unsigned long long)sector); } peer_req->w.cb = w_e_end_ov_req; - fault_type = DRBD_FAULT_RS_RD; break; default: @@ -2975,8 +2989,7 @@ submit_for_resync: submit: update_receiver_timing_details(connection, drbd_submit_peer_request); inc_unacked(device); - if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, - fault_type) == 0) + if (drbd_submit_peer_request(peer_req) == 0) return 0; /* don't care for the reason here */ @@ -4947,7 +4960,6 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac if (get_ldev(device)) { struct drbd_peer_request *peer_req; - const enum req_op op = REQ_OP_WRITE_ZEROES; peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector, size, 0, GFP_NOIO); @@ -4957,6 +4969,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac } peer_req->w.cb = e_end_resync_block; + peer_req->opf = REQ_OP_DISCARD; peer_req->submit_jif = jiffies; peer_req->flags |= EE_TRIM; @@ -4965,8 +4978,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac spin_unlock_irq(&device->resource->req_lock); atomic_add(pi->size >> 9, &device->rs_sect_ev); - err = drbd_submit_peer_request(device, peer_req, op, - DRBD_FAULT_RS_WR); + err = drbd_submit_peer_request(peer_req); if (err) { spin_lock_irq(&device->resource->req_lock); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 7f9bcc82fc9c..e36216d50753 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_req.c @@ -144,7 +144,7 @@ void drbd_req_destroy(struct kref *kref) if (get_ldev_if_state(device, D_FAILED)) { drbd_al_complete_io(device, &req->i); put_ldev(device); - } else if (__ratelimit(&drbd_ratelimit_state)) { + } else if (drbd_ratelimit()) { drbd_warn(device, "Should have called drbd_al_complete_io(, %llu, %u), " "but my Disk seems to have failed :(\n", (unsigned long long) req->i.sector, req->i.size); @@ -518,7 +518,7 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req) { - if (!__ratelimit(&drbd_ratelimit_state)) + if (!drbd_ratelimit()) return; drbd_warn(device, "local %s IO error sector %llu+%u on %pg\n", @@ -1402,7 +1402,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request submit_private_bio = true; } else if (no_remote) { nodata: - if (__ratelimit(&drbd_ratelimit_state)) + if (drbd_ratelimit()) drbd_err(device, "IO ERROR: neither local nor remote data, sector %llu+%u\n", (unsigned long long)req->i.sector, req->i.size >> 9); /* A write may have been queued for send_oos, however. @@ -1607,6 +1607,8 @@ void drbd_submit_bio(struct bio *bio) struct drbd_device *device = bio->bi_bdev->bd_disk->private_data; bio = bio_split_to_limits(bio); + if (!bio) + return; /* * what we "blindly" assume: diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 6237fa1dcb0e..b4017b5c3fbc 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* SPDX-License-Identifier: GPL-2.0-only */ /* drbd_req.h diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 3f7bf9f2d874..75d13ea0024f 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_state.c diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index f87371e55e68..cbaeb8018dbf 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef DRBD_STATE_H #define DRBD_STATE_H diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h index d5b0479bc9a6..9d78d8e3912e 100644 --- a/drivers/block/drbd/drbd_state_change.h +++ b/drivers/block/drbd/drbd_state_change.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef DRBD_STATE_CHANGE_H #define DRBD_STATE_CHANGE_H diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c index fc01307607ea..0a06f744b096 100644 --- a/drivers/block/drbd/drbd_strings.c +++ b/drivers/block/drbd/drbd_strings.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd.h diff --git a/drivers/block/drbd/drbd_strings.h b/drivers/block/drbd/drbd_strings.h index 87b94a27358a..0201f6590f6a 100644 --- a/drivers/block/drbd/drbd_strings.h +++ b/drivers/block/drbd/drbd_strings.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef __DRBD_STRINGS_H #define __DRBD_STRINGS_H diff --git a/drivers/block/drbd/drbd_vli.h b/drivers/block/drbd/drbd_vli.h index 01e3babc5277..1ee81e3c2152 100644 --- a/drivers/block/drbd/drbd_vli.h +++ b/drivers/block/drbd/drbd_vli.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- linux-c -*- drbd_receiver.c diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 0bb1a900c2d5..f46738040d6b 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* drbd_worker.c @@ -176,7 +176,7 @@ void drbd_peer_request_endio(struct bio *bio) bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES || bio_op(bio) == REQ_OP_DISCARD; - if (bio->bi_status && __ratelimit(&drbd_ratelimit_state)) + if (bio->bi_status && drbd_ratelimit()) drbd_warn(device, "%s: error=%d s=%llus\n", is_write ? (is_discard ? "discard" : "write") : "read", bio->bi_status, @@ -240,7 +240,7 @@ void drbd_request_endio(struct bio *bio) * though we still will complain noisily about it. */ if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) { - if (__ratelimit(&drbd_ratelimit_state)) + if (drbd_ratelimit()) drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n"); if (!bio->bi_status) @@ -400,13 +400,13 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, goto defer; peer_req->w.cb = w_e_send_csum; + peer_req->opf = REQ_OP_READ; spin_lock_irq(&device->resource->req_lock); list_add_tail(&peer_req->w.list, &device->read_ee); spin_unlock_irq(&device->resource->req_lock); atomic_add(size >> 9, &device->rs_sect_ev); - if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, - DRBD_FAULT_RS_RD) == 0) + if (drbd_submit_peer_request(peer_req) == 0) return 0; /* If it failed because of ENOMEM, retry should help. If it failed @@ -1062,7 +1062,7 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req); } else { - if (__ratelimit(&drbd_ratelimit_state)) + if (drbd_ratelimit()) drbd_err(device, "Sending NegDReply. sector=%llus.\n", (unsigned long long)peer_req->i.sector); @@ -1135,13 +1135,13 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) else err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); } else { - if (__ratelimit(&drbd_ratelimit_state)) + if (drbd_ratelimit()) drbd_err(device, "Not sending RSDataReply, " "partner DISKLESS!\n"); err = 0; } } else { - if (__ratelimit(&drbd_ratelimit_state)) + if (drbd_ratelimit()) drbd_err(device, "Sending NegRSDReply. sector %llus.\n", (unsigned long long)peer_req->i.sector); @@ -1212,7 +1212,7 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) } } else { err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); - if (__ratelimit(&drbd_ratelimit_state)) + if (drbd_ratelimit()) drbd_err(device, "Sending NegDReply. I guess it gets messy.\n"); } diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index ccad3d7b3ddd..487840e3564d 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4593,8 +4593,10 @@ static int __init do_floppy_init(void) goto out_put_disk; err = floppy_alloc_disk(drive, 0); - if (err) + if (err) { + blk_mq_free_tag_set(&tag_sets[drive]); goto out_put_disk; + } timer_setup(&motor_off_timer[drive], motor_off_callback, 0); } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ad92192c7d61..1518a6423279 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -243,7 +243,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) struct iov_iter i; ssize_t bw; - iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len); + iov_iter_bvec(&i, ITER_SOURCE, bvec, 1, bvec->bv_len); file_start_write(file); bw = vfs_iter_write(file, &i, ppos, 0); @@ -286,7 +286,7 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq, ssize_t len; rq_for_each_segment(bvec, rq, iter) { - iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len); + iov_iter_bvec(&i, ITER_DEST, &bvec, 1, bvec.bv_len); len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); if (len < 0) return len; @@ -392,7 +392,7 @@ static void lo_rw_aio_complete(struct kiocb *iocb, long ret) } static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, - loff_t pos, bool rw) + loff_t pos, int rw) { struct iov_iter iter; struct req_iterator rq_iter; @@ -448,7 +448,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, cmd->iocb.ki_flags = IOCB_DIRECT; cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); - if (rw == WRITE) + if (rw == ITER_SOURCE) ret = call_write_iter(file, &cmd->iocb, &iter); else ret = call_read_iter(file, &cmd->iocb, &iter); @@ -490,12 +490,12 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE); case REQ_OP_WRITE: if (cmd->use_aio) - return lo_rw_aio(lo, cmd, pos, WRITE); + return lo_rw_aio(lo, cmd, pos, ITER_SOURCE); else return lo_write_simple(lo, rq, pos); case REQ_OP_READ: if (cmd->use_aio) - return lo_rw_aio(lo, cmd, pos, READ); + return lo_rw_aio(lo, cmd, pos, ITER_DEST); else return lo_read_simple(lo, rq, pos); default: @@ -1755,7 +1755,7 @@ static void lo_free_disk(struct gendisk *disk) if (lo->workqueue) destroy_workqueue(lo->workqueue); loop_free_idle_workers(lo, true); - del_timer_sync(&lo->timer); + timer_shutdown_sync(&lo->timer); mutex_destroy(&lo->lo_mutex); kfree(lo); } @@ -1773,7 +1773,16 @@ static const struct block_device_operations lo_fops = { /* * And now the modules code and kernel interface. */ -static int max_loop; + +/* + * If max_loop is specified, create that many devices upfront. + * This also becomes a hard limit. If max_loop is not specified, + * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module + * init time. Loop devices can be requested on-demand with the + * /dev/loop-control interface, or be instantiated by accessing + * a 'dead' device node. + */ +static int max_loop = CONFIG_BLK_DEV_LOOP_MIN_COUNT; module_param(max_loop, int, 0444); MODULE_PARM_DESC(max_loop, "Maximum number of loop devices"); module_param(max_part, int, 0444); @@ -2181,7 +2190,7 @@ MODULE_ALIAS("devname:loop-control"); static int __init loop_init(void) { - int i, nr; + int i; int err; part_shift = 0; @@ -2209,19 +2218,6 @@ static int __init loop_init(void) goto err_out; } - /* - * If max_loop is specified, create that many devices upfront. - * This also becomes a hard limit. If max_loop is not specified, - * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module - * init time. Loop devices can be requested on-demand with the - * /dev/loop-control interface, or be instantiated by accessing - * a 'dead' device node. - */ - if (max_loop) - nr = max_loop; - else - nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT; - err = misc_register(&loop_misc); if (err < 0) goto err_out; @@ -2233,7 +2229,7 @@ static int __init loop_init(void) } /* pre-create number of devices given by config or max_loop */ - for (i = 0; i < nr; i++) + for (i = 0; i < max_loop; i++) loop_add(i); printk(KERN_INFO "loop: module loaded\n"); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 5cffd96ef2d7..592cfa8b765a 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -512,6 +512,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, noreclaim_flag = memalloc_noreclaim_save(); do { sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; + sock->sk->sk_use_task_frag = false; msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_control = NULL; @@ -563,7 +564,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) u32 nbd_cmd_flags = 0; int sent = nsock->sent, skip = 0; - iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request)); + iov_iter_kvec(&from, ITER_SOURCE, &iov, 1, sizeof(request)); type = req_to_nbd_cmd_type(req); if (type == U32_MAX) @@ -649,7 +650,7 @@ send_pages: dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", req, bvec.bv_len); - iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len); + iov_iter_bvec(&from, ITER_SOURCE, &bvec, 1, bvec.bv_len); if (skip) { if (skip >= iov_iter_count(&from)) { skip -= iov_iter_count(&from); @@ -701,7 +702,7 @@ static int nbd_read_reply(struct nbd_device *nbd, int index, int result; reply->magic = 0; - iov_iter_kvec(&to, READ, &iov, 1, sizeof(*reply)); + iov_iter_kvec(&to, ITER_DEST, &iov, 1, sizeof(*reply)); result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); if (result < 0) { if (!nbd_disconnected(nbd->config)) @@ -790,7 +791,7 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index, struct iov_iter to; rq_for_each_segment(bvec, req, iter) { - iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len); + iov_iter_bvec(&to, ITER_DEST, &bvec, 1, bvec.bv_len); result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); if (result < 0) { dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", @@ -1267,7 +1268,7 @@ static void send_disconnects(struct nbd_device *nbd) for (i = 0; i < config->num_connections; i++) { struct nbd_sock *nsock = config->socks[i]; - iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request)); + iov_iter_kvec(&from, ITER_SOURCE, &iov, 1, sizeof(request)); mutex_lock(&nsock->tx_lock); ret = sock_xmit(nbd, i, 1, &from, 0, NULL); if (ret < 0) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 1f154f92f4c2..7d28e3aa406c 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -523,6 +523,24 @@ out: } CONFIGFS_ATTR(nullb_device_, badblocks); +static ssize_t nullb_device_zone_readonly_store(struct config_item *item, + const char *page, size_t count) +{ + struct nullb_device *dev = to_nullb_device(item); + + return zone_cond_store(dev, page, count, BLK_ZONE_COND_READONLY); +} +CONFIGFS_ATTR_WO(nullb_device_, zone_readonly); + +static ssize_t nullb_device_zone_offline_store(struct config_item *item, + const char *page, size_t count) +{ + struct nullb_device *dev = to_nullb_device(item); + + return zone_cond_store(dev, page, count, BLK_ZONE_COND_OFFLINE); +} +CONFIGFS_ATTR_WO(nullb_device_, zone_offline); + static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_size, &nullb_device_attr_completion_nsec, @@ -549,6 +567,8 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_zone_nr_conv, &nullb_device_attr_zone_max_open, &nullb_device_attr_zone_max_active, + &nullb_device_attr_zone_readonly, + &nullb_device_attr_zone_offline, &nullb_device_attr_virt_boundary, &nullb_device_attr_no_sched, &nullb_device_attr_shared_tag_bitmap, @@ -614,7 +634,7 @@ static ssize_t memb_group_features_show(struct config_item *item, char *page) "poll_queues,power,queue_mode,shared_tag_bitmap,size," "submit_queues,use_per_node_hctx,virt_boundary,zoned," "zone_capacity,zone_max_active,zone_max_open," - "zone_nr_conv,zone_size\n"); + "zone_nr_conv,zone_offline,zone_readonly,zone_size\n"); } CONFIGFS_ATTR_RO(memb_group_, features); diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 94ff68052b1e..eb5972c50be8 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -151,6 +151,8 @@ blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_op op, sector_t sector, sector_t nr_sectors); size_t null_zone_valid_read_len(struct nullb *nullb, sector_t sector, unsigned int len); +ssize_t zone_cond_store(struct nullb_device *dev, const char *page, + size_t count, enum blk_zone_cond cond); #else static inline int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) @@ -174,6 +176,12 @@ static inline size_t null_zone_valid_read_len(struct nullb *nullb, { return len; } +static inline ssize_t zone_cond_store(struct nullb_device *dev, + const char *page, size_t count, + enum blk_zone_cond cond) +{ + return -EOPNOTSUPP; +} #define null_report_zones NULL #endif /* CONFIG_BLK_DEV_ZONED */ #endif /* __NULL_BLK_H */ diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 55a69e48ef8b..635ce0648133 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -384,8 +384,10 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, null_lock_zone(dev, zone); - if (zone->cond == BLK_ZONE_COND_FULL) { - /* Cannot write to a full zone */ + if (zone->cond == BLK_ZONE_COND_FULL || + zone->cond == BLK_ZONE_COND_READONLY || + zone->cond == BLK_ZONE_COND_OFFLINE) { + /* Cannot write to the zone */ ret = BLK_STS_IOERR; goto unlock; } @@ -613,7 +615,9 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_op op, for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) { zone = &dev->zones[i]; null_lock_zone(dev, zone); - if (zone->cond != BLK_ZONE_COND_EMPTY) { + if (zone->cond != BLK_ZONE_COND_EMPTY && + zone->cond != BLK_ZONE_COND_READONLY && + zone->cond != BLK_ZONE_COND_OFFLINE) { null_reset_zone(dev, zone); trace_nullb_zone_op(cmd, i, zone->cond); } @@ -627,6 +631,12 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_op op, null_lock_zone(dev, zone); + if (zone->cond == BLK_ZONE_COND_READONLY || + zone->cond == BLK_ZONE_COND_OFFLINE) { + ret = BLK_STS_IOERR; + goto unlock; + } + switch (op) { case REQ_OP_ZONE_RESET: ret = null_reset_zone(dev, zone); @@ -648,6 +658,7 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_op op, if (ret == BLK_STS_OK) trace_nullb_zone_op(cmd, zone_no, zone->cond); +unlock: null_unlock_zone(dev, zone); return ret; @@ -674,6 +685,8 @@ blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_op op, default: dev = cmd->nq->dev; zone = &dev->zones[null_zone_no(dev, sector)]; + if (zone->cond == BLK_ZONE_COND_OFFLINE) + return BLK_STS_IOERR; null_lock_zone(dev, zone); sts = null_process_cmd(cmd, op, sector, nr_sectors); @@ -681,3 +694,79 @@ blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_op op, return sts; } } + +/* + * Set a zone in the read-only or offline condition. + */ +static void null_set_zone_cond(struct nullb_device *dev, + struct nullb_zone *zone, enum blk_zone_cond cond) +{ + if (WARN_ON_ONCE(cond != BLK_ZONE_COND_READONLY && + cond != BLK_ZONE_COND_OFFLINE)) + return; + + null_lock_zone(dev, zone); + + /* + * If the read-only condition is requested again to zones already in + * read-only condition, restore back normal empty condition. Do the same + * if the offline condition is requested for offline zones. Otherwise, + * set the specified zone condition to the zones. Finish the zones + * beforehand to free up zone resources. + */ + if (zone->cond == cond) { + zone->cond = BLK_ZONE_COND_EMPTY; + zone->wp = zone->start; + if (dev->memory_backed) + null_handle_discard(dev, zone->start, zone->len); + } else { + if (zone->cond != BLK_ZONE_COND_READONLY && + zone->cond != BLK_ZONE_COND_OFFLINE) + null_finish_zone(dev, zone); + zone->cond = cond; + zone->wp = (sector_t)-1; + } + + null_unlock_zone(dev, zone); +} + +/* + * Identify a zone from the sector written to configfs file. Then set zone + * condition to the zone. + */ +ssize_t zone_cond_store(struct nullb_device *dev, const char *page, + size_t count, enum blk_zone_cond cond) +{ + unsigned long long sector; + unsigned int zone_no; + int ret; + + if (!dev->zoned) { + pr_err("null_blk device is not zoned\n"); + return -EINVAL; + } + + if (!dev->zones) { + pr_err("null_blk device is not yet powered\n"); + return -EINVAL; + } + + ret = kstrtoull(page, 0, §or); + if (ret < 0) + return ret; + + zone_no = null_zone_no(dev, sector); + if (zone_no >= dev->nr_zones) { + pr_err("Sector out of range\n"); + return -EINVAL; + } + + if (dev->zones[zone_no].type == BLK_ZONE_TYPE_CONVENTIONAL) { + pr_err("Can not change condition of conventional zones\n"); + return -EINVAL; + } + + null_set_zone_cond(dev, &dev->zones[zone_no], cond); + + return count; +} diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index c76e0148eada..574e470b220b 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -587,6 +587,8 @@ static void ps3vram_submit_bio(struct bio *bio) dev_dbg(&dev->core, "%s\n", __func__); bio = bio_split_to_limits(bio); + if (!bio) + return; spin_lock_irq(&priv->lock); busy = !bio_list_empty(&priv->list); diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index e9de9d846b73..17b677b5d3b2 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1992,6 +1992,9 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; int ret = -EINVAL; + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + ublk_ctrl_cmd_dump(cmd); if (!(issue_flags & IO_URING_F_SQE128)) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 19da5defd734..6a77fa917428 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -315,22 +315,35 @@ static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) virtqueue_notify(vq->vq); } +static blk_status_t virtblk_fail_to_queue(struct request *req, int rc) +{ + virtblk_cleanup_cmd(req); + switch (rc) { + case -ENOSPC: + return BLK_STS_DEV_RESOURCE; + case -ENOMEM: + return BLK_STS_RESOURCE; + default: + return BLK_STS_IOERR; + } +} + static blk_status_t virtblk_prep_rq(struct blk_mq_hw_ctx *hctx, struct virtio_blk *vblk, struct request *req, struct virtblk_req *vbr) { blk_status_t status; + int num; status = virtblk_setup_cmd(vblk->vdev, req, vbr); if (unlikely(status)) return status; - vbr->sg_table.nents = virtblk_map_data(hctx, req, vbr); - if (unlikely(vbr->sg_table.nents < 0)) { - virtblk_cleanup_cmd(req); - return BLK_STS_RESOURCE; - } + num = virtblk_map_data(hctx, req, vbr); + if (unlikely(num < 0)) + return virtblk_fail_to_queue(req, -ENOMEM); + vbr->sg_table.nents = num; blk_mq_start_request(req); @@ -364,15 +377,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_stop_hw_queue(hctx); spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); virtblk_unmap_data(req, vbr); - virtblk_cleanup_cmd(req); - switch (err) { - case -ENOSPC: - return BLK_STS_DEV_RESOURCE; - case -ENOMEM: - return BLK_STS_RESOURCE; - default: - return BLK_STS_IOERR; - } + return virtblk_fail_to_queue(req, err); } if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) @@ -512,7 +517,7 @@ static void virtblk_free_disk(struct gendisk *disk) { struct virtio_blk *vblk = disk->private_data; - ida_simple_remove(&vd_index_ida, vblk->index); + ida_free(&vd_index_ida, vblk->index); mutex_destroy(&vblk->vdev_mutex); kfree(vblk); } @@ -902,8 +907,8 @@ static int virtblk_probe(struct virtio_device *vdev) return -EINVAL; } - err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS), - GFP_KERNEL); + err = ida_alloc_range(&vd_index_ida, 0, + minor_to_index(1 << MINORBITS) - 1, GFP_KERNEL); if (err < 0) goto out; index = err; @@ -991,7 +996,7 @@ static int virtblk_probe(struct virtio_device *vdev) blk_queue_max_segments(q, sg_elems); /* No real sector limit. */ - blk_queue_max_hw_sectors(q, -1U); + blk_queue_max_hw_sectors(q, UINT_MAX); max_size = virtio_max_dma_size(vdev); @@ -1163,7 +1168,7 @@ out_free_vq: out_free_vblk: kfree(vblk); out_free_index: - ida_simple_remove(&vd_index_ida, index); + ida_free(&vd_index_ida, index); out: return err; } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index e68576ded7cb..23ed258b57f0 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2129,7 +2129,6 @@ static void blkfront_closing(struct blkfront_info *info) if (info->rq && info->gd) { blk_mq_stop_hw_queues(info->rq); blk_mark_disk_dead(info->gd); - set_capacity(info->gd, 0); } for_each_rinfo(info, rinfo, i) { diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index d4100b0c083e..0386b7da02aa 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -78,3 +78,12 @@ config ZRAM_MEMORY_TRACKING /sys/kernel/debug/zram/zramX/block_state. See Documentation/admin-guide/blockdev/zram.rst for more information. + +config ZRAM_MULTI_COMP + bool "Enable multiple compression streams" + depends on ZRAM + help + This will enable multi-compression streams, so that ZRAM can + re-compress pages using a potentially slower but more effective + compression algorithm. Note, that IDLE page recompression + requires ZRAM_MEMORY_TRACKING. diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 0916de952e09..55af4efd7983 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -206,7 +206,7 @@ void zcomp_destroy(struct zcomp *comp) * case of allocation error, or any other error potentially * returned by zcomp_init(). */ -struct zcomp *zcomp_create(const char *compress) +struct zcomp *zcomp_create(const char *alg) { struct zcomp *comp; int error; @@ -216,14 +216,14 @@ struct zcomp *zcomp_create(const char *compress) * is not loaded yet. We must do it here, otherwise we are about to * call /sbin/modprobe under CPU hot-plug lock. */ - if (!zcomp_available_algorithm(compress)) + if (!zcomp_available_algorithm(alg)) return ERR_PTR(-EINVAL); comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL); if (!comp) return ERR_PTR(-ENOMEM); - comp->name = compress; + comp->name = alg; error = zcomp_init(comp); if (error) { kfree(comp); diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index 40f6420f4b2e..cdefdef93da8 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -27,7 +27,7 @@ int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node); ssize_t zcomp_available_show(const char *comp, char *buf); bool zcomp_available_algorithm(const char *comp); -struct zcomp *zcomp_create(const char *comp); +struct zcomp *zcomp_create(const char *alg); void zcomp_destroy(struct zcomp *comp); struct zcomp_strm *zcomp_stream_get(struct zcomp *comp); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 966aab902d19..e290d6d97047 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -155,6 +155,25 @@ static inline bool is_partial_io(struct bio_vec *bvec) } #endif +static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio) +{ + prio &= ZRAM_COMP_PRIORITY_MASK; + /* + * Clear previous priority value first, in case if we recompress + * further an already recompressed page + */ + zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK << + ZRAM_COMP_PRIORITY_BIT1); + zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1); +} + +static inline u32 zram_get_priority(struct zram *zram, u32 index) +{ + u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1; + + return prio & ZRAM_COMP_PRIORITY_MASK; +} + /* * Check if request is within bounds and aligned on zram logical blocks. */ @@ -188,16 +207,13 @@ static void update_position(u32 *index, int *offset, struct bio_vec *bvec) static inline void update_used_max(struct zram *zram, const unsigned long pages) { - unsigned long old_max, cur_max; - - old_max = atomic_long_read(&zram->stats.max_used_pages); + unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages); do { - cur_max = old_max; - if (pages > cur_max) - old_max = atomic_long_cmpxchg( - &zram->stats.max_used_pages, cur_max, pages); - } while (old_max != cur_max); + if (cur_max >= pages) + return; + } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages, + &cur_max, pages)); } static inline void zram_fill_page(void *ptr, unsigned long len, @@ -629,10 +645,10 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, #define PAGE_WB_SIG "page_index=" -#define PAGE_WRITEBACK 0 -#define HUGE_WRITEBACK (1<<0) -#define IDLE_WRITEBACK (1<<1) - +#define PAGE_WRITEBACK 0 +#define HUGE_WRITEBACK (1<<0) +#define IDLE_WRITEBACK (1<<1) +#define INCOMPRESSIBLE_WRITEBACK (1<<2) static ssize_t writeback_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) @@ -653,6 +669,8 @@ static ssize_t writeback_store(struct device *dev, mode = HUGE_WRITEBACK; else if (sysfs_streq(buf, "huge_idle")) mode = IDLE_WRITEBACK | HUGE_WRITEBACK; + else if (sysfs_streq(buf, "incompressible")) + mode = INCOMPRESSIBLE_WRITEBACK; else { if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) return -EINVAL; @@ -715,11 +733,15 @@ static ssize_t writeback_store(struct device *dev, goto next; if (mode & IDLE_WRITEBACK && - !zram_test_flag(zram, index, ZRAM_IDLE)) + !zram_test_flag(zram, index, ZRAM_IDLE)) goto next; if (mode & HUGE_WRITEBACK && - !zram_test_flag(zram, index, ZRAM_HUGE)) + !zram_test_flag(zram, index, ZRAM_HUGE)) + goto next; + if (mode & INCOMPRESSIBLE_WRITEBACK && + !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) goto next; + /* * Clearing ZRAM_UNDER_WB is duty of caller. * IOW, zram_free_page never clear it. @@ -753,8 +775,12 @@ static ssize_t writeback_store(struct device *dev, zram_clear_flag(zram, index, ZRAM_IDLE); zram_slot_unlock(zram, index); /* - * Return last IO error unless every IO were - * not suceeded. + * BIO errors are not fatal, we continue and simply + * attempt to writeback the remaining objects (pages). + * At the same time we need to signal user-space that + * some writes (at least one, but also could be all of + * them) were not successful and we do so by returning + * the most recent BIO error. */ ret = err; continue; @@ -920,13 +946,16 @@ static ssize_t read_block_state(struct file *file, char __user *buf, ts = ktime_to_timespec64(zram->table[index].ac_time); copied = snprintf(kbuf + written, count, - "%12zd %12lld.%06lu %c%c%c%c\n", + "%12zd %12lld.%06lu %c%c%c%c%c%c\n", index, (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC, zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', - zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.'); + zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', + zram_get_priority(zram, index) ? 'r' : '.', + zram_test_flag(zram, index, + ZRAM_INCOMPRESSIBLE) ? 'n' : '.'); if (count <= copied) { zram_slot_unlock(zram, index); @@ -1000,46 +1029,143 @@ static ssize_t max_comp_streams_store(struct device *dev, return len; } -static ssize_t comp_algorithm_show(struct device *dev, - struct device_attribute *attr, char *buf) +static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) { - size_t sz; - struct zram *zram = dev_to_zram(dev); + /* Do not free statically defined compression algorithms */ + if (zram->comp_algs[prio] != default_compressor) + kfree(zram->comp_algs[prio]); + + zram->comp_algs[prio] = alg; +} + +static ssize_t __comp_algorithm_show(struct zram *zram, u32 prio, char *buf) +{ + ssize_t sz; down_read(&zram->init_lock); - sz = zcomp_available_show(zram->compressor, buf); + sz = zcomp_available_show(zram->comp_algs[prio], buf); up_read(&zram->init_lock); return sz; } -static ssize_t comp_algorithm_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) +static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) { - struct zram *zram = dev_to_zram(dev); - char compressor[ARRAY_SIZE(zram->compressor)]; + char *compressor; size_t sz; - strscpy(compressor, buf, sizeof(compressor)); + sz = strlen(buf); + if (sz >= CRYPTO_MAX_ALG_NAME) + return -E2BIG; + + compressor = kstrdup(buf, GFP_KERNEL); + if (!compressor) + return -ENOMEM; + /* ignore trailing newline */ - sz = strlen(compressor); if (sz > 0 && compressor[sz - 1] == '\n') compressor[sz - 1] = 0x00; - if (!zcomp_available_algorithm(compressor)) + if (!zcomp_available_algorithm(compressor)) { + kfree(compressor); return -EINVAL; + } down_write(&zram->init_lock); if (init_done(zram)) { up_write(&zram->init_lock); + kfree(compressor); pr_info("Can't change algorithm for initialized device\n"); return -EBUSY; } - strcpy(zram->compressor, compressor); + comp_algorithm_set(zram, prio, compressor); up_write(&zram->init_lock); - return len; + return 0; +} + +static ssize_t comp_algorithm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return __comp_algorithm_show(zram, ZRAM_PRIMARY_COMP, buf); +} + +static ssize_t comp_algorithm_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t len) +{ + struct zram *zram = dev_to_zram(dev); + int ret; + + ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf); + return ret ? ret : len; +} + +#ifdef CONFIG_ZRAM_MULTI_COMP +static ssize_t recomp_algorithm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct zram *zram = dev_to_zram(dev); + ssize_t sz = 0; + u32 prio; + + for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { + if (!zram->comp_algs[prio]) + continue; + + sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, "#%d: ", prio); + sz += __comp_algorithm_show(zram, prio, buf + sz); + } + + return sz; +} + +static ssize_t recomp_algorithm_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t len) +{ + struct zram *zram = dev_to_zram(dev); + int prio = ZRAM_SECONDARY_COMP; + char *args, *param, *val; + char *alg = NULL; + int ret; + + args = skip_spaces(buf); + while (*args) { + args = next_arg(args, ¶m, &val); + + if (!*val) + return -EINVAL; + + if (!strcmp(param, "algo")) { + alg = val; + continue; + } + + if (!strcmp(param, "priority")) { + ret = kstrtoint(val, 10, &prio); + if (ret) + return ret; + continue; + } + } + + if (!alg) + return -EINVAL; + + if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) + return -EINVAL; + + ret = __comp_algorithm_store(zram, prio, alg); + return ret ? ret : len; } +#endif static ssize_t compact_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) @@ -1210,6 +1336,11 @@ static void zram_free_page(struct zram *zram, size_t index) atomic64_dec(&zram->stats.huge_pages); } + if (zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) + zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE); + + zram_set_priority(zram, index, 0); + if (zram_test_flag(zram, index, ZRAM_WB)) { zram_clear_flag(zram, index, ZRAM_WB); free_block_bdev(zram, zram_get_element(zram, index)); @@ -1242,32 +1373,37 @@ out: ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); } -static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, - struct bio *bio, bool partial_io) +/* + * Reads a page from the writeback devices. Corresponding ZRAM slot + * should be unlocked. + */ +static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page, + u32 index, struct bio *bio, bool partial_io) +{ + struct bio_vec bvec = { + .bv_page = page, + .bv_len = PAGE_SIZE, + .bv_offset = 0, + }; + + return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio, + partial_io); +} + +/* + * Reads (decompresses if needed) a page from zspool (zsmalloc). + * Corresponding ZRAM slot should be locked. + */ +static int zram_read_from_zspool(struct zram *zram, struct page *page, + u32 index) { struct zcomp_strm *zstrm; unsigned long handle; unsigned int size; void *src, *dst; + u32 prio; int ret; - zram_slot_lock(zram, index); - if (zram_test_flag(zram, index, ZRAM_WB)) { - struct bio_vec bvec; - - zram_slot_unlock(zram, index); - /* A null bio means rw_page was used, we must fallback to bio */ - if (!bio) - return -EOPNOTSUPP; - - bvec.bv_page = page; - bvec.bv_len = PAGE_SIZE; - bvec.bv_offset = 0; - return read_from_bdev(zram, &bvec, - zram_get_element(zram, index), - bio, partial_io); - } - handle = zram_get_handle(zram, index); if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { unsigned long value; @@ -1277,14 +1413,15 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, mem = kmap_atomic(page); zram_fill_page(mem, PAGE_SIZE, value); kunmap_atomic(mem); - zram_slot_unlock(zram, index); return 0; } size = zram_get_obj_size(zram, index); - if (size != PAGE_SIZE) - zstrm = zcomp_stream_get(zram->comp); + if (size != PAGE_SIZE) { + prio = zram_get_priority(zram, index); + zstrm = zcomp_stream_get(zram->comps[prio]); + } src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); if (size == PAGE_SIZE) { @@ -1296,20 +1433,43 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, dst = kmap_atomic(page); ret = zcomp_decompress(zstrm, src, size, dst); kunmap_atomic(dst); - zcomp_stream_put(zram->comp); + zcomp_stream_put(zram->comps[prio]); } zs_unmap_object(zram->mem_pool, handle); - zram_slot_unlock(zram, index); + return ret; +} + +static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, + struct bio *bio, bool partial_io) +{ + int ret; + + zram_slot_lock(zram, index); + if (!zram_test_flag(zram, index, ZRAM_WB)) { + /* Slot should be locked through out the function call */ + ret = zram_read_from_zspool(zram, page, index); + zram_slot_unlock(zram, index); + } else { + /* Slot should be unlocked before the function call */ + zram_slot_unlock(zram, index); + + /* A null bio means rw_page was used, we must fallback to bio */ + if (!bio) + return -EOPNOTSUPP; + + ret = zram_bvec_read_from_bdev(zram, page, index, bio, + partial_io); + } /* Should NEVER happen. Return bio error if it does. */ - if (WARN_ON(ret)) + if (WARN_ON(ret < 0)) pr_err("Decompression failed! err=%d, page=%u\n", ret, index); return ret; } static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset, struct bio *bio) + u32 index, int offset, struct bio *bio) { int ret; struct page *page; @@ -1363,13 +1523,13 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, kunmap_atomic(mem); compress_again: - zstrm = zcomp_stream_get(zram->comp); + zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); src = kmap_atomic(page); ret = zcomp_compress(zstrm, src, &comp_len); kunmap_atomic(src); if (unlikely(ret)) { - zcomp_stream_put(zram->comp); + zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); pr_err("Compression failed! err=%d\n", ret); zs_free(zram->mem_pool, handle); return ret; @@ -1390,19 +1550,19 @@ compress_again: * if we have a 'non-null' handle here then we are coming * from the slow path and handle has already been allocated. */ - if (IS_ERR((void *)handle)) + if (IS_ERR_VALUE(handle)) handle = zs_malloc(zram->mem_pool, comp_len, __GFP_KSWAPD_RECLAIM | __GFP_NOWARN | __GFP_HIGHMEM | __GFP_MOVABLE); - if (IS_ERR((void *)handle)) { - zcomp_stream_put(zram->comp); + if (IS_ERR_VALUE(handle)) { + zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); atomic64_inc(&zram->stats.writestall); handle = zs_malloc(zram->mem_pool, comp_len, GFP_NOIO | __GFP_HIGHMEM | __GFP_MOVABLE); - if (IS_ERR((void *)handle)) + if (IS_ERR_VALUE(handle)) return PTR_ERR((void *)handle); if (comp_len != PAGE_SIZE) @@ -1414,14 +1574,14 @@ compress_again: * zstrm buffer back. It is necessary that the dereferencing * of the zstrm variable below occurs correctly. */ - zstrm = zcomp_stream_get(zram->comp); + zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); } alloced_pages = zs_get_total_pages(zram->mem_pool); update_used_max(zram, alloced_pages); if (zram->limit_pages && alloced_pages > zram->limit_pages) { - zcomp_stream_put(zram->comp); + zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); zs_free(zram->mem_pool, handle); return -ENOMEM; } @@ -1435,7 +1595,7 @@ compress_again: if (comp_len == PAGE_SIZE) kunmap_atomic(src); - zcomp_stream_put(zram->comp); + zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); zs_unmap_object(zram->mem_pool, handle); atomic64_add(comp_len, &zram->stats.compr_data_size); out: @@ -1504,6 +1664,274 @@ out: return ret; } +#ifdef CONFIG_ZRAM_MULTI_COMP +/* + * This function will decompress (unless it's ZRAM_HUGE) the page and then + * attempt to compress it using provided compression algorithm priority + * (which is potentially more effective). + * + * Corresponding ZRAM slot should be locked. + */ +static int zram_recompress(struct zram *zram, u32 index, struct page *page, + u32 threshold, u32 prio, u32 prio_max) +{ + struct zcomp_strm *zstrm = NULL; + unsigned long handle_old; + unsigned long handle_new; + unsigned int comp_len_old; + unsigned int comp_len_new; + unsigned int class_index_old; + unsigned int class_index_new; + u32 num_recomps = 0; + void *src, *dst; + int ret; + + handle_old = zram_get_handle(zram, index); + if (!handle_old) + return -EINVAL; + + comp_len_old = zram_get_obj_size(zram, index); + /* + * Do not recompress objects that are already "small enough". + */ + if (comp_len_old < threshold) + return 0; + + ret = zram_read_from_zspool(zram, page, index); + if (ret) + return ret; + + class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); + /* + * Iterate the secondary comp algorithms list (in order of priority) + * and try to recompress the page. + */ + for (; prio < prio_max; prio++) { + if (!zram->comps[prio]) + continue; + + /* + * Skip if the object is already re-compressed with a higher + * priority algorithm (or same algorithm). + */ + if (prio <= zram_get_priority(zram, index)) + continue; + + num_recomps++; + zstrm = zcomp_stream_get(zram->comps[prio]); + src = kmap_atomic(page); + ret = zcomp_compress(zstrm, src, &comp_len_new); + kunmap_atomic(src); + + if (ret) { + zcomp_stream_put(zram->comps[prio]); + return ret; + } + + class_index_new = zs_lookup_class_index(zram->mem_pool, + comp_len_new); + + /* Continue until we make progress */ + if (class_index_new >= class_index_old || + (threshold && comp_len_new >= threshold)) { + zcomp_stream_put(zram->comps[prio]); + continue; + } + + /* Recompression was successful so break out */ + break; + } + + /* + * We did not try to recompress, e.g. when we have only one + * secondary algorithm and the page is already recompressed + * using that algorithm + */ + if (!zstrm) + return 0; + + if (class_index_new >= class_index_old) { + /* + * Secondary algorithms failed to re-compress the page + * in a way that would save memory, mark the object as + * incompressible so that we will not try to compress + * it again. + * + * We need to make sure that all secondary algorithms have + * failed, so we test if the number of recompressions matches + * the number of active secondary algorithms. + */ + if (num_recomps == zram->num_active_comps - 1) + zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); + return 0; + } + + /* Successful recompression but above threshold */ + if (threshold && comp_len_new >= threshold) + return 0; + + /* + * No direct reclaim (slow path) for handle allocation and no + * re-compression attempt (unlike in __zram_bvec_write()) since + * we already have stored that object in zsmalloc. If we cannot + * alloc memory for recompressed object then we bail out and + * simply keep the old (existing) object in zsmalloc. + */ + handle_new = zs_malloc(zram->mem_pool, comp_len_new, + __GFP_KSWAPD_RECLAIM | + __GFP_NOWARN | + __GFP_HIGHMEM | + __GFP_MOVABLE); + if (IS_ERR_VALUE(handle_new)) { + zcomp_stream_put(zram->comps[prio]); + return PTR_ERR((void *)handle_new); + } + + dst = zs_map_object(zram->mem_pool, handle_new, ZS_MM_WO); + memcpy(dst, zstrm->buffer, comp_len_new); + zcomp_stream_put(zram->comps[prio]); + + zs_unmap_object(zram->mem_pool, handle_new); + + zram_free_page(zram, index); + zram_set_handle(zram, index, handle_new); + zram_set_obj_size(zram, index, comp_len_new); + zram_set_priority(zram, index, prio); + + atomic64_add(comp_len_new, &zram->stats.compr_data_size); + atomic64_inc(&zram->stats.pages_stored); + + return 0; +} + +#define RECOMPRESS_IDLE (1 << 0) +#define RECOMPRESS_HUGE (1 << 1) + +static ssize_t recompress_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + u32 prio = ZRAM_SECONDARY_COMP, prio_max = ZRAM_MAX_COMPS; + struct zram *zram = dev_to_zram(dev); + unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; + char *args, *param, *val, *algo = NULL; + u32 mode = 0, threshold = 0; + unsigned long index; + struct page *page; + ssize_t ret; + + args = skip_spaces(buf); + while (*args) { + args = next_arg(args, ¶m, &val); + + if (!*val) + return -EINVAL; + + if (!strcmp(param, "type")) { + if (!strcmp(val, "idle")) + mode = RECOMPRESS_IDLE; + if (!strcmp(val, "huge")) + mode = RECOMPRESS_HUGE; + if (!strcmp(val, "huge_idle")) + mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE; + continue; + } + + if (!strcmp(param, "threshold")) { + /* + * We will re-compress only idle objects equal or + * greater in size than watermark. + */ + ret = kstrtouint(val, 10, &threshold); + if (ret) + return ret; + continue; + } + + if (!strcmp(param, "algo")) { + algo = val; + continue; + } + } + + if (threshold >= PAGE_SIZE) + return -EINVAL; + + down_read(&zram->init_lock); + if (!init_done(zram)) { + ret = -EINVAL; + goto release_init_lock; + } + + if (algo) { + bool found = false; + + for (; prio < ZRAM_MAX_COMPS; prio++) { + if (!zram->comp_algs[prio]) + continue; + + if (!strcmp(zram->comp_algs[prio], algo)) { + prio_max = min(prio + 1, ZRAM_MAX_COMPS); + found = true; + break; + } + } + + if (!found) { + ret = -EINVAL; + goto release_init_lock; + } + } + + page = alloc_page(GFP_KERNEL); + if (!page) { + ret = -ENOMEM; + goto release_init_lock; + } + + ret = len; + for (index = 0; index < nr_pages; index++) { + int err = 0; + + zram_slot_lock(zram, index); + + if (!zram_allocated(zram, index)) + goto next; + + if (mode & RECOMPRESS_IDLE && + !zram_test_flag(zram, index, ZRAM_IDLE)) + goto next; + + if (mode & RECOMPRESS_HUGE && + !zram_test_flag(zram, index, ZRAM_HUGE)) + goto next; + + if (zram_test_flag(zram, index, ZRAM_WB) || + zram_test_flag(zram, index, ZRAM_UNDER_WB) || + zram_test_flag(zram, index, ZRAM_SAME) || + zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) + goto next; + + err = zram_recompress(zram, index, page, threshold, + prio, prio_max); +next: + zram_slot_unlock(zram, index); + if (err) { + ret = err; + break; + } + + cond_resched(); + } + + __free_page(page); + +release_init_lock: + up_read(&zram->init_lock); + return ret; +} +#endif + /* * zram_bio_discard - handler on discard request * @index: physical block index in PAGE_SIZE units @@ -1553,11 +1981,9 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, int ret; if (!op_is_write(op)) { - atomic64_inc(&zram->stats.num_reads); ret = zram_bvec_read(zram, bvec, index, offset, bio); flush_dcache_page(bvec->bv_page); } else { - atomic64_inc(&zram->stats.num_writes); ret = zram_bvec_write(zram, bvec, index, offset, bio); } @@ -1710,6 +2136,21 @@ out: return ret; } +static void zram_destroy_comps(struct zram *zram) +{ + u32 prio; + + for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) { + struct zcomp *comp = zram->comps[prio]; + + zram->comps[prio] = NULL; + if (!comp) + continue; + zcomp_destroy(comp); + zram->num_active_comps--; + } +} + static void zram_reset_device(struct zram *zram) { down_write(&zram->init_lock); @@ -1727,11 +2168,11 @@ static void zram_reset_device(struct zram *zram) /* I/O operation under all of CPU are done so let's free */ zram_meta_free(zram, zram->disksize); zram->disksize = 0; + zram_destroy_comps(zram); memset(&zram->stats, 0, sizeof(zram->stats)); - zcomp_destroy(zram->comp); - zram->comp = NULL; reset_bdev(zram); + comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); up_write(&zram->init_lock); } @@ -1742,6 +2183,7 @@ static ssize_t disksize_store(struct device *dev, struct zcomp *comp; struct zram *zram = dev_to_zram(dev); int err; + u32 prio; disksize = memparse(buf, NULL); if (!disksize) @@ -1760,22 +2202,29 @@ static ssize_t disksize_store(struct device *dev, goto out_unlock; } - comp = zcomp_create(zram->compressor); - if (IS_ERR(comp)) { - pr_err("Cannot initialise %s compressing backend\n", - zram->compressor); - err = PTR_ERR(comp); - goto out_free_meta; - } + for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) { + if (!zram->comp_algs[prio]) + continue; + + comp = zcomp_create(zram->comp_algs[prio]); + if (IS_ERR(comp)) { + pr_err("Cannot initialise %s compressing backend\n", + zram->comp_algs[prio]); + err = PTR_ERR(comp); + goto out_free_comps; + } - zram->comp = comp; + zram->comps[prio] = comp; + zram->num_active_comps++; + } zram->disksize = disksize; set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); up_write(&zram->init_lock); return len; -out_free_meta: +out_free_comps: + zram_destroy_comps(zram); zram_meta_free(zram, disksize); out_unlock: up_write(&zram->init_lock); @@ -1860,6 +2309,10 @@ static DEVICE_ATTR_WO(writeback); static DEVICE_ATTR_RW(writeback_limit); static DEVICE_ATTR_RW(writeback_limit_enable); #endif +#ifdef CONFIG_ZRAM_MULTI_COMP +static DEVICE_ATTR_RW(recomp_algorithm); +static DEVICE_ATTR_WO(recompress); +#endif static struct attribute *zram_disk_attrs[] = { &dev_attr_disksize.attr, @@ -1883,6 +2336,10 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_bd_stat.attr, #endif &dev_attr_debug_stat.attr, +#ifdef CONFIG_ZRAM_MULTI_COMP + &dev_attr_recomp_algorithm.attr, + &dev_attr_recompress.attr, +#endif NULL, }; @@ -1962,7 +2419,7 @@ static int zram_add(void) if (ret) goto out_cleanup_disk; - strscpy(zram->compressor, default_compressor, sizeof(zram->compressor)); + comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); zram_debugfs_register(zram); pr_info("Added device: %s\n", zram->disk->disk_name); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index a2bda53020fd..c5254626f051 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -40,6 +40,9 @@ */ #define ZRAM_FLAG_SHIFT (PAGE_SHIFT + 1) +/* Only 2 bits are allowed for comp priority index */ +#define ZRAM_COMP_PRIORITY_MASK 0x3 + /* Flags for zram pages (table[page_no].flags) */ enum zram_pageflags { /* zram slot is locked */ @@ -49,6 +52,10 @@ enum zram_pageflags { ZRAM_UNDER_WB, /* page is under writeback */ ZRAM_HUGE, /* Incompressible page */ ZRAM_IDLE, /* not accessed page since last idle marking */ + ZRAM_INCOMPRESSIBLE, /* none of the algorithms could compress it */ + + ZRAM_COMP_PRIORITY_BIT1, /* First bit of comp priority index */ + ZRAM_COMP_PRIORITY_BIT2, /* Second bit of comp priority index */ __NR_ZRAM_PAGEFLAGS, }; @@ -69,8 +76,6 @@ struct zram_table_entry { struct zram_stats { atomic64_t compr_data_size; /* compressed size of pages stored */ - atomic64_t num_reads; /* failed + successful */ - atomic64_t num_writes; /* --do-- */ atomic64_t failed_reads; /* can happen when memory is too low */ atomic64_t failed_writes; /* can happen when memory is too low */ atomic64_t invalid_io; /* non-page-aligned I/O requests */ @@ -89,10 +94,20 @@ struct zram_stats { #endif }; +#ifdef CONFIG_ZRAM_MULTI_COMP +#define ZRAM_PRIMARY_COMP 0U +#define ZRAM_SECONDARY_COMP 1U +#define ZRAM_MAX_COMPS 4U +#else +#define ZRAM_PRIMARY_COMP 0U +#define ZRAM_SECONDARY_COMP 0U +#define ZRAM_MAX_COMPS 1U +#endif + struct zram { struct zram_table_entry *table; struct zs_pool *mem_pool; - struct zcomp *comp; + struct zcomp *comps[ZRAM_MAX_COMPS]; struct gendisk *disk; /* Prevent concurrent execution of device init */ struct rw_semaphore init_lock; @@ -107,7 +122,8 @@ struct zram { * we can store in a disk. */ u64 disksize; /* bytes */ - char compressor[CRYPTO_MAX_ALG_NAME]; + const char *comp_algs[ZRAM_MAX_COMPS]; + s8 num_active_comps; /* * zram is claimed so open request will be failed */ |