diff options
Diffstat (limited to 'drivers/block/drbd/drbd_worker.c')
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 115 |
1 files changed, 94 insertions, 21 deletions
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 51fab978eb61..35dbb3dca47e 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -173,8 +173,8 @@ void drbd_peer_request_endio(struct bio *bio) { struct drbd_peer_request *peer_req = bio->bi_private; struct drbd_device *device = peer_req->peer_device->device; - int is_write = bio_data_dir(bio) == WRITE; - int is_discard = !!(bio_op(bio) == REQ_OP_DISCARD); + bool is_write = bio_data_dir(bio) == WRITE; + bool is_discard = !!(bio_op(bio) == REQ_OP_DISCARD); if (bio->bi_error && __ratelimit(&drbd_ratelimit_state)) drbd_warn(device, "%s: error=%d s=%llus\n", @@ -248,18 +248,26 @@ void drbd_request_endio(struct bio *bio) /* to avoid recursion in __req_mod */ if (unlikely(bio->bi_error)) { - if (bio_op(bio) == REQ_OP_DISCARD) - what = (bio->bi_error == -EOPNOTSUPP) - ? DISCARD_COMPLETED_NOTSUPP - : DISCARD_COMPLETED_WITH_ERROR; - else - what = (bio_data_dir(bio) == WRITE) - ? WRITE_COMPLETED_WITH_ERROR - : (bio_rw(bio) == READ) - ? READ_COMPLETED_WITH_ERROR - : READ_AHEAD_COMPLETED_WITH_ERROR; - } else + switch (bio_op(bio)) { + case REQ_OP_DISCARD: + if (bio->bi_error == -EOPNOTSUPP) + what = DISCARD_COMPLETED_NOTSUPP; + else + what = DISCARD_COMPLETED_WITH_ERROR; + break; + case REQ_OP_READ: + if (bio->bi_rw & REQ_RAHEAD) + what = READ_AHEAD_COMPLETED_WITH_ERROR; + else + what = READ_COMPLETED_WITH_ERROR; + break; + default: + what = WRITE_COMPLETED_WITH_ERROR; + break; + } + } else { what = COMPLETED_OK; + } bio_put(req->private_bio); req->private_bio = ERR_PTR(bio->bi_error); @@ -320,6 +328,10 @@ void drbd_csum_bio(struct crypto_ahash *tfm, struct bio *bio, void *digest) sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); ahash_request_set_crypt(req, &sg, NULL, sg.length); crypto_ahash_update(req); + /* REQ_OP_WRITE_SAME has only one segment, + * checksum the payload only once. */ + if (bio_op(bio) == REQ_OP_WRITE_SAME) + break; } ahash_request_set_crypt(req, NULL, digest, 0); crypto_ahash_final(req); @@ -387,7 +399,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, /* GFP_TRY, because if there is no memory available right now, this may * be rescheduled for later. It is "only" background resync, after all. */ peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, - size, true /* has real payload */, GFP_TRY); + size, size, GFP_TRY); if (!peer_req) goto defer; @@ -583,6 +595,7 @@ static int make_resync_request(struct drbd_device *const device, int cancel) int number, rollback_i, size; int align, requeue = 0; int i = 0; + int discard_granularity = 0; if (unlikely(cancel)) return 0; @@ -602,6 +615,12 @@ static int make_resync_request(struct drbd_device *const device, int cancel) return 0; } + if (connection->agreed_features & DRBD_FF_THIN_RESYNC) { + rcu_read_lock(); + discard_granularity = rcu_dereference(device->ldev->disk_conf)->rs_discard_granularity; + rcu_read_unlock(); + } + max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; number = drbd_rs_number_requests(device); if (number <= 0) @@ -666,6 +685,9 @@ next_sector: if (sector & ((1<<(align+3))-1)) break; + if (discard_granularity && size == discard_granularity) + break; + /* do not cross extent boundaries */ if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0) break; @@ -712,7 +734,8 @@ next_sector: int err; inc_rs_pending(device); - err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST, + err = drbd_send_drequest(peer_device, + size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST, sector, size, ID_SYNCER); if (err) { drbd_err(device, "drbd_send_drequest() failed, aborting...\n"); @@ -829,6 +852,7 @@ static void ping_peer(struct drbd_device *device) int drbd_resync_finished(struct drbd_device *device) { + struct drbd_connection *connection = first_peer_device(device)->connection; unsigned long db, dt, dbdt; unsigned long n_oos; union drbd_state os, ns; @@ -850,8 +874,7 @@ int drbd_resync_finished(struct drbd_device *device) if (dw) { dw->w.cb = w_resync_finished; dw->device = device; - drbd_queue_work(&first_peer_device(device)->connection->sender_work, - &dw->w); + drbd_queue_work(&connection->sender_work, &dw->w); return 1; } drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n"); @@ -964,6 +987,30 @@ int drbd_resync_finished(struct drbd_device *device) _drbd_set_state(device, ns, CS_VERBOSE, NULL); out_unlock: spin_unlock_irq(&device->resource->req_lock); + + /* If we have been sync source, and have an effective fencing-policy, + * once *all* volumes are back in sync, call "unfence". */ + if (os.conn == C_SYNC_SOURCE) { + enum drbd_disk_state disk_state = D_MASK; + enum drbd_disk_state pdsk_state = D_MASK; + enum drbd_fencing_p fp = FP_DONT_CARE; + + rcu_read_lock(); + fp = rcu_dereference(device->ldev->disk_conf)->fencing; + if (fp != FP_DONT_CARE) { + struct drbd_peer_device *peer_device; + int vnr; + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + disk_state = min_t(enum drbd_disk_state, disk_state, device->state.disk); + pdsk_state = min_t(enum drbd_disk_state, pdsk_state, device->state.pdsk); + } + } + rcu_read_unlock(); + if (disk_state == D_UP_TO_DATE && pdsk_state == D_UP_TO_DATE) + conn_khelper(connection, "unfence-peer"); + } + put_ldev(device); out: device->rs_total = 0; @@ -1000,7 +1047,6 @@ static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_ /** * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST - * @device: DRBD device. * @w: work object. * @cancel: The connection will be closed anyways */ @@ -1036,6 +1082,30 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) return err; } +static bool all_zero(struct drbd_peer_request *peer_req) +{ + struct page *page = peer_req->pages; + unsigned int len = peer_req->i.size; + + page_chain_for_each(page) { + unsigned int l = min_t(unsigned int, len, PAGE_SIZE); + unsigned int i, words = l / sizeof(long); + unsigned long *d; + + d = kmap_atomic(page); + for (i = 0; i < words; i++) { + if (d[i]) { + kunmap_atomic(d); + return false; + } + } + kunmap_atomic(d); + len -= l; + } + + return true; +} + /** * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST * @w: work object. @@ -1064,7 +1134,10 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { if (likely(device->state.pdsk >= D_INCONSISTENT)) { inc_rs_pending(device); - err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); + if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req)) + err = drbd_send_rs_deallocated(peer_device, peer_req); + else + err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); } else { if (__ratelimit(&drbd_ratelimit_state)) drbd_err(device, "Not sending RSDataReply, " @@ -1634,7 +1707,7 @@ static bool use_checksum_based_resync(struct drbd_connection *connection, struct rcu_read_unlock(); return connection->agreed_pro_version >= 89 && /* supported? */ connection->csums_tfm && /* configured? */ - (csums_after_crash_only == 0 /* use for each resync? */ + (csums_after_crash_only == false /* use for each resync? */ || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */ } @@ -1769,7 +1842,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) device->bm_resync_fo = 0; device->use_csums = use_checksum_based_resync(connection, device); } else { - device->use_csums = 0; + device->use_csums = false; } /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid |