summaryrefslogtreecommitdiff
path: root/drivers/block/drbd/drbd_receiver.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-01-22 05:19:38 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-22 05:19:38 +0300
commit641203549a21ba6a701aecd05c3dfc969ec670cc (patch)
tree5e3d177c380ed811b5bf37e0bf9b8098416a9bc6 /drivers/block/drbd/drbd_receiver.c
parent404a47410c26a115123885977053e9a1a4460929 (diff)
parente93d12ae3be91d18b2a46deebb90a3f516db3d3c (diff)
downloadlinux-641203549a21ba6a701aecd05c3dfc969ec670cc.tar.xz
Merge branch 'for-4.5/drivers' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe: "This is the block driver pull request for 4.5, with the exception of NVMe, which is in a separate branch and will be posted after this one. This pull request contains: - A set of bcache stability fixes, which have been acked by Kent. These have been used and tested for more than a year by the community, so it's about time that they got in. - A set of drbd updates from the drbd team (Andreas, Lars, Philipp) and Markus Elfring, Oleg Drokin. - A set of fixes for xen blkback/front from the usual suspects, (Bob, Konrad) as well as community based fixes from Kiri, Julien, and Peng. - A 2038 time fix for sx8 from Shraddha, with a fix from me. - A small mtip32xx cleanup from Zhu Yanjun. - A null_blk division fix from Arnd" * 'for-4.5/drivers' of git://git.kernel.dk/linux-block: (71 commits) null_blk: use sector_div instead of do_div mtip32xx: restrict variables visible in current code module xen/blkfront: Fix crash if backend doesn't follow the right states. xen/blkback: Fix two memory leaks. xen/blkback: make st_ statistics per ring xen/blkfront: Handle non-indirect grant with 64KB pages xen-blkfront: Introduce blkif_ring_get_request xen-blkback: clear PF_NOFREEZE for xen_blkif_schedule() xen/blkback: Free resources if connect_ring failed. xen/blocks: Return -EXX instead of -1 xen/blkback: make pool of persistent grants and free pages per-queue xen/blkback: get the number of hardware queues/rings from blkfront xen/blkback: pseudo support for multi hardware queues/rings xen/blkback: separate ring information out of struct xen_blkif xen/blkfront: correct setting for xen_blkif_max_ring_order xen/blkfront: make persistent grants pool per-queue xen/blkfront: Remove duplicate setting of ->xbdev. xen/blkfront: Cleanup of comments, fix unaligned variables, and syntax errors. xen/blkfront: negotiate number of queues/rings to be used with backend xen/blkfront: split per device io_lock ...
Diffstat (limited to 'drivers/block/drbd/drbd_receiver.c')
-rw-r--r--drivers/block/drbd/drbd_receiver.c254
1 files changed, 139 insertions, 115 deletions
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index b4b5680ac6ad..1957fe8601dc 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -215,7 +215,7 @@ static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
}
}
-static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
+static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
{
LIST_HEAD(reclaimed);
struct drbd_peer_request *peer_req, *t;
@@ -223,11 +223,30 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
spin_lock_irq(&device->resource->req_lock);
reclaim_finished_net_peer_reqs(device, &reclaimed);
spin_unlock_irq(&device->resource->req_lock);
-
list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
drbd_free_net_peer_req(device, peer_req);
}
+static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
+{
+ struct drbd_peer_device *peer_device;
+ int vnr;
+
+ rcu_read_lock();
+ idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
+ struct drbd_device *device = peer_device->device;
+ if (!atomic_read(&device->pp_in_use_by_net))
+ continue;
+
+ kref_get(&device->kref);
+ rcu_read_unlock();
+ drbd_reclaim_net_peer_reqs(device);
+ kref_put(&device->kref, drbd_destroy_device);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+}
+
/**
* drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
* @device: DRBD device.
@@ -265,10 +284,15 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int
if (atomic_read(&device->pp_in_use) < mxb)
page = __drbd_alloc_pages(device, number);
+ /* Try to keep the fast path fast, but occasionally we need
+ * to reclaim the pages we lended to the network stack. */
+ if (page && atomic_read(&device->pp_in_use_by_net) > 512)
+ drbd_reclaim_net_peer_reqs(device);
+
while (page == NULL) {
prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
- drbd_kick_lo_and_reclaim_net(device);
+ drbd_reclaim_net_peer_reqs(device);
if (atomic_read(&device->pp_in_use) < mxb) {
page = __drbd_alloc_pages(device, number);
@@ -1099,7 +1123,15 @@ randomize:
return 0;
}
- drbd_thread_start(&connection->asender);
+ drbd_thread_start(&connection->ack_receiver);
+ /* opencoded create_singlethread_workqueue(),
+ * to be able to use format string arguments */
+ connection->ack_sender =
+ alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
+ if (!connection->ack_sender) {
+ drbd_err(connection, "Failed to create workqueue ack_sender\n");
+ return 0;
+ }
mutex_lock(&connection->resource->conf_update);
/* The discard_my_data flag is a single-shot modifier to the next
@@ -1178,7 +1210,7 @@ static void drbd_flush(struct drbd_connection *connection)
struct drbd_peer_device *peer_device;
int vnr;
- if (connection->resource->write_ordering >= WO_bdev_flush) {
+ if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
rcu_read_lock();
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
struct drbd_device *device = peer_device->device;
@@ -1203,7 +1235,7 @@ static void drbd_flush(struct drbd_connection *connection)
/* would rather check on EOPNOTSUPP, but that is not reliable.
* don't try again for ANY return value != 0
* if (rv == -EOPNOTSUPP) */
- drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
+ drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
}
put_ldev(device);
kref_put(&device->kref, drbd_destroy_device);
@@ -1299,10 +1331,10 @@ max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
dc = rcu_dereference(bdev->disk_conf);
- if (wo == WO_bdev_flush && !dc->disk_flushes)
- wo = WO_drain_io;
- if (wo == WO_drain_io && !dc->disk_drain)
- wo = WO_none;
+ if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
+ wo = WO_DRAIN_IO;
+ if (wo == WO_DRAIN_IO && !dc->disk_drain)
+ wo = WO_NONE;
return wo;
}
@@ -1319,13 +1351,13 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
enum write_ordering_e pwo;
int vnr;
static char *write_ordering_str[] = {
- [WO_none] = "none",
- [WO_drain_io] = "drain",
- [WO_bdev_flush] = "flush",
+ [WO_NONE] = "none",
+ [WO_DRAIN_IO] = "drain",
+ [WO_BDEV_FLUSH] = "flush",
};
pwo = resource->write_ordering;
- if (wo != WO_bdev_flush)
+ if (wo != WO_BDEV_FLUSH)
wo = min(pwo, wo);
rcu_read_lock();
idr_for_each_entry(&resource->devices, device, vnr) {
@@ -1343,7 +1375,7 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
rcu_read_unlock();
resource->write_ordering = wo;
- if (pwo != resource->write_ordering || wo == WO_bdev_flush)
+ if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
}
@@ -1380,7 +1412,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
/* wait for all pending IO completions, before we start
* zeroing things out. */
- conn_wait_active_ee_empty(first_peer_device(device)->connection);
+ conn_wait_active_ee_empty(peer_req->peer_device->connection);
/* add it to the active list now,
* so we can find it to present it in debugfs */
peer_req->submit_jif = jiffies;
@@ -1508,12 +1540,6 @@ static void conn_wait_active_ee_empty(struct drbd_connection *connection)
rcu_read_unlock();
}
-static struct drbd_peer_device *
-conn_peer_device(struct drbd_connection *connection, int volume_number)
-{
- return idr_find(&connection->peer_devices, volume_number);
-}
-
static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
{
int rv;
@@ -1533,7 +1559,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
* Therefore we must send the barrier_ack after the barrier request was
* completed. */
switch (connection->resource->write_ordering) {
- case WO_none:
+ case WO_NONE:
if (rv == FE_RECYCLED)
return 0;
@@ -1546,8 +1572,8 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
/* Fall through */
- case WO_bdev_flush:
- case WO_drain_io:
+ case WO_BDEV_FLUSH:
+ case WO_DRAIN_IO:
conn_wait_active_ee_empty(connection);
drbd_flush(connection);
@@ -1752,7 +1778,7 @@ static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_req
}
/*
- * e_end_resync_block() is called in asender context via
+ * e_end_resync_block() is called in ack_sender context via
* drbd_finish_peer_reqs().
*/
static int e_end_resync_block(struct drbd_work *w, int unused)
@@ -1926,7 +1952,7 @@ static void restart_conflicting_writes(struct drbd_device *device,
}
/*
- * e_end_block() is called in asender context via drbd_finish_peer_reqs().
+ * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
*/
static int e_end_block(struct drbd_work *w, int cancel)
{
@@ -1966,7 +1992,7 @@ static int e_end_block(struct drbd_work *w, int cancel)
} else
D_ASSERT(device, drbd_interval_empty(&peer_req->i));
- drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
+ drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
return err;
}
@@ -2098,7 +2124,7 @@ static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, co
}
rcu_read_lock();
- tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
+ tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
rcu_read_unlock();
if (!tp)
@@ -2217,7 +2243,7 @@ static int handle_write_conflicts(struct drbd_device *device,
peer_req->w.cb = superseded ? e_send_superseded :
e_send_retry_write;
list_add_tail(&peer_req->w.list, &device->done_ee);
- wake_asender(connection);
+ queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
err = -ENOENT;
goto out;
@@ -2364,7 +2390,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
if (dp_flags & DP_SEND_RECEIVE_ACK) {
/* I really don't like it that the receiver thread
* sends on the msock, but anyways */
- drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
+ drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
}
if (tp) {
@@ -4056,7 +4082,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
os = ns = drbd_read_state(device);
spin_unlock_irq(&device->resource->req_lock);
- /* If some other part of the code (asender thread, timeout)
+ /* If some other part of the code (ack_receiver thread, timeout)
* already decided to close the connection again,
* we must not "re-establish" it here. */
if (os.conn <= C_TEAR_DOWN)
@@ -4661,8 +4687,12 @@ static void conn_disconnect(struct drbd_connection *connection)
*/
conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
- /* asender does not clean up anything. it must not interfere, either */
- drbd_thread_stop(&connection->asender);
+ /* ack_receiver does not clean up anything. it must not interfere, either */
+ drbd_thread_stop(&connection->ack_receiver);
+ if (connection->ack_sender) {
+ destroy_workqueue(connection->ack_sender);
+ connection->ack_sender = NULL;
+ }
drbd_free_sock(connection);
rcu_read_lock();
@@ -5431,49 +5461,39 @@ static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
return 0;
}
-static int connection_finish_peer_reqs(struct drbd_connection *connection)
+struct meta_sock_cmd {
+ size_t pkt_size;
+ int (*fn)(struct drbd_connection *connection, struct packet_info *);
+};
+
+static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
{
- struct drbd_peer_device *peer_device;
- int vnr, not_empty = 0;
+ long t;
+ struct net_conf *nc;
- do {
- clear_bit(SIGNAL_ASENDER, &connection->flags);
- flush_signals(current);
+ rcu_read_lock();
+ nc = rcu_dereference(connection->net_conf);
+ t = ping_timeout ? nc->ping_timeo : nc->ping_int;
+ rcu_read_unlock();
- rcu_read_lock();
- idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
- struct drbd_device *device = peer_device->device;
- kref_get(&device->kref);
- rcu_read_unlock();
- if (drbd_finish_peer_reqs(device)) {
- kref_put(&device->kref, drbd_destroy_device);
- return 1;
- }
- kref_put(&device->kref, drbd_destroy_device);
- rcu_read_lock();
- }
- set_bit(SIGNAL_ASENDER, &connection->flags);
+ t *= HZ;
+ if (ping_timeout)
+ t /= 10;
- spin_lock_irq(&connection->resource->req_lock);
- idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
- struct drbd_device *device = peer_device->device;
- not_empty = !list_empty(&device->done_ee);
- if (not_empty)
- break;
- }
- spin_unlock_irq(&connection->resource->req_lock);
- rcu_read_unlock();
- } while (not_empty);
+ connection->meta.socket->sk->sk_rcvtimeo = t;
+}
- return 0;
+static void set_ping_timeout(struct drbd_connection *connection)
+{
+ set_rcvtimeo(connection, 1);
}
-struct asender_cmd {
- size_t pkt_size;
- int (*fn)(struct drbd_connection *connection, struct packet_info *);
-};
+static void set_idle_timeout(struct drbd_connection *connection)
+{
+ set_rcvtimeo(connection, 0);
+}
-static struct asender_cmd asender_tbl[] = {
+static struct meta_sock_cmd ack_receiver_tbl[] = {
[P_PING] = { 0, got_Ping },
[P_PING_ACK] = { 0, got_PingAck },
[P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
@@ -5493,64 +5513,40 @@ static struct asender_cmd asender_tbl[] = {
[P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
};
-int drbd_asender(struct drbd_thread *thi)
+int drbd_ack_receiver(struct drbd_thread *thi)
{
struct drbd_connection *connection = thi->connection;
- struct asender_cmd *cmd = NULL;
+ struct meta_sock_cmd *cmd = NULL;
struct packet_info pi;
+ unsigned long pre_recv_jif;
int rv;
void *buf = connection->meta.rbuf;
int received = 0;
unsigned int header_size = drbd_header_size(connection);
int expect = header_size;
bool ping_timeout_active = false;
- struct net_conf *nc;
- int ping_timeo, tcp_cork, ping_int;
struct sched_param param = { .sched_priority = 2 };
rv = sched_setscheduler(current, SCHED_RR, &param);
if (rv < 0)
- drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
+ drbd_err(connection, "drbd_ack_receiver: ERROR set priority, ret=%d\n", rv);
while (get_t_state(thi) == RUNNING) {
drbd_thread_current_set_cpu(thi);
- rcu_read_lock();
- nc = rcu_dereference(connection->net_conf);
- ping_timeo = nc->ping_timeo;
- tcp_cork = nc->tcp_cork;
- ping_int = nc->ping_int;
- rcu_read_unlock();
+ conn_reclaim_net_peer_reqs(connection);
if (test_and_clear_bit(SEND_PING, &connection->flags)) {
if (drbd_send_ping(connection)) {
drbd_err(connection, "drbd_send_ping has failed\n");
goto reconnect;
}
- connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
+ set_ping_timeout(connection);
ping_timeout_active = true;
}
- /* TODO: conditionally cork; it may hurt latency if we cork without
- much to send */
- if (tcp_cork)
- drbd_tcp_cork(connection->meta.socket);
- if (connection_finish_peer_reqs(connection)) {
- drbd_err(connection, "connection_finish_peer_reqs() failed\n");
- goto reconnect;
- }
- /* but unconditionally uncork unless disabled */
- if (tcp_cork)
- drbd_tcp_uncork(connection->meta.socket);
-
- /* short circuit, recv_msg would return EINTR anyways. */
- if (signal_pending(current))
- continue;
-
+ pre_recv_jif = jiffies;
rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
- clear_bit(SIGNAL_ASENDER, &connection->flags);
-
- flush_signals(current);
/* Note:
* -EINTR (on meta) we got a signal
@@ -5562,7 +5558,6 @@ int drbd_asender(struct drbd_thread *thi)
* rv < expected: "woken" by signal during receive
* rv == 0 : "connection shut down by peer"
*/
-received_more:
if (likely(rv > 0)) {
received += rv;
buf += rv;
@@ -5584,8 +5579,7 @@ received_more:
} else if (rv == -EAGAIN) {
/* If the data socket received something meanwhile,
* that is good enough: peer is still alive. */
- if (time_after(connection->last_received,
- jiffies - connection->meta.socket->sk->sk_rcvtimeo))
+ if (time_after(connection->last_received, pre_recv_jif))
continue;
if (ping_timeout_active) {
drbd_err(connection, "PingAck did not arrive in time.\n");
@@ -5594,6 +5588,10 @@ received_more:
set_bit(SEND_PING, &connection->flags);
continue;
} else if (rv == -EINTR) {
+ /* maybe drbd_thread_stop(): the while condition will notice.
+ * maybe woken for send_ping: we'll send a ping above,
+ * and change the rcvtimeo */
+ flush_signals(current);
continue;
} else {
drbd_err(connection, "sock_recvmsg returned %d\n", rv);
@@ -5603,8 +5601,8 @@ received_more:
if (received == expect && cmd == NULL) {
if (decode_header(connection, connection->meta.rbuf, &pi))
goto reconnect;
- cmd = &asender_tbl[pi.cmd];
- if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
+ cmd = &ack_receiver_tbl[pi.cmd];
+ if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
cmdname(pi.cmd), pi.cmd);
goto disconnect;
@@ -5627,9 +5625,8 @@ received_more:
connection->last_received = jiffies;
- if (cmd == &asender_tbl[P_PING_ACK]) {
- /* restore idle timeout */
- connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
+ if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
+ set_idle_timeout(connection);
ping_timeout_active = false;
}
@@ -5638,11 +5635,6 @@ received_more:
expect = header_size;
cmd = NULL;
}
- if (test_bit(SEND_PING, &connection->flags))
- continue;
- rv = drbd_recv_short(connection->meta.socket, buf, expect-received, MSG_DONTWAIT);
- if (rv > 0)
- goto received_more;
}
if (0) {
@@ -5654,9 +5646,41 @@ reconnect:
disconnect:
conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
}
- clear_bit(SIGNAL_ASENDER, &connection->flags);
- drbd_info(connection, "asender terminated\n");
+ drbd_info(connection, "ack_receiver terminated\n");
return 0;
}
+
+void drbd_send_acks_wf(struct work_struct *ws)
+{
+ struct drbd_peer_device *peer_device =
+ container_of(ws, struct drbd_peer_device, send_acks_work);
+ struct drbd_connection *connection = peer_device->connection;
+ struct drbd_device *device = peer_device->device;
+ struct net_conf *nc;
+ int tcp_cork, err;
+
+ rcu_read_lock();
+ nc = rcu_dereference(connection->net_conf);
+ tcp_cork = nc->tcp_cork;
+ rcu_read_unlock();
+
+ if (tcp_cork)
+ drbd_tcp_cork(connection->meta.socket);
+
+ err = drbd_finish_peer_reqs(device);
+ kref_put(&device->kref, drbd_destroy_device);
+ /* get is in drbd_endio_write_sec_final(). That is necessary to keep the
+ struct work_struct send_acks_work alive, which is in the peer_device object */
+
+ if (err) {
+ conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
+ return;
+ }
+
+ if (tcp_cork)
+ drbd_tcp_uncork(connection->meta.socket);
+
+ return;
+}