summaryrefslogtreecommitdiff
path: root/drivers/net/hyperv
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/hyperv')
-rw-r--r--drivers/net/hyperv/hyperv_net.h19
-rw-r--r--drivers/net/hyperv/netvsc.c256
-rw-r--r--drivers/net/hyperv/netvsc_drv.c261
-rw-r--r--drivers/net/hyperv/rndis_filter.c115
4 files changed, 375 insertions, 276 deletions
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index db23cb36ae5c..262b2ea576a3 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -196,6 +196,7 @@ int netvsc_recv_callback(struct net_device *net,
const struct ndis_tcp_ip_checksum_info *csum_info,
const struct ndis_pkt_8021q_info *vlan);
void netvsc_channel_cb(void *context);
+int netvsc_poll(struct napi_struct *napi, int budget);
int rndis_filter_open(struct netvsc_device *nvdev);
int rndis_filter_close(struct netvsc_device *nvdev);
int rndis_filter_device_add(struct hv_device *dev,
@@ -632,7 +633,7 @@ struct nvsp_message {
#define NETVSC_PACKET_SIZE 4096
-#define VRSS_SEND_TAB_SIZE 16
+#define VRSS_SEND_TAB_SIZE 16 /* must be power of 2 */
#define VRSS_CHANNEL_MAX 64
#define VRSS_CHANNEL_DEFAULT 8
@@ -685,7 +686,7 @@ struct net_device_context {
/* point back to our device context */
struct hv_device *device_ctx;
/* netvsc_device */
- struct netvsc_device *nvdev;
+ struct netvsc_device __rcu *nvdev;
/* reconfigure work */
struct delayed_work dwork;
/* last reconfig time */
@@ -707,9 +708,6 @@ struct net_device_context {
u32 speed;
struct netvsc_ethtool_stats eth_stats;
- /* the device is going away */
- bool start_remove;
-
/* State to manage the associated VF interface. */
struct net_device __rcu *vf_netdev;
@@ -722,6 +720,8 @@ struct net_device_context {
/* Per channel data */
struct netvsc_channel {
struct vmbus_channel *channel;
+ const struct vmpacket_descriptor *desc;
+ struct napi_struct napi;
struct multi_send_data msd;
struct multi_recv_comp mrc;
atomic_t queue_sends;
@@ -760,8 +760,8 @@ struct netvsc_device {
u32 max_chn;
u32 num_chn;
- spinlock_t sc_lock; /* Protects num_sc_offered variable */
- u32 num_sc_offered;
+
+ refcount_t sc_offered;
/* Holds rndis device info */
void *extension;
@@ -776,6 +776,8 @@ struct netvsc_device {
atomic_t open_cnt;
struct netvsc_channel chan_table[VRSS_CHANNEL_MAX];
+
+ struct rcu_head rcu;
};
static inline struct netvsc_device *
@@ -1424,9 +1426,6 @@ struct rndis_message {
((void *) rndis_msg)
-#define __struct_bcount(x)
-
-
#define RNDIS_HEADER_SIZE (sizeof(struct rndis_message) - \
sizeof(union rndis_message_container))
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 15ef713d96c0..15749d359e60 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -80,8 +80,10 @@ static struct netvsc_device *alloc_net_device(void)
return net_device;
}
-static void free_netvsc_device(struct netvsc_device *nvdev)
+static void free_netvsc_device(struct rcu_head *head)
{
+ struct netvsc_device *nvdev
+ = container_of(head, struct netvsc_device, rcu);
int i;
for (i = 0; i < VRSS_CHANNEL_MAX; i++)
@@ -90,14 +92,9 @@ static void free_netvsc_device(struct netvsc_device *nvdev)
kfree(nvdev);
}
-
-static inline bool netvsc_channel_idle(const struct netvsc_device *net_device,
- u16 q_idx)
+static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
{
- const struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
-
- return atomic_read(&net_device->num_outstanding_recvs) == 0 &&
- atomic_read(&nvchan->queue_sends) == 0;
+ call_rcu(&nvdev->rcu, free_netvsc_device);
}
static struct netvsc_device *get_outbound_net_device(struct hv_device *device)
@@ -138,6 +135,13 @@ static void netvsc_destroy_buf(struct hv_device *device)
sizeof(struct nvsp_message),
(unsigned long)revoke_packet,
VM_PKT_DATA_INBAND, 0);
+ /* If the failure is because the channel is rescinded;
+ * ignore the failure since we cannot send on a rescinded
+ * channel. This would allow us to properly cleanup
+ * even when the channel is rescinded.
+ */
+ if (device->channel->rescind)
+ ret = 0;
/*
* If we failed here, we might as well return and
* have a leak rather than continue and a bugchk
@@ -198,6 +202,15 @@ static void netvsc_destroy_buf(struct hv_device *device)
sizeof(struct nvsp_message),
(unsigned long)revoke_packet,
VM_PKT_DATA_INBAND, 0);
+
+ /* If the failure is because the channel is rescinded;
+ * ignore the failure since we cannot send on a rescinded
+ * channel. This would allow us to properly cleanup
+ * even when the channel is rescinded.
+ */
+ if (device->channel->rescind)
+ ret = 0;
+
/* If we failed here, we might as well return and
* have a leak rather than continue and a bugchk
*/
@@ -555,10 +568,11 @@ void netvsc_device_remove(struct hv_device *device)
struct net_device *ndev = hv_get_drvdata(device);
struct net_device_context *net_device_ctx = netdev_priv(ndev);
struct netvsc_device *net_device = net_device_ctx->nvdev;
+ int i;
netvsc_disconnect_vsp(device);
- net_device_ctx->nvdev = NULL;
+ RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
/*
* At this point, no one should be accessing net_device
@@ -569,8 +583,12 @@ void netvsc_device_remove(struct hv_device *device)
/* Now, we can close the channel safely */
vmbus_close(device->channel);
+ /* And dissassociate NAPI context from device */
+ for (i = 0; i < net_device->num_chn; i++)
+ netif_napi_del(&net_device->chan_table[i].napi);
+
/* Release all resources */
- free_netvsc_device(net_device);
+ free_netvsc_device_rcu(net_device);
}
#define RING_AVAIL_PERCENT_HIWATER 20
@@ -599,11 +617,11 @@ static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
static void netvsc_send_tx_complete(struct netvsc_device *net_device,
struct vmbus_channel *incoming_channel,
struct hv_device *device,
- struct vmpacket_descriptor *packet)
+ const struct vmpacket_descriptor *desc,
+ int budget)
{
- struct sk_buff *skb = (struct sk_buff *)(unsigned long)packet->trans_id;
+ struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id;
struct net_device *ndev = hv_get_drvdata(device);
- struct net_device_context *net_device_ctx = netdev_priv(ndev);
struct vmbus_channel *channel = device->channel;
u16 q_idx = 0;
int queue_sends;
@@ -627,7 +645,7 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
tx_stats->bytes += packet->total_bytes;
u64_stats_update_end(&tx_stats->syncp);
- dev_consume_skb_any(skb);
+ napi_consume_skb(skb, budget);
}
queue_sends =
@@ -637,7 +655,6 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
wake_up(&net_device->wait_drain);
if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
- !net_device_ctx->start_remove &&
(hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
queue_sends < 1))
netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
@@ -646,14 +663,12 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
static void netvsc_send_completion(struct netvsc_device *net_device,
struct vmbus_channel *incoming_channel,
struct hv_device *device,
- struct vmpacket_descriptor *packet)
+ const struct vmpacket_descriptor *desc,
+ int budget)
{
- struct nvsp_message *nvsp_packet;
+ struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
struct net_device *ndev = hv_get_drvdata(device);
- nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
- (packet->offset8 << 3));
-
switch (nvsp_packet->hdr.msg_type) {
case NVSP_MSG_TYPE_INIT_COMPLETE:
case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
@@ -667,7 +682,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
netvsc_send_tx_complete(net_device, incoming_channel,
- device, packet);
+ device, desc, budget);
break;
default:
@@ -709,8 +724,7 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
packet->page_buf_cnt;
/* Add padding */
- if (skb && skb->xmit_more && remain &&
- !packet->cp_partial) {
+ if (skb->xmit_more && remain && !packet->cp_partial) {
padding = net_device->pkt_align - remain;
rndis_msg->msg_len += padding;
packet->total_data_buflen += padding;
@@ -868,9 +882,7 @@ int netvsc_send(struct hv_device *device,
if (msdp->pkt)
msd_len = msdp->pkt->total_data_buflen;
- try_batch = (skb != NULL) && msd_len > 0 && msdp->count <
- net_device->max_pkt;
-
+ try_batch = msd_len > 0 && msdp->count < net_device->max_pkt;
if (try_batch && msd_len + pktlen + net_device->pkt_align <
net_device->send_section_size) {
section_index = msdp->pkt->send_buf_index;
@@ -880,7 +892,7 @@ int netvsc_send(struct hv_device *device,
section_index = msdp->pkt->send_buf_index;
packet->cp_partial = true;
- } else if ((skb != NULL) && pktlen + net_device->pkt_align <
+ } else if (pktlen + net_device->pkt_align <
net_device->send_section_size) {
section_index = netvsc_get_next_send_section(net_device);
if (section_index != NETVSC_INVALID_INDEX) {
@@ -1065,28 +1077,29 @@ static inline struct recv_comp_data *get_recv_comp_slot(
return rcd;
}
-static void netvsc_receive(struct net_device *ndev,
+static int netvsc_receive(struct net_device *ndev,
struct netvsc_device *net_device,
struct net_device_context *net_device_ctx,
struct hv_device *device,
struct vmbus_channel *channel,
- struct vmtransfer_page_packet_header *vmxferpage_packet,
+ const struct vmpacket_descriptor *desc,
struct nvsp_message *nvsp)
{
+ const struct vmtransfer_page_packet_header *vmxferpage_packet
+ = container_of(desc, const struct vmtransfer_page_packet_header, d);
+ u16 q_idx = channel->offermsg.offer.sub_channel_index;
char *recv_buf = net_device->recv_buf;
u32 status = NVSP_STAT_SUCCESS;
int i;
int count = 0;
int ret;
- struct recv_comp_data *rcd;
- u16 q_idx = channel->offermsg.offer.sub_channel_index;
/* Make sure this is a valid nvsp packet */
if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
netif_err(net_device_ctx, rx_err, ndev,
"Unknown nvsp packet type received %u\n",
nvsp->hdr.msg_type);
- return;
+ return 0;
}
if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) {
@@ -1094,7 +1107,7 @@ static void netvsc_receive(struct net_device *ndev,
"Invalid xfer page set id - expecting %x got %x\n",
NETVSC_RECEIVE_BUFFER_ID,
vmxferpage_packet->xfer_pageset_id);
- return;
+ return 0;
}
count = vmxferpage_packet->range_cnt;
@@ -1110,26 +1123,26 @@ static void netvsc_receive(struct net_device *ndev,
channel, data, buflen);
}
- if (!net_device->chan_table[q_idx].mrc.buf) {
+ if (net_device->chan_table[q_idx].mrc.buf) {
+ struct recv_comp_data *rcd;
+
+ rcd = get_recv_comp_slot(net_device, channel, q_idx);
+ if (rcd) {
+ rcd->tid = vmxferpage_packet->d.trans_id;
+ rcd->status = status;
+ } else {
+ netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
+ q_idx, vmxferpage_packet->d.trans_id);
+ }
+ } else {
ret = netvsc_send_recv_completion(channel,
vmxferpage_packet->d.trans_id,
status);
if (ret)
netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n",
q_idx, vmxferpage_packet->d.trans_id, ret);
- return;
}
-
- rcd = get_recv_comp_slot(net_device, channel, q_idx);
-
- if (!rcd) {
- netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
- q_idx, vmxferpage_packet->d.trans_id);
- return;
- }
-
- rcd->tid = vmxferpage_packet->d.trans_id;
- rcd->status = status;
+ return count;
}
static void netvsc_send_table(struct hv_device *hdev,
@@ -1175,28 +1188,25 @@ static inline void netvsc_receive_inband(struct hv_device *hdev,
}
}
-static void netvsc_process_raw_pkt(struct hv_device *device,
- struct vmbus_channel *channel,
- struct netvsc_device *net_device,
- struct net_device *ndev,
- u64 request_id,
- struct vmpacket_descriptor *desc)
+static int netvsc_process_raw_pkt(struct hv_device *device,
+ struct vmbus_channel *channel,
+ struct netvsc_device *net_device,
+ struct net_device *ndev,
+ const struct vmpacket_descriptor *desc,
+ int budget)
{
struct net_device_context *net_device_ctx = netdev_priv(ndev);
- struct nvsp_message *nvmsg
- = (struct nvsp_message *)((unsigned long)desc
- + (desc->offset8 << 3));
+ struct nvsp_message *nvmsg = hv_pkt_data(desc);
switch (desc->type) {
case VM_PKT_COMP:
- netvsc_send_completion(net_device, channel, device, desc);
+ netvsc_send_completion(net_device, channel, device,
+ desc, budget);
break;
case VM_PKT_DATA_USING_XFER_PAGES:
- netvsc_receive(ndev, net_device, net_device_ctx,
- device, channel,
- (struct vmtransfer_page_packet_header *)desc,
- nvmsg);
+ return netvsc_receive(ndev, net_device, net_device_ctx,
+ device, channel, desc, nvmsg);
break;
case VM_PKT_DATA_INBAND:
@@ -1205,53 +1215,74 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
default:
netdev_err(ndev, "unhandled packet type %d, tid %llx\n",
- desc->type, request_id);
+ desc->type, desc->trans_id);
break;
}
+
+ return 0;
}
-void netvsc_channel_cb(void *context)
+static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
{
- struct vmbus_channel *channel = context;
- u16 q_idx = channel->offermsg.offer.sub_channel_index;
- struct hv_device *device;
- struct netvsc_device *net_device;
- struct vmpacket_descriptor *desc;
- struct net_device *ndev;
- bool need_to_commit = false;
+ struct vmbus_channel *primary = channel->primary_channel;
- if (channel->primary_channel != NULL)
- device = channel->primary_channel->device_obj;
- else
- device = channel->device_obj;
+ return primary ? primary->device_obj : channel->device_obj;
+}
- ndev = hv_get_drvdata(device);
- if (unlikely(!ndev))
- return;
+/* Network processing softirq
+ * Process data in incoming ring buffer from host
+ * Stops when ring is empty or budget is met or exceeded.
+ */
+int netvsc_poll(struct napi_struct *napi, int budget)
+{
+ struct netvsc_channel *nvchan
+ = container_of(napi, struct netvsc_channel, napi);
+ struct vmbus_channel *channel = nvchan->channel;
+ struct hv_device *device = netvsc_channel_to_device(channel);
+ u16 q_idx = channel->offermsg.offer.sub_channel_index;
+ struct net_device *ndev = hv_get_drvdata(device);
+ struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
+ int work_done = 0;
- net_device = net_device_to_netvsc_device(ndev);
- if (unlikely(!net_device))
- return;
+ /* If starting a new interval */
+ if (!nvchan->desc)
+ nvchan->desc = hv_pkt_iter_first(channel);
- if (unlikely(net_device->destroy &&
- netvsc_channel_idle(net_device, q_idx)))
- return;
+ while (nvchan->desc && work_done < budget) {
+ work_done += netvsc_process_raw_pkt(device, channel, net_device,
+ ndev, nvchan->desc, budget);
+ nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
+ }
- /* commit_rd_index() -> hv_signal_on_read() needs this. */
- init_cached_read_index(channel);
+ /* If receive ring was exhausted
+ * and not doing busy poll
+ * then re-enable host interrupts
+ * and reschedule if ring is not empty.
+ */
+ if (work_done < budget &&
+ napi_complete_done(napi, work_done) &&
+ hv_end_read(&channel->inbound) != 0)
+ napi_reschedule(napi);
- while ((desc = get_next_pkt_raw(channel)) != NULL) {
- netvsc_process_raw_pkt(device, channel, net_device,
- ndev, desc->trans_id, desc);
+ netvsc_chk_recv_comp(net_device, channel, q_idx);
- put_pkt_raw(channel, desc);
- need_to_commit = true;
- }
+ /* Driver may overshoot since multiple packets per descriptor */
+ return min(work_done, budget);
+}
- if (need_to_commit)
- commit_rd_index(channel);
+/* Call back when data is available in host ring buffer.
+ * Processing is deferred until network softirq (NAPI)
+ */
+void netvsc_channel_cb(void *context)
+{
+ struct netvsc_channel *nvchan = context;
- netvsc_chk_recv_comp(net_device, channel, q_idx);
+ if (napi_schedule_prep(&nvchan->napi)) {
+ /* disable interupts from host */
+ hv_begin_read(&nvchan->channel->inbound);
+
+ __napi_schedule(&nvchan->napi);
+ }
}
/*
@@ -1273,10 +1304,29 @@ int netvsc_device_add(struct hv_device *device,
net_device->ring_size = ring_size;
+ /* Because the device uses NAPI, all the interrupt batching and
+ * control is done via Net softirq, not the channel handling
+ */
+ set_channel_read_mode(device->channel, HV_CALL_ISR);
+
+ /* If we're reopening the device we may have multiple queues, fill the
+ * chn_table with the default channel to use it before subchannels are
+ * opened.
+ * Initialize the channel state before we open;
+ * we can be interrupted as soon as we open the channel.
+ */
+
+ for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
+ struct netvsc_channel *nvchan = &net_device->chan_table[i];
+
+ nvchan->channel = device->channel;
+ }
+
/* Open the channel */
ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
ring_size * PAGE_SIZE, NULL, 0,
- netvsc_channel_cb, device->channel);
+ netvsc_channel_cb,
+ net_device->chan_table);
if (ret != 0) {
netdev_err(ndev, "unable to open channel: %d\n", ret);
@@ -1286,19 +1336,15 @@ int netvsc_device_add(struct hv_device *device,
/* Channel is opened */
netdev_dbg(ndev, "hv_netvsc channel opened successfully\n");
- /* If we're reopening the device we may have multiple queues, fill the
- * chn_table with the default channel to use it before subchannels are
- * opened.
- */
- for (i = 0; i < VRSS_CHANNEL_MAX; i++)
- net_device->chan_table[i].channel = device->channel;
+ /* Enable NAPI handler for init callbacks */
+ netif_napi_add(ndev, &net_device->chan_table[0].napi,
+ netvsc_poll, NAPI_POLL_WEIGHT);
+ napi_enable(&net_device->chan_table[0].napi);
/* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
* populated.
*/
- wmb();
-
- net_device_ctx->nvdev = net_device;
+ rcu_assign_pointer(net_device_ctx->nvdev, net_device);
/* Connect with the NetVsp */
ret = netvsc_connect_vsp(device);
@@ -1311,11 +1357,13 @@ int netvsc_device_add(struct hv_device *device,
return ret;
close:
+ netif_napi_del(&net_device->chan_table[0].napi);
+
/* Now, we can close the channel safely */
vmbus_close(device->channel);
cleanup:
- free_netvsc_device(net_device);
+ free_netvsc_device(&net_device->rcu);
return ret;
}
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 5ede87f30463..4421a6d00375 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -62,7 +62,7 @@ static void do_set_multicast(struct work_struct *w)
container_of(w, struct net_device_context, work);
struct hv_device *device_obj = ndevctx->device_ctx;
struct net_device *ndev = hv_get_drvdata(device_obj);
- struct netvsc_device *nvdev = ndevctx->nvdev;
+ struct netvsc_device *nvdev = rcu_dereference(ndevctx->nvdev);
struct rndis_device *rdev;
if (!nvdev)
@@ -116,7 +116,7 @@ static int netvsc_open(struct net_device *net)
static int netvsc_close(struct net_device *net)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
- struct netvsc_device *nvdev = net_device_ctx->nvdev;
+ struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
int ret;
u32 aread, awrite, i, msec = 10, retry = 0, retry_max = 20;
struct vmbus_channel *chn;
@@ -191,6 +191,54 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
return ppi;
}
+/* Azure hosts don't support non-TCP port numbers in hashing yet. We compute
+ * hash for non-TCP traffic with only IP numbers.
+ */
+static inline u32 netvsc_get_hash(struct sk_buff *skb, struct sock *sk)
+{
+ struct flow_keys flow;
+ u32 hash;
+ static u32 hashrnd __read_mostly;
+
+ net_get_random_once(&hashrnd, sizeof(hashrnd));
+
+ if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
+ return 0;
+
+ if (flow.basic.ip_proto == IPPROTO_TCP) {
+ return skb_get_hash(skb);
+ } else {
+ if (flow.basic.n_proto == htons(ETH_P_IP))
+ hash = jhash2((u32 *)&flow.addrs.v4addrs, 2, hashrnd);
+ else if (flow.basic.n_proto == htons(ETH_P_IPV6))
+ hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd);
+ else
+ hash = 0;
+
+ skb_set_hash(skb, hash, PKT_HASH_TYPE_L3);
+ }
+
+ return hash;
+}
+
+static inline int netvsc_get_tx_queue(struct net_device *ndev,
+ struct sk_buff *skb, int old_idx)
+{
+ const struct net_device_context *ndc = netdev_priv(ndev);
+ struct sock *sk = skb->sk;
+ int q_idx;
+
+ q_idx = ndc->tx_send_table[netvsc_get_hash(skb, sk) &
+ (VRSS_SEND_TAB_SIZE - 1)];
+
+ /* If queue index changed record the new value */
+ if (q_idx != old_idx &&
+ sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache))
+ sk_tx_queue_set(sk, q_idx);
+
+ return q_idx;
+}
+
/*
* Select queue for transmit.
*
@@ -205,24 +253,22 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
void *accel_priv, select_queue_fallback_t fallback)
{
- struct net_device_context *net_device_ctx = netdev_priv(ndev);
unsigned int num_tx_queues = ndev->real_num_tx_queues;
- struct sock *sk = skb->sk;
- int q_idx = sk_tx_queue_get(sk);
-
- if (q_idx < 0 || skb->ooo_okay || q_idx >= num_tx_queues) {
- u16 hash = __skb_tx_hash(ndev, skb, VRSS_SEND_TAB_SIZE);
- int new_idx;
-
- new_idx = net_device_ctx->tx_send_table[hash] % num_tx_queues;
+ int q_idx = sk_tx_queue_get(skb->sk);
- if (q_idx != new_idx && sk &&
- sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache))
- sk_tx_queue_set(sk, new_idx);
-
- q_idx = new_idx;
+ if (q_idx < 0 || skb->ooo_okay) {
+ /* If forwarding a packet, we use the recorded queue when
+ * available for better cache locality.
+ */
+ if (skb_rx_queue_recorded(skb))
+ q_idx = skb_get_rx_queue(skb);
+ else
+ q_idx = netvsc_get_tx_queue(ndev, skb, q_idx);
}
+ while (unlikely(q_idx >= num_tx_queues))
+ q_idx -= num_tx_queues;
+
return q_idx;
}
@@ -584,13 +630,14 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
}
static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
+ struct napi_struct *napi,
const struct ndis_tcp_ip_checksum_info *csum_info,
const struct ndis_pkt_8021q_info *vlan,
void *data, u32 buflen)
{
struct sk_buff *skb;
- skb = netdev_alloc_skb_ip_align(net, buflen);
+ skb = napi_alloc_skb(napi, buflen);
if (!skb)
return skb;
@@ -636,12 +683,12 @@ int netvsc_recv_callback(struct net_device *net,
const struct ndis_pkt_8021q_info *vlan)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
- struct netvsc_device *net_device = net_device_ctx->nvdev;
+ struct netvsc_device *net_device;
+ u16 q_idx = channel->offermsg.offer.sub_channel_index;
+ struct netvsc_channel *nvchan;
struct net_device *vf_netdev;
struct sk_buff *skb;
struct netvsc_stats *rx_stats;
- u16 q_idx = channel->offermsg.offer.sub_channel_index;
-
if (net->reg_state != NETREG_REGISTERED)
return NVSP_STAT_FAIL;
@@ -654,13 +701,20 @@ int netvsc_recv_callback(struct net_device *net,
* interface in the guest.
*/
rcu_read_lock();
+ net_device = rcu_dereference(net_device_ctx->nvdev);
+ if (unlikely(!net_device))
+ goto drop;
+
+ nvchan = &net_device->chan_table[q_idx];
vf_netdev = rcu_dereference(net_device_ctx->vf_netdev);
if (vf_netdev && (vf_netdev->flags & IFF_UP))
net = vf_netdev;
/* Allocate a skb - TODO direct I/O to pages? */
- skb = netvsc_alloc_recv_skb(net, csum_info, vlan, data, len);
+ skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
+ csum_info, vlan, data, len);
if (unlikely(!skb)) {
+drop:
++net->stats.rx_dropped;
rcu_read_unlock();
return NVSP_STAT_FAIL;
@@ -674,7 +728,7 @@ int netvsc_recv_callback(struct net_device *net,
* on the synthetic device because modifying the VF device
* statistics will not work correctly.
*/
- rx_stats = &net_device->chan_table[q_idx].rx_stats;
+ rx_stats = &nvchan->rx_stats;
u64_stats_update_begin(&rx_stats->syncp);
rx_stats->packets++;
rx_stats->bytes += len;
@@ -685,12 +739,7 @@ int netvsc_recv_callback(struct net_device *net,
++rx_stats->multicast;
u64_stats_update_end(&rx_stats->syncp);
- /*
- * Pass the skb back up. Network stack will deallocate the skb when it
- * is done.
- * TODO - use NAPI?
- */
- netif_receive_skb(skb);
+ napi_gro_receive(&nvchan->napi, skb);
rcu_read_unlock();
return 0;
@@ -707,7 +756,7 @@ static void netvsc_get_channels(struct net_device *net,
struct ethtool_channels *channel)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
- struct netvsc_device *nvdev = net_device_ctx->nvdev;
+ struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
if (nvdev) {
channel->max_combined = nvdev->max_chn;
@@ -744,8 +793,9 @@ static int netvsc_set_channels(struct net_device *net,
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct hv_device *dev = net_device_ctx->device_ctx;
- struct netvsc_device *nvdev = net_device_ctx->nvdev;
+ struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
unsigned int count = channels->combined_count;
+ bool was_running;
int ret;
/* We do not support separate count for rx, tx, or other */
@@ -756,7 +806,7 @@ static int netvsc_set_channels(struct net_device *net,
if (count > net->num_tx_queues || count > net->num_rx_queues)
return -EINVAL;
- if (net_device_ctx->start_remove || !nvdev || nvdev->destroy)
+ if (!nvdev || nvdev->destroy)
return -ENODEV;
if (nvdev->nvsp_version < NVSP_PROTOCOL_VERSION_5)
@@ -765,11 +815,13 @@ static int netvsc_set_channels(struct net_device *net,
if (count > nvdev->max_chn)
return -EINVAL;
- ret = netvsc_close(net);
- if (ret)
- return ret;
+ was_running = netif_running(net);
+ if (was_running) {
+ ret = netvsc_close(net);
+ if (ret)
+ return ret;
+ }
- net_device_ctx->start_remove = true;
rndis_filter_device_remove(dev, nvdev);
ret = netvsc_set_queues(net, dev, count);
@@ -778,8 +830,8 @@ static int netvsc_set_channels(struct net_device *net,
else
netvsc_set_queues(net, dev, nvdev->num_chn);
- netvsc_open(net);
- net_device_ctx->start_remove = false;
+ if (was_running)
+ ret = netvsc_open(net);
/* We may have missed link change notifications */
schedule_delayed_work(&net_device_ctx->dwork, 0);
@@ -787,18 +839,19 @@ static int netvsc_set_channels(struct net_device *net,
return ret;
}
-static bool netvsc_validate_ethtool_ss_cmd(const struct ethtool_cmd *cmd)
+static bool
+netvsc_validate_ethtool_ss_cmd(const struct ethtool_link_ksettings *cmd)
{
- struct ethtool_cmd diff1 = *cmd;
- struct ethtool_cmd diff2 = {};
+ struct ethtool_link_ksettings diff1 = *cmd;
+ struct ethtool_link_ksettings diff2 = {};
- ethtool_cmd_speed_set(&diff1, 0);
- diff1.duplex = 0;
+ diff1.base.speed = 0;
+ diff1.base.duplex = 0;
/* advertising and cmd are usually set */
- diff1.advertising = 0;
- diff1.cmd = 0;
+ ethtool_link_ksettings_zero_link_mode(&diff1, advertising);
+ diff1.base.cmd = 0;
/* We set port to PORT_OTHER */
- diff2.port = PORT_OTHER;
+ diff2.base.port = PORT_OTHER;
return !memcmp(&diff1, &diff2, sizeof(diff1));
}
@@ -808,33 +861,35 @@ static void netvsc_init_settings(struct net_device *dev)
struct net_device_context *ndc = netdev_priv(dev);
ndc->speed = SPEED_UNKNOWN;
- ndc->duplex = DUPLEX_UNKNOWN;
+ ndc->duplex = DUPLEX_FULL;
}
-static int netvsc_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int netvsc_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
{
struct net_device_context *ndc = netdev_priv(dev);
- ethtool_cmd_speed_set(cmd, ndc->speed);
- cmd->duplex = ndc->duplex;
- cmd->port = PORT_OTHER;
+ cmd->base.speed = ndc->speed;
+ cmd->base.duplex = ndc->duplex;
+ cmd->base.port = PORT_OTHER;
return 0;
}
-static int netvsc_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int netvsc_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
{
struct net_device_context *ndc = netdev_priv(dev);
u32 speed;
- speed = ethtool_cmd_speed(cmd);
+ speed = cmd->base.speed;
if (!ethtool_validate_speed(speed) ||
- !ethtool_validate_duplex(cmd->duplex) ||
+ !ethtool_validate_duplex(cmd->base.duplex) ||
!netvsc_validate_ethtool_ss_cmd(cmd))
return -EINVAL;
ndc->speed = speed;
- ndc->duplex = cmd->duplex;
+ ndc->duplex = cmd->base.duplex;
return 0;
}
@@ -842,24 +897,27 @@ static int netvsc_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
static int netvsc_change_mtu(struct net_device *ndev, int mtu)
{
struct net_device_context *ndevctx = netdev_priv(ndev);
- struct netvsc_device *nvdev = ndevctx->nvdev;
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
struct hv_device *hdev = ndevctx->device_ctx;
struct netvsc_device_info device_info;
- int ret;
+ bool was_running;
+ int ret = 0;
- if (ndevctx->start_remove || !nvdev || nvdev->destroy)
+ if (!nvdev || nvdev->destroy)
return -ENODEV;
- ret = netvsc_close(ndev);
- if (ret)
- goto out;
+ was_running = netif_running(ndev);
+ if (was_running) {
+ ret = netvsc_close(ndev);
+ if (ret)
+ return ret;
+ }
memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
device_info.num_chn = nvdev->num_chn;
device_info.max_num_vrss_chns = nvdev->num_chn;
- ndevctx->start_remove = true;
rndis_filter_device_remove(hdev, nvdev);
/* 'nvdev' has been freed in rndis_filter_device_remove() ->
@@ -872,9 +930,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
rndis_filter_device_add(hdev, &device_info);
-out:
- netvsc_open(ndev);
- ndevctx->start_remove = false;
+ if (was_running)
+ ret = netvsc_open(ndev);
/* We may have missed link change notifications */
schedule_delayed_work(&ndevctx->dwork, 0);
@@ -886,7 +943,7 @@ static void netvsc_get_stats64(struct net_device *net,
struct rtnl_link_stats64 *t)
{
struct net_device_context *ndev_ctx = netdev_priv(net);
- struct netvsc_device *nvdev = ndev_ctx->nvdev;
+ struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
int i;
if (!nvdev)
@@ -971,7 +1028,10 @@ static const struct {
static int netvsc_get_sset_count(struct net_device *dev, int string_set)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *nvdev = ndc->nvdev;
+ struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+
+ if (!nvdev)
+ return -ENODEV;
switch (string_set) {
case ETH_SS_STATS:
@@ -985,13 +1045,16 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats, u64 *data)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *nvdev = ndc->nvdev;
+ struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
const void *nds = &ndc->eth_stats;
const struct netvsc_stats *qstats;
unsigned int start;
u64 packets, bytes;
int i, j;
+ if (!nvdev)
+ return;
+
for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++)
data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset);
@@ -1020,10 +1083,13 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *nvdev = ndc->nvdev;
+ struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
u8 *p = data;
int i;
+ if (!nvdev)
+ return;
+
switch (stringset) {
case ETH_SS_STATS:
for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
@@ -1075,7 +1141,10 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
u32 *rules)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *nvdev = ndc->nvdev;
+ struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+
+ if (!nvdev)
+ return -ENODEV;
switch (info->cmd) {
case ETHTOOL_GRXRINGS:
@@ -1111,13 +1180,17 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
u8 *hfunc)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *ndev = ndc->nvdev;
- struct rndis_device *rndis_dev = ndev->extension;
+ struct netvsc_device *ndev = rcu_dereference(ndc->nvdev);
+ struct rndis_device *rndis_dev;
int i;
+ if (!ndev)
+ return -ENODEV;
+
if (hfunc)
*hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */
+ rndis_dev = ndev->extension;
if (indir) {
for (i = 0; i < ITAB_NUM; i++)
indir[i] = rndis_dev->ind_table[i];
@@ -1133,13 +1206,17 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
const u8 *key, const u8 hfunc)
{
struct net_device_context *ndc = netdev_priv(dev);
- struct netvsc_device *ndev = ndc->nvdev;
- struct rndis_device *rndis_dev = ndev->extension;
+ struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev);
+ struct rndis_device *rndis_dev;
int i;
+ if (!ndev)
+ return -ENODEV;
+
if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;
+ rndis_dev = ndev->extension;
if (indir) {
for (i = 0; i < ITAB_NUM; i++)
if (indir[i] >= dev->num_rx_queues)
@@ -1168,13 +1245,13 @@ static const struct ethtool_ops ethtool_ops = {
.get_channels = netvsc_get_channels,
.set_channels = netvsc_set_channels,
.get_ts_info = ethtool_op_get_ts_info,
- .get_settings = netvsc_get_settings,
- .set_settings = netvsc_set_settings,
.get_rxnfc = netvsc_get_rxnfc,
.get_rxfh_key_size = netvsc_get_rxfh_key_size,
.get_rxfh_indir_size = netvsc_rss_indir_size,
.get_rxfh = netvsc_get_rxfh,
.set_rxfh = netvsc_set_rxfh,
+ .get_link_ksettings = netvsc_get_link_ksettings,
+ .set_link_ksettings = netvsc_set_link_ksettings,
};
static const struct net_device_ops device_ops = {
@@ -1210,10 +1287,10 @@ static void netvsc_link_change(struct work_struct *w)
unsigned long flags, next_reconfig, delay;
rtnl_lock();
- if (ndev_ctx->start_remove)
+ net_device = rtnl_dereference(ndev_ctx->nvdev);
+ if (!net_device)
goto out_unlock;
- net_device = ndev_ctx->nvdev;
rdev = net_device->extension;
next_reconfig = ndev_ctx->last_reconfig + LINKCHANGE_INT;
@@ -1354,7 +1431,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
return NOTIFY_DONE;
net_device_ctx = netdev_priv(ndev);
- netvsc_dev = net_device_ctx->nvdev;
+ netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev))
return NOTIFY_DONE;
@@ -1380,7 +1457,7 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
return NOTIFY_DONE;
net_device_ctx = netdev_priv(ndev);
- netvsc_dev = net_device_ctx->nvdev;
+ netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
@@ -1414,7 +1491,7 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
return NOTIFY_DONE;
net_device_ctx = netdev_priv(ndev);
- netvsc_dev = net_device_ctx->nvdev;
+ netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
netvsc_switch_datapath(ndev, false);
@@ -1474,8 +1551,6 @@ static int netvsc_probe(struct hv_device *dev,
hv_set_drvdata(dev, net);
- net_device_ctx->start_remove = false;
-
INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
INIT_WORK(&net_device_ctx->work, do_set_multicast);
@@ -1492,8 +1567,7 @@ static int netvsc_probe(struct hv_device *dev,
/* Notify the netvsc driver of the new device */
memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
- device_info.max_num_vrss_chns = min_t(u32, VRSS_CHANNEL_DEFAULT,
- num_online_cpus());
+ device_info.num_chn = VRSS_CHANNEL_DEFAULT;
ret = rndis_filter_device_add(dev, &device_info);
if (ret != 0) {
netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
@@ -1509,6 +1583,7 @@ static int netvsc_probe(struct hv_device *dev,
NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
net->vlan_features = net->features;
+ /* RCU not necessary here, device not registered */
nvdev = net_device_ctx->nvdev;
netif_set_real_num_tx_queues(net, nvdev->num_chn);
netif_set_real_num_rx_queues(net, nvdev->num_chn);
@@ -1544,26 +1619,20 @@ static int netvsc_remove(struct hv_device *dev)
ndev_ctx = netdev_priv(net);
- /* Avoid racing with netvsc_change_mtu()/netvsc_set_channels()
- * removing the device.
- */
- rtnl_lock();
- ndev_ctx->start_remove = true;
- rtnl_unlock();
+ netif_device_detach(net);
cancel_delayed_work_sync(&ndev_ctx->dwork);
cancel_work_sync(&ndev_ctx->work);
- /* Stop outbound asap */
- netif_tx_disable(net);
-
- unregister_netdev(net);
-
/*
* Call to the vsc driver to let it know that the device is being
- * removed
+ * removed. Also blocks mtu and channel changes.
*/
+ rtnl_lock();
rndis_filter_device_remove(dev, ndev_ctx->nvdev);
+ rtnl_unlock();
+
+ unregister_netdev(net);
hv_set_drvdata(dev, NULL);
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 19356f56b7b1..ab92c3c95951 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -819,16 +819,14 @@ int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
{
struct rndis_request *request;
struct rndis_set_request *set;
- struct rndis_set_complete *set_complete;
int ret;
request = get_rndis_request(dev, RNDIS_MSG_SET,
RNDIS_MESSAGE_SIZE(struct rndis_set_request) +
sizeof(u32));
- if (!request) {
- ret = -ENOMEM;
- goto cleanup;
- }
+ if (!request)
+ return -ENOMEM;
+
/* Setup the rndis set */
set = &request->request_msg.msg.set_req;
@@ -840,15 +838,11 @@ int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
&new_filter, sizeof(u32));
ret = rndis_filter_send_request(dev, request);
- if (ret != 0)
- goto cleanup;
+ if (ret == 0)
+ wait_for_completion(&request->wait_event);
- wait_for_completion(&request->wait_event);
+ put_rndis_request(dev, request);
- set_complete = &request->response_msg.msg.set_complete;
-cleanup:
- if (request)
- put_rndis_request(dev, request);
return ret;
}
@@ -926,8 +920,6 @@ static void rndis_filter_halt_device(struct rndis_device *dev)
struct rndis_halt_request *halt;
struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
struct netvsc_device *nvdev = net_device_ctx->nvdev;
- struct hv_device *hdev = net_device_ctx->device_ctx;
- ulong flags;
/* Attempt to do a rndis device halt */
request = get_rndis_request(dev, RNDIS_MSG_HALT,
@@ -945,9 +937,10 @@ static void rndis_filter_halt_device(struct rndis_device *dev)
dev->state = RNDIS_DEV_UNINITIALIZED;
cleanup:
- spin_lock_irqsave(&hdev->channel->inbound_lock, flags);
nvdev->destroy = true;
- spin_unlock_irqrestore(&hdev->channel->inbound_lock, flags);
+
+ /* Force flag to be ordered before waiting */
+ wmb();
/* Wait for all send completions */
wait_event(nvdev->wait_drain, netvsc_device_idle(nvdev));
@@ -996,26 +989,38 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
hv_get_drvdata(new_sc->primary_channel->device_obj);
struct netvsc_device *nvscdev = net_device_to_netvsc_device(ndev);
u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
+ struct netvsc_channel *nvchan;
int ret;
- unsigned long flags;
if (chn_index >= nvscdev->num_chn)
return;
- nvscdev->chan_table[chn_index].mrc.buf
+ nvchan = nvscdev->chan_table + chn_index;
+ nvchan->mrc.buf
= vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
+ if (!nvchan->mrc.buf)
+ return;
+
+ /* Because the device uses NAPI, all the interrupt batching and
+ * control is done via Net softirq, not the channel handling
+ */
+ set_channel_read_mode(new_sc, HV_CALL_ISR);
+
+ /* Set the channel before opening.*/
+ nvchan->channel = new_sc;
+ netif_napi_add(ndev, &nvchan->napi,
+ netvsc_poll, NAPI_POLL_WEIGHT);
+
ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
nvscdev->ring_size * PAGE_SIZE, NULL, 0,
- netvsc_channel_cb, new_sc);
-
+ netvsc_channel_cb, nvchan);
if (ret == 0)
- nvscdev->chan_table[chn_index].channel = new_sc;
+ napi_enable(&nvchan->napi);
+ else
+ netdev_err(ndev, "sub channel open failed (%d)\n", ret);
- spin_lock_irqsave(&nvscdev->sc_lock, flags);
- nvscdev->num_sc_offered--;
- spin_unlock_irqrestore(&nvscdev->sc_lock, flags);
- if (nvscdev->num_sc_offered == 0)
+ if (refcount_dec_and_test(&nvscdev->sc_offered))
complete(&nvscdev->channel_init_wait);
}
@@ -1032,12 +1037,9 @@ int rndis_filter_device_add(struct hv_device *dev,
struct ndis_recv_scale_cap rsscap;
u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
unsigned int gso_max_size = GSO_MAX_SIZE;
- u32 mtu, size;
- u32 num_rss_qs;
- u32 sc_delta;
+ u32 mtu, size, num_rss_qs;
const struct cpumask *node_cpu_mask;
u32 num_possible_rss_qs;
- unsigned long flags;
int i, ret;
rndis_device = get_rndis_device();
@@ -1060,7 +1062,7 @@ int rndis_filter_device_add(struct hv_device *dev,
net_device->max_chn = 1;
net_device->num_chn = 1;
- spin_lock_init(&net_device->sc_lock);
+ refcount_set(&net_device->sc_offered, 0);
net_device->extension = rndis_device;
rndis_device->ndev = net;
@@ -1174,34 +1176,30 @@ int rndis_filter_device_add(struct hv_device *dev,
if (ret || rsscap.num_recv_que < 2)
goto out;
- net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, rsscap.num_recv_que);
-
- num_rss_qs = min(device_info->max_num_vrss_chns, net_device->max_chn);
-
/*
* We will limit the VRSS channels to the number CPUs in the NUMA node
* the primary channel is currently bound to.
+ *
+ * This also guarantees that num_possible_rss_qs <= num_online_cpus
*/
node_cpu_mask = cpumask_of_node(cpu_to_node(dev->channel->target_cpu));
- num_possible_rss_qs = cpumask_weight(node_cpu_mask);
+ num_possible_rss_qs = min_t(u32, cpumask_weight(node_cpu_mask),
+ rsscap.num_recv_que);
- /* We will use the given number of channels if available. */
- if (device_info->num_chn && device_info->num_chn < net_device->max_chn)
- net_device->num_chn = device_info->num_chn;
- else
- net_device->num_chn = min(num_possible_rss_qs, num_rss_qs);
+ net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, num_possible_rss_qs);
- num_rss_qs = net_device->num_chn - 1;
+ /* We will use the given number of channels if available. */
+ net_device->num_chn = min(net_device->max_chn, device_info->num_chn);
for (i = 0; i < ITAB_NUM; i++)
rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i,
net_device->num_chn);
- net_device->num_sc_offered = num_rss_qs;
-
- if (net_device->num_chn == 1)
- goto out;
+ num_rss_qs = net_device->num_chn - 1;
+ if (num_rss_qs == 0)
+ return 0;
+ refcount_set(&net_device->sc_offered, num_rss_qs);
vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
init_packet = &net_device->channel_init_pkt;
@@ -1217,32 +1215,23 @@ int rndis_filter_device_add(struct hv_device *dev,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (ret)
goto out;
- wait_for_completion(&net_device->channel_init_wait);
- if (init_packet->msg.v5_msg.subchn_comp.status !=
- NVSP_STAT_SUCCESS) {
+ if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) {
ret = -ENODEV;
goto out;
}
+ wait_for_completion(&net_device->channel_init_wait);
+
net_device->num_chn = 1 +
init_packet->msg.v5_msg.subchn_comp.num_subchannels;
- ret = rndis_filter_set_rss_param(rndis_device, netvsc_hash_key,
- net_device->num_chn);
-
- /*
- * Set the number of sub-channels to be received.
- */
- spin_lock_irqsave(&net_device->sc_lock, flags);
- sc_delta = num_rss_qs - (net_device->num_chn - 1);
- net_device->num_sc_offered -= sc_delta;
- spin_unlock_irqrestore(&net_device->sc_lock, flags);
-
+ /* ignore failues from setting rss parameters, still have channels */
+ rndis_filter_set_rss_param(rndis_device, netvsc_hash_key,
+ net_device->num_chn);
out:
if (ret) {
net_device->max_chn = 1;
net_device->num_chn = 1;
- net_device->num_sc_offered = 0;
}
return 0; /* return 0 because primary channel can be used alone */
@@ -1257,12 +1246,6 @@ void rndis_filter_device_remove(struct hv_device *dev,
{
struct rndis_device *rndis_dev = net_dev->extension;
- /* If not all subchannel offers are complete, wait for them until
- * completion to avoid race.
- */
- if (net_dev->num_sc_offered > 0)
- wait_for_completion(&net_dev->channel_init_wait);
-
/* Halt and release the rndis device */
rndis_filter_halt_device(rndis_dev);