summaryrefslogtreecommitdiff
path: root/drivers/net/hyperv/netvsc_drv.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-01-13 05:57:02 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-13 05:57:02 +0300
commitaee3bfa3307cd0da2126bdc0ea359dabea5ee8f7 (patch)
tree3d35c69e8fa835098bb90f77f30abed120681651 /drivers/net/hyperv/netvsc_drv.c
parentc597b6bcd5c624534afc3df65cdc42bb05173bca (diff)
parent415b6f19e87e350b13585591859d4fdf50772229 (diff)
downloadlinux-aee3bfa3307cd0da2126bdc0ea359dabea5ee8f7.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from Davic Miller: 1) Support busy polling generically, for all NAPI drivers. From Eric Dumazet. 2) Add byte/packet counter support to nft_ct, from Floriani Westphal. 3) Add RSS/XPS support to mvneta driver, from Gregory Clement. 4) Implement IPV6_HDRINCL socket option for raw sockets, from Hannes Frederic Sowa. 5) Add support for T6 adapter to cxgb4 driver, from Hariprasad Shenai. 6) Add support for VLAN device bridging to mlxsw switch driver, from Ido Schimmel. 7) Add driver for Netronome NFP4000/NFP6000, from Jakub Kicinski. 8) Provide hwmon interface to mlxsw switch driver, from Jiri Pirko. 9) Reorganize wireless drivers into per-vendor directories just like we do for ethernet drivers. From Kalle Valo. 10) Provide a way for administrators "destroy" connected sockets via the SOCK_DESTROY socket netlink diag operation. From Lorenzo Colitti. 11) Add support to add/remove multicast routes via netlink, from Nikolay Aleksandrov. 12) Make TCP keepalive settings per-namespace, from Nikolay Borisov. 13) Add forwarding and packet duplication facilities to nf_tables, from Pablo Neira Ayuso. 14) Dead route support in MPLS, from Roopa Prabhu. 15) TSO support for thunderx chips, from Sunil Goutham. 16) Add driver for IBM's System i/p VNIC protocol, from Thomas Falcon. 17) Rationalize, consolidate, and more completely document the checksum offloading facilities in the networking stack. From Tom Herbert. 18) Support aborting an ongoing scan in mac80211/cfg80211, from Vidyullatha Kanchanapally. 19) Use per-bucket spinlock for bpf hash facility, from Tom Leiming. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1375 commits) net: bnxt: always return values from _bnxt_get_max_rings net: bpf: reject invalid shifts phonet: properly unshare skbs in phonet_rcv() dwc_eth_qos: Fix dma address for multi-fragment skbs phy: remove an unneeded condition mdio: remove an unneed condition mdio_bus: NULL dereference on allocation error net: Fix typo in netdev_intersect_features net: freescale: mac-fec: Fix build error from phy_device API change net: freescale: ucc_geth: Fix build error from phy_device API change bonding: Prevent IPv6 link local address on enslaved devices IB/mlx5: Add flow steering support net/mlx5_core: Export flow steering API net/mlx5_core: Make ipv4/ipv6 location more clear net/mlx5_core: Enable flow steering support for the IB driver net/mlx5_core: Initialize namespaces only when supported by device net/mlx5_core: Set priority attributes net/mlx5_core: Connect flow tables net/mlx5_core: Introduce modify flow table command net/mlx5_core: Managing root flow table ...
Diffstat (limited to 'drivers/net/hyperv/netvsc_drv.c')
-rw-r--r--drivers/net/hyperv/netvsc_drv.c235
1 files changed, 130 insertions, 105 deletions
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 409b48e1e589..1c8db9afdcda 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -42,6 +42,7 @@
#define RING_SIZE_MIN 64
+#define LINKCHANGE_INT (2 * HZ)
static int ring_size = 128;
module_param(ring_size, int, S_IRUGO);
MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
@@ -272,17 +273,10 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
skb_set_hash(skb, hash, PKT_HASH_TYPE_L3);
}
- return q_idx;
-}
-
-void netvsc_xmit_completion(void *context)
-{
- struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
- struct sk_buff *skb = (struct sk_buff *)
- (unsigned long)packet->send_completion_tid;
+ if (!nvsc_dev->chn_table[q_idx])
+ q_idx = 0;
- if (skb)
- dev_kfree_skb_any(skb);
+ return q_idx;
}
static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
@@ -320,9 +314,10 @@ static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
}
static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
- struct hv_netvsc_packet *packet)
+ struct hv_netvsc_packet *packet,
+ struct hv_page_buffer **page_buf)
{
- struct hv_page_buffer *pb = packet->page_buf;
+ struct hv_page_buffer *pb = *page_buf;
u32 slots_used = 0;
char *data = skb->data;
int frags = skb_shinfo(skb)->nr_frags;
@@ -432,8 +427,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
u32 net_trans_info;
u32 hash;
u32 skb_length;
- u32 pkt_sz;
struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
+ struct hv_page_buffer *pb = page_buf;
struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats);
/* We will atmost need two pages to describe the rndis
@@ -460,42 +455,34 @@ check_size:
goto check_size;
}
- pkt_sz = sizeof(struct hv_netvsc_packet) + RNDIS_AND_PPI_SIZE;
-
- ret = skb_cow_head(skb, pkt_sz);
+ /*
+ * Place the rndis header in the skb head room and
+ * the skb->cb will be used for hv_netvsc_packet
+ * structure.
+ */
+ ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE);
if (ret) {
netdev_err(net, "unable to alloc hv_netvsc_packet\n");
ret = -ENOMEM;
goto drop;
}
- /* Use the headroom for building up the packet */
- packet = (struct hv_netvsc_packet *)skb->head;
+ /* Use the skb control buffer for building up the packet */
+ BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) >
+ FIELD_SIZEOF(struct sk_buff, cb));
+ packet = (struct hv_netvsc_packet *)skb->cb;
- packet->status = 0;
- packet->xmit_more = skb->xmit_more;
-
- packet->vlan_tci = skb->vlan_tci;
- packet->page_buf = page_buf;
packet->q_idx = skb_get_queue_mapping(skb);
- packet->is_data_pkt = true;
packet->total_data_buflen = skb->len;
- packet->rndis_msg = (struct rndis_message *)((unsigned long)packet +
- sizeof(struct hv_netvsc_packet));
-
- memset(packet->rndis_msg, 0, RNDIS_AND_PPI_SIZE);
+ rndis_msg = (struct rndis_message *)skb->head;
- /* Set the completion routine */
- packet->send_completion = netvsc_xmit_completion;
- packet->send_completion_ctx = packet;
- packet->send_completion_tid = (unsigned long)skb;
+ memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE);
- isvlan = packet->vlan_tci & VLAN_TAG_PRESENT;
+ isvlan = skb->vlan_tci & VLAN_TAG_PRESENT;
/* Add the rndis header */
- rndis_msg = packet->rndis_msg;
rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
rndis_msg->msg_len = packet->total_data_buflen;
rndis_pkt = &rndis_msg->msg.pkt;
@@ -521,8 +508,8 @@ check_size:
IEEE_8021Q_INFO);
vlan = (struct ndis_pkt_8021q_info *)((void *)ppi +
ppi->ppi_offset);
- vlan->vlanid = packet->vlan_tci & VLAN_VID_MASK;
- vlan->pri = (packet->vlan_tci & VLAN_PRIO_MASK) >>
+ vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK;
+ vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >>
VLAN_PRIO_SHIFT;
}
@@ -617,9 +604,10 @@ do_send:
rndis_msg->msg_len += rndis_msg_size;
packet->total_data_buflen = rndis_msg->msg_len;
packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
- skb, packet);
+ skb, packet, &pb);
- ret = netvsc_send(net_device_ctx->device_ctx, packet);
+ ret = netvsc_send(net_device_ctx->device_ctx, packet,
+ rndis_msg, &pb, skb);
drop:
if (ret == 0) {
@@ -647,37 +635,33 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
struct net_device *net;
struct net_device_context *ndev_ctx;
struct netvsc_device *net_device;
- struct rndis_device *rdev;
-
- net_device = hv_get_drvdata(device_obj);
- rdev = net_device->extension;
+ struct netvsc_reconfig *event;
+ unsigned long flags;
- switch (indicate->status) {
- case RNDIS_STATUS_MEDIA_CONNECT:
- rdev->link_state = false;
- break;
- case RNDIS_STATUS_MEDIA_DISCONNECT:
- rdev->link_state = true;
- break;
- case RNDIS_STATUS_NETWORK_CHANGE:
- rdev->link_change = true;
- break;
- default:
+ /* Handle link change statuses only */
+ if (indicate->status != RNDIS_STATUS_NETWORK_CHANGE &&
+ indicate->status != RNDIS_STATUS_MEDIA_CONNECT &&
+ indicate->status != RNDIS_STATUS_MEDIA_DISCONNECT)
return;
- }
+ net_device = hv_get_drvdata(device_obj);
net = net_device->ndev;
if (!net || net->reg_state != NETREG_REGISTERED)
return;
ndev_ctx = netdev_priv(net);
- if (!rdev->link_state) {
- schedule_delayed_work(&ndev_ctx->dwork, 0);
- schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20));
- } else {
- schedule_delayed_work(&ndev_ctx->dwork, 0);
- }
+
+ event = kzalloc(sizeof(*event), GFP_ATOMIC);
+ if (!event)
+ return;
+ event->event = indicate->status;
+
+ spin_lock_irqsave(&ndev_ctx->lock, flags);
+ list_add_tail(&event->list, &ndev_ctx->reconfig_events);
+ spin_unlock_irqrestore(&ndev_ctx->lock, flags);
+
+ schedule_delayed_work(&ndev_ctx->dwork, 0);
}
/*
@@ -686,7 +670,10 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
*/
int netvsc_recv_callback(struct hv_device *device_obj,
struct hv_netvsc_packet *packet,
- struct ndis_tcp_ip_checksum_info *csum_info)
+ void **data,
+ struct ndis_tcp_ip_checksum_info *csum_info,
+ struct vmbus_channel *channel,
+ u16 vlan_tci)
{
struct net_device *net;
struct net_device_context *net_device_ctx;
@@ -695,8 +682,7 @@ int netvsc_recv_callback(struct hv_device *device_obj,
net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev;
if (!net || net->reg_state != NETREG_REGISTERED) {
- packet->status = NVSP_STAT_FAIL;
- return 0;
+ return NVSP_STAT_FAIL;
}
net_device_ctx = netdev_priv(net);
rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
@@ -705,15 +691,14 @@ int netvsc_recv_callback(struct hv_device *device_obj,
skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen);
if (unlikely(!skb)) {
++net->stats.rx_dropped;
- packet->status = NVSP_STAT_FAIL;
- return 0;
+ return NVSP_STAT_FAIL;
}
/*
* Copy to skb. This copy is needed here since the memory pointed by
* hv_netvsc_packet cannot be deallocated
*/
- memcpy(skb_put(skb, packet->total_data_buflen), packet->data,
+ memcpy(skb_put(skb, packet->total_data_buflen), *data,
packet->total_data_buflen);
skb->protocol = eth_type_trans(skb, net);
@@ -728,11 +713,11 @@ int netvsc_recv_callback(struct hv_device *device_obj,
skb->ip_summed = CHECKSUM_NONE;
}
- if (packet->vlan_tci & VLAN_TAG_PRESENT)
+ if (vlan_tci & VLAN_TAG_PRESENT)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
- packet->vlan_tci);
+ vlan_tci);
- skb_record_rx_queue(skb, packet->channel->
+ skb_record_rx_queue(skb, channel->
offermsg.offer.sub_channel_index);
u64_stats_update_begin(&rx_stats->syncp);
@@ -1009,12 +994,9 @@ static const struct net_device_ops device_ops = {
};
/*
- * Send GARP packet to network peers after migrations.
- * After Quick Migration, the network is not immediately operational in the
- * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add
- * another netif_notify_peers() into a delayed work, otherwise GARP packet
- * will not be sent after quick migration, and cause network disconnection.
- * Also, we update the carrier status here.
+ * Handle link status changes. For RNDIS_STATUS_NETWORK_CHANGE emulate link
+ * down/up sequence. In case of RNDIS_STATUS_MEDIA_CONNECT when carrier is
+ * present send GARP packet to network peers with netif_notify_peers().
*/
static void netvsc_link_change(struct work_struct *w)
{
@@ -1022,36 +1004,89 @@ static void netvsc_link_change(struct work_struct *w)
struct net_device *net;
struct netvsc_device *net_device;
struct rndis_device *rdev;
- bool notify, refresh = false;
- char *argv[] = { "/etc/init.d/network", "restart", NULL };
- char *envp[] = { "HOME=/", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
-
- rtnl_lock();
+ struct netvsc_reconfig *event = NULL;
+ bool notify = false, reschedule = false;
+ unsigned long flags, next_reconfig, delay;
ndev_ctx = container_of(w, struct net_device_context, dwork.work);
net_device = hv_get_drvdata(ndev_ctx->device_ctx);
rdev = net_device->extension;
net = net_device->ndev;
- if (rdev->link_state) {
- netif_carrier_off(net);
- notify = false;
- } else {
- netif_carrier_on(net);
- notify = true;
- if (rdev->link_change) {
- rdev->link_change = false;
- refresh = true;
+ next_reconfig = ndev_ctx->last_reconfig + LINKCHANGE_INT;
+ if (time_is_after_jiffies(next_reconfig)) {
+ /* link_watch only sends one notification with current state
+ * per second, avoid doing reconfig more frequently. Handle
+ * wrap around.
+ */
+ delay = next_reconfig - jiffies;
+ delay = delay < LINKCHANGE_INT ? delay : LINKCHANGE_INT;
+ schedule_delayed_work(&ndev_ctx->dwork, delay);
+ return;
+ }
+ ndev_ctx->last_reconfig = jiffies;
+
+ spin_lock_irqsave(&ndev_ctx->lock, flags);
+ if (!list_empty(&ndev_ctx->reconfig_events)) {
+ event = list_first_entry(&ndev_ctx->reconfig_events,
+ struct netvsc_reconfig, list);
+ list_del(&event->list);
+ reschedule = !list_empty(&ndev_ctx->reconfig_events);
+ }
+ spin_unlock_irqrestore(&ndev_ctx->lock, flags);
+
+ if (!event)
+ return;
+
+ rtnl_lock();
+
+ switch (event->event) {
+ /* Only the following events are possible due to the check in
+ * netvsc_linkstatus_callback()
+ */
+ case RNDIS_STATUS_MEDIA_CONNECT:
+ if (rdev->link_state) {
+ rdev->link_state = false;
+ netif_carrier_on(net);
+ netif_tx_wake_all_queues(net);
+ } else {
+ notify = true;
+ }
+ kfree(event);
+ break;
+ case RNDIS_STATUS_MEDIA_DISCONNECT:
+ if (!rdev->link_state) {
+ rdev->link_state = true;
+ netif_carrier_off(net);
+ netif_tx_stop_all_queues(net);
+ }
+ kfree(event);
+ break;
+ case RNDIS_STATUS_NETWORK_CHANGE:
+ /* Only makes sense if carrier is present */
+ if (!rdev->link_state) {
+ rdev->link_state = true;
+ netif_carrier_off(net);
+ netif_tx_stop_all_queues(net);
+ event->event = RNDIS_STATUS_MEDIA_CONNECT;
+ spin_lock_irqsave(&ndev_ctx->lock, flags);
+ list_add_tail(&event->list, &ndev_ctx->reconfig_events);
+ spin_unlock_irqrestore(&ndev_ctx->lock, flags);
+ reschedule = true;
}
+ break;
}
rtnl_unlock();
- if (refresh)
- call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
-
if (notify)
netdev_notify_peers(net);
+
+ /* link_watch only sends one notification with current state per
+ * second, handle next reconfig event in 2 seconds.
+ */
+ if (reschedule)
+ schedule_delayed_work(&ndev_ctx->dwork, LINKCHANGE_INT);
}
static void netvsc_free_netdev(struct net_device *netdev)
@@ -1071,16 +1106,12 @@ static int netvsc_probe(struct hv_device *dev,
struct netvsc_device_info device_info;
struct netvsc_device *nvdev;
int ret;
- u32 max_needed_headroom;
net = alloc_etherdev_mq(sizeof(struct net_device_context),
num_online_cpus());
if (!net)
return -ENOMEM;
- max_needed_headroom = sizeof(struct hv_netvsc_packet) +
- RNDIS_AND_PPI_SIZE;
-
netif_carrier_off(net);
net_device_ctx = netdev_priv(net);
@@ -1106,6 +1137,9 @@ static int netvsc_probe(struct hv_device *dev,
INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
INIT_WORK(&net_device_ctx->work, do_set_multicast);
+ spin_lock_init(&net_device_ctx->lock);
+ INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
+
net->netdev_ops = &device_ops;
net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM |
@@ -1116,13 +1150,6 @@ static int netvsc_probe(struct hv_device *dev,
net->ethtool_ops = &ethtool_ops;
SET_NETDEV_DEV(net, &dev->device);
- /*
- * Request additional head room in the skb.
- * We will use this space to build the rndis
- * heaser and other state we need to maintain.
- */
- net->needed_headroom = max_needed_headroom;
-
/* Notify the netvsc driver of the new device */
memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
@@ -1145,8 +1172,6 @@ static int netvsc_probe(struct hv_device *dev,
pr_err("Unable to register netdev.\n");
rndis_filter_device_remove(dev);
netvsc_free_netdev(net);
- } else {
- schedule_delayed_work(&net_device_ctx->dwork, 0);
}
return ret;