diff options
Diffstat (limited to 'drivers/net/ethernet/ibm/ibmvnic.c')
-rw-r--r-- | drivers/net/ethernet/ibm/ibmvnic.c | 631 |
1 files changed, 405 insertions, 226 deletions
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index da15913879f8..a2191392ca4f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -84,8 +84,6 @@ static int ibmvnic_reset_crq(struct ibmvnic_adapter *); static int ibmvnic_send_crq_init(struct ibmvnic_adapter *); static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *); static int ibmvnic_send_crq(struct ibmvnic_adapter *, union ibmvnic_crq *); -static int send_subcrq(struct ibmvnic_adapter *adapter, u64 remote_handle, - union sub_crq *sub_crq); static int send_subcrq_indirect(struct ibmvnic_adapter *, u64, u64, u64); static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance); static int enable_scrq_irq(struct ibmvnic_adapter *, @@ -306,9 +304,11 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, int count = pool->size - atomic_read(&pool->available); u64 handle = adapter->rx_scrq[pool->index]->handle; struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ind_xmit_queue *ind_bufp; + struct ibmvnic_sub_crq_queue *rx_scrq; + union sub_crq *sub_crq; int buffers_added = 0; unsigned long lpar_rc; - union sub_crq sub_crq; struct sk_buff *skb; unsigned int offset; dma_addr_t dma_addr; @@ -320,8 +320,10 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, if (!pool->active) return; + rx_scrq = adapter->rx_scrq[pool->index]; + ind_bufp = &rx_scrq->ind_buf; for (i = 0; i < count; ++i) { - skb = alloc_skb(pool->buff_size, GFP_ATOMIC); + skb = netdev_alloc_skb(adapter->netdev, pool->buff_size); if (!skb) { dev_err(dev, "Couldn't replenish rx buff\n"); adapter->replenish_no_mem++; @@ -346,12 +348,13 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, pool->rx_buff[index].pool_index = pool->index; pool->rx_buff[index].size = pool->buff_size; - memset(&sub_crq, 0, sizeof(sub_crq)); - sub_crq.rx_add.first = IBMVNIC_CRQ_CMD; - sub_crq.rx_add.correlator = + sub_crq = &ind_bufp->indir_arr[ind_bufp->index++]; + memset(sub_crq, 0, sizeof(*sub_crq)); + sub_crq->rx_add.first = IBMVNIC_CRQ_CMD; + sub_crq->rx_add.correlator = cpu_to_be64((u64)&pool->rx_buff[index]); - sub_crq.rx_add.ioba = cpu_to_be32(dma_addr); - sub_crq.rx_add.map_id = pool->long_term_buff.map_id; + sub_crq->rx_add.ioba = cpu_to_be32(dma_addr); + sub_crq->rx_add.map_id = pool->long_term_buff.map_id; /* The length field of the sCRQ is defined to be 24 bits so the * buffer size needs to be left shifted by a byte before it is @@ -361,15 +364,20 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, #ifdef __LITTLE_ENDIAN__ shift = 8; #endif - sub_crq.rx_add.len = cpu_to_be32(pool->buff_size << shift); - - lpar_rc = send_subcrq(adapter, handle, &sub_crq); - if (lpar_rc != H_SUCCESS) - goto failure; - - buffers_added++; - adapter->replenish_add_buff_success++; + sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift); pool->next_free = (pool->next_free + 1) % pool->size; + if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS || + i == count - 1) { + lpar_rc = + send_subcrq_indirect(adapter, handle, + (u64)ind_bufp->indir_dma, + (u64)ind_bufp->index); + if (lpar_rc != H_SUCCESS) + goto failure; + buffers_added += ind_bufp->index; + adapter->replenish_add_buff_success += ind_bufp->index; + ind_bufp->index = 0; + } } atomic_add(buffers_added, &pool->available); return; @@ -377,13 +385,22 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, failure: if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED) dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n"); - pool->free_map[pool->next_free] = index; - pool->rx_buff[index].skb = NULL; + for (i = ind_bufp->index - 1; i >= 0; --i) { + struct ibmvnic_rx_buff *rx_buff; - dev_kfree_skb_any(skb); - adapter->replenish_add_buff_failure++; + pool->next_free = pool->next_free == 0 ? + pool->size - 1 : pool->next_free - 1; + sub_crq = &ind_bufp->indir_arr[i]; + rx_buff = (struct ibmvnic_rx_buff *) + be64_to_cpu(sub_crq->rx_add.correlator); + index = (int)(rx_buff - pool->rx_buff); + pool->free_map[pool->next_free] = index; + dev_kfree_skb_any(pool->rx_buff[index].skb); + pool->rx_buff[index].skb = NULL; + } + adapter->replenish_add_buff_failure += ind_bufp->index; atomic_add(buffers_added, &pool->available); - + ind_bufp->index = 0; if (lpar_rc == H_CLOSED || adapter->failover_pending) { /* Disable buffer pool replenishment and report carrier off if * queue is closed or pending failover. @@ -404,6 +421,8 @@ static void replenish_pools(struct ibmvnic_adapter *adapter) if (adapter->rx_pool[i].active) replenish_rx_pool(adapter, &adapter->rx_pool[i]); } + + netdev_dbg(adapter->netdev, "Replenished %d pools\n", i); } static void release_stats_buffers(struct ibmvnic_adapter *adapter) @@ -483,7 +502,7 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter) if (rx_pool->buff_size != buff_size) { free_long_term_buff(adapter, &rx_pool->long_term_buff); - rx_pool->buff_size = buff_size; + rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES); rc = alloc_long_term_buff(adapter, &rx_pool->long_term_buff, rx_pool->size * @@ -577,7 +596,7 @@ static int init_rx_pools(struct net_device *netdev) rx_pool->size = adapter->req_rx_add_entries_per_subcrq; rx_pool->index = i; - rx_pool->buff_size = buff_size; + rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES); rx_pool->active = 1; rx_pool->free_map = kcalloc(rx_pool->size, sizeof(int), @@ -730,6 +749,7 @@ static int init_tx_pools(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); int tx_subcrqs; + u64 buff_size; int i, rc; tx_subcrqs = adapter->num_active_tx_scrqs; @@ -746,9 +766,11 @@ static int init_tx_pools(struct net_device *netdev) adapter->num_active_tx_pools = tx_subcrqs; for (i = 0; i < tx_subcrqs; i++) { + buff_size = adapter->req_mtu + VLAN_HLEN; + buff_size = ALIGN(buff_size, L1_CACHE_BYTES); rc = init_one_tx_pool(netdev, &adapter->tx_pool[i], adapter->req_tx_entries_per_subcrq, - adapter->req_mtu + VLAN_HLEN); + buff_size); if (rc) { release_tx_pools(adapter); return rc; @@ -834,7 +856,7 @@ static void release_napi(struct ibmvnic_adapter *adapter) static int ibmvnic_login(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - unsigned long timeout = msecs_to_jiffies(30000); + unsigned long timeout = msecs_to_jiffies(20000); int retry_count = 0; int retries = 10; bool retry; @@ -850,10 +872,8 @@ static int ibmvnic_login(struct net_device *netdev) adapter->init_done_rc = 0; reinit_completion(&adapter->init_done); rc = send_login(adapter); - if (rc) { - netdev_warn(netdev, "Unable to login\n"); + if (rc) return rc; - } if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { @@ -911,6 +931,7 @@ static int ibmvnic_login(struct net_device *netdev) __ibmvnic_set_mac(netdev, adapter->mac_addr); + netdev_dbg(netdev, "[S:%d] Login succeeded\n", adapter->state); return 0; } @@ -940,7 +961,7 @@ static void release_resources(struct ibmvnic_adapter *adapter) static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state) { struct net_device *netdev = adapter->netdev; - unsigned long timeout = msecs_to_jiffies(30000); + unsigned long timeout = msecs_to_jiffies(20000); union ibmvnic_crq crq; bool resend; int rc; @@ -1148,6 +1169,7 @@ static int __ibmvnic_open(struct net_device *netdev) if (prev_state == VNIC_CLOSED) enable_irq(adapter->tx_scrq[i]->irq); enable_scrq_irq(adapter, adapter->tx_scrq[i]); + netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i)); } rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); @@ -1341,6 +1363,10 @@ static int ibmvnic_close(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); int rc; + netdev_dbg(netdev, "[S:%d FOP:%d FRR:%d] Closing\n", + adapter->state, adapter->failover_pending, + adapter->force_reset_recovery); + /* If device failover is pending, just set device state and return. * Device operation will be handled by reset routine. */ @@ -1478,17 +1504,18 @@ static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len, * L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect. */ -static void build_hdr_descs_arr(struct ibmvnic_tx_buff *txbuff, +static void build_hdr_descs_arr(struct sk_buff *skb, + union sub_crq *indir_arr, int *num_entries, u8 hdr_field) { int hdr_len[3] = {0, 0, 0}; + u8 hdr_data[140] = {0}; int tot_len; - u8 *hdr_data = txbuff->hdr_data; - tot_len = build_hdr_data(hdr_field, txbuff->skb, hdr_len, - txbuff->hdr_data); + tot_len = build_hdr_data(hdr_field, skb, hdr_len, + hdr_data); *num_entries += create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len, - txbuff->indir_arr + 1); + indir_arr + 1); } static int ibmvnic_xmit_workarounds(struct sk_buff *skb, @@ -1506,17 +1533,95 @@ static int ibmvnic_xmit_workarounds(struct sk_buff *skb, return 0; } +static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq) +{ + struct ibmvnic_ind_xmit_queue *ind_bufp; + struct ibmvnic_tx_buff *tx_buff; + struct ibmvnic_tx_pool *tx_pool; + union sub_crq tx_scrq_entry; + int queue_num; + int entries; + int index; + int i; + + ind_bufp = &tx_scrq->ind_buf; + entries = (u64)ind_bufp->index; + queue_num = tx_scrq->pool_index; + + for (i = entries - 1; i >= 0; --i) { + tx_scrq_entry = ind_bufp->indir_arr[i]; + if (tx_scrq_entry.v1.type != IBMVNIC_TX_DESC) + continue; + index = be32_to_cpu(tx_scrq_entry.v1.correlator); + if (index & IBMVNIC_TSO_POOL_MASK) { + tx_pool = &adapter->tso_pool[queue_num]; + index &= ~IBMVNIC_TSO_POOL_MASK; + } else { + tx_pool = &adapter->tx_pool[queue_num]; + } + tx_pool->free_map[tx_pool->consumer_index] = index; + tx_pool->consumer_index = tx_pool->consumer_index == 0 ? + tx_pool->num_buffers - 1 : + tx_pool->consumer_index - 1; + tx_buff = &tx_pool->tx_buff[index]; + adapter->netdev->stats.tx_packets--; + adapter->netdev->stats.tx_bytes -= tx_buff->skb->len; + adapter->tx_stats_buffers[queue_num].packets--; + adapter->tx_stats_buffers[queue_num].bytes -= + tx_buff->skb->len; + dev_kfree_skb_any(tx_buff->skb); + tx_buff->skb = NULL; + adapter->netdev->stats.tx_dropped++; + } + ind_bufp->index = 0; + if (atomic_sub_return(entries, &tx_scrq->used) <= + (adapter->req_tx_entries_per_subcrq / 2) && + __netif_subqueue_stopped(adapter->netdev, queue_num)) { + netif_wake_subqueue(adapter->netdev, queue_num); + netdev_dbg(adapter->netdev, "Started queue %d\n", + queue_num); + } +} + +static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter, + struct ibmvnic_sub_crq_queue *tx_scrq) +{ + struct ibmvnic_ind_xmit_queue *ind_bufp; + u64 dma_addr; + u64 entries; + u64 handle; + int rc; + + ind_bufp = &tx_scrq->ind_buf; + dma_addr = (u64)ind_bufp->indir_dma; + entries = (u64)ind_bufp->index; + handle = tx_scrq->handle; + + if (!entries) + return 0; + rc = send_subcrq_indirect(adapter, handle, dma_addr, entries); + if (rc) + ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq); + else + ind_bufp->index = 0; + return 0; +} + static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); int queue_num = skb_get_queue_mapping(skb); u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ind_xmit_queue *ind_bufp; struct ibmvnic_tx_buff *tx_buff = NULL; struct ibmvnic_sub_crq_queue *tx_scrq; struct ibmvnic_tx_pool *tx_pool; unsigned int tx_send_failed = 0; + netdev_tx_t ret = NETDEV_TX_OK; unsigned int tx_map_failed = 0; + union sub_crq indir_arr[16]; unsigned int tx_dropped = 0; unsigned int tx_packets = 0; unsigned int tx_bytes = 0; @@ -1529,8 +1634,10 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) unsigned char *dst; int index = 0; u8 proto = 0; - u64 handle; - netdev_tx_t ret = NETDEV_TX_OK; + + tx_scrq = adapter->tx_scrq[queue_num]; + txq = netdev_get_tx_queue(netdev, queue_num); + ind_bufp = &tx_scrq->ind_buf; if (test_bit(0, &adapter->resetting)) { if (!netif_subqueue_stopped(netdev, skb)) @@ -1540,6 +1647,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_send_failed++; tx_dropped++; ret = NETDEV_TX_OK; + ibmvnic_tx_scrq_flush(adapter, tx_scrq); goto out; } @@ -1547,6 +1655,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_dropped++; tx_send_failed++; ret = NETDEV_TX_OK; + ibmvnic_tx_scrq_flush(adapter, tx_scrq); goto out; } if (skb_is_gso(skb)) @@ -1554,10 +1663,6 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) else tx_pool = &adapter->tx_pool[queue_num]; - tx_scrq = adapter->tx_scrq[queue_num]; - txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb)); - handle = tx_scrq->handle; - index = tx_pool->free_map[tx_pool->consumer_index]; if (index == IBMVNIC_INVALID_MAP) { @@ -1565,6 +1670,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_send_failed++; tx_dropped++; ret = NETDEV_TX_OK; + ibmvnic_tx_scrq_flush(adapter, tx_scrq); goto out; } @@ -1600,11 +1706,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_buff = &tx_pool->tx_buff[index]; tx_buff->skb = skb; - tx_buff->data_dma[0] = data_dma_addr; - tx_buff->data_len[0] = skb->len; tx_buff->index = index; tx_buff->pool_index = queue_num; - tx_buff->last_frag = true; memset(&tx_crq, 0, sizeof(tx_crq)); tx_crq.v1.first = IBMVNIC_CRQ_CMD; @@ -1649,55 +1752,29 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); hdrs += 2; } - /* determine if l2/3/4 headers are sent to firmware */ - if ((*hdrs >> 7) & 1) { - build_hdr_descs_arr(tx_buff, &num_entries, *hdrs); - tx_crq.v1.n_crq_elem = num_entries; - tx_buff->num_entries = num_entries; - tx_buff->indir_arr[0] = tx_crq; - tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr, - sizeof(tx_buff->indir_arr), - DMA_TO_DEVICE); - if (dma_mapping_error(dev, tx_buff->indir_dma)) { - dev_kfree_skb_any(skb); - tx_buff->skb = NULL; - if (!firmware_has_feature(FW_FEATURE_CMO)) - dev_err(dev, "tx: unable to map descriptor array\n"); - tx_map_failed++; - tx_dropped++; - ret = NETDEV_TX_OK; - goto tx_err_out; - } - lpar_rc = send_subcrq_indirect(adapter, handle, - (u64)tx_buff->indir_dma, - (u64)num_entries); - dma_unmap_single(dev, tx_buff->indir_dma, - sizeof(tx_buff->indir_arr), DMA_TO_DEVICE); - } else { - tx_buff->num_entries = num_entries; - lpar_rc = send_subcrq(adapter, handle, - &tx_crq); - } - if (lpar_rc != H_SUCCESS) { - if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER) - dev_err_ratelimited(dev, "tx: send failed\n"); - dev_kfree_skb_any(skb); - tx_buff->skb = NULL; - if (lpar_rc == H_CLOSED || adapter->failover_pending) { - /* Disable TX and report carrier off if queue is closed - * or pending failover. - * Firmware guarantees that a signal will be sent to the - * driver, triggering a reset or some other action. - */ - netif_tx_stop_all_queues(netdev); - netif_carrier_off(netdev); - } + if ((*hdrs >> 7) & 1) + build_hdr_descs_arr(skb, indir_arr, &num_entries, *hdrs); - tx_send_failed++; - tx_dropped++; - ret = NETDEV_TX_OK; - goto tx_err_out; + tx_crq.v1.n_crq_elem = num_entries; + tx_buff->num_entries = num_entries; + /* flush buffer if current entry can not fit */ + if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) { + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq); + if (lpar_rc != H_SUCCESS) + goto tx_flush_err; + } + + indir_arr[0] = tx_crq; + memcpy(&ind_bufp->indir_arr[ind_bufp->index], &indir_arr[0], + num_entries * sizeof(struct ibmvnic_generic_scrq)); + ind_bufp->index += num_entries; + if (__netdev_tx_sent_queue(txq, skb->len, + netdev_xmit_more() && + ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) { + lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq); + if (lpar_rc != H_SUCCESS) + goto tx_err; } if (atomic_add_return(num_entries, &tx_scrq->used) @@ -1712,14 +1789,26 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ret = NETDEV_TX_OK; goto out; -tx_err_out: - /* roll back consumer index and map array*/ - if (tx_pool->consumer_index == 0) - tx_pool->consumer_index = - tx_pool->num_buffers - 1; - else - tx_pool->consumer_index--; - tx_pool->free_map[tx_pool->consumer_index] = index; +tx_flush_err: + dev_kfree_skb_any(skb); + tx_buff->skb = NULL; + tx_pool->consumer_index = tx_pool->consumer_index == 0 ? + tx_pool->num_buffers - 1 : + tx_pool->consumer_index - 1; + tx_dropped++; +tx_err: + if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER) + dev_err_ratelimited(dev, "tx: send failed\n"); + + if (lpar_rc == H_CLOSED || adapter->failover_pending) { + /* Disable TX and report carrier off if queue is closed + * or pending failover. + * Firmware guarantees that a signal will be sent to the + * driver, triggering a reset or some other action. + */ + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + } out: netdev->stats.tx_dropped += tx_dropped; netdev->stats.tx_bytes += tx_bytes; @@ -1857,7 +1946,7 @@ static int do_change_param_reset(struct ibmvnic_adapter *adapter, if (reset_state == VNIC_OPEN) { rc = __ibmvnic_close(netdev); if (rc) - return rc; + goto out; } release_resources(adapter); @@ -1875,24 +1964,25 @@ static int do_change_param_reset(struct ibmvnic_adapter *adapter, } rc = ibmvnic_reset_init(adapter, true); - if (rc) - return IBMVNIC_INIT_FAILED; + if (rc) { + rc = IBMVNIC_INIT_FAILED; + goto out; + } /* If the adapter was in PROBE state prior to the reset, * exit here. */ if (reset_state == VNIC_PROBED) - return 0; + goto out; rc = ibmvnic_login(netdev); if (rc) { - adapter->state = reset_state; - return rc; + goto out; } rc = init_resources(adapter); if (rc) - return rc; + goto out; ibmvnic_disable_irqs(adapter); @@ -1902,8 +1992,10 @@ static int do_change_param_reset(struct ibmvnic_adapter *adapter, return 0; rc = __ibmvnic_open(netdev); - if (rc) - return IBMVNIC_OPEN_FAILED; + if (rc) { + rc = IBMVNIC_OPEN_FAILED; + goto out; + } /* refresh device's multicast list */ ibmvnic_set_multi(netdev); @@ -1912,7 +2004,10 @@ static int do_change_param_reset(struct ibmvnic_adapter *adapter, for (i = 0; i < adapter->req_rx_queues; i++) napi_schedule(&adapter->napi[i]); - return 0; +out: + if (rc) + adapter->state = reset_state; + return rc; } /** @@ -1927,8 +2022,10 @@ static int do_reset(struct ibmvnic_adapter *adapter, struct net_device *netdev = adapter->netdev; int i, rc; - netdev_dbg(adapter->netdev, "Re-setting driver (%d)\n", - rwi->reset_reason); + netdev_dbg(adapter->netdev, + "[S:%d FOP:%d] Reset reason %d, reset_state %d\n", + adapter->state, adapter->failover_pending, + rwi->reset_reason, reset_state); rtnl_lock(); /* @@ -2015,7 +2112,6 @@ static int do_reset(struct ibmvnic_adapter *adapter, rc = ibmvnic_login(netdev); if (rc) { - adapter->state = reset_state; goto out; } @@ -2074,14 +2170,22 @@ static int do_reset(struct ibmvnic_adapter *adapter, for (i = 0; i < adapter->req_rx_queues; i++) napi_schedule(&adapter->napi[i]); - if (adapter->reset_reason != VNIC_RESET_FAILOVER) + if (adapter->reset_reason == VNIC_RESET_FAILOVER || + adapter->reset_reason == VNIC_RESET_MOBILITY) { call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev); + call_netdevice_notifiers(NETDEV_RESEND_IGMP, netdev); + } rc = 0; out: + /* restore the adapter state if reset failed */ + if (rc) + adapter->state = reset_state; rtnl_unlock(); + netdev_dbg(adapter->netdev, "[S:%d FOP:%d] Reset done, rc %d\n", + adapter->state, adapter->failover_pending, rc); return rc; } @@ -2112,40 +2216,48 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, if (rc) { netdev_err(adapter->netdev, "Couldn't initialize crq. rc=%d\n", rc); - return rc; + goto out; } rc = ibmvnic_reset_init(adapter, false); if (rc) - return rc; + goto out; /* If the adapter was in PROBE state prior to the reset, * exit here. */ if (reset_state == VNIC_PROBED) - return 0; + goto out; rc = ibmvnic_login(netdev); - if (rc) { - adapter->state = VNIC_PROBED; - return 0; - } + if (rc) + goto out; rc = init_resources(adapter); if (rc) - return rc; + goto out; ibmvnic_disable_irqs(adapter); adapter->state = VNIC_CLOSED; if (reset_state == VNIC_CLOSED) - return 0; + goto out; rc = __ibmvnic_open(netdev); - if (rc) - return IBMVNIC_OPEN_FAILED; + if (rc) { + rc = IBMVNIC_OPEN_FAILED; + goto out; + } - return 0; + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev); + call_netdevice_notifiers(NETDEV_RESEND_IGMP, netdev); +out: + /* restore adapter state if reset failed */ + if (rc) + adapter->state = reset_state; + netdev_dbg(adapter->netdev, "[S:%d FOP:%d] Hard reset done, rc %d\n", + adapter->state, adapter->failover_pending, rc); + return rc; } static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter) @@ -2167,17 +2279,6 @@ static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter) return rwi; } -static void free_all_rwi(struct ibmvnic_adapter *adapter) -{ - struct ibmvnic_rwi *rwi; - - rwi = get_next_rwi(adapter); - while (rwi) { - kfree(rwi); - rwi = get_next_rwi(adapter); - } -} - static void __ibmvnic_reset(struct work_struct *work) { struct ibmvnic_rwi *rwi; @@ -2209,7 +2310,6 @@ static void __ibmvnic_reset(struct work_struct *work) if (!saved_state) { reset_state = adapter->state; - adapter->state = VNIC_RESETTING; saved_state = true; } spin_unlock_irqrestore(&adapter->state_lock, flags); @@ -2236,20 +2336,23 @@ static void __ibmvnic_reset(struct work_struct *work) rc = do_hard_reset(adapter, rwi, reset_state); rtnl_unlock(); } + if (rc) { + /* give backing device time to settle down */ + netdev_dbg(adapter->netdev, + "[S:%d] Hard reset failed, waiting 60 secs\n", + adapter->state); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(60 * HZ); + } } else if (!(rwi->reset_reason == VNIC_RESET_FATAL && adapter->from_passive_init)) { rc = do_reset(adapter, rwi, reset_state); } kfree(rwi); - if (rc == IBMVNIC_OPEN_FAILED) { - if (list_empty(&adapter->rwi_list)) - adapter->state = VNIC_CLOSED; - else - adapter->state = reset_state; - rc = 0; - } else if (rc && rc != IBMVNIC_INIT_FAILED && - !adapter->force_reset_recovery) - break; + adapter->last_reset_time = jiffies; + + if (rc) + netdev_dbg(adapter->netdev, "Reset failed, rc=%d\n", rc); rwi = get_next_rwi(adapter); @@ -2263,12 +2366,12 @@ static void __ibmvnic_reset(struct work_struct *work) complete(&adapter->reset_done); } - if (rc) { - netdev_dbg(adapter->netdev, "Reset failed\n"); - free_all_rwi(adapter); - } - clear_bit_unlock(0, &adapter->resetting); + + netdev_dbg(adapter->netdev, + "[S:%d FRR:%d WFR:%d] Done processing resets\n", + adapter->state, adapter->force_reset_recovery, + adapter->wait_for_reset); } static void __ibmvnic_delayed_reset(struct work_struct *work) @@ -2314,7 +2417,8 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, list_for_each(entry, &adapter->rwi_list) { tmp = list_entry(entry, struct ibmvnic_rwi, list); if (tmp->reset_reason == reason) { - netdev_dbg(netdev, "Skipping matching reset\n"); + netdev_dbg(netdev, "Skipping matching reset, reason=%d\n", + reason); spin_unlock_irqrestore(&adapter->rwi_lock, flags); ret = EBUSY; goto err; @@ -2350,6 +2454,18 @@ static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ibmvnic_adapter *adapter = netdev_priv(dev); + if (test_bit(0, &adapter->resetting)) { + netdev_err(adapter->netdev, + "Adapter is resetting, skip timeout reset\n"); + return; + } + /* No queuing up reset until at least 5 seconds (default watchdog val) + * after last reset + */ + if (time_before(jiffies, (adapter->last_reset_time + dev->watchdog_timeo))) { + netdev_dbg(dev, "Not yet time to tx timeout.\n"); + return; + } ibmvnic_reset(adapter, VNIC_RESET_TIMEOUT); } @@ -2368,10 +2484,17 @@ static void remove_buff_from_pool(struct ibmvnic_adapter *adapter, static int ibmvnic_poll(struct napi_struct *napi, int budget) { - struct net_device *netdev = napi->dev; - struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int scrq_num = (int)(napi - adapter->napi); - int frames_processed = 0; + struct ibmvnic_sub_crq_queue *rx_scrq; + struct ibmvnic_adapter *adapter; + struct net_device *netdev; + int frames_processed; + int scrq_num; + + netdev = napi->dev; + adapter = netdev_priv(netdev); + scrq_num = (int)(napi - adapter->napi); + frames_processed = 0; + rx_scrq = adapter->rx_scrq[scrq_num]; restart_poll: while (frames_processed < budget) { @@ -2384,14 +2507,20 @@ restart_poll: if (unlikely(test_bit(0, &adapter->resetting) && adapter->reset_reason != VNIC_RESET_NON_FATAL)) { - enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]); + enable_scrq_irq(adapter, rx_scrq); napi_complete_done(napi, frames_processed); return frames_processed; } - if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num])) + if (!pending_scrq(adapter, rx_scrq)) break; - next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]); + /* The queue entry at the current index is peeked at above + * to determine that there is a valid descriptor awaiting + * processing. We want to be sure that the current slot + * holds a valid descriptor before reading its contents. + */ + dma_rmb(); + next = ibmvnic_next_scrq(adapter, rx_scrq); rx_buff = (struct ibmvnic_rx_buff *)be64_to_cpu(next-> rx_comp.correlator); @@ -2448,16 +2577,21 @@ restart_poll: frames_processed++; } - if (adapter->state != VNIC_CLOSING) + if (adapter->state != VNIC_CLOSING && + ((atomic_read(&adapter->rx_pool[scrq_num].available) < + adapter->req_rx_add_entries_per_subcrq / 2) || + frames_processed < budget)) replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]); - if (frames_processed < budget) { - enable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]); - napi_complete_done(napi, frames_processed); - if (pending_scrq(adapter, adapter->rx_scrq[scrq_num]) && - napi_reschedule(napi)) { - disable_scrq_irq(adapter, adapter->rx_scrq[scrq_num]); - goto restart_poll; + if (napi_complete_done(napi, frames_processed)) { + enable_scrq_irq(adapter, rx_scrq); + if (pending_scrq(adapter, rx_scrq)) { + rmb(); + if (napi_reschedule(napi)) { + disable_scrq_irq(adapter, rx_scrq); + goto restart_poll; + } + } } } return frames_processed; @@ -2849,15 +2983,28 @@ static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter, { int rc; + if (!scrq) { + netdev_dbg(adapter->netdev, + "Invalid scrq reset. irq (%d) or msgs (%p).\n", + scrq->irq, scrq->msgs); + return -EINVAL; + } + if (scrq->irq) { free_irq(scrq->irq, scrq); irq_dispose_mapping(scrq->irq); scrq->irq = 0; } - memset(scrq->msgs, 0, 4 * PAGE_SIZE); - atomic_set(&scrq->used, 0); - scrq->cur = 0; + if (scrq->msgs) { + memset(scrq->msgs, 0, 4 * PAGE_SIZE); + atomic_set(&scrq->used, 0); + scrq->cur = 0; + scrq->ind_buf.index = 0; + } else { + netdev_dbg(adapter->netdev, "Invalid scrq reset\n"); + return -EINVAL; + } rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token, 4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq); @@ -2868,6 +3015,9 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter) { int i, rc; + if (!adapter->tx_scrq || !adapter->rx_scrq) + return -EINVAL; + for (i = 0; i < adapter->req_tx_queues; i++) { netdev_dbg(adapter->netdev, "Re-setting tx_scrq[%d]\n", i); rc = reset_one_sub_crq_queue(adapter, adapter->tx_scrq[i]); @@ -2909,6 +3059,11 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter, } } + dma_free_coherent(dev, + IBMVNIC_IND_ARR_SZ, + scrq->ind_buf.indir_arr, + scrq->ind_buf.indir_dma); + dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, DMA_BIDIRECTIONAL); free_pages((unsigned long)scrq->msgs, 2); @@ -2955,6 +3110,17 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter scrq->adapter = adapter; scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs); + scrq->ind_buf.index = 0; + + scrq->ind_buf.indir_arr = + dma_alloc_coherent(dev, + IBMVNIC_IND_ARR_SZ, + &scrq->ind_buf.indir_dma, + GFP_KERNEL); + + if (!scrq->ind_buf.indir_arr) + goto indir_failed; + spin_lock_init(&scrq->lock); netdev_dbg(adapter->netdev, @@ -2963,6 +3129,12 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter return scrq; +indir_failed: + do { + rc = plpar_hcall_norets(H_FREE_SUB_CRQ, + adapter->vdev->unit_address, + scrq->crq_num); + } while (rc == H_BUSY || rc == H_IS_LONG_BUSY(rc)); reg_failed: dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, DMA_BIDIRECTIONAL); @@ -3077,22 +3249,30 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, struct device *dev = &adapter->vdev->dev; struct ibmvnic_tx_pool *tx_pool; struct ibmvnic_tx_buff *txbuff; + struct netdev_queue *txq; union sub_crq *next; int index; - int i, j; + int i; restart_loop: while (pending_scrq(adapter, scrq)) { unsigned int pool = scrq->pool_index; int num_entries = 0; + int total_bytes = 0; + int num_packets = 0; + + /* The queue entry at the current index is peeked at above + * to determine that there is a valid descriptor awaiting + * processing. We want to be sure that the current slot + * holds a valid descriptor before reading its contents. + */ + dma_rmb(); next = ibmvnic_next_scrq(adapter, scrq); for (i = 0; i < next->tx_comp.num_comps; i++) { - if (next->tx_comp.rcs[i]) { + if (next->tx_comp.rcs[i]) dev_err(dev, "tx error %x\n", next->tx_comp.rcs[i]); - continue; - } index = be32_to_cpu(next->tx_comp.correlators[i]); if (index & IBMVNIC_TSO_POOL_MASK) { tx_pool = &adapter->tso_pool[pool]; @@ -3102,21 +3282,16 @@ restart_loop: } txbuff = &tx_pool->tx_buff[index]; - - for (j = 0; j < IBMVNIC_MAX_FRAGS_PER_CRQ; j++) { - if (!txbuff->data_dma[j]) - continue; - - txbuff->data_dma[j] = 0; - } - - if (txbuff->last_frag) { - dev_kfree_skb_any(txbuff->skb); + num_packets++; + num_entries += txbuff->num_entries; + if (txbuff->skb) { + total_bytes += txbuff->skb->len; + dev_consume_skb_irq(txbuff->skb); txbuff->skb = NULL; + } else { + netdev_warn(adapter->netdev, + "TX completion received with NULL socket buffer\n"); } - - num_entries += txbuff->num_entries; - tx_pool->free_map[tx_pool->producer_index] = index; tx_pool->producer_index = (tx_pool->producer_index + 1) % @@ -3125,6 +3300,9 @@ restart_loop: /* remove tx_comp scrq*/ next->tx_comp.first = 0; + txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index); + netdev_tx_completed_queue(txq, num_packets, total_bytes); + if (atomic_sub_return(num_entries, &scrq->used) <= (adapter->req_tx_entries_per_subcrq / 2) && __netif_subqueue_stopped(adapter->netdev, @@ -3486,6 +3664,11 @@ static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter, } spin_unlock_irqrestore(&scrq->lock, flags); + /* Ensure that the entire buffer descriptor has been + * loaded before reading its contents + */ + dma_rmb(); + return entry; } @@ -3524,38 +3707,6 @@ static void print_subcrq_error(struct device *dev, int rc, const char *func) } } -static int send_subcrq(struct ibmvnic_adapter *adapter, u64 remote_handle, - union sub_crq *sub_crq) -{ - unsigned int ua = adapter->vdev->unit_address; - struct device *dev = &adapter->vdev->dev; - u64 *u64_crq = (u64 *)sub_crq; - int rc; - - netdev_dbg(adapter->netdev, - "Sending sCRQ %016lx: %016lx %016lx %016lx %016lx\n", - (unsigned long int)cpu_to_be64(remote_handle), - (unsigned long int)cpu_to_be64(u64_crq[0]), - (unsigned long int)cpu_to_be64(u64_crq[1]), - (unsigned long int)cpu_to_be64(u64_crq[2]), - (unsigned long int)cpu_to_be64(u64_crq[3])); - - /* Make sure the hypervisor sees the complete request */ - mb(); - - rc = plpar_hcall_norets(H_SEND_SUB_CRQ, ua, - cpu_to_be64(remote_handle), - cpu_to_be64(u64_crq[0]), - cpu_to_be64(u64_crq[1]), - cpu_to_be64(u64_crq[2]), - cpu_to_be64(u64_crq[3])); - - if (rc) - print_subcrq_error(dev, rc, __func__); - - return rc; -} - static int send_subcrq_indirect(struct ibmvnic_adapter *adapter, u64 remote_handle, u64 ioba, u64 num_entries) { @@ -3707,15 +3858,16 @@ static int send_login(struct ibmvnic_adapter *adapter) struct ibmvnic_login_rsp_buffer *login_rsp_buffer; struct ibmvnic_login_buffer *login_buffer; struct device *dev = &adapter->vdev->dev; + struct vnic_login_client_data *vlcd; dma_addr_t rsp_buffer_token; dma_addr_t buffer_token; size_t rsp_buffer_size; union ibmvnic_crq crq; + int client_data_len; size_t buffer_size; __be64 *tx_list_p; __be64 *rx_list_p; - int client_data_len; - struct vnic_login_client_data *vlcd; + int rc; int i; if (!adapter->tx_scrq || !adapter->rx_scrq) { @@ -3819,16 +3971,25 @@ static int send_login(struct ibmvnic_adapter *adapter) crq.login.cmd = LOGIN; crq.login.ioba = cpu_to_be32(buffer_token); crq.login.len = cpu_to_be32(buffer_size); - ibmvnic_send_crq(adapter, &crq); + + adapter->login_pending = true; + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + adapter->login_pending = false; + netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc); + goto buf_rsp_map_failed; + } return 0; buf_rsp_map_failed: kfree(login_rsp_buffer); + adapter->login_rsp_buf = NULL; buf_rsp_alloc_failed: dma_unmap_single(dev, buffer_token, buffer_size, DMA_TO_DEVICE); buf_map_failed: kfree(login_buffer); + adapter->login_buf = NULL; buf_alloc_failed: return -1; } @@ -4371,6 +4532,15 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, u64 *size_array; int i; + /* CHECK: Test/set of login_pending does not need to be atomic + * because only ibmvnic_tasklet tests/clears this. + */ + if (!adapter->login_pending) { + netdev_warn(netdev, "Ignoring unexpected login response\n"); + return 0; + } + adapter->login_pending = false; + dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz, DMA_TO_DEVICE); dma_unmap_single(dev, adapter->login_rsp_buf_token, @@ -4400,7 +4570,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, adapter->req_rx_add_queues != be32_to_cpu(login_rsp->num_rxadd_subcrqs))) { dev_err(dev, "FATAL: Inconsistent login and login rsp\n"); - ibmvnic_remove(adapter->vdev); + ibmvnic_reset(adapter, VNIC_RESET_FATAL); return -EIO; } size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + @@ -4742,6 +4912,11 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, case IBMVNIC_CRQ_INIT: dev_info(dev, "Partner initialized\n"); adapter->from_passive_init = true; + /* Discard any stale login responses from prev reset. + * CHECK: should we clear even on INIT_COMPLETE? + */ + adapter->login_pending = false; + if (!completion_done(&adapter->init_done)) { complete(&adapter->init_done); adapter->init_done_rc = -EIO; @@ -4958,6 +5133,9 @@ static int ibmvnic_reset_crq(struct ibmvnic_adapter *adapter) } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); /* Clean out the queue */ + if (!crq->msgs) + return -EINVAL; + memset(crq->msgs, 0, PAGE_SIZE); crq->cur = 0; crq->active = false; @@ -5076,7 +5254,7 @@ map_failed: static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) { struct device *dev = &adapter->vdev->dev; - unsigned long timeout = msecs_to_jiffies(30000); + unsigned long timeout = msecs_to_jiffies(20000); u64 old_num_rx_queues, old_num_tx_queues; int rc; @@ -5171,6 +5349,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) dev_set_drvdata(&dev->dev, netdev); adapter->vdev = dev; adapter->netdev = netdev; + adapter->login_pending = false; ether_addr_copy(adapter->mac_addr, mac_addr_p); ether_addr_copy(netdev->dev_addr, adapter->mac_addr); @@ -5234,7 +5413,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->state = VNIC_PROBED; adapter->wait_for_reset = false; - + adapter->last_reset_time = jiffies; return 0; ibmvnic_register_fail: @@ -5262,7 +5441,7 @@ static int ibmvnic_remove(struct vio_dev *dev) unsigned long flags; spin_lock_irqsave(&adapter->state_lock, flags); - if (adapter->state == VNIC_RESETTING) { + if (test_bit(0, &adapter->resetting)) { spin_unlock_irqrestore(&adapter->state_lock, flags); return -EBUSY; } |