diff options
Diffstat (limited to 'drivers/net/ethernet/ibm')
31 files changed, 18157 insertions, 0 deletions
diff --git a/drivers/net/ethernet/ibm/Kconfig b/drivers/net/ethernet/ibm/Kconfig new file mode 100644 index 000000000000..9e16f3fa97b2 --- /dev/null +++ b/drivers/net/ethernet/ibm/Kconfig @@ -0,0 +1,48 @@ +# +# IBM device configuration. +# + +config NET_VENDOR_IBM + bool "IBM devices" + default y + depends on MCA || PPC_PSERIES || PPC_PSERIES || PPC_DCR || \ + (IBMEBUS && INET && SPARSEMEM) + ---help--- + If you have a network (Ethernet) card belonging to this class, say Y + and read the Ethernet-HOWTO, available from + <http://www.tldp.org/docs.html#howto>. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about IBM devices. If you say Y, you will be asked for + your specific card in the following questions. + +if NET_VENDOR_IBM + +config IBMVETH + tristate "IBM LAN Virtual Ethernet support" + depends on PPC_PSERIES + ---help--- + This driver supports virtual ethernet adapters on newer IBM iSeries + and pSeries systems. + + To compile this driver as a module, choose M here. The module will + be called ibmveth. + +config ISERIES_VETH + tristate "iSeries Virtual Ethernet driver support" + depends on PPC_ISERIES + +source "drivers/net/ethernet/ibm/emac/Kconfig" + +config EHEA + tristate "eHEA Ethernet support" + depends on IBMEBUS && INET && SPARSEMEM + select INET_LRO + ---help--- + This driver supports the IBM pSeries eHEA ethernet adapter. + + To compile the driver as a module, choose M here. The module + will be called ehea. + +endif # NET_VENDOR_IBM diff --git a/drivers/net/ethernet/ibm/Makefile b/drivers/net/ethernet/ibm/Makefile new file mode 100644 index 000000000000..5a7d4e9ac803 --- /dev/null +++ b/drivers/net/ethernet/ibm/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for th IBM network device drivers. +# + +obj-$(CONFIG_IBMVETH) += ibmveth.o +obj-$(CONFIG_ISERIES_VETH) += iseries_veth.o +obj-$(CONFIG_IBM_EMAC) += emac/ +obj-$(CONFIG_EHEA) += ehea/ diff --git a/drivers/net/ethernet/ibm/ehea/Makefile b/drivers/net/ethernet/ibm/ehea/Makefile new file mode 100644 index 000000000000..775d9969b5c2 --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for the eHEA ethernet device driver for IBM eServer System p +# +ehea-y = ehea_main.o ehea_phyp.o ehea_qmr.o ehea_ethtool.o ehea_phyp.o +obj-$(CONFIG_EHEA) += ehea.o + diff --git a/drivers/net/ethernet/ibm/ehea/ehea.h b/drivers/net/ethernet/ibm/ehea/ehea.h new file mode 100644 index 000000000000..0b8e6a97a980 --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/ehea.h @@ -0,0 +1,505 @@ +/* + * linux/drivers/net/ehea/ehea.h + * + * eHEA ethernet device driver for IBM eServer System p + * + * (C) Copyright IBM Corp. 2006 + * + * Authors: + * Christoph Raisch <raisch@de.ibm.com> + * Jan-Bernd Themann <themann@de.ibm.com> + * Thomas Klein <tklein@de.ibm.com> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __EHEA_H__ +#define __EHEA_H__ + +#include <linux/module.h> +#include <linux/ethtool.h> +#include <linux/vmalloc.h> +#include <linux/if_vlan.h> +#include <linux/inet_lro.h> + +#include <asm/ibmebus.h> +#include <asm/abs_addr.h> +#include <asm/io.h> + +#define DRV_NAME "ehea" +#define DRV_VERSION "EHEA_0107" + +/* eHEA capability flags */ +#define DLPAR_PORT_ADD_REM 1 +#define DLPAR_MEM_ADD 2 +#define DLPAR_MEM_REM 4 +#define EHEA_CAPABILITIES (DLPAR_PORT_ADD_REM | DLPAR_MEM_ADD | DLPAR_MEM_REM) + +#define EHEA_MSG_DEFAULT (NETIF_MSG_LINK | NETIF_MSG_TIMER \ + | NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) + +#define EHEA_MAX_ENTRIES_RQ1 32767 +#define EHEA_MAX_ENTRIES_RQ2 16383 +#define EHEA_MAX_ENTRIES_RQ3 16383 +#define EHEA_MAX_ENTRIES_SQ 32767 +#define EHEA_MIN_ENTRIES_QP 127 + +#define EHEA_SMALL_QUEUES +#define EHEA_NUM_TX_QP 1 +#define EHEA_LRO_MAX_AGGR 64 + +#ifdef EHEA_SMALL_QUEUES +#define EHEA_MAX_CQE_COUNT 1023 +#define EHEA_DEF_ENTRIES_SQ 1023 +#define EHEA_DEF_ENTRIES_RQ1 4095 +#define EHEA_DEF_ENTRIES_RQ2 1023 +#define EHEA_DEF_ENTRIES_RQ3 1023 +#else +#define EHEA_MAX_CQE_COUNT 4080 +#define EHEA_DEF_ENTRIES_SQ 4080 +#define EHEA_DEF_ENTRIES_RQ1 8160 +#define EHEA_DEF_ENTRIES_RQ2 2040 +#define EHEA_DEF_ENTRIES_RQ3 2040 +#endif + +#define EHEA_MAX_ENTRIES_EQ 20 + +#define EHEA_SG_SQ 2 +#define EHEA_SG_RQ1 1 +#define EHEA_SG_RQ2 0 +#define EHEA_SG_RQ3 0 + +#define EHEA_MAX_PACKET_SIZE 9022 /* for jumbo frames */ +#define EHEA_RQ2_PKT_SIZE 1522 +#define EHEA_L_PKT_SIZE 256 /* low latency */ + +#define MAX_LRO_DESCRIPTORS 8 + +/* Send completion signaling */ + +/* Protection Domain Identifier */ +#define EHEA_PD_ID 0xaabcdeff + +#define EHEA_RQ2_THRESHOLD 1 +#define EHEA_RQ3_THRESHOLD 9 /* use RQ3 threshold of 1522 bytes */ + +#define EHEA_SPEED_10G 10000 +#define EHEA_SPEED_1G 1000 +#define EHEA_SPEED_100M 100 +#define EHEA_SPEED_10M 10 +#define EHEA_SPEED_AUTONEG 0 + +/* Broadcast/Multicast registration types */ +#define EHEA_BCMC_SCOPE_ALL 0x08 +#define EHEA_BCMC_SCOPE_SINGLE 0x00 +#define EHEA_BCMC_MULTICAST 0x04 +#define EHEA_BCMC_BROADCAST 0x00 +#define EHEA_BCMC_UNTAGGED 0x02 +#define EHEA_BCMC_TAGGED 0x00 +#define EHEA_BCMC_VLANID_ALL 0x01 +#define EHEA_BCMC_VLANID_SINGLE 0x00 + +#define EHEA_CACHE_LINE 128 + +/* Memory Regions */ +#define EHEA_MR_ACC_CTRL 0x00800000 + +#define EHEA_BUSMAP_START 0x8000000000000000ULL +#define EHEA_INVAL_ADDR 0xFFFFFFFFFFFFFFFFULL +#define EHEA_DIR_INDEX_SHIFT 13 /* 8k Entries in 64k block */ +#define EHEA_TOP_INDEX_SHIFT (EHEA_DIR_INDEX_SHIFT * 2) +#define EHEA_MAP_ENTRIES (1 << EHEA_DIR_INDEX_SHIFT) +#define EHEA_MAP_SIZE (0x10000) /* currently fixed map size */ +#define EHEA_INDEX_MASK (EHEA_MAP_ENTRIES - 1) + + +#define EHEA_WATCH_DOG_TIMEOUT 10*HZ + +/* utility functions */ + +void ehea_dump(void *adr, int len, char *msg); + +#define EHEA_BMASK(pos, length) (((pos) << 16) + (length)) + +#define EHEA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1)) + +#define EHEA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff) + +#define EHEA_BMASK_MASK(mask) \ + (0xffffffffffffffffULL >> ((64 - (mask)) & 0xffff)) + +#define EHEA_BMASK_SET(mask, value) \ + ((EHEA_BMASK_MASK(mask) & ((u64)(value))) << EHEA_BMASK_SHIFTPOS(mask)) + +#define EHEA_BMASK_GET(mask, value) \ + (EHEA_BMASK_MASK(mask) & (((u64)(value)) >> EHEA_BMASK_SHIFTPOS(mask))) + +/* + * Generic ehea page + */ +struct ehea_page { + u8 entries[PAGE_SIZE]; +}; + +/* + * Generic queue in linux kernel virtual memory + */ +struct hw_queue { + u64 current_q_offset; /* current queue entry */ + struct ehea_page **queue_pages; /* array of pages belonging to queue */ + u32 qe_size; /* queue entry size */ + u32 queue_length; /* queue length allocated in bytes */ + u32 pagesize; + u32 toggle_state; /* toggle flag - per page */ + u32 reserved; /* 64 bit alignment */ +}; + +/* + * For pSeries this is a 64bit memory address where + * I/O memory is mapped into CPU address space + */ +struct h_epa { + void __iomem *addr; +}; + +struct h_epa_user { + u64 addr; +}; + +struct h_epas { + struct h_epa kernel; /* kernel space accessible resource, + set to 0 if unused */ + struct h_epa_user user; /* user space accessible resource + set to 0 if unused */ +}; + +/* + * Memory map data structures + */ +struct ehea_dir_bmap +{ + u64 ent[EHEA_MAP_ENTRIES]; +}; +struct ehea_top_bmap +{ + struct ehea_dir_bmap *dir[EHEA_MAP_ENTRIES]; +}; +struct ehea_bmap +{ + struct ehea_top_bmap *top[EHEA_MAP_ENTRIES]; +}; + +struct ehea_qp; +struct ehea_cq; +struct ehea_eq; +struct ehea_port; +struct ehea_av; + +/* + * Queue attributes passed to ehea_create_qp() + */ +struct ehea_qp_init_attr { + /* input parameter */ + u32 qp_token; /* queue token */ + u8 low_lat_rq1; + u8 signalingtype; /* cqe generation flag */ + u8 rq_count; /* num of receive queues */ + u8 eqe_gen; /* eqe generation flag */ + u16 max_nr_send_wqes; /* max number of send wqes */ + u16 max_nr_rwqes_rq1; /* max number of receive wqes */ + u16 max_nr_rwqes_rq2; + u16 max_nr_rwqes_rq3; + u8 wqe_size_enc_sq; + u8 wqe_size_enc_rq1; + u8 wqe_size_enc_rq2; + u8 wqe_size_enc_rq3; + u8 swqe_imm_data_len; /* immediate data length for swqes */ + u16 port_nr; + u16 rq2_threshold; + u16 rq3_threshold; + u64 send_cq_handle; + u64 recv_cq_handle; + u64 aff_eq_handle; + + /* output parameter */ + u32 qp_nr; + u16 act_nr_send_wqes; + u16 act_nr_rwqes_rq1; + u16 act_nr_rwqes_rq2; + u16 act_nr_rwqes_rq3; + u8 act_wqe_size_enc_sq; + u8 act_wqe_size_enc_rq1; + u8 act_wqe_size_enc_rq2; + u8 act_wqe_size_enc_rq3; + u32 nr_sq_pages; + u32 nr_rq1_pages; + u32 nr_rq2_pages; + u32 nr_rq3_pages; + u32 liobn_sq; + u32 liobn_rq1; + u32 liobn_rq2; + u32 liobn_rq3; +}; + +/* + * Event Queue attributes, passed as parameter + */ +struct ehea_eq_attr { + u32 type; + u32 max_nr_of_eqes; + u8 eqe_gen; /* generate eqe flag */ + u64 eq_handle; + u32 act_nr_of_eqes; + u32 nr_pages; + u32 ist1; /* Interrupt service token */ + u32 ist2; + u32 ist3; + u32 ist4; +}; + + +/* + * Event Queue + */ +struct ehea_eq { + struct ehea_adapter *adapter; + struct hw_queue hw_queue; + u64 fw_handle; + struct h_epas epas; + spinlock_t spinlock; + struct ehea_eq_attr attr; +}; + +/* + * HEA Queues + */ +struct ehea_qp { + struct ehea_adapter *adapter; + u64 fw_handle; /* QP handle for firmware calls */ + struct hw_queue hw_squeue; + struct hw_queue hw_rqueue1; + struct hw_queue hw_rqueue2; + struct hw_queue hw_rqueue3; + struct h_epas epas; + struct ehea_qp_init_attr init_attr; +}; + +/* + * Completion Queue attributes + */ +struct ehea_cq_attr { + /* input parameter */ + u32 max_nr_of_cqes; + u32 cq_token; + u64 eq_handle; + + /* output parameter */ + u32 act_nr_of_cqes; + u32 nr_pages; +}; + +/* + * Completion Queue + */ +struct ehea_cq { + struct ehea_adapter *adapter; + u64 fw_handle; + struct hw_queue hw_queue; + struct h_epas epas; + struct ehea_cq_attr attr; +}; + +/* + * Memory Region + */ +struct ehea_mr { + struct ehea_adapter *adapter; + u64 handle; + u64 vaddr; + u32 lkey; +}; + +/* + * Port state information + */ +struct port_stats { + int poll_receive_errors; + int queue_stopped; + int err_tcp_cksum; + int err_ip_cksum; + int err_frame_crc; +}; + +#define EHEA_IRQ_NAME_SIZE 20 + +/* + * Queue SKB Array + */ +struct ehea_q_skb_arr { + struct sk_buff **arr; /* skb array for queue */ + int len; /* array length */ + int index; /* array index */ + int os_skbs; /* rq2/rq3 only: outstanding skbs */ +}; + +/* + * Port resources + */ +struct ehea_port_res { + struct napi_struct napi; + struct port_stats p_stats; + struct ehea_mr send_mr; /* send memory region */ + struct ehea_mr recv_mr; /* receive memory region */ + spinlock_t xmit_lock; + struct ehea_port *port; + char int_recv_name[EHEA_IRQ_NAME_SIZE]; + char int_send_name[EHEA_IRQ_NAME_SIZE]; + struct ehea_qp *qp; + struct ehea_cq *send_cq; + struct ehea_cq *recv_cq; + struct ehea_eq *eq; + struct ehea_q_skb_arr rq1_skba; + struct ehea_q_skb_arr rq2_skba; + struct ehea_q_skb_arr rq3_skba; + struct ehea_q_skb_arr sq_skba; + int sq_skba_size; + spinlock_t netif_queue; + int queue_stopped; + int swqe_refill_th; + atomic_t swqe_avail; + int swqe_ll_count; + u32 swqe_id_counter; + u64 tx_packets; + u64 tx_bytes; + u64 rx_packets; + u64 rx_bytes; + u32 poll_counter; + struct net_lro_mgr lro_mgr; + struct net_lro_desc lro_desc[MAX_LRO_DESCRIPTORS]; + int sq_restart_flag; +}; + + +#define EHEA_MAX_PORTS 16 + +#define EHEA_NUM_PORTRES_FW_HANDLES 6 /* QP handle, SendCQ handle, + RecvCQ handle, EQ handle, + SendMR handle, RecvMR handle */ +#define EHEA_NUM_PORT_FW_HANDLES 1 /* EQ handle */ +#define EHEA_NUM_ADAPTER_FW_HANDLES 2 /* MR handle, NEQ handle */ + +struct ehea_adapter { + u64 handle; + struct platform_device *ofdev; + struct ehea_port *port[EHEA_MAX_PORTS]; + struct ehea_eq *neq; /* notification event queue */ + struct tasklet_struct neq_tasklet; + struct ehea_mr mr; + u32 pd; /* protection domain */ + u64 max_mc_mac; /* max number of multicast mac addresses */ + int active_ports; + struct list_head list; +}; + + +struct ehea_mc_list { + struct list_head list; + u64 macaddr; +}; + +/* kdump support */ +struct ehea_fw_handle_entry { + u64 adh; /* Adapter Handle */ + u64 fwh; /* Firmware Handle */ +}; + +struct ehea_fw_handle_array { + struct ehea_fw_handle_entry *arr; + int num_entries; + struct mutex lock; +}; + +struct ehea_bcmc_reg_entry { + u64 adh; /* Adapter Handle */ + u32 port_id; /* Logical Port Id */ + u8 reg_type; /* Registration Type */ + u64 macaddr; +}; + +struct ehea_bcmc_reg_array { + struct ehea_bcmc_reg_entry *arr; + int num_entries; + spinlock_t lock; +}; + +#define EHEA_PORT_UP 1 +#define EHEA_PORT_DOWN 0 +#define EHEA_PHY_LINK_UP 1 +#define EHEA_PHY_LINK_DOWN 0 +#define EHEA_MAX_PORT_RES 16 +struct ehea_port { + struct ehea_adapter *adapter; /* adapter that owns this port */ + struct net_device *netdev; + struct net_device_stats stats; + struct ehea_port_res port_res[EHEA_MAX_PORT_RES]; + struct platform_device ofdev; /* Open Firmware Device */ + struct ehea_mc_list *mc_list; /* Multicast MAC addresses */ + struct ehea_eq *qp_eq; + struct work_struct reset_task; + struct delayed_work stats_work; + struct mutex port_lock; + char int_aff_name[EHEA_IRQ_NAME_SIZE]; + int allmulti; /* Indicates IFF_ALLMULTI state */ + int promisc; /* Indicates IFF_PROMISC state */ + int num_tx_qps; + int num_add_tx_qps; + int num_mcs; + int resets; + unsigned long flags; + u64 mac_addr; + u32 logical_port_id; + u32 port_speed; + u32 msg_enable; + u32 sig_comp_iv; + u32 state; + u32 lro_max_aggr; + u8 phy_link; + u8 full_duplex; + u8 autoneg; + u8 num_def_qps; + wait_queue_head_t swqe_avail_wq; + wait_queue_head_t restart_wq; +}; + +struct port_res_cfg { + int max_entries_rcq; + int max_entries_scq; + int max_entries_sq; + int max_entries_rq1; + int max_entries_rq2; + int max_entries_rq3; +}; + +enum ehea_flag_bits { + __EHEA_STOP_XFER, + __EHEA_DISABLE_PORT_RESET +}; + +void ehea_set_ethtool_ops(struct net_device *netdev); +int ehea_sense_port_attr(struct ehea_port *port); +int ehea_set_portspeed(struct ehea_port *port, u32 port_speed); + +#endif /* __EHEA_H__ */ diff --git a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c new file mode 100644 index 000000000000..7f642aef5e82 --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c @@ -0,0 +1,295 @@ +/* + * linux/drivers/net/ehea/ehea_ethtool.c + * + * eHEA ethernet device driver for IBM eServer System p + * + * (C) Copyright IBM Corp. 2006 + * + * Authors: + * Christoph Raisch <raisch@de.ibm.com> + * Jan-Bernd Themann <themann@de.ibm.com> + * Thomas Klein <tklein@de.ibm.com> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "ehea.h" +#include "ehea_phyp.h" + +static int ehea_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct ehea_port *port = netdev_priv(dev); + u32 speed; + int ret; + + ret = ehea_sense_port_attr(port); + + if (ret) + return ret; + + if (netif_carrier_ok(dev)) { + switch (port->port_speed) { + case EHEA_SPEED_10M: + speed = SPEED_10; + break; + case EHEA_SPEED_100M: + speed = SPEED_100; + break; + case EHEA_SPEED_1G: + speed = SPEED_1000; + break; + case EHEA_SPEED_10G: + speed = SPEED_10000; + break; + default: + speed = -1; + break; /* BUG */ + } + cmd->duplex = port->full_duplex == 1 ? + DUPLEX_FULL : DUPLEX_HALF; + } else { + speed = ~0; + cmd->duplex = -1; + } + ethtool_cmd_speed_set(cmd, speed); + + if (cmd->speed == SPEED_10000) { + cmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE); + cmd->advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); + cmd->port = PORT_FIBRE; + } else { + cmd->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full + | SUPPORTED_100baseT_Half | SUPPORTED_10baseT_Full + | SUPPORTED_10baseT_Half | SUPPORTED_Autoneg + | SUPPORTED_TP); + cmd->advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg + | ADVERTISED_TP); + cmd->port = PORT_TP; + } + + cmd->autoneg = port->autoneg == 1 ? AUTONEG_ENABLE : AUTONEG_DISABLE; + + return 0; +} + +static int ehea_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct ehea_port *port = netdev_priv(dev); + int ret = 0; + u32 sp; + + if (cmd->autoneg == AUTONEG_ENABLE) { + sp = EHEA_SPEED_AUTONEG; + goto doit; + } + + switch (cmd->speed) { + case SPEED_10: + if (cmd->duplex == DUPLEX_FULL) + sp = H_SPEED_10M_F; + else + sp = H_SPEED_10M_H; + break; + + case SPEED_100: + if (cmd->duplex == DUPLEX_FULL) + sp = H_SPEED_100M_F; + else + sp = H_SPEED_100M_H; + break; + + case SPEED_1000: + if (cmd->duplex == DUPLEX_FULL) + sp = H_SPEED_1G_F; + else + ret = -EINVAL; + break; + + case SPEED_10000: + if (cmd->duplex == DUPLEX_FULL) + sp = H_SPEED_10G_F; + else + ret = -EINVAL; + break; + + default: + ret = -EINVAL; + break; + } + + if (ret) + goto out; +doit: + ret = ehea_set_portspeed(port, sp); + + if (!ret) + netdev_info(dev, + "Port speed successfully set: %dMbps %s Duplex\n", + port->port_speed, + port->full_duplex == 1 ? "Full" : "Half"); +out: + return ret; +} + +static int ehea_nway_reset(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + int ret; + + ret = ehea_set_portspeed(port, EHEA_SPEED_AUTONEG); + + if (!ret) + netdev_info(port->netdev, + "Port speed successfully set: %dMbps %s Duplex\n", + port->port_speed, + port->full_duplex == 1 ? "Full" : "Half"); + return ret; +} + +static void ehea_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); + strlcpy(info->version, DRV_VERSION, sizeof(info->version)); +} + +static u32 ehea_get_msglevel(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + return port->msg_enable; +} + +static void ehea_set_msglevel(struct net_device *dev, u32 value) +{ + struct ehea_port *port = netdev_priv(dev); + port->msg_enable = value; +} + +static char ehea_ethtool_stats_keys[][ETH_GSTRING_LEN] = { + {"sig_comp_iv"}, + {"swqe_refill_th"}, + {"port resets"}, + {"Receive errors"}, + {"TCP cksum errors"}, + {"IP cksum errors"}, + {"Frame cksum errors"}, + {"num SQ stopped"}, + {"SQ stopped"}, + {"PR0 free_swqes"}, + {"PR1 free_swqes"}, + {"PR2 free_swqes"}, + {"PR3 free_swqes"}, + {"PR4 free_swqes"}, + {"PR5 free_swqes"}, + {"PR6 free_swqes"}, + {"PR7 free_swqes"}, + {"LRO aggregated"}, + {"LRO flushed"}, + {"LRO no_desc"}, +}; + +static void ehea_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + if (stringset == ETH_SS_STATS) { + memcpy(data, &ehea_ethtool_stats_keys, + sizeof(ehea_ethtool_stats_keys)); + } +} + +static int ehea_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(ehea_ethtool_stats_keys); + default: + return -EOPNOTSUPP; + } +} + +static void ehea_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + int i, k, tmp; + struct ehea_port *port = netdev_priv(dev); + + for (i = 0; i < ehea_get_sset_count(dev, ETH_SS_STATS); i++) + data[i] = 0; + i = 0; + + data[i++] = port->sig_comp_iv; + data[i++] = port->port_res[0].swqe_refill_th; + data[i++] = port->resets; + + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp += port->port_res[k].p_stats.poll_receive_errors; + data[i++] = tmp; + + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp += port->port_res[k].p_stats.err_tcp_cksum; + data[i++] = tmp; + + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp += port->port_res[k].p_stats.err_ip_cksum; + data[i++] = tmp; + + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp += port->port_res[k].p_stats.err_frame_crc; + data[i++] = tmp; + + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp += port->port_res[k].p_stats.queue_stopped; + data[i++] = tmp; + + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp |= port->port_res[k].queue_stopped; + data[i++] = tmp; + + for (k = 0; k < 8; k++) + data[i++] = atomic_read(&port->port_res[k].swqe_avail); + + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp |= port->port_res[k].lro_mgr.stats.aggregated; + data[i++] = tmp; + + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp |= port->port_res[k].lro_mgr.stats.flushed; + data[i++] = tmp; + + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp |= port->port_res[k].lro_mgr.stats.no_desc; + data[i++] = tmp; + +} + +const struct ethtool_ops ehea_ethtool_ops = { + .get_settings = ehea_get_settings, + .get_drvinfo = ehea_get_drvinfo, + .get_msglevel = ehea_get_msglevel, + .set_msglevel = ehea_set_msglevel, + .get_link = ethtool_op_get_link, + .get_strings = ehea_get_strings, + .get_sset_count = ehea_get_sset_count, + .get_ethtool_stats = ehea_get_ethtool_stats, + .set_settings = ehea_set_settings, + .nway_reset = ehea_nway_reset, /* Restart autonegotiation */ +}; + +void ehea_set_ethtool_ops(struct net_device *netdev) +{ + SET_ETHTOOL_OPS(netdev, &ehea_ethtool_ops); +} diff --git a/drivers/net/ethernet/ibm/ehea/ehea_hw.h b/drivers/net/ethernet/ibm/ehea/ehea_hw.h new file mode 100644 index 000000000000..567981b4b2cc --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/ehea_hw.h @@ -0,0 +1,292 @@ +/* + * linux/drivers/net/ehea/ehea_hw.h + * + * eHEA ethernet device driver for IBM eServer System p + * + * (C) Copyright IBM Corp. 2006 + * + * Authors: + * Christoph Raisch <raisch@de.ibm.com> + * Jan-Bernd Themann <themann@de.ibm.com> + * Thomas Klein <tklein@de.ibm.com> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __EHEA_HW_H__ +#define __EHEA_HW_H__ + +#define QPX_SQA_VALUE EHEA_BMASK_IBM(48, 63) +#define QPX_RQ1A_VALUE EHEA_BMASK_IBM(48, 63) +#define QPX_RQ2A_VALUE EHEA_BMASK_IBM(48, 63) +#define QPX_RQ3A_VALUE EHEA_BMASK_IBM(48, 63) + +#define QPTEMM_OFFSET(x) offsetof(struct ehea_qptemm, x) + +struct ehea_qptemm { + u64 qpx_hcr; + u64 qpx_c; + u64 qpx_herr; + u64 qpx_aer; + u64 qpx_sqa; + u64 qpx_sqc; + u64 qpx_rq1a; + u64 qpx_rq1c; + u64 qpx_st; + u64 qpx_aerr; + u64 qpx_tenure; + u64 qpx_reserved1[(0x098 - 0x058) / 8]; + u64 qpx_portp; + u64 qpx_reserved2[(0x100 - 0x0A0) / 8]; + u64 qpx_t; + u64 qpx_sqhp; + u64 qpx_sqptp; + u64 qpx_reserved3[(0x140 - 0x118) / 8]; + u64 qpx_sqwsize; + u64 qpx_reserved4[(0x170 - 0x148) / 8]; + u64 qpx_sqsize; + u64 qpx_reserved5[(0x1B0 - 0x178) / 8]; + u64 qpx_sigt; + u64 qpx_wqecnt; + u64 qpx_rq1hp; + u64 qpx_rq1ptp; + u64 qpx_rq1size; + u64 qpx_reserved6[(0x220 - 0x1D8) / 8]; + u64 qpx_rq1wsize; + u64 qpx_reserved7[(0x240 - 0x228) / 8]; + u64 qpx_pd; + u64 qpx_scqn; + u64 qpx_rcqn; + u64 qpx_aeqn; + u64 reserved49; + u64 qpx_ram; + u64 qpx_reserved8[(0x300 - 0x270) / 8]; + u64 qpx_rq2a; + u64 qpx_rq2c; + u64 qpx_rq2hp; + u64 qpx_rq2ptp; + u64 qpx_rq2size; + u64 qpx_rq2wsize; + u64 qpx_rq2th; + u64 qpx_rq3a; + u64 qpx_rq3c; + u64 qpx_rq3hp; + u64 qpx_rq3ptp; + u64 qpx_rq3size; + u64 qpx_rq3wsize; + u64 qpx_rq3th; + u64 qpx_lpn; + u64 qpx_reserved9[(0x400 - 0x378) / 8]; + u64 reserved_ext[(0x500 - 0x400) / 8]; + u64 reserved2[(0x1000 - 0x500) / 8]; +}; + +#define MRx_HCR_LPARID_VALID EHEA_BMASK_IBM(0, 0) + +#define MRMWMM_OFFSET(x) offsetof(struct ehea_mrmwmm, x) + +struct ehea_mrmwmm { + u64 mrx_hcr; + u64 mrx_c; + u64 mrx_herr; + u64 mrx_aer; + u64 mrx_pp; + u64 reserved1; + u64 reserved2; + u64 reserved3; + u64 reserved4[(0x200 - 0x40) / 8]; + u64 mrx_ctl[64]; +}; + +#define QPEDMM_OFFSET(x) offsetof(struct ehea_qpedmm, x) + +struct ehea_qpedmm { + + u64 reserved0[(0x400) / 8]; + u64 qpedx_phh; + u64 qpedx_ppsgp; + u64 qpedx_ppsgu; + u64 qpedx_ppdgp; + u64 qpedx_ppdgu; + u64 qpedx_aph; + u64 qpedx_apsgp; + u64 qpedx_apsgu; + u64 qpedx_apdgp; + u64 qpedx_apdgu; + u64 qpedx_apav; + u64 qpedx_apsav; + u64 qpedx_hcr; + u64 reserved1[4]; + u64 qpedx_rrl0; + u64 qpedx_rrrkey0; + u64 qpedx_rrva0; + u64 reserved2; + u64 qpedx_rrl1; + u64 qpedx_rrrkey1; + u64 qpedx_rrva1; + u64 reserved3; + u64 qpedx_rrl2; + u64 qpedx_rrrkey2; + u64 qpedx_rrva2; + u64 reserved4; + u64 qpedx_rrl3; + u64 qpedx_rrrkey3; + u64 qpedx_rrva3; +}; + +#define CQX_FECADDER EHEA_BMASK_IBM(32, 63) +#define CQX_FEC_CQE_CNT EHEA_BMASK_IBM(32, 63) +#define CQX_N1_GENERATE_COMP_EVENT EHEA_BMASK_IBM(0, 0) +#define CQX_EP_EVENT_PENDING EHEA_BMASK_IBM(0, 0) + +#define CQTEMM_OFFSET(x) offsetof(struct ehea_cqtemm, x) + +struct ehea_cqtemm { + u64 cqx_hcr; + u64 cqx_c; + u64 cqx_herr; + u64 cqx_aer; + u64 cqx_ptp; + u64 cqx_tp; + u64 cqx_fec; + u64 cqx_feca; + u64 cqx_ep; + u64 cqx_eq; + u64 reserved1; + u64 cqx_n0; + u64 cqx_n1; + u64 reserved2[(0x1000 - 0x60) / 8]; +}; + +#define EQTEMM_OFFSET(x) offsetof(struct ehea_eqtemm, x) + +struct ehea_eqtemm { + u64 eqx_hcr; + u64 eqx_c; + u64 eqx_herr; + u64 eqx_aer; + u64 eqx_ptp; + u64 eqx_tp; + u64 eqx_ssba; + u64 eqx_psba; + u64 eqx_cec; + u64 eqx_meql; + u64 eqx_xisbi; + u64 eqx_xisc; + u64 eqx_it; +}; + +/* + * These access functions will be changed when the dissuccsion about + * the new access methods for POWER has settled. + */ + +static inline u64 epa_load(struct h_epa epa, u32 offset) +{ + return __raw_readq((void __iomem *)(epa.addr + offset)); +} + +static inline void epa_store(struct h_epa epa, u32 offset, u64 value) +{ + __raw_writeq(value, (void __iomem *)(epa.addr + offset)); + epa_load(epa, offset); /* synchronize explicitly to eHEA */ +} + +static inline void epa_store_acc(struct h_epa epa, u32 offset, u64 value) +{ + __raw_writeq(value, (void __iomem *)(epa.addr + offset)); +} + +#define epa_store_eq(epa, offset, value)\ + epa_store(epa, EQTEMM_OFFSET(offset), value) +#define epa_load_eq(epa, offset)\ + epa_load(epa, EQTEMM_OFFSET(offset)) + +#define epa_store_cq(epa, offset, value)\ + epa_store(epa, CQTEMM_OFFSET(offset), value) +#define epa_load_cq(epa, offset)\ + epa_load(epa, CQTEMM_OFFSET(offset)) + +#define epa_store_qp(epa, offset, value)\ + epa_store(epa, QPTEMM_OFFSET(offset), value) +#define epa_load_qp(epa, offset)\ + epa_load(epa, QPTEMM_OFFSET(offset)) + +#define epa_store_qped(epa, offset, value)\ + epa_store(epa, QPEDMM_OFFSET(offset), value) +#define epa_load_qped(epa, offset)\ + epa_load(epa, QPEDMM_OFFSET(offset)) + +#define epa_store_mrmw(epa, offset, value)\ + epa_store(epa, MRMWMM_OFFSET(offset), value) +#define epa_load_mrmw(epa, offset)\ + epa_load(epa, MRMWMM_OFFSET(offset)) + +#define epa_store_base(epa, offset, value)\ + epa_store(epa, HCAGR_OFFSET(offset), value) +#define epa_load_base(epa, offset)\ + epa_load(epa, HCAGR_OFFSET(offset)) + +static inline void ehea_update_sqa(struct ehea_qp *qp, u16 nr_wqes) +{ + struct h_epa epa = qp->epas.kernel; + epa_store_acc(epa, QPTEMM_OFFSET(qpx_sqa), + EHEA_BMASK_SET(QPX_SQA_VALUE, nr_wqes)); +} + +static inline void ehea_update_rq3a(struct ehea_qp *qp, u16 nr_wqes) +{ + struct h_epa epa = qp->epas.kernel; + epa_store_acc(epa, QPTEMM_OFFSET(qpx_rq3a), + EHEA_BMASK_SET(QPX_RQ1A_VALUE, nr_wqes)); +} + +static inline void ehea_update_rq2a(struct ehea_qp *qp, u16 nr_wqes) +{ + struct h_epa epa = qp->epas.kernel; + epa_store_acc(epa, QPTEMM_OFFSET(qpx_rq2a), + EHEA_BMASK_SET(QPX_RQ2A_VALUE, nr_wqes)); +} + +static inline void ehea_update_rq1a(struct ehea_qp *qp, u16 nr_wqes) +{ + struct h_epa epa = qp->epas.kernel; + epa_store_acc(epa, QPTEMM_OFFSET(qpx_rq1a), + EHEA_BMASK_SET(QPX_RQ3A_VALUE, nr_wqes)); +} + +static inline void ehea_update_feca(struct ehea_cq *cq, u32 nr_cqes) +{ + struct h_epa epa = cq->epas.kernel; + epa_store_acc(epa, CQTEMM_OFFSET(cqx_feca), + EHEA_BMASK_SET(CQX_FECADDER, nr_cqes)); +} + +static inline void ehea_reset_cq_n1(struct ehea_cq *cq) +{ + struct h_epa epa = cq->epas.kernel; + epa_store_cq(epa, cqx_n1, + EHEA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, 1)); +} + +static inline void ehea_reset_cq_ep(struct ehea_cq *my_cq) +{ + struct h_epa epa = my_cq->epas.kernel; + epa_store_acc(epa, CQTEMM_OFFSET(cqx_ep), + EHEA_BMASK_SET(CQX_EP_EVENT_PENDING, 0)); +} + +#endif /* __EHEA_HW_H__ */ diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c new file mode 100644 index 000000000000..c821cb653999 --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -0,0 +1,3781 @@ +/* + * linux/drivers/net/ehea/ehea_main.c + * + * eHEA ethernet device driver for IBM eServer System p + * + * (C) Copyright IBM Corp. 2006 + * + * Authors: + * Christoph Raisch <raisch@de.ibm.com> + * Jan-Bernd Themann <themann@de.ibm.com> + * Thomas Klein <tklein@de.ibm.com> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/if.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/if_ether.h> +#include <linux/notifier.h> +#include <linux/reboot.h> +#include <linux/memory.h> +#include <asm/kexec.h> +#include <linux/mutex.h> +#include <linux/prefetch.h> + +#include <net/ip.h> + +#include "ehea.h" +#include "ehea_qmr.h" +#include "ehea_phyp.h" + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); +MODULE_DESCRIPTION("IBM eServer HEA Driver"); +MODULE_VERSION(DRV_VERSION); + + +static int msg_level = -1; +static int rq1_entries = EHEA_DEF_ENTRIES_RQ1; +static int rq2_entries = EHEA_DEF_ENTRIES_RQ2; +static int rq3_entries = EHEA_DEF_ENTRIES_RQ3; +static int sq_entries = EHEA_DEF_ENTRIES_SQ; +static int use_mcs; +static int use_lro; +static int lro_max_aggr = EHEA_LRO_MAX_AGGR; +static int num_tx_qps = EHEA_NUM_TX_QP; +static int prop_carrier_state; + +module_param(msg_level, int, 0); +module_param(rq1_entries, int, 0); +module_param(rq2_entries, int, 0); +module_param(rq3_entries, int, 0); +module_param(sq_entries, int, 0); +module_param(prop_carrier_state, int, 0); +module_param(use_mcs, int, 0); +module_param(use_lro, int, 0); +module_param(lro_max_aggr, int, 0); +module_param(num_tx_qps, int, 0); + +MODULE_PARM_DESC(num_tx_qps, "Number of TX-QPS"); +MODULE_PARM_DESC(msg_level, "msg_level"); +MODULE_PARM_DESC(prop_carrier_state, "Propagate carrier state of physical " + "port to stack. 1:yes, 0:no. Default = 0 "); +MODULE_PARM_DESC(rq3_entries, "Number of entries for Receive Queue 3 " + "[2^x - 1], x = [6..14]. Default = " + __MODULE_STRING(EHEA_DEF_ENTRIES_RQ3) ")"); +MODULE_PARM_DESC(rq2_entries, "Number of entries for Receive Queue 2 " + "[2^x - 1], x = [6..14]. Default = " + __MODULE_STRING(EHEA_DEF_ENTRIES_RQ2) ")"); +MODULE_PARM_DESC(rq1_entries, "Number of entries for Receive Queue 1 " + "[2^x - 1], x = [6..14]. Default = " + __MODULE_STRING(EHEA_DEF_ENTRIES_RQ1) ")"); +MODULE_PARM_DESC(sq_entries, " Number of entries for the Send Queue " + "[2^x - 1], x = [6..14]. Default = " + __MODULE_STRING(EHEA_DEF_ENTRIES_SQ) ")"); +MODULE_PARM_DESC(use_mcs, " 0:NAPI, 1:Multiple receive queues, Default = 0 "); + +MODULE_PARM_DESC(lro_max_aggr, " LRO: Max packets to be aggregated. Default = " + __MODULE_STRING(EHEA_LRO_MAX_AGGR)); +MODULE_PARM_DESC(use_lro, " Large Receive Offload, 1: enable, 0: disable, " + "Default = 0"); + +static int port_name_cnt; +static LIST_HEAD(adapter_list); +static unsigned long ehea_driver_flags; +static DEFINE_MUTEX(dlpar_mem_lock); +struct ehea_fw_handle_array ehea_fw_handles; +struct ehea_bcmc_reg_array ehea_bcmc_regs; + + +static int __devinit ehea_probe_adapter(struct platform_device *dev, + const struct of_device_id *id); + +static int __devexit ehea_remove(struct platform_device *dev); + +static struct of_device_id ehea_device_table[] = { + { + .name = "lhea", + .compatible = "IBM,lhea", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, ehea_device_table); + +static struct of_platform_driver ehea_driver = { + .driver = { + .name = "ehea", + .owner = THIS_MODULE, + .of_match_table = ehea_device_table, + }, + .probe = ehea_probe_adapter, + .remove = ehea_remove, +}; + +void ehea_dump(void *adr, int len, char *msg) +{ + int x; + unsigned char *deb = adr; + for (x = 0; x < len; x += 16) { + pr_info("%s adr=%p ofs=%04x %016llx %016llx\n", + msg, deb, x, *((u64 *)&deb[0]), *((u64 *)&deb[8])); + deb += 16; + } +} + +void ehea_schedule_port_reset(struct ehea_port *port) +{ + if (!test_bit(__EHEA_DISABLE_PORT_RESET, &port->flags)) + schedule_work(&port->reset_task); +} + +static void ehea_update_firmware_handles(void) +{ + struct ehea_fw_handle_entry *arr = NULL; + struct ehea_adapter *adapter; + int num_adapters = 0; + int num_ports = 0; + int num_portres = 0; + int i = 0; + int num_fw_handles, k, l; + + /* Determine number of handles */ + mutex_lock(&ehea_fw_handles.lock); + + list_for_each_entry(adapter, &adapter_list, list) { + num_adapters++; + + for (k = 0; k < EHEA_MAX_PORTS; k++) { + struct ehea_port *port = adapter->port[k]; + + if (!port || (port->state != EHEA_PORT_UP)) + continue; + + num_ports++; + num_portres += port->num_def_qps + port->num_add_tx_qps; + } + } + + num_fw_handles = num_adapters * EHEA_NUM_ADAPTER_FW_HANDLES + + num_ports * EHEA_NUM_PORT_FW_HANDLES + + num_portres * EHEA_NUM_PORTRES_FW_HANDLES; + + if (num_fw_handles) { + arr = kcalloc(num_fw_handles, sizeof(*arr), GFP_KERNEL); + if (!arr) + goto out; /* Keep the existing array */ + } else + goto out_update; + + list_for_each_entry(adapter, &adapter_list, list) { + if (num_adapters == 0) + break; + + for (k = 0; k < EHEA_MAX_PORTS; k++) { + struct ehea_port *port = adapter->port[k]; + + if (!port || (port->state != EHEA_PORT_UP) || + (num_ports == 0)) + continue; + + for (l = 0; + l < port->num_def_qps + port->num_add_tx_qps; + l++) { + struct ehea_port_res *pr = &port->port_res[l]; + + arr[i].adh = adapter->handle; + arr[i++].fwh = pr->qp->fw_handle; + arr[i].adh = adapter->handle; + arr[i++].fwh = pr->send_cq->fw_handle; + arr[i].adh = adapter->handle; + arr[i++].fwh = pr->recv_cq->fw_handle; + arr[i].adh = adapter->handle; + arr[i++].fwh = pr->eq->fw_handle; + arr[i].adh = adapter->handle; + arr[i++].fwh = pr->send_mr.handle; + arr[i].adh = adapter->handle; + arr[i++].fwh = pr->recv_mr.handle; + } + arr[i].adh = adapter->handle; + arr[i++].fwh = port->qp_eq->fw_handle; + num_ports--; + } + + arr[i].adh = adapter->handle; + arr[i++].fwh = adapter->neq->fw_handle; + + if (adapter->mr.handle) { + arr[i].adh = adapter->handle; + arr[i++].fwh = adapter->mr.handle; + } + num_adapters--; + } + +out_update: + kfree(ehea_fw_handles.arr); + ehea_fw_handles.arr = arr; + ehea_fw_handles.num_entries = i; +out: + mutex_unlock(&ehea_fw_handles.lock); +} + +static void ehea_update_bcmc_registrations(void) +{ + unsigned long flags; + struct ehea_bcmc_reg_entry *arr = NULL; + struct ehea_adapter *adapter; + struct ehea_mc_list *mc_entry; + int num_registrations = 0; + int i = 0; + int k; + + spin_lock_irqsave(&ehea_bcmc_regs.lock, flags); + + /* Determine number of registrations */ + list_for_each_entry(adapter, &adapter_list, list) + for (k = 0; k < EHEA_MAX_PORTS; k++) { + struct ehea_port *port = adapter->port[k]; + + if (!port || (port->state != EHEA_PORT_UP)) + continue; + + num_registrations += 2; /* Broadcast registrations */ + + list_for_each_entry(mc_entry, &port->mc_list->list,list) + num_registrations += 2; + } + + if (num_registrations) { + arr = kcalloc(num_registrations, sizeof(*arr), GFP_ATOMIC); + if (!arr) + goto out; /* Keep the existing array */ + } else + goto out_update; + + list_for_each_entry(adapter, &adapter_list, list) { + for (k = 0; k < EHEA_MAX_PORTS; k++) { + struct ehea_port *port = adapter->port[k]; + + if (!port || (port->state != EHEA_PORT_UP)) + continue; + + if (num_registrations == 0) + goto out_update; + + arr[i].adh = adapter->handle; + arr[i].port_id = port->logical_port_id; + arr[i].reg_type = EHEA_BCMC_BROADCAST | + EHEA_BCMC_UNTAGGED; + arr[i++].macaddr = port->mac_addr; + + arr[i].adh = adapter->handle; + arr[i].port_id = port->logical_port_id; + arr[i].reg_type = EHEA_BCMC_BROADCAST | + EHEA_BCMC_VLANID_ALL; + arr[i++].macaddr = port->mac_addr; + num_registrations -= 2; + + list_for_each_entry(mc_entry, + &port->mc_list->list, list) { + if (num_registrations == 0) + goto out_update; + + arr[i].adh = adapter->handle; + arr[i].port_id = port->logical_port_id; + arr[i].reg_type = EHEA_BCMC_SCOPE_ALL | + EHEA_BCMC_MULTICAST | + EHEA_BCMC_UNTAGGED; + arr[i++].macaddr = mc_entry->macaddr; + + arr[i].adh = adapter->handle; + arr[i].port_id = port->logical_port_id; + arr[i].reg_type = EHEA_BCMC_SCOPE_ALL | + EHEA_BCMC_MULTICAST | + EHEA_BCMC_VLANID_ALL; + arr[i++].macaddr = mc_entry->macaddr; + num_registrations -= 2; + } + } + } + +out_update: + kfree(ehea_bcmc_regs.arr); + ehea_bcmc_regs.arr = arr; + ehea_bcmc_regs.num_entries = i; +out: + spin_unlock_irqrestore(&ehea_bcmc_regs.lock, flags); +} + +static struct net_device_stats *ehea_get_stats(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + struct net_device_stats *stats = &port->stats; + u64 rx_packets = 0, tx_packets = 0, rx_bytes = 0, tx_bytes = 0; + int i; + + for (i = 0; i < port->num_def_qps; i++) { + rx_packets += port->port_res[i].rx_packets; + rx_bytes += port->port_res[i].rx_bytes; + } + + for (i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) { + tx_packets += port->port_res[i].tx_packets; + tx_bytes += port->port_res[i].tx_bytes; + } + + stats->tx_packets = tx_packets; + stats->rx_bytes = rx_bytes; + stats->tx_bytes = tx_bytes; + stats->rx_packets = rx_packets; + + return &port->stats; +} + +static void ehea_update_stats(struct work_struct *work) +{ + struct ehea_port *port = + container_of(work, struct ehea_port, stats_work.work); + struct net_device *dev = port->netdev; + struct net_device_stats *stats = &port->stats; + struct hcp_ehea_port_cb2 *cb2; + u64 hret; + + cb2 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb2) { + netdev_err(dev, "No mem for cb2. Some interface statistics were not updated\n"); + goto resched; + } + + hret = ehea_h_query_ehea_port(port->adapter->handle, + port->logical_port_id, + H_PORT_CB2, H_PORT_CB2_ALL, cb2); + if (hret != H_SUCCESS) { + netdev_err(dev, "query_ehea_port failed\n"); + goto out_herr; + } + + if (netif_msg_hw(port)) + ehea_dump(cb2, sizeof(*cb2), "net_device_stats"); + + stats->multicast = cb2->rxmcp; + stats->rx_errors = cb2->rxuerr; + +out_herr: + free_page((unsigned long)cb2); +resched: + schedule_delayed_work(&port->stats_work, msecs_to_jiffies(1000)); +} + +static void ehea_refill_rq1(struct ehea_port_res *pr, int index, int nr_of_wqes) +{ + struct sk_buff **skb_arr_rq1 = pr->rq1_skba.arr; + struct net_device *dev = pr->port->netdev; + int max_index_mask = pr->rq1_skba.len - 1; + int fill_wqes = pr->rq1_skba.os_skbs + nr_of_wqes; + int adder = 0; + int i; + + pr->rq1_skba.os_skbs = 0; + + if (unlikely(test_bit(__EHEA_STOP_XFER, &ehea_driver_flags))) { + if (nr_of_wqes > 0) + pr->rq1_skba.index = index; + pr->rq1_skba.os_skbs = fill_wqes; + return; + } + + for (i = 0; i < fill_wqes; i++) { + if (!skb_arr_rq1[index]) { + skb_arr_rq1[index] = netdev_alloc_skb(dev, + EHEA_L_PKT_SIZE); + if (!skb_arr_rq1[index]) { + netdev_info(dev, "Unable to allocate enough skb in the array\n"); + pr->rq1_skba.os_skbs = fill_wqes - i; + break; + } + } + index--; + index &= max_index_mask; + adder++; + } + + if (adder == 0) + return; + + /* Ring doorbell */ + ehea_update_rq1a(pr->qp, adder); +} + +static void ehea_init_fill_rq1(struct ehea_port_res *pr, int nr_rq1a) +{ + struct sk_buff **skb_arr_rq1 = pr->rq1_skba.arr; + struct net_device *dev = pr->port->netdev; + int i; + + if (nr_rq1a > pr->rq1_skba.len) { + netdev_err(dev, "NR_RQ1A bigger than skb array len\n"); + return; + } + + for (i = 0; i < nr_rq1a; i++) { + skb_arr_rq1[i] = netdev_alloc_skb(dev, EHEA_L_PKT_SIZE); + if (!skb_arr_rq1[i]) { + netdev_info(dev, "Not enough memory to allocate skb array\n"); + break; + } + } + /* Ring doorbell */ + ehea_update_rq1a(pr->qp, i - 1); +} + +static int ehea_refill_rq_def(struct ehea_port_res *pr, + struct ehea_q_skb_arr *q_skba, int rq_nr, + int num_wqes, int wqe_type, int packet_size) +{ + struct net_device *dev = pr->port->netdev; + struct ehea_qp *qp = pr->qp; + struct sk_buff **skb_arr = q_skba->arr; + struct ehea_rwqe *rwqe; + int i, index, max_index_mask, fill_wqes; + int adder = 0; + int ret = 0; + + fill_wqes = q_skba->os_skbs + num_wqes; + q_skba->os_skbs = 0; + + if (unlikely(test_bit(__EHEA_STOP_XFER, &ehea_driver_flags))) { + q_skba->os_skbs = fill_wqes; + return ret; + } + + index = q_skba->index; + max_index_mask = q_skba->len - 1; + for (i = 0; i < fill_wqes; i++) { + u64 tmp_addr; + struct sk_buff *skb; + + skb = netdev_alloc_skb_ip_align(dev, packet_size); + if (!skb) { + q_skba->os_skbs = fill_wqes - i; + if (q_skba->os_skbs == q_skba->len - 2) { + netdev_info(pr->port->netdev, + "rq%i ran dry - no mem for skb\n", + rq_nr); + ret = -ENOMEM; + } + break; + } + + skb_arr[index] = skb; + tmp_addr = ehea_map_vaddr(skb->data); + if (tmp_addr == -1) { + dev_kfree_skb(skb); + q_skba->os_skbs = fill_wqes - i; + ret = 0; + break; + } + + rwqe = ehea_get_next_rwqe(qp, rq_nr); + rwqe->wr_id = EHEA_BMASK_SET(EHEA_WR_ID_TYPE, wqe_type) + | EHEA_BMASK_SET(EHEA_WR_ID_INDEX, index); + rwqe->sg_list[0].l_key = pr->recv_mr.lkey; + rwqe->sg_list[0].vaddr = tmp_addr; + rwqe->sg_list[0].len = packet_size; + rwqe->data_segments = 1; + + index++; + index &= max_index_mask; + adder++; + } + + q_skba->index = index; + if (adder == 0) + goto out; + + /* Ring doorbell */ + iosync(); + if (rq_nr == 2) + ehea_update_rq2a(pr->qp, adder); + else + ehea_update_rq3a(pr->qp, adder); +out: + return ret; +} + + +static int ehea_refill_rq2(struct ehea_port_res *pr, int nr_of_wqes) +{ + return ehea_refill_rq_def(pr, &pr->rq2_skba, 2, + nr_of_wqes, EHEA_RWQE2_TYPE, + EHEA_RQ2_PKT_SIZE); +} + + +static int ehea_refill_rq3(struct ehea_port_res *pr, int nr_of_wqes) +{ + return ehea_refill_rq_def(pr, &pr->rq3_skba, 3, + nr_of_wqes, EHEA_RWQE3_TYPE, + EHEA_MAX_PACKET_SIZE); +} + +static inline int ehea_check_cqe(struct ehea_cqe *cqe, int *rq_num) +{ + *rq_num = (cqe->type & EHEA_CQE_TYPE_RQ) >> 5; + if ((cqe->status & EHEA_CQE_STAT_ERR_MASK) == 0) + return 0; + if (((cqe->status & EHEA_CQE_STAT_ERR_TCP) != 0) && + (cqe->header_length == 0)) + return 0; + return -EINVAL; +} + +static inline void ehea_fill_skb(struct net_device *dev, + struct sk_buff *skb, struct ehea_cqe *cqe) +{ + int length = cqe->num_bytes_transfered - 4; /*remove CRC */ + + skb_put(skb, length); + skb->protocol = eth_type_trans(skb, dev); + + /* The packet was not an IPV4 packet so a complemented checksum was + calculated. The value is found in the Internet Checksum field. */ + if (cqe->status & EHEA_CQE_BLIND_CKSUM) { + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold(~cqe->inet_checksum_value); + } else + skb->ip_summed = CHECKSUM_UNNECESSARY; +} + +static inline struct sk_buff *get_skb_by_index(struct sk_buff **skb_array, + int arr_len, + struct ehea_cqe *cqe) +{ + int skb_index = EHEA_BMASK_GET(EHEA_WR_ID_INDEX, cqe->wr_id); + struct sk_buff *skb; + void *pref; + int x; + + x = skb_index + 1; + x &= (arr_len - 1); + + pref = skb_array[x]; + if (pref) { + prefetchw(pref); + prefetchw(pref + EHEA_CACHE_LINE); + + pref = (skb_array[x]->data); + prefetch(pref); + prefetch(pref + EHEA_CACHE_LINE); + prefetch(pref + EHEA_CACHE_LINE * 2); + prefetch(pref + EHEA_CACHE_LINE * 3); + } + + skb = skb_array[skb_index]; + skb_array[skb_index] = NULL; + return skb; +} + +static inline struct sk_buff *get_skb_by_index_ll(struct sk_buff **skb_array, + int arr_len, int wqe_index) +{ + struct sk_buff *skb; + void *pref; + int x; + + x = wqe_index + 1; + x &= (arr_len - 1); + + pref = skb_array[x]; + if (pref) { + prefetchw(pref); + prefetchw(pref + EHEA_CACHE_LINE); + + pref = (skb_array[x]->data); + prefetchw(pref); + prefetchw(pref + EHEA_CACHE_LINE); + } + + skb = skb_array[wqe_index]; + skb_array[wqe_index] = NULL; + return skb; +} + +static int ehea_treat_poll_error(struct ehea_port_res *pr, int rq, + struct ehea_cqe *cqe, int *processed_rq2, + int *processed_rq3) +{ + struct sk_buff *skb; + + if (cqe->status & EHEA_CQE_STAT_ERR_TCP) + pr->p_stats.err_tcp_cksum++; + if (cqe->status & EHEA_CQE_STAT_ERR_IP) + pr->p_stats.err_ip_cksum++; + if (cqe->status & EHEA_CQE_STAT_ERR_CRC) + pr->p_stats.err_frame_crc++; + + if (rq == 2) { + *processed_rq2 += 1; + skb = get_skb_by_index(pr->rq2_skba.arr, pr->rq2_skba.len, cqe); + dev_kfree_skb(skb); + } else if (rq == 3) { + *processed_rq3 += 1; + skb = get_skb_by_index(pr->rq3_skba.arr, pr->rq3_skba.len, cqe); + dev_kfree_skb(skb); + } + + if (cqe->status & EHEA_CQE_STAT_FAT_ERR_MASK) { + if (netif_msg_rx_err(pr->port)) { + pr_err("Critical receive error for QP %d. Resetting port.\n", + pr->qp->init_attr.qp_nr); + ehea_dump(cqe, sizeof(*cqe), "CQE"); + } + ehea_schedule_port_reset(pr->port); + return 1; + } + + return 0; +} + +static int get_skb_hdr(struct sk_buff *skb, void **iphdr, + void **tcph, u64 *hdr_flags, void *priv) +{ + struct ehea_cqe *cqe = priv; + unsigned int ip_len; + struct iphdr *iph; + + /* non tcp/udp packets */ + if (!cqe->header_length) + return -1; + + /* non tcp packet */ + skb_reset_network_header(skb); + iph = ip_hdr(skb); + if (iph->protocol != IPPROTO_TCP) + return -1; + + ip_len = ip_hdrlen(skb); + skb_set_transport_header(skb, ip_len); + *tcph = tcp_hdr(skb); + + /* check if ip header and tcp header are complete */ + if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb)) + return -1; + + *hdr_flags = LRO_IPV4 | LRO_TCP; + *iphdr = iph; + + return 0; +} + +static void ehea_proc_skb(struct ehea_port_res *pr, struct ehea_cqe *cqe, + struct sk_buff *skb) +{ + if (cqe->status & EHEA_CQE_VLAN_TAG_XTRACT) + __vlan_hwaccel_put_tag(skb, cqe->vlan_tag); + + if (skb->dev->features & NETIF_F_LRO) + lro_receive_skb(&pr->lro_mgr, skb, cqe); + else + netif_receive_skb(skb); +} + +static int ehea_proc_rwqes(struct net_device *dev, + struct ehea_port_res *pr, + int budget) +{ + struct ehea_port *port = pr->port; + struct ehea_qp *qp = pr->qp; + struct ehea_cqe *cqe; + struct sk_buff *skb; + struct sk_buff **skb_arr_rq1 = pr->rq1_skba.arr; + struct sk_buff **skb_arr_rq2 = pr->rq2_skba.arr; + struct sk_buff **skb_arr_rq3 = pr->rq3_skba.arr; + int skb_arr_rq1_len = pr->rq1_skba.len; + int skb_arr_rq2_len = pr->rq2_skba.len; + int skb_arr_rq3_len = pr->rq3_skba.len; + int processed, processed_rq1, processed_rq2, processed_rq3; + u64 processed_bytes = 0; + int wqe_index, last_wqe_index, rq, port_reset; + + processed = processed_rq1 = processed_rq2 = processed_rq3 = 0; + last_wqe_index = 0; + + cqe = ehea_poll_rq1(qp, &wqe_index); + while ((processed < budget) && cqe) { + ehea_inc_rq1(qp); + processed_rq1++; + processed++; + if (netif_msg_rx_status(port)) + ehea_dump(cqe, sizeof(*cqe), "CQE"); + + last_wqe_index = wqe_index; + rmb(); + if (!ehea_check_cqe(cqe, &rq)) { + if (rq == 1) { + /* LL RQ1 */ + skb = get_skb_by_index_ll(skb_arr_rq1, + skb_arr_rq1_len, + wqe_index); + if (unlikely(!skb)) { + netif_info(port, rx_err, dev, + "LL rq1: skb=NULL\n"); + + skb = netdev_alloc_skb(dev, + EHEA_L_PKT_SIZE); + if (!skb) { + netdev_err(dev, "Not enough memory to allocate skb\n"); + break; + } + } + skb_copy_to_linear_data(skb, ((char *)cqe) + 64, + cqe->num_bytes_transfered - 4); + ehea_fill_skb(dev, skb, cqe); + } else if (rq == 2) { + /* RQ2 */ + skb = get_skb_by_index(skb_arr_rq2, + skb_arr_rq2_len, cqe); + if (unlikely(!skb)) { + netif_err(port, rx_err, dev, + "rq2: skb=NULL\n"); + break; + } + ehea_fill_skb(dev, skb, cqe); + processed_rq2++; + } else { + /* RQ3 */ + skb = get_skb_by_index(skb_arr_rq3, + skb_arr_rq3_len, cqe); + if (unlikely(!skb)) { + netif_err(port, rx_err, dev, + "rq3: skb=NULL\n"); + break; + } + ehea_fill_skb(dev, skb, cqe); + processed_rq3++; + } + + processed_bytes += skb->len; + ehea_proc_skb(pr, cqe, skb); + } else { + pr->p_stats.poll_receive_errors++; + port_reset = ehea_treat_poll_error(pr, rq, cqe, + &processed_rq2, + &processed_rq3); + if (port_reset) + break; + } + cqe = ehea_poll_rq1(qp, &wqe_index); + } + if (dev->features & NETIF_F_LRO) + lro_flush_all(&pr->lro_mgr); + + pr->rx_packets += processed; + pr->rx_bytes += processed_bytes; + + ehea_refill_rq1(pr, last_wqe_index, processed_rq1); + ehea_refill_rq2(pr, processed_rq2); + ehea_refill_rq3(pr, processed_rq3); + + return processed; +} + +#define SWQE_RESTART_CHECK 0xdeadbeaff00d0000ull + +static void reset_sq_restart_flag(struct ehea_port *port) +{ + int i; + + for (i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) { + struct ehea_port_res *pr = &port->port_res[i]; + pr->sq_restart_flag = 0; + } + wake_up(&port->restart_wq); +} + +static void check_sqs(struct ehea_port *port) +{ + struct ehea_swqe *swqe; + int swqe_index; + int i, k; + + for (i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) { + struct ehea_port_res *pr = &port->port_res[i]; + int ret; + k = 0; + swqe = ehea_get_swqe(pr->qp, &swqe_index); + memset(swqe, 0, SWQE_HEADER_SIZE); + atomic_dec(&pr->swqe_avail); + + swqe->tx_control |= EHEA_SWQE_PURGE; + swqe->wr_id = SWQE_RESTART_CHECK; + swqe->tx_control |= EHEA_SWQE_SIGNALLED_COMPLETION; + swqe->tx_control |= EHEA_SWQE_IMM_DATA_PRESENT; + swqe->immediate_data_length = 80; + + ehea_post_swqe(pr->qp, swqe); + + ret = wait_event_timeout(port->restart_wq, + pr->sq_restart_flag == 0, + msecs_to_jiffies(100)); + + if (!ret) { + pr_err("HW/SW queues out of sync\n"); + ehea_schedule_port_reset(pr->port); + return; + } + } +} + + +static struct ehea_cqe *ehea_proc_cqes(struct ehea_port_res *pr, int my_quota) +{ + struct sk_buff *skb; + struct ehea_cq *send_cq = pr->send_cq; + struct ehea_cqe *cqe; + int quota = my_quota; + int cqe_counter = 0; + int swqe_av = 0; + int index; + unsigned long flags; + + cqe = ehea_poll_cq(send_cq); + while (cqe && (quota > 0)) { + ehea_inc_cq(send_cq); + + cqe_counter++; + rmb(); + + if (cqe->wr_id == SWQE_RESTART_CHECK) { + pr->sq_restart_flag = 1; + swqe_av++; + break; + } + + if (cqe->status & EHEA_CQE_STAT_ERR_MASK) { + pr_err("Bad send completion status=0x%04X\n", + cqe->status); + + if (netif_msg_tx_err(pr->port)) + ehea_dump(cqe, sizeof(*cqe), "Send CQE"); + + if (cqe->status & EHEA_CQE_STAT_RESET_MASK) { + pr_err("Resetting port\n"); + ehea_schedule_port_reset(pr->port); + break; + } + } + + if (netif_msg_tx_done(pr->port)) + ehea_dump(cqe, sizeof(*cqe), "CQE"); + + if (likely(EHEA_BMASK_GET(EHEA_WR_ID_TYPE, cqe->wr_id) + == EHEA_SWQE2_TYPE)) { + + index = EHEA_BMASK_GET(EHEA_WR_ID_INDEX, cqe->wr_id); + skb = pr->sq_skba.arr[index]; + dev_kfree_skb(skb); + pr->sq_skba.arr[index] = NULL; + } + + swqe_av += EHEA_BMASK_GET(EHEA_WR_ID_REFILL, cqe->wr_id); + quota--; + + cqe = ehea_poll_cq(send_cq); + } + + ehea_update_feca(send_cq, cqe_counter); + atomic_add(swqe_av, &pr->swqe_avail); + + spin_lock_irqsave(&pr->netif_queue, flags); + + if (pr->queue_stopped && (atomic_read(&pr->swqe_avail) + >= pr->swqe_refill_th)) { + netif_wake_queue(pr->port->netdev); + pr->queue_stopped = 0; + } + spin_unlock_irqrestore(&pr->netif_queue, flags); + wake_up(&pr->port->swqe_avail_wq); + + return cqe; +} + +#define EHEA_NAPI_POLL_NUM_BEFORE_IRQ 16 +#define EHEA_POLL_MAX_CQES 65535 + +static int ehea_poll(struct napi_struct *napi, int budget) +{ + struct ehea_port_res *pr = container_of(napi, struct ehea_port_res, + napi); + struct net_device *dev = pr->port->netdev; + struct ehea_cqe *cqe; + struct ehea_cqe *cqe_skb = NULL; + int force_irq, wqe_index; + int rx = 0; + + force_irq = (pr->poll_counter > EHEA_NAPI_POLL_NUM_BEFORE_IRQ); + cqe_skb = ehea_proc_cqes(pr, EHEA_POLL_MAX_CQES); + + if (!force_irq) + rx += ehea_proc_rwqes(dev, pr, budget - rx); + + while ((rx != budget) || force_irq) { + pr->poll_counter = 0; + force_irq = 0; + napi_complete(napi); + ehea_reset_cq_ep(pr->recv_cq); + ehea_reset_cq_ep(pr->send_cq); + ehea_reset_cq_n1(pr->recv_cq); + ehea_reset_cq_n1(pr->send_cq); + rmb(); + cqe = ehea_poll_rq1(pr->qp, &wqe_index); + cqe_skb = ehea_poll_cq(pr->send_cq); + + if (!cqe && !cqe_skb) + return rx; + + if (!napi_reschedule(napi)) + return rx; + + cqe_skb = ehea_proc_cqes(pr, EHEA_POLL_MAX_CQES); + rx += ehea_proc_rwqes(dev, pr, budget - rx); + } + + pr->poll_counter++; + return rx; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void ehea_netpoll(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + int i; + + for (i = 0; i < port->num_def_qps; i++) + napi_schedule(&port->port_res[i].napi); +} +#endif + +static irqreturn_t ehea_recv_irq_handler(int irq, void *param) +{ + struct ehea_port_res *pr = param; + + napi_schedule(&pr->napi); + + return IRQ_HANDLED; +} + +static irqreturn_t ehea_qp_aff_irq_handler(int irq, void *param) +{ + struct ehea_port *port = param; + struct ehea_eqe *eqe; + struct ehea_qp *qp; + u32 qp_token; + u64 resource_type, aer, aerr; + int reset_port = 0; + + eqe = ehea_poll_eq(port->qp_eq); + + while (eqe) { + qp_token = EHEA_BMASK_GET(EHEA_EQE_QP_TOKEN, eqe->entry); + pr_err("QP aff_err: entry=0x%llx, token=0x%x\n", + eqe->entry, qp_token); + + qp = port->port_res[qp_token].qp; + + resource_type = ehea_error_data(port->adapter, qp->fw_handle, + &aer, &aerr); + + if (resource_type == EHEA_AER_RESTYPE_QP) { + if ((aer & EHEA_AER_RESET_MASK) || + (aerr & EHEA_AERR_RESET_MASK)) + reset_port = 1; + } else + reset_port = 1; /* Reset in case of CQ or EQ error */ + + eqe = ehea_poll_eq(port->qp_eq); + } + + if (reset_port) { + pr_err("Resetting port\n"); + ehea_schedule_port_reset(port); + } + + return IRQ_HANDLED; +} + +static struct ehea_port *ehea_get_port(struct ehea_adapter *adapter, + int logical_port) +{ + int i; + + for (i = 0; i < EHEA_MAX_PORTS; i++) + if (adapter->port[i]) + if (adapter->port[i]->logical_port_id == logical_port) + return adapter->port[i]; + return NULL; +} + +int ehea_sense_port_attr(struct ehea_port *port) +{ + int ret; + u64 hret; + struct hcp_ehea_port_cb0 *cb0; + + /* may be called via ehea_neq_tasklet() */ + cb0 = (void *)get_zeroed_page(GFP_ATOMIC); + if (!cb0) { + pr_err("no mem for cb0\n"); + ret = -ENOMEM; + goto out; + } + + hret = ehea_h_query_ehea_port(port->adapter->handle, + port->logical_port_id, H_PORT_CB0, + EHEA_BMASK_SET(H_PORT_CB0_ALL, 0xFFFF), + cb0); + if (hret != H_SUCCESS) { + ret = -EIO; + goto out_free; + } + + /* MAC address */ + port->mac_addr = cb0->port_mac_addr << 16; + + if (!is_valid_ether_addr((u8 *)&port->mac_addr)) { + ret = -EADDRNOTAVAIL; + goto out_free; + } + + /* Port speed */ + switch (cb0->port_speed) { + case H_SPEED_10M_H: + port->port_speed = EHEA_SPEED_10M; + port->full_duplex = 0; + break; + case H_SPEED_10M_F: + port->port_speed = EHEA_SPEED_10M; + port->full_duplex = 1; + break; + case H_SPEED_100M_H: + port->port_speed = EHEA_SPEED_100M; + port->full_duplex = 0; + break; + case H_SPEED_100M_F: + port->port_speed = EHEA_SPEED_100M; + port->full_duplex = 1; + break; + case H_SPEED_1G_F: + port->port_speed = EHEA_SPEED_1G; + port->full_duplex = 1; + break; + case H_SPEED_10G_F: + port->port_speed = EHEA_SPEED_10G; + port->full_duplex = 1; + break; + default: + port->port_speed = 0; + port->full_duplex = 0; + break; + } + + port->autoneg = 1; + port->num_mcs = cb0->num_default_qps; + + /* Number of default QPs */ + if (use_mcs) + port->num_def_qps = cb0->num_default_qps; + else + port->num_def_qps = 1; + + if (!port->num_def_qps) { + ret = -EINVAL; + goto out_free; + } + + port->num_tx_qps = num_tx_qps; + + if (port->num_def_qps >= port->num_tx_qps) + port->num_add_tx_qps = 0; + else + port->num_add_tx_qps = port->num_tx_qps - port->num_def_qps; + + ret = 0; +out_free: + if (ret || netif_msg_probe(port)) + ehea_dump(cb0, sizeof(*cb0), "ehea_sense_port_attr"); + free_page((unsigned long)cb0); +out: + return ret; +} + +int ehea_set_portspeed(struct ehea_port *port, u32 port_speed) +{ + struct hcp_ehea_port_cb4 *cb4; + u64 hret; + int ret = 0; + + cb4 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb4) { + pr_err("no mem for cb4\n"); + ret = -ENOMEM; + goto out; + } + + cb4->port_speed = port_speed; + + netif_carrier_off(port->netdev); + + hret = ehea_h_modify_ehea_port(port->adapter->handle, + port->logical_port_id, + H_PORT_CB4, H_PORT_CB4_SPEED, cb4); + if (hret == H_SUCCESS) { + port->autoneg = port_speed == EHEA_SPEED_AUTONEG ? 1 : 0; + + hret = ehea_h_query_ehea_port(port->adapter->handle, + port->logical_port_id, + H_PORT_CB4, H_PORT_CB4_SPEED, + cb4); + if (hret == H_SUCCESS) { + switch (cb4->port_speed) { + case H_SPEED_10M_H: + port->port_speed = EHEA_SPEED_10M; + port->full_duplex = 0; + break; + case H_SPEED_10M_F: + port->port_speed = EHEA_SPEED_10M; + port->full_duplex = 1; + break; + case H_SPEED_100M_H: + port->port_speed = EHEA_SPEED_100M; + port->full_duplex = 0; + break; + case H_SPEED_100M_F: + port->port_speed = EHEA_SPEED_100M; + port->full_duplex = 1; + break; + case H_SPEED_1G_F: + port->port_speed = EHEA_SPEED_1G; + port->full_duplex = 1; + break; + case H_SPEED_10G_F: + port->port_speed = EHEA_SPEED_10G; + port->full_duplex = 1; + break; + default: + port->port_speed = 0; + port->full_duplex = 0; + break; + } + } else { + pr_err("Failed sensing port speed\n"); + ret = -EIO; + } + } else { + if (hret == H_AUTHORITY) { + pr_info("Hypervisor denied setting port speed\n"); + ret = -EPERM; + } else { + ret = -EIO; + pr_err("Failed setting port speed\n"); + } + } + if (!prop_carrier_state || (port->phy_link == EHEA_PHY_LINK_UP)) + netif_carrier_on(port->netdev); + + free_page((unsigned long)cb4); +out: + return ret; +} + +static void ehea_parse_eqe(struct ehea_adapter *adapter, u64 eqe) +{ + int ret; + u8 ec; + u8 portnum; + struct ehea_port *port; + struct net_device *dev; + + ec = EHEA_BMASK_GET(NEQE_EVENT_CODE, eqe); + portnum = EHEA_BMASK_GET(NEQE_PORTNUM, eqe); + port = ehea_get_port(adapter, portnum); + dev = port->netdev; + + switch (ec) { + case EHEA_EC_PORTSTATE_CHG: /* port state change */ + + if (!port) { + netdev_err(dev, "unknown portnum %x\n", portnum); + break; + } + + if (EHEA_BMASK_GET(NEQE_PORT_UP, eqe)) { + if (!netif_carrier_ok(dev)) { + ret = ehea_sense_port_attr(port); + if (ret) { + netdev_err(dev, "failed resensing port attributes\n"); + break; + } + + netif_info(port, link, dev, + "Logical port up: %dMbps %s Duplex\n", + port->port_speed, + port->full_duplex == 1 ? + "Full" : "Half"); + + netif_carrier_on(dev); + netif_wake_queue(dev); + } + } else + if (netif_carrier_ok(dev)) { + netif_info(port, link, dev, + "Logical port down\n"); + netif_carrier_off(dev); + netif_stop_queue(dev); + } + + if (EHEA_BMASK_GET(NEQE_EXTSWITCH_PORT_UP, eqe)) { + port->phy_link = EHEA_PHY_LINK_UP; + netif_info(port, link, dev, + "Physical port up\n"); + if (prop_carrier_state) + netif_carrier_on(dev); + } else { + port->phy_link = EHEA_PHY_LINK_DOWN; + netif_info(port, link, dev, + "Physical port down\n"); + if (prop_carrier_state) + netif_carrier_off(dev); + } + + if (EHEA_BMASK_GET(NEQE_EXTSWITCH_PRIMARY, eqe)) + netdev_info(dev, + "External switch port is primary port\n"); + else + netdev_info(dev, + "External switch port is backup port\n"); + + break; + case EHEA_EC_ADAPTER_MALFUNC: + netdev_err(dev, "Adapter malfunction\n"); + break; + case EHEA_EC_PORT_MALFUNC: + netdev_info(dev, "Port malfunction\n"); + netif_carrier_off(dev); + netif_stop_queue(dev); + break; + default: + netdev_err(dev, "unknown event code %x, eqe=0x%llX\n", ec, eqe); + break; + } +} + +static void ehea_neq_tasklet(unsigned long data) +{ + struct ehea_adapter *adapter = (struct ehea_adapter *)data; + struct ehea_eqe *eqe; + u64 event_mask; + + eqe = ehea_poll_eq(adapter->neq); + pr_debug("eqe=%p\n", eqe); + + while (eqe) { + pr_debug("*eqe=%lx\n", (unsigned long) eqe->entry); + ehea_parse_eqe(adapter, eqe->entry); + eqe = ehea_poll_eq(adapter->neq); + pr_debug("next eqe=%p\n", eqe); + } + + event_mask = EHEA_BMASK_SET(NELR_PORTSTATE_CHG, 1) + | EHEA_BMASK_SET(NELR_ADAPTER_MALFUNC, 1) + | EHEA_BMASK_SET(NELR_PORT_MALFUNC, 1); + + ehea_h_reset_events(adapter->handle, + adapter->neq->fw_handle, event_mask); +} + +static irqreturn_t ehea_interrupt_neq(int irq, void *param) +{ + struct ehea_adapter *adapter = param; + tasklet_hi_schedule(&adapter->neq_tasklet); + return IRQ_HANDLED; +} + + +static int ehea_fill_port_res(struct ehea_port_res *pr) +{ + int ret; + struct ehea_qp_init_attr *init_attr = &pr->qp->init_attr; + + ehea_init_fill_rq1(pr, pr->rq1_skba.len); + + ret = ehea_refill_rq2(pr, init_attr->act_nr_rwqes_rq2 - 1); + + ret |= ehea_refill_rq3(pr, init_attr->act_nr_rwqes_rq3 - 1); + + return ret; +} + +static int ehea_reg_interrupts(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + struct ehea_port_res *pr; + int i, ret; + + + snprintf(port->int_aff_name, EHEA_IRQ_NAME_SIZE - 1, "%s-aff", + dev->name); + + ret = ibmebus_request_irq(port->qp_eq->attr.ist1, + ehea_qp_aff_irq_handler, + IRQF_DISABLED, port->int_aff_name, port); + if (ret) { + netdev_err(dev, "failed registering irq for qp_aff_irq_handler:ist=%X\n", + port->qp_eq->attr.ist1); + goto out_free_qpeq; + } + + netif_info(port, ifup, dev, + "irq_handle 0x%X for function qp_aff_irq_handler registered\n", + port->qp_eq->attr.ist1); + + + for (i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) { + pr = &port->port_res[i]; + snprintf(pr->int_send_name, EHEA_IRQ_NAME_SIZE - 1, + "%s-queue%d", dev->name, i); + ret = ibmebus_request_irq(pr->eq->attr.ist1, + ehea_recv_irq_handler, + IRQF_DISABLED, pr->int_send_name, + pr); + if (ret) { + netdev_err(dev, "failed registering irq for ehea_queue port_res_nr:%d, ist=%X\n", + i, pr->eq->attr.ist1); + goto out_free_req; + } + netif_info(port, ifup, dev, + "irq_handle 0x%X for function ehea_queue_int %d registered\n", + pr->eq->attr.ist1, i); + } +out: + return ret; + + +out_free_req: + while (--i >= 0) { + u32 ist = port->port_res[i].eq->attr.ist1; + ibmebus_free_irq(ist, &port->port_res[i]); + } + +out_free_qpeq: + ibmebus_free_irq(port->qp_eq->attr.ist1, port); + i = port->num_def_qps; + + goto out; + +} + +static void ehea_free_interrupts(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + struct ehea_port_res *pr; + int i; + + /* send */ + + for (i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) { + pr = &port->port_res[i]; + ibmebus_free_irq(pr->eq->attr.ist1, pr); + netif_info(port, intr, dev, + "free send irq for res %d with handle 0x%X\n", + i, pr->eq->attr.ist1); + } + + /* associated events */ + ibmebus_free_irq(port->qp_eq->attr.ist1, port); + netif_info(port, intr, dev, + "associated event interrupt for handle 0x%X freed\n", + port->qp_eq->attr.ist1); +} + +static int ehea_configure_port(struct ehea_port *port) +{ + int ret, i; + u64 hret, mask; + struct hcp_ehea_port_cb0 *cb0; + + ret = -ENOMEM; + cb0 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb0) + goto out; + + cb0->port_rc = EHEA_BMASK_SET(PXLY_RC_VALID, 1) + | EHEA_BMASK_SET(PXLY_RC_IP_CHKSUM, 1) + | EHEA_BMASK_SET(PXLY_RC_TCP_UDP_CHKSUM, 1) + | EHEA_BMASK_SET(PXLY_RC_VLAN_XTRACT, 1) + | EHEA_BMASK_SET(PXLY_RC_VLAN_TAG_FILTER, + PXLY_RC_VLAN_FILTER) + | EHEA_BMASK_SET(PXLY_RC_JUMBO_FRAME, 1); + + for (i = 0; i < port->num_mcs; i++) + if (use_mcs) + cb0->default_qpn_arr[i] = + port->port_res[i].qp->init_attr.qp_nr; + else + cb0->default_qpn_arr[i] = + port->port_res[0].qp->init_attr.qp_nr; + + if (netif_msg_ifup(port)) + ehea_dump(cb0, sizeof(*cb0), "ehea_configure_port"); + + mask = EHEA_BMASK_SET(H_PORT_CB0_PRC, 1) + | EHEA_BMASK_SET(H_PORT_CB0_DEFQPNARRAY, 1); + + hret = ehea_h_modify_ehea_port(port->adapter->handle, + port->logical_port_id, + H_PORT_CB0, mask, cb0); + ret = -EIO; + if (hret != H_SUCCESS) + goto out_free; + + ret = 0; + +out_free: + free_page((unsigned long)cb0); +out: + return ret; +} + +int ehea_gen_smrs(struct ehea_port_res *pr) +{ + int ret; + struct ehea_adapter *adapter = pr->port->adapter; + + ret = ehea_gen_smr(adapter, &adapter->mr, &pr->send_mr); + if (ret) + goto out; + + ret = ehea_gen_smr(adapter, &adapter->mr, &pr->recv_mr); + if (ret) + goto out_free; + + return 0; + +out_free: + ehea_rem_mr(&pr->send_mr); +out: + pr_err("Generating SMRS failed\n"); + return -EIO; +} + +int ehea_rem_smrs(struct ehea_port_res *pr) +{ + if ((ehea_rem_mr(&pr->send_mr)) || + (ehea_rem_mr(&pr->recv_mr))) + return -EIO; + else + return 0; +} + +static int ehea_init_q_skba(struct ehea_q_skb_arr *q_skba, int max_q_entries) +{ + int arr_size = sizeof(void *) * max_q_entries; + + q_skba->arr = vzalloc(arr_size); + if (!q_skba->arr) + return -ENOMEM; + + q_skba->len = max_q_entries; + q_skba->index = 0; + q_skba->os_skbs = 0; + + return 0; +} + +static int ehea_init_port_res(struct ehea_port *port, struct ehea_port_res *pr, + struct port_res_cfg *pr_cfg, int queue_token) +{ + struct ehea_adapter *adapter = port->adapter; + enum ehea_eq_type eq_type = EHEA_EQ; + struct ehea_qp_init_attr *init_attr = NULL; + int ret = -EIO; + u64 tx_bytes, rx_bytes, tx_packets, rx_packets; + + tx_bytes = pr->tx_bytes; + tx_packets = pr->tx_packets; + rx_bytes = pr->rx_bytes; + rx_packets = pr->rx_packets; + + memset(pr, 0, sizeof(struct ehea_port_res)); + + pr->tx_bytes = rx_bytes; + pr->tx_packets = tx_packets; + pr->rx_bytes = rx_bytes; + pr->rx_packets = rx_packets; + + pr->port = port; + spin_lock_init(&pr->xmit_lock); + spin_lock_init(&pr->netif_queue); + + pr->eq = ehea_create_eq(adapter, eq_type, EHEA_MAX_ENTRIES_EQ, 0); + if (!pr->eq) { + pr_err("create_eq failed (eq)\n"); + goto out_free; + } + + pr->recv_cq = ehea_create_cq(adapter, pr_cfg->max_entries_rcq, + pr->eq->fw_handle, + port->logical_port_id); + if (!pr->recv_cq) { + pr_err("create_cq failed (cq_recv)\n"); + goto out_free; + } + + pr->send_cq = ehea_create_cq(adapter, pr_cfg->max_entries_scq, + pr->eq->fw_handle, + port->logical_port_id); + if (!pr->send_cq) { + pr_err("create_cq failed (cq_send)\n"); + goto out_free; + } + + if (netif_msg_ifup(port)) + pr_info("Send CQ: act_nr_cqes=%d, Recv CQ: act_nr_cqes=%d\n", + pr->send_cq->attr.act_nr_of_cqes, + pr->recv_cq->attr.act_nr_of_cqes); + + init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); + if (!init_attr) { + ret = -ENOMEM; + pr_err("no mem for ehea_qp_init_attr\n"); + goto out_free; + } + + init_attr->low_lat_rq1 = 1; + init_attr->signalingtype = 1; /* generate CQE if specified in WQE */ + init_attr->rq_count = 3; + init_attr->qp_token = queue_token; + init_attr->max_nr_send_wqes = pr_cfg->max_entries_sq; + init_attr->max_nr_rwqes_rq1 = pr_cfg->max_entries_rq1; + init_attr->max_nr_rwqes_rq2 = pr_cfg->max_entries_rq2; + init_attr->max_nr_rwqes_rq3 = pr_cfg->max_entries_rq3; + init_attr->wqe_size_enc_sq = EHEA_SG_SQ; + init_attr->wqe_size_enc_rq1 = EHEA_SG_RQ1; + init_attr->wqe_size_enc_rq2 = EHEA_SG_RQ2; + init_attr->wqe_size_enc_rq3 = EHEA_SG_RQ3; + init_attr->rq2_threshold = EHEA_RQ2_THRESHOLD; + init_attr->rq3_threshold = EHEA_RQ3_THRESHOLD; + init_attr->port_nr = port->logical_port_id; + init_attr->send_cq_handle = pr->send_cq->fw_handle; + init_attr->recv_cq_handle = pr->recv_cq->fw_handle; + init_attr->aff_eq_handle = port->qp_eq->fw_handle; + + pr->qp = ehea_create_qp(adapter, adapter->pd, init_attr); + if (!pr->qp) { + pr_err("create_qp failed\n"); + ret = -EIO; + goto out_free; + } + + if (netif_msg_ifup(port)) + pr_info("QP: qp_nr=%d\n act_nr_snd_wqe=%d\n nr_rwqe_rq1=%d\n nr_rwqe_rq2=%d\n nr_rwqe_rq3=%d\n", + init_attr->qp_nr, + init_attr->act_nr_send_wqes, + init_attr->act_nr_rwqes_rq1, + init_attr->act_nr_rwqes_rq2, + init_attr->act_nr_rwqes_rq3); + + pr->sq_skba_size = init_attr->act_nr_send_wqes + 1; + + ret = ehea_init_q_skba(&pr->sq_skba, pr->sq_skba_size); + ret |= ehea_init_q_skba(&pr->rq1_skba, init_attr->act_nr_rwqes_rq1 + 1); + ret |= ehea_init_q_skba(&pr->rq2_skba, init_attr->act_nr_rwqes_rq2 + 1); + ret |= ehea_init_q_skba(&pr->rq3_skba, init_attr->act_nr_rwqes_rq3 + 1); + if (ret) + goto out_free; + + pr->swqe_refill_th = init_attr->act_nr_send_wqes / 10; + if (ehea_gen_smrs(pr) != 0) { + ret = -EIO; + goto out_free; + } + + atomic_set(&pr->swqe_avail, init_attr->act_nr_send_wqes - 1); + + kfree(init_attr); + + netif_napi_add(pr->port->netdev, &pr->napi, ehea_poll, 64); + + pr->lro_mgr.max_aggr = pr->port->lro_max_aggr; + pr->lro_mgr.max_desc = MAX_LRO_DESCRIPTORS; + pr->lro_mgr.lro_arr = pr->lro_desc; + pr->lro_mgr.get_skb_header = get_skb_hdr; + pr->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID; + pr->lro_mgr.dev = port->netdev; + pr->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY; + pr->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; + + ret = 0; + goto out; + +out_free: + kfree(init_attr); + vfree(pr->sq_skba.arr); + vfree(pr->rq1_skba.arr); + vfree(pr->rq2_skba.arr); + vfree(pr->rq3_skba.arr); + ehea_destroy_qp(pr->qp); + ehea_destroy_cq(pr->send_cq); + ehea_destroy_cq(pr->recv_cq); + ehea_destroy_eq(pr->eq); +out: + return ret; +} + +static int ehea_clean_portres(struct ehea_port *port, struct ehea_port_res *pr) +{ + int ret, i; + + if (pr->qp) + netif_napi_del(&pr->napi); + + ret = ehea_destroy_qp(pr->qp); + + if (!ret) { + ehea_destroy_cq(pr->send_cq); + ehea_destroy_cq(pr->recv_cq); + ehea_destroy_eq(pr->eq); + + for (i = 0; i < pr->rq1_skba.len; i++) + if (pr->rq1_skba.arr[i]) + dev_kfree_skb(pr->rq1_skba.arr[i]); + + for (i = 0; i < pr->rq2_skba.len; i++) + if (pr->rq2_skba.arr[i]) + dev_kfree_skb(pr->rq2_skba.arr[i]); + + for (i = 0; i < pr->rq3_skba.len; i++) + if (pr->rq3_skba.arr[i]) + dev_kfree_skb(pr->rq3_skba.arr[i]); + + for (i = 0; i < pr->sq_skba.len; i++) + if (pr->sq_skba.arr[i]) + dev_kfree_skb(pr->sq_skba.arr[i]); + + vfree(pr->rq1_skba.arr); + vfree(pr->rq2_skba.arr); + vfree(pr->rq3_skba.arr); + vfree(pr->sq_skba.arr); + ret = ehea_rem_smrs(pr); + } + return ret; +} + +/* + * The write_* functions store information in swqe which is used by + * the hardware to calculate the ip/tcp/udp checksum + */ + +static inline void write_ip_start_end(struct ehea_swqe *swqe, + const struct sk_buff *skb) +{ + swqe->ip_start = skb_network_offset(skb); + swqe->ip_end = (u8)(swqe->ip_start + ip_hdrlen(skb) - 1); +} + +static inline void write_tcp_offset_end(struct ehea_swqe *swqe, + const struct sk_buff *skb) +{ + swqe->tcp_offset = + (u8)(swqe->ip_end + 1 + offsetof(struct tcphdr, check)); + + swqe->tcp_end = (u16)skb->len - 1; +} + +static inline void write_udp_offset_end(struct ehea_swqe *swqe, + const struct sk_buff *skb) +{ + swqe->tcp_offset = + (u8)(swqe->ip_end + 1 + offsetof(struct udphdr, check)); + + swqe->tcp_end = (u16)skb->len - 1; +} + + +static void write_swqe2_TSO(struct sk_buff *skb, + struct ehea_swqe *swqe, u32 lkey) +{ + struct ehea_vsgentry *sg1entry = &swqe->u.immdata_desc.sg_entry; + u8 *imm_data = &swqe->u.immdata_desc.immediate_data[0]; + int skb_data_size = skb_headlen(skb); + int headersize; + + /* Packet is TCP with TSO enabled */ + swqe->tx_control |= EHEA_SWQE_TSO; + swqe->mss = skb_shinfo(skb)->gso_size; + /* copy only eth/ip/tcp headers to immediate data and + * the rest of skb->data to sg1entry + */ + headersize = ETH_HLEN + ip_hdrlen(skb) + tcp_hdrlen(skb); + + skb_data_size = skb_headlen(skb); + + if (skb_data_size >= headersize) { + /* copy immediate data */ + skb_copy_from_linear_data(skb, imm_data, headersize); + swqe->immediate_data_length = headersize; + + if (skb_data_size > headersize) { + /* set sg1entry data */ + sg1entry->l_key = lkey; + sg1entry->len = skb_data_size - headersize; + sg1entry->vaddr = + ehea_map_vaddr(skb->data + headersize); + swqe->descriptors++; + } + } else + pr_err("cannot handle fragmented headers\n"); +} + +static void write_swqe2_nonTSO(struct sk_buff *skb, + struct ehea_swqe *swqe, u32 lkey) +{ + int skb_data_size = skb_headlen(skb); + u8 *imm_data = &swqe->u.immdata_desc.immediate_data[0]; + struct ehea_vsgentry *sg1entry = &swqe->u.immdata_desc.sg_entry; + + /* Packet is any nonTSO type + * + * Copy as much as possible skb->data to immediate data and + * the rest to sg1entry + */ + if (skb_data_size >= SWQE2_MAX_IMM) { + /* copy immediate data */ + skb_copy_from_linear_data(skb, imm_data, SWQE2_MAX_IMM); + + swqe->immediate_data_length = SWQE2_MAX_IMM; + + if (skb_data_size > SWQE2_MAX_IMM) { + /* copy sg1entry data */ + sg1entry->l_key = lkey; + sg1entry->len = skb_data_size - SWQE2_MAX_IMM; + sg1entry->vaddr = + ehea_map_vaddr(skb->data + SWQE2_MAX_IMM); + swqe->descriptors++; + } + } else { + skb_copy_from_linear_data(skb, imm_data, skb_data_size); + swqe->immediate_data_length = skb_data_size; + } +} + +static inline void write_swqe2_data(struct sk_buff *skb, struct net_device *dev, + struct ehea_swqe *swqe, u32 lkey) +{ + struct ehea_vsgentry *sg_list, *sg1entry, *sgentry; + skb_frag_t *frag; + int nfrags, sg1entry_contains_frag_data, i; + + nfrags = skb_shinfo(skb)->nr_frags; + sg1entry = &swqe->u.immdata_desc.sg_entry; + sg_list = (struct ehea_vsgentry *)&swqe->u.immdata_desc.sg_list; + swqe->descriptors = 0; + sg1entry_contains_frag_data = 0; + + if ((dev->features & NETIF_F_TSO) && skb_shinfo(skb)->gso_size) + write_swqe2_TSO(skb, swqe, lkey); + else + write_swqe2_nonTSO(skb, swqe, lkey); + + /* write descriptors */ + if (nfrags > 0) { + if (swqe->descriptors == 0) { + /* sg1entry not yet used */ + frag = &skb_shinfo(skb)->frags[0]; + + /* copy sg1entry data */ + sg1entry->l_key = lkey; + sg1entry->len = frag->size; + sg1entry->vaddr = + ehea_map_vaddr(page_address(frag->page) + + frag->page_offset); + swqe->descriptors++; + sg1entry_contains_frag_data = 1; + } + + for (i = sg1entry_contains_frag_data; i < nfrags; i++) { + + frag = &skb_shinfo(skb)->frags[i]; + sgentry = &sg_list[i - sg1entry_contains_frag_data]; + + sgentry->l_key = lkey; + sgentry->len = frag->size; + sgentry->vaddr = + ehea_map_vaddr(page_address(frag->page) + + frag->page_offset); + swqe->descriptors++; + } + } +} + +static int ehea_broadcast_reg_helper(struct ehea_port *port, u32 hcallid) +{ + int ret = 0; + u64 hret; + u8 reg_type; + + /* De/Register untagged packets */ + reg_type = EHEA_BCMC_BROADCAST | EHEA_BCMC_UNTAGGED; + hret = ehea_h_reg_dereg_bcmc(port->adapter->handle, + port->logical_port_id, + reg_type, port->mac_addr, 0, hcallid); + if (hret != H_SUCCESS) { + pr_err("%sregistering bc address failed (tagged)\n", + hcallid == H_REG_BCMC ? "" : "de"); + ret = -EIO; + goto out_herr; + } + + /* De/Register VLAN packets */ + reg_type = EHEA_BCMC_BROADCAST | EHEA_BCMC_VLANID_ALL; + hret = ehea_h_reg_dereg_bcmc(port->adapter->handle, + port->logical_port_id, + reg_type, port->mac_addr, 0, hcallid); + if (hret != H_SUCCESS) { + pr_err("%sregistering bc address failed (vlan)\n", + hcallid == H_REG_BCMC ? "" : "de"); + ret = -EIO; + } +out_herr: + return ret; +} + +static int ehea_set_mac_addr(struct net_device *dev, void *sa) +{ + struct ehea_port *port = netdev_priv(dev); + struct sockaddr *mac_addr = sa; + struct hcp_ehea_port_cb0 *cb0; + int ret; + u64 hret; + + if (!is_valid_ether_addr(mac_addr->sa_data)) { + ret = -EADDRNOTAVAIL; + goto out; + } + + cb0 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb0) { + pr_err("no mem for cb0\n"); + ret = -ENOMEM; + goto out; + } + + memcpy(&(cb0->port_mac_addr), &(mac_addr->sa_data[0]), ETH_ALEN); + + cb0->port_mac_addr = cb0->port_mac_addr >> 16; + + hret = ehea_h_modify_ehea_port(port->adapter->handle, + port->logical_port_id, H_PORT_CB0, + EHEA_BMASK_SET(H_PORT_CB0_MAC, 1), cb0); + if (hret != H_SUCCESS) { + ret = -EIO; + goto out_free; + } + + memcpy(dev->dev_addr, mac_addr->sa_data, dev->addr_len); + + /* Deregister old MAC in pHYP */ + if (port->state == EHEA_PORT_UP) { + ret = ehea_broadcast_reg_helper(port, H_DEREG_BCMC); + if (ret) + goto out_upregs; + } + + port->mac_addr = cb0->port_mac_addr << 16; + + /* Register new MAC in pHYP */ + if (port->state == EHEA_PORT_UP) { + ret = ehea_broadcast_reg_helper(port, H_REG_BCMC); + if (ret) + goto out_upregs; + } + + ret = 0; + +out_upregs: + ehea_update_bcmc_registrations(); +out_free: + free_page((unsigned long)cb0); +out: + return ret; +} + +static void ehea_promiscuous_error(u64 hret, int enable) +{ + if (hret == H_AUTHORITY) + pr_info("Hypervisor denied %sabling promiscuous mode\n", + enable == 1 ? "en" : "dis"); + else + pr_err("failed %sabling promiscuous mode\n", + enable == 1 ? "en" : "dis"); +} + +static void ehea_promiscuous(struct net_device *dev, int enable) +{ + struct ehea_port *port = netdev_priv(dev); + struct hcp_ehea_port_cb7 *cb7; + u64 hret; + + if (enable == port->promisc) + return; + + cb7 = (void *)get_zeroed_page(GFP_ATOMIC); + if (!cb7) { + pr_err("no mem for cb7\n"); + goto out; + } + + /* Modify Pxs_DUCQPN in CB7 */ + cb7->def_uc_qpn = enable == 1 ? port->port_res[0].qp->fw_handle : 0; + + hret = ehea_h_modify_ehea_port(port->adapter->handle, + port->logical_port_id, + H_PORT_CB7, H_PORT_CB7_DUCQPN, cb7); + if (hret) { + ehea_promiscuous_error(hret, enable); + goto out; + } + + port->promisc = enable; +out: + free_page((unsigned long)cb7); +} + +static u64 ehea_multicast_reg_helper(struct ehea_port *port, u64 mc_mac_addr, + u32 hcallid) +{ + u64 hret; + u8 reg_type; + + reg_type = EHEA_BCMC_SCOPE_ALL | EHEA_BCMC_MULTICAST + | EHEA_BCMC_UNTAGGED; + + hret = ehea_h_reg_dereg_bcmc(port->adapter->handle, + port->logical_port_id, + reg_type, mc_mac_addr, 0, hcallid); + if (hret) + goto out; + + reg_type = EHEA_BCMC_SCOPE_ALL | EHEA_BCMC_MULTICAST + | EHEA_BCMC_VLANID_ALL; + + hret = ehea_h_reg_dereg_bcmc(port->adapter->handle, + port->logical_port_id, + reg_type, mc_mac_addr, 0, hcallid); +out: + return hret; +} + +static int ehea_drop_multicast_list(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + struct ehea_mc_list *mc_entry = port->mc_list; + struct list_head *pos; + struct list_head *temp; + int ret = 0; + u64 hret; + + list_for_each_safe(pos, temp, &(port->mc_list->list)) { + mc_entry = list_entry(pos, struct ehea_mc_list, list); + + hret = ehea_multicast_reg_helper(port, mc_entry->macaddr, + H_DEREG_BCMC); + if (hret) { + pr_err("failed deregistering mcast MAC\n"); + ret = -EIO; + } + + list_del(pos); + kfree(mc_entry); + } + return ret; +} + +static void ehea_allmulti(struct net_device *dev, int enable) +{ + struct ehea_port *port = netdev_priv(dev); + u64 hret; + + if (!port->allmulti) { + if (enable) { + /* Enable ALLMULTI */ + ehea_drop_multicast_list(dev); + hret = ehea_multicast_reg_helper(port, 0, H_REG_BCMC); + if (!hret) + port->allmulti = 1; + else + netdev_err(dev, + "failed enabling IFF_ALLMULTI\n"); + } + } else + if (!enable) { + /* Disable ALLMULTI */ + hret = ehea_multicast_reg_helper(port, 0, H_DEREG_BCMC); + if (!hret) + port->allmulti = 0; + else + netdev_err(dev, + "failed disabling IFF_ALLMULTI\n"); + } +} + +static void ehea_add_multicast_entry(struct ehea_port *port, u8 *mc_mac_addr) +{ + struct ehea_mc_list *ehea_mcl_entry; + u64 hret; + + ehea_mcl_entry = kzalloc(sizeof(*ehea_mcl_entry), GFP_ATOMIC); + if (!ehea_mcl_entry) { + pr_err("no mem for mcl_entry\n"); + return; + } + + INIT_LIST_HEAD(&ehea_mcl_entry->list); + + memcpy(&ehea_mcl_entry->macaddr, mc_mac_addr, ETH_ALEN); + + hret = ehea_multicast_reg_helper(port, ehea_mcl_entry->macaddr, + H_REG_BCMC); + if (!hret) + list_add(&ehea_mcl_entry->list, &port->mc_list->list); + else { + pr_err("failed registering mcast MAC\n"); + kfree(ehea_mcl_entry); + } +} + +static void ehea_set_multicast_list(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + struct netdev_hw_addr *ha; + int ret; + + if (port->promisc) { + ehea_promiscuous(dev, 1); + return; + } + ehea_promiscuous(dev, 0); + + if (dev->flags & IFF_ALLMULTI) { + ehea_allmulti(dev, 1); + goto out; + } + ehea_allmulti(dev, 0); + + if (!netdev_mc_empty(dev)) { + ret = ehea_drop_multicast_list(dev); + if (ret) { + /* Dropping the current multicast list failed. + * Enabling ALL_MULTI is the best we can do. + */ + ehea_allmulti(dev, 1); + } + + if (netdev_mc_count(dev) > port->adapter->max_mc_mac) { + pr_info("Mcast registration limit reached (0x%llx). Use ALLMULTI!\n", + port->adapter->max_mc_mac); + goto out; + } + + netdev_for_each_mc_addr(ha, dev) + ehea_add_multicast_entry(port, ha->addr); + + } +out: + ehea_update_bcmc_registrations(); +} + +static int ehea_change_mtu(struct net_device *dev, int new_mtu) +{ + if ((new_mtu < 68) || (new_mtu > EHEA_MAX_PACKET_SIZE)) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +static void ehea_xmit2(struct sk_buff *skb, struct net_device *dev, + struct ehea_swqe *swqe, u32 lkey) +{ + if (skb->protocol == htons(ETH_P_IP)) { + const struct iphdr *iph = ip_hdr(skb); + + /* IPv4 */ + swqe->tx_control |= EHEA_SWQE_CRC + | EHEA_SWQE_IP_CHECKSUM + | EHEA_SWQE_TCP_CHECKSUM + | EHEA_SWQE_IMM_DATA_PRESENT + | EHEA_SWQE_DESCRIPTORS_PRESENT; + + write_ip_start_end(swqe, skb); + + if (iph->protocol == IPPROTO_UDP) { + if ((iph->frag_off & IP_MF) || + (iph->frag_off & IP_OFFSET)) + /* IP fragment, so don't change cs */ + swqe->tx_control &= ~EHEA_SWQE_TCP_CHECKSUM; + else + write_udp_offset_end(swqe, skb); + } else if (iph->protocol == IPPROTO_TCP) { + write_tcp_offset_end(swqe, skb); + } + + /* icmp (big data) and ip segmentation packets (all other ip + packets) do not require any special handling */ + + } else { + /* Other Ethernet Protocol */ + swqe->tx_control |= EHEA_SWQE_CRC + | EHEA_SWQE_IMM_DATA_PRESENT + | EHEA_SWQE_DESCRIPTORS_PRESENT; + } + + write_swqe2_data(skb, dev, swqe, lkey); +} + +static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev, + struct ehea_swqe *swqe) +{ + int nfrags = skb_shinfo(skb)->nr_frags; + u8 *imm_data = &swqe->u.immdata_nodesc.immediate_data[0]; + skb_frag_t *frag; + int i; + + if (skb->protocol == htons(ETH_P_IP)) { + const struct iphdr *iph = ip_hdr(skb); + + /* IPv4 */ + write_ip_start_end(swqe, skb); + + if (iph->protocol == IPPROTO_TCP) { + swqe->tx_control |= EHEA_SWQE_CRC + | EHEA_SWQE_IP_CHECKSUM + | EHEA_SWQE_TCP_CHECKSUM + | EHEA_SWQE_IMM_DATA_PRESENT; + + write_tcp_offset_end(swqe, skb); + + } else if (iph->protocol == IPPROTO_UDP) { + if ((iph->frag_off & IP_MF) || + (iph->frag_off & IP_OFFSET)) + /* IP fragment, so don't change cs */ + swqe->tx_control |= EHEA_SWQE_CRC + | EHEA_SWQE_IMM_DATA_PRESENT; + else { + swqe->tx_control |= EHEA_SWQE_CRC + | EHEA_SWQE_IP_CHECKSUM + | EHEA_SWQE_TCP_CHECKSUM + | EHEA_SWQE_IMM_DATA_PRESENT; + + write_udp_offset_end(swqe, skb); + } + } else { + /* icmp (big data) and + ip segmentation packets (all other ip packets) */ + swqe->tx_control |= EHEA_SWQE_CRC + | EHEA_SWQE_IP_CHECKSUM + | EHEA_SWQE_IMM_DATA_PRESENT; + } + } else { + /* Other Ethernet Protocol */ + swqe->tx_control |= EHEA_SWQE_CRC | EHEA_SWQE_IMM_DATA_PRESENT; + } + /* copy (immediate) data */ + if (nfrags == 0) { + /* data is in a single piece */ + skb_copy_from_linear_data(skb, imm_data, skb->len); + } else { + /* first copy data from the skb->data buffer ... */ + skb_copy_from_linear_data(skb, imm_data, + skb_headlen(skb)); + imm_data += skb_headlen(skb); + + /* ... then copy data from the fragments */ + for (i = 0; i < nfrags; i++) { + frag = &skb_shinfo(skb)->frags[i]; + memcpy(imm_data, + page_address(frag->page) + frag->page_offset, + frag->size); + imm_data += frag->size; + } + } + swqe->immediate_data_length = skb->len; + dev_kfree_skb(skb); +} + +static inline int ehea_hash_skb(struct sk_buff *skb, int num_qps) +{ + struct tcphdr *tcp; + u32 tmp; + + if ((skb->protocol == htons(ETH_P_IP)) && + (ip_hdr(skb)->protocol == IPPROTO_TCP)) { + tcp = (struct tcphdr *)(skb_network_header(skb) + + (ip_hdr(skb)->ihl * 4)); + tmp = (tcp->source + (tcp->dest << 16)) % 31; + tmp += ip_hdr(skb)->daddr % 31; + return tmp % num_qps; + } else + return 0; +} + +static int ehea_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + struct ehea_swqe *swqe; + unsigned long flags; + u32 lkey; + int swqe_index; + struct ehea_port_res *pr; + + pr = &port->port_res[ehea_hash_skb(skb, port->num_tx_qps)]; + + if (!spin_trylock(&pr->xmit_lock)) + return NETDEV_TX_BUSY; + + if (pr->queue_stopped) { + spin_unlock(&pr->xmit_lock); + return NETDEV_TX_BUSY; + } + + swqe = ehea_get_swqe(pr->qp, &swqe_index); + memset(swqe, 0, SWQE_HEADER_SIZE); + atomic_dec(&pr->swqe_avail); + + if (vlan_tx_tag_present(skb)) { + swqe->tx_control |= EHEA_SWQE_VLAN_INSERT; + swqe->vlan_tag = vlan_tx_tag_get(skb); + } + + pr->tx_packets++; + pr->tx_bytes += skb->len; + + if (skb->len <= SWQE3_MAX_IMM) { + u32 sig_iv = port->sig_comp_iv; + u32 swqe_num = pr->swqe_id_counter; + ehea_xmit3(skb, dev, swqe); + swqe->wr_id = EHEA_BMASK_SET(EHEA_WR_ID_TYPE, EHEA_SWQE3_TYPE) + | EHEA_BMASK_SET(EHEA_WR_ID_COUNT, swqe_num); + if (pr->swqe_ll_count >= (sig_iv - 1)) { + swqe->wr_id |= EHEA_BMASK_SET(EHEA_WR_ID_REFILL, + sig_iv); + swqe->tx_control |= EHEA_SWQE_SIGNALLED_COMPLETION; + pr->swqe_ll_count = 0; + } else + pr->swqe_ll_count += 1; + } else { + swqe->wr_id = + EHEA_BMASK_SET(EHEA_WR_ID_TYPE, EHEA_SWQE2_TYPE) + | EHEA_BMASK_SET(EHEA_WR_ID_COUNT, pr->swqe_id_counter) + | EHEA_BMASK_SET(EHEA_WR_ID_REFILL, 1) + | EHEA_BMASK_SET(EHEA_WR_ID_INDEX, pr->sq_skba.index); + pr->sq_skba.arr[pr->sq_skba.index] = skb; + + pr->sq_skba.index++; + pr->sq_skba.index &= (pr->sq_skba.len - 1); + + lkey = pr->send_mr.lkey; + ehea_xmit2(skb, dev, swqe, lkey); + swqe->tx_control |= EHEA_SWQE_SIGNALLED_COMPLETION; + } + pr->swqe_id_counter += 1; + + netif_info(port, tx_queued, dev, + "post swqe on QP %d\n", pr->qp->init_attr.qp_nr); + if (netif_msg_tx_queued(port)) + ehea_dump(swqe, 512, "swqe"); + + if (unlikely(test_bit(__EHEA_STOP_XFER, &ehea_driver_flags))) { + netif_stop_queue(dev); + swqe->tx_control |= EHEA_SWQE_PURGE; + } + + ehea_post_swqe(pr->qp, swqe); + + if (unlikely(atomic_read(&pr->swqe_avail) <= 1)) { + spin_lock_irqsave(&pr->netif_queue, flags); + if (unlikely(atomic_read(&pr->swqe_avail) <= 1)) { + pr->p_stats.queue_stopped++; + netif_stop_queue(dev); + pr->queue_stopped = 1; + } + spin_unlock_irqrestore(&pr->netif_queue, flags); + } + dev->trans_start = jiffies; /* NETIF_F_LLTX driver :( */ + spin_unlock(&pr->xmit_lock); + + return NETDEV_TX_OK; +} + +static void ehea_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +{ + struct ehea_port *port = netdev_priv(dev); + struct ehea_adapter *adapter = port->adapter; + struct hcp_ehea_port_cb1 *cb1; + int index; + u64 hret; + + cb1 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb1) { + pr_err("no mem for cb1\n"); + goto out; + } + + hret = ehea_h_query_ehea_port(adapter->handle, port->logical_port_id, + H_PORT_CB1, H_PORT_CB1_ALL, cb1); + if (hret != H_SUCCESS) { + pr_err("query_ehea_port failed\n"); + goto out; + } + + index = (vid / 64); + cb1->vlan_filter[index] |= ((u64)(0x8000000000000000 >> (vid & 0x3F))); + + hret = ehea_h_modify_ehea_port(adapter->handle, port->logical_port_id, + H_PORT_CB1, H_PORT_CB1_ALL, cb1); + if (hret != H_SUCCESS) + pr_err("modify_ehea_port failed\n"); +out: + free_page((unsigned long)cb1); + return; +} + +static void ehea_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +{ + struct ehea_port *port = netdev_priv(dev); + struct ehea_adapter *adapter = port->adapter; + struct hcp_ehea_port_cb1 *cb1; + int index; + u64 hret; + + cb1 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb1) { + pr_err("no mem for cb1\n"); + goto out; + } + + hret = ehea_h_query_ehea_port(adapter->handle, port->logical_port_id, + H_PORT_CB1, H_PORT_CB1_ALL, cb1); + if (hret != H_SUCCESS) { + pr_err("query_ehea_port failed\n"); + goto out; + } + + index = (vid / 64); + cb1->vlan_filter[index] &= ~((u64)(0x8000000000000000 >> (vid & 0x3F))); + + hret = ehea_h_modify_ehea_port(adapter->handle, port->logical_port_id, + H_PORT_CB1, H_PORT_CB1_ALL, cb1); + if (hret != H_SUCCESS) + pr_err("modify_ehea_port failed\n"); +out: + free_page((unsigned long)cb1); +} + +int ehea_activate_qp(struct ehea_adapter *adapter, struct ehea_qp *qp) +{ + int ret = -EIO; + u64 hret; + u16 dummy16 = 0; + u64 dummy64 = 0; + struct hcp_modify_qp_cb0 *cb0; + + cb0 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb0) { + ret = -ENOMEM; + goto out; + } + + hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), cb0); + if (hret != H_SUCCESS) { + pr_err("query_ehea_qp failed (1)\n"); + goto out; + } + + cb0->qp_ctl_reg = H_QP_CR_STATE_INITIALIZED; + hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG, 1), cb0, + &dummy64, &dummy64, &dummy16, &dummy16); + if (hret != H_SUCCESS) { + pr_err("modify_ehea_qp failed (1)\n"); + goto out; + } + + hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), cb0); + if (hret != H_SUCCESS) { + pr_err("query_ehea_qp failed (2)\n"); + goto out; + } + + cb0->qp_ctl_reg = H_QP_CR_ENABLED | H_QP_CR_STATE_INITIALIZED; + hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG, 1), cb0, + &dummy64, &dummy64, &dummy16, &dummy16); + if (hret != H_SUCCESS) { + pr_err("modify_ehea_qp failed (2)\n"); + goto out; + } + + hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), cb0); + if (hret != H_SUCCESS) { + pr_err("query_ehea_qp failed (3)\n"); + goto out; + } + + cb0->qp_ctl_reg = H_QP_CR_ENABLED | H_QP_CR_STATE_RDY2SND; + hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG, 1), cb0, + &dummy64, &dummy64, &dummy16, &dummy16); + if (hret != H_SUCCESS) { + pr_err("modify_ehea_qp failed (3)\n"); + goto out; + } + + hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), cb0); + if (hret != H_SUCCESS) { + pr_err("query_ehea_qp failed (4)\n"); + goto out; + } + + ret = 0; +out: + free_page((unsigned long)cb0); + return ret; +} + +static int ehea_port_res_setup(struct ehea_port *port, int def_qps, + int add_tx_qps) +{ + int ret, i; + struct port_res_cfg pr_cfg, pr_cfg_small_rx; + enum ehea_eq_type eq_type = EHEA_EQ; + + port->qp_eq = ehea_create_eq(port->adapter, eq_type, + EHEA_MAX_ENTRIES_EQ, 1); + if (!port->qp_eq) { + ret = -EINVAL; + pr_err("ehea_create_eq failed (qp_eq)\n"); + goto out_kill_eq; + } + + pr_cfg.max_entries_rcq = rq1_entries + rq2_entries + rq3_entries; + pr_cfg.max_entries_scq = sq_entries * 2; + pr_cfg.max_entries_sq = sq_entries; + pr_cfg.max_entries_rq1 = rq1_entries; + pr_cfg.max_entries_rq2 = rq2_entries; + pr_cfg.max_entries_rq3 = rq3_entries; + + pr_cfg_small_rx.max_entries_rcq = 1; + pr_cfg_small_rx.max_entries_scq = sq_entries; + pr_cfg_small_rx.max_entries_sq = sq_entries; + pr_cfg_small_rx.max_entries_rq1 = 1; + pr_cfg_small_rx.max_entries_rq2 = 1; + pr_cfg_small_rx.max_entries_rq3 = 1; + + for (i = 0; i < def_qps; i++) { + ret = ehea_init_port_res(port, &port->port_res[i], &pr_cfg, i); + if (ret) + goto out_clean_pr; + } + for (i = def_qps; i < def_qps + add_tx_qps; i++) { + ret = ehea_init_port_res(port, &port->port_res[i], + &pr_cfg_small_rx, i); + if (ret) + goto out_clean_pr; + } + + return 0; + +out_clean_pr: + while (--i >= 0) + ehea_clean_portres(port, &port->port_res[i]); + +out_kill_eq: + ehea_destroy_eq(port->qp_eq); + return ret; +} + +static int ehea_clean_all_portres(struct ehea_port *port) +{ + int ret = 0; + int i; + + for (i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) + ret |= ehea_clean_portres(port, &port->port_res[i]); + + ret |= ehea_destroy_eq(port->qp_eq); + + return ret; +} + +static void ehea_remove_adapter_mr(struct ehea_adapter *adapter) +{ + if (adapter->active_ports) + return; + + ehea_rem_mr(&adapter->mr); +} + +static int ehea_add_adapter_mr(struct ehea_adapter *adapter) +{ + if (adapter->active_ports) + return 0; + + return ehea_reg_kernel_mr(adapter, &adapter->mr); +} + +static int ehea_up(struct net_device *dev) +{ + int ret, i; + struct ehea_port *port = netdev_priv(dev); + + if (port->state == EHEA_PORT_UP) + return 0; + + ret = ehea_port_res_setup(port, port->num_def_qps, + port->num_add_tx_qps); + if (ret) { + netdev_err(dev, "port_res_failed\n"); + goto out; + } + + /* Set default QP for this port */ + ret = ehea_configure_port(port); + if (ret) { + netdev_err(dev, "ehea_configure_port failed. ret:%d\n", ret); + goto out_clean_pr; + } + + ret = ehea_reg_interrupts(dev); + if (ret) { + netdev_err(dev, "reg_interrupts failed. ret:%d\n", ret); + goto out_clean_pr; + } + + for (i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) { + ret = ehea_activate_qp(port->adapter, port->port_res[i].qp); + if (ret) { + netdev_err(dev, "activate_qp failed\n"); + goto out_free_irqs; + } + } + + for (i = 0; i < port->num_def_qps; i++) { + ret = ehea_fill_port_res(&port->port_res[i]); + if (ret) { + netdev_err(dev, "out_free_irqs\n"); + goto out_free_irqs; + } + } + + ret = ehea_broadcast_reg_helper(port, H_REG_BCMC); + if (ret) { + ret = -EIO; + goto out_free_irqs; + } + + port->state = EHEA_PORT_UP; + + ret = 0; + goto out; + +out_free_irqs: + ehea_free_interrupts(dev); + +out_clean_pr: + ehea_clean_all_portres(port); +out: + if (ret) + netdev_info(dev, "Failed starting. ret=%i\n", ret); + + ehea_update_bcmc_registrations(); + ehea_update_firmware_handles(); + + return ret; +} + +static void port_napi_disable(struct ehea_port *port) +{ + int i; + + for (i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) + napi_disable(&port->port_res[i].napi); +} + +static void port_napi_enable(struct ehea_port *port) +{ + int i; + + for (i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) + napi_enable(&port->port_res[i].napi); +} + +static int ehea_open(struct net_device *dev) +{ + int ret; + struct ehea_port *port = netdev_priv(dev); + + mutex_lock(&port->port_lock); + + netif_info(port, ifup, dev, "enabling port\n"); + + ret = ehea_up(dev); + if (!ret) { + port_napi_enable(port); + netif_start_queue(dev); + } + + mutex_unlock(&port->port_lock); + schedule_delayed_work(&port->stats_work, msecs_to_jiffies(1000)); + + return ret; +} + +static int ehea_down(struct net_device *dev) +{ + int ret; + struct ehea_port *port = netdev_priv(dev); + + if (port->state == EHEA_PORT_DOWN) + return 0; + + ehea_drop_multicast_list(dev); + ehea_broadcast_reg_helper(port, H_DEREG_BCMC); + + ehea_free_interrupts(dev); + + port->state = EHEA_PORT_DOWN; + + ehea_update_bcmc_registrations(); + + ret = ehea_clean_all_portres(port); + if (ret) + netdev_info(dev, "Failed freeing resources. ret=%i\n", ret); + + ehea_update_firmware_handles(); + + return ret; +} + +static int ehea_stop(struct net_device *dev) +{ + int ret; + struct ehea_port *port = netdev_priv(dev); + + netif_info(port, ifdown, dev, "disabling port\n"); + + set_bit(__EHEA_DISABLE_PORT_RESET, &port->flags); + cancel_work_sync(&port->reset_task); + cancel_delayed_work_sync(&port->stats_work); + mutex_lock(&port->port_lock); + netif_stop_queue(dev); + port_napi_disable(port); + ret = ehea_down(dev); + mutex_unlock(&port->port_lock); + clear_bit(__EHEA_DISABLE_PORT_RESET, &port->flags); + return ret; +} + +static void ehea_purge_sq(struct ehea_qp *orig_qp) +{ + struct ehea_qp qp = *orig_qp; + struct ehea_qp_init_attr *init_attr = &qp.init_attr; + struct ehea_swqe *swqe; + int wqe_index; + int i; + + for (i = 0; i < init_attr->act_nr_send_wqes; i++) { + swqe = ehea_get_swqe(&qp, &wqe_index); + swqe->tx_control |= EHEA_SWQE_PURGE; + } +} + +static void ehea_flush_sq(struct ehea_port *port) +{ + int i; + + for (i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) { + struct ehea_port_res *pr = &port->port_res[i]; + int swqe_max = pr->sq_skba_size - 2 - pr->swqe_ll_count; + int ret; + + ret = wait_event_timeout(port->swqe_avail_wq, + atomic_read(&pr->swqe_avail) >= swqe_max, + msecs_to_jiffies(100)); + + if (!ret) { + pr_err("WARNING: sq not flushed completely\n"); + break; + } + } +} + +int ehea_stop_qps(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + struct ehea_adapter *adapter = port->adapter; + struct hcp_modify_qp_cb0 *cb0; + int ret = -EIO; + int dret; + int i; + u64 hret; + u64 dummy64 = 0; + u16 dummy16 = 0; + + cb0 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb0) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < (port->num_def_qps + port->num_add_tx_qps); i++) { + struct ehea_port_res *pr = &port->port_res[i]; + struct ehea_qp *qp = pr->qp; + + /* Purge send queue */ + ehea_purge_sq(qp); + + /* Disable queue pair */ + hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), + cb0); + if (hret != H_SUCCESS) { + pr_err("query_ehea_qp failed (1)\n"); + goto out; + } + + cb0->qp_ctl_reg = (cb0->qp_ctl_reg & H_QP_CR_RES_STATE) << 8; + cb0->qp_ctl_reg &= ~H_QP_CR_ENABLED; + + hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG, + 1), cb0, &dummy64, + &dummy64, &dummy16, &dummy16); + if (hret != H_SUCCESS) { + pr_err("modify_ehea_qp failed (1)\n"); + goto out; + } + + hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), + cb0); + if (hret != H_SUCCESS) { + pr_err("query_ehea_qp failed (2)\n"); + goto out; + } + + /* deregister shared memory regions */ + dret = ehea_rem_smrs(pr); + if (dret) { + pr_err("unreg shared memory region failed\n"); + goto out; + } + } + + ret = 0; +out: + free_page((unsigned long)cb0); + + return ret; +} + +void ehea_update_rqs(struct ehea_qp *orig_qp, struct ehea_port_res *pr) +{ + struct ehea_qp qp = *orig_qp; + struct ehea_qp_init_attr *init_attr = &qp.init_attr; + struct ehea_rwqe *rwqe; + struct sk_buff **skba_rq2 = pr->rq2_skba.arr; + struct sk_buff **skba_rq3 = pr->rq3_skba.arr; + struct sk_buff *skb; + u32 lkey = pr->recv_mr.lkey; + + + int i; + int index; + + for (i = 0; i < init_attr->act_nr_rwqes_rq2 + 1; i++) { + rwqe = ehea_get_next_rwqe(&qp, 2); + rwqe->sg_list[0].l_key = lkey; + index = EHEA_BMASK_GET(EHEA_WR_ID_INDEX, rwqe->wr_id); + skb = skba_rq2[index]; + if (skb) + rwqe->sg_list[0].vaddr = ehea_map_vaddr(skb->data); + } + + for (i = 0; i < init_attr->act_nr_rwqes_rq3 + 1; i++) { + rwqe = ehea_get_next_rwqe(&qp, 3); + rwqe->sg_list[0].l_key = lkey; + index = EHEA_BMASK_GET(EHEA_WR_ID_INDEX, rwqe->wr_id); + skb = skba_rq3[index]; + if (skb) + rwqe->sg_list[0].vaddr = ehea_map_vaddr(skb->data); + } +} + +int ehea_restart_qps(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + struct ehea_adapter *adapter = port->adapter; + int ret = 0; + int i; + + struct hcp_modify_qp_cb0 *cb0; + u64 hret; + u64 dummy64 = 0; + u16 dummy16 = 0; + + cb0 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb0) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < (port->num_def_qps + port->num_add_tx_qps); i++) { + struct ehea_port_res *pr = &port->port_res[i]; + struct ehea_qp *qp = pr->qp; + + ret = ehea_gen_smrs(pr); + if (ret) { + netdev_err(dev, "creation of shared memory regions failed\n"); + goto out; + } + + ehea_update_rqs(qp, pr); + + /* Enable queue pair */ + hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), + cb0); + if (hret != H_SUCCESS) { + netdev_err(dev, "query_ehea_qp failed (1)\n"); + goto out; + } + + cb0->qp_ctl_reg = (cb0->qp_ctl_reg & H_QP_CR_RES_STATE) << 8; + cb0->qp_ctl_reg |= H_QP_CR_ENABLED; + + hret = ehea_h_modify_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_QP_CTL_REG, + 1), cb0, &dummy64, + &dummy64, &dummy16, &dummy16); + if (hret != H_SUCCESS) { + netdev_err(dev, "modify_ehea_qp failed (1)\n"); + goto out; + } + + hret = ehea_h_query_ehea_qp(adapter->handle, 0, qp->fw_handle, + EHEA_BMASK_SET(H_QPCB0_ALL, 0xFFFF), + cb0); + if (hret != H_SUCCESS) { + netdev_err(dev, "query_ehea_qp failed (2)\n"); + goto out; + } + + /* refill entire queue */ + ehea_refill_rq1(pr, pr->rq1_skba.index, 0); + ehea_refill_rq2(pr, 0); + ehea_refill_rq3(pr, 0); + } +out: + free_page((unsigned long)cb0); + + return ret; +} + +static void ehea_reset_port(struct work_struct *work) +{ + int ret; + struct ehea_port *port = + container_of(work, struct ehea_port, reset_task); + struct net_device *dev = port->netdev; + + mutex_lock(&dlpar_mem_lock); + port->resets++; + mutex_lock(&port->port_lock); + netif_stop_queue(dev); + + port_napi_disable(port); + + ehea_down(dev); + + ret = ehea_up(dev); + if (ret) + goto out; + + ehea_set_multicast_list(dev); + + netif_info(port, timer, dev, "reset successful\n"); + + port_napi_enable(port); + + netif_wake_queue(dev); +out: + mutex_unlock(&port->port_lock); + mutex_unlock(&dlpar_mem_lock); +} + +static void ehea_rereg_mrs(void) +{ + int ret, i; + struct ehea_adapter *adapter; + + pr_info("LPAR memory changed - re-initializing driver\n"); + + list_for_each_entry(adapter, &adapter_list, list) + if (adapter->active_ports) { + /* Shutdown all ports */ + for (i = 0; i < EHEA_MAX_PORTS; i++) { + struct ehea_port *port = adapter->port[i]; + struct net_device *dev; + + if (!port) + continue; + + dev = port->netdev; + + if (dev->flags & IFF_UP) { + mutex_lock(&port->port_lock); + netif_stop_queue(dev); + ehea_flush_sq(port); + ret = ehea_stop_qps(dev); + if (ret) { + mutex_unlock(&port->port_lock); + goto out; + } + port_napi_disable(port); + mutex_unlock(&port->port_lock); + } + reset_sq_restart_flag(port); + } + + /* Unregister old memory region */ + ret = ehea_rem_mr(&adapter->mr); + if (ret) { + pr_err("unregister MR failed - driver inoperable!\n"); + goto out; + } + } + + clear_bit(__EHEA_STOP_XFER, &ehea_driver_flags); + + list_for_each_entry(adapter, &adapter_list, list) + if (adapter->active_ports) { + /* Register new memory region */ + ret = ehea_reg_kernel_mr(adapter, &adapter->mr); + if (ret) { + pr_err("register MR failed - driver inoperable!\n"); + goto out; + } + + /* Restart all ports */ + for (i = 0; i < EHEA_MAX_PORTS; i++) { + struct ehea_port *port = adapter->port[i]; + + if (port) { + struct net_device *dev = port->netdev; + + if (dev->flags & IFF_UP) { + mutex_lock(&port->port_lock); + ret = ehea_restart_qps(dev); + if (!ret) { + check_sqs(port); + port_napi_enable(port); + netif_wake_queue(dev); + } else { + netdev_err(dev, "Unable to restart QPS\n"); + } + mutex_unlock(&port->port_lock); + } + } + } + } + pr_info("re-initializing driver complete\n"); +out: + return; +} + +static void ehea_tx_watchdog(struct net_device *dev) +{ + struct ehea_port *port = netdev_priv(dev); + + if (netif_carrier_ok(dev) && + !test_bit(__EHEA_STOP_XFER, &ehea_driver_flags)) + ehea_schedule_port_reset(port); +} + +int ehea_sense_adapter_attr(struct ehea_adapter *adapter) +{ + struct hcp_query_ehea *cb; + u64 hret; + int ret; + + cb = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb) { + ret = -ENOMEM; + goto out; + } + + hret = ehea_h_query_ehea(adapter->handle, cb); + + if (hret != H_SUCCESS) { + ret = -EIO; + goto out_herr; + } + + adapter->max_mc_mac = cb->max_mc_mac - 1; + ret = 0; + +out_herr: + free_page((unsigned long)cb); +out: + return ret; +} + +int ehea_get_jumboframe_status(struct ehea_port *port, int *jumbo) +{ + struct hcp_ehea_port_cb4 *cb4; + u64 hret; + int ret = 0; + + *jumbo = 0; + + /* (Try to) enable *jumbo frames */ + cb4 = (void *)get_zeroed_page(GFP_KERNEL); + if (!cb4) { + pr_err("no mem for cb4\n"); + ret = -ENOMEM; + goto out; + } else { + hret = ehea_h_query_ehea_port(port->adapter->handle, + port->logical_port_id, + H_PORT_CB4, + H_PORT_CB4_JUMBO, cb4); + if (hret == H_SUCCESS) { + if (cb4->jumbo_frame) + *jumbo = 1; + else { + cb4->jumbo_frame = 1; + hret = ehea_h_modify_ehea_port(port->adapter-> + handle, + port-> + logical_port_id, + H_PORT_CB4, + H_PORT_CB4_JUMBO, + cb4); + if (hret == H_SUCCESS) + *jumbo = 1; + } + } else + ret = -EINVAL; + + free_page((unsigned long)cb4); + } +out: + return ret; +} + +static ssize_t ehea_show_port_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ehea_port *port = container_of(dev, struct ehea_port, ofdev.dev); + return sprintf(buf, "%d", port->logical_port_id); +} + +static DEVICE_ATTR(log_port_id, S_IRUSR | S_IRGRP | S_IROTH, ehea_show_port_id, + NULL); + +static void __devinit logical_port_release(struct device *dev) +{ + struct ehea_port *port = container_of(dev, struct ehea_port, ofdev.dev); + of_node_put(port->ofdev.dev.of_node); +} + +static struct device *ehea_register_port(struct ehea_port *port, + struct device_node *dn) +{ + int ret; + + port->ofdev.dev.of_node = of_node_get(dn); + port->ofdev.dev.parent = &port->adapter->ofdev->dev; + port->ofdev.dev.bus = &ibmebus_bus_type; + + dev_set_name(&port->ofdev.dev, "port%d", port_name_cnt++); + port->ofdev.dev.release = logical_port_release; + + ret = of_device_register(&port->ofdev); + if (ret) { + pr_err("failed to register device. ret=%d\n", ret); + goto out; + } + + ret = device_create_file(&port->ofdev.dev, &dev_attr_log_port_id); + if (ret) { + pr_err("failed to register attributes, ret=%d\n", ret); + goto out_unreg_of_dev; + } + + return &port->ofdev.dev; + +out_unreg_of_dev: + of_device_unregister(&port->ofdev); +out: + return NULL; +} + +static void ehea_unregister_port(struct ehea_port *port) +{ + device_remove_file(&port->ofdev.dev, &dev_attr_log_port_id); + of_device_unregister(&port->ofdev); +} + +static const struct net_device_ops ehea_netdev_ops = { + .ndo_open = ehea_open, + .ndo_stop = ehea_stop, + .ndo_start_xmit = ehea_start_xmit, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = ehea_netpoll, +#endif + .ndo_get_stats = ehea_get_stats, + .ndo_set_mac_address = ehea_set_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_rx_mode = ehea_set_multicast_list, + .ndo_change_mtu = ehea_change_mtu, + .ndo_vlan_rx_add_vid = ehea_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = ehea_vlan_rx_kill_vid, + .ndo_tx_timeout = ehea_tx_watchdog, +}; + +struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter, + u32 logical_port_id, + struct device_node *dn) +{ + int ret; + struct net_device *dev; + struct ehea_port *port; + struct device *port_dev; + int jumbo; + + /* allocate memory for the port structures */ + dev = alloc_etherdev(sizeof(struct ehea_port)); + + if (!dev) { + pr_err("no mem for net_device\n"); + ret = -ENOMEM; + goto out_err; + } + + port = netdev_priv(dev); + + mutex_init(&port->port_lock); + port->state = EHEA_PORT_DOWN; + port->sig_comp_iv = sq_entries / 10; + + port->adapter = adapter; + port->netdev = dev; + port->logical_port_id = logical_port_id; + + port->msg_enable = netif_msg_init(msg_level, EHEA_MSG_DEFAULT); + + port->mc_list = kzalloc(sizeof(struct ehea_mc_list), GFP_KERNEL); + if (!port->mc_list) { + ret = -ENOMEM; + goto out_free_ethdev; + } + + INIT_LIST_HEAD(&port->mc_list->list); + + ret = ehea_sense_port_attr(port); + if (ret) + goto out_free_mc_list; + + port_dev = ehea_register_port(port, dn); + if (!port_dev) + goto out_free_mc_list; + + SET_NETDEV_DEV(dev, port_dev); + + /* initialize net_device structure */ + memcpy(dev->dev_addr, &port->mac_addr, ETH_ALEN); + + dev->netdev_ops = &ehea_netdev_ops; + ehea_set_ethtool_ops(dev); + + dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_TSO + | NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_TX | NETIF_F_LRO; + dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_TSO + | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | NETIF_F_HW_VLAN_TX + | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER + | NETIF_F_LLTX | NETIF_F_RXCSUM; + dev->watchdog_timeo = EHEA_WATCH_DOG_TIMEOUT; + + if (use_lro) + dev->features |= NETIF_F_LRO; + + INIT_WORK(&port->reset_task, ehea_reset_port); + INIT_DELAYED_WORK(&port->stats_work, ehea_update_stats); + + init_waitqueue_head(&port->swqe_avail_wq); + init_waitqueue_head(&port->restart_wq); + + memset(&port->stats, 0, sizeof(struct net_device_stats)); + ret = register_netdev(dev); + if (ret) { + pr_err("register_netdev failed. ret=%d\n", ret); + goto out_unreg_port; + } + + port->lro_max_aggr = lro_max_aggr; + + ret = ehea_get_jumboframe_status(port, &jumbo); + if (ret) + netdev_err(dev, "failed determining jumbo frame status\n"); + + netdev_info(dev, "Jumbo frames are %sabled\n", + jumbo == 1 ? "en" : "dis"); + + adapter->active_ports++; + + return port; + +out_unreg_port: + ehea_unregister_port(port); + +out_free_mc_list: + kfree(port->mc_list); + +out_free_ethdev: + free_netdev(dev); + +out_err: + pr_err("setting up logical port with id=%d failed, ret=%d\n", + logical_port_id, ret); + return NULL; +} + +static void ehea_shutdown_single_port(struct ehea_port *port) +{ + struct ehea_adapter *adapter = port->adapter; + + cancel_work_sync(&port->reset_task); + cancel_delayed_work_sync(&port->stats_work); + unregister_netdev(port->netdev); + ehea_unregister_port(port); + kfree(port->mc_list); + free_netdev(port->netdev); + adapter->active_ports--; +} + +static int ehea_setup_ports(struct ehea_adapter *adapter) +{ + struct device_node *lhea_dn; + struct device_node *eth_dn = NULL; + + const u32 *dn_log_port_id; + int i = 0; + + lhea_dn = adapter->ofdev->dev.of_node; + while ((eth_dn = of_get_next_child(lhea_dn, eth_dn))) { + + dn_log_port_id = of_get_property(eth_dn, "ibm,hea-port-no", + NULL); + if (!dn_log_port_id) { + pr_err("bad device node: eth_dn name=%s\n", + eth_dn->full_name); + continue; + } + + if (ehea_add_adapter_mr(adapter)) { + pr_err("creating MR failed\n"); + of_node_put(eth_dn); + return -EIO; + } + + adapter->port[i] = ehea_setup_single_port(adapter, + *dn_log_port_id, + eth_dn); + if (adapter->port[i]) + netdev_info(adapter->port[i]->netdev, + "logical port id #%d\n", *dn_log_port_id); + else + ehea_remove_adapter_mr(adapter); + + i++; + } + return 0; +} + +static struct device_node *ehea_get_eth_dn(struct ehea_adapter *adapter, + u32 logical_port_id) +{ + struct device_node *lhea_dn; + struct device_node *eth_dn = NULL; + const u32 *dn_log_port_id; + + lhea_dn = adapter->ofdev->dev.of_node; + while ((eth_dn = of_get_next_child(lhea_dn, eth_dn))) { + + dn_log_port_id = of_get_property(eth_dn, "ibm,hea-port-no", + NULL); + if (dn_log_port_id) + if (*dn_log_port_id == logical_port_id) + return eth_dn; + } + + return NULL; +} + +static ssize_t ehea_probe_port(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ehea_adapter *adapter = dev_get_drvdata(dev); + struct ehea_port *port; + struct device_node *eth_dn = NULL; + int i; + + u32 logical_port_id; + + sscanf(buf, "%d", &logical_port_id); + + port = ehea_get_port(adapter, logical_port_id); + + if (port) { + netdev_info(port->netdev, "adding port with logical port id=%d failed: port already configured\n", + logical_port_id); + return -EINVAL; + } + + eth_dn = ehea_get_eth_dn(adapter, logical_port_id); + + if (!eth_dn) { + pr_info("no logical port with id %d found\n", logical_port_id); + return -EINVAL; + } + + if (ehea_add_adapter_mr(adapter)) { + pr_err("creating MR failed\n"); + return -EIO; + } + + port = ehea_setup_single_port(adapter, logical_port_id, eth_dn); + + of_node_put(eth_dn); + + if (port) { + for (i = 0; i < EHEA_MAX_PORTS; i++) + if (!adapter->port[i]) { + adapter->port[i] = port; + break; + } + + netdev_info(port->netdev, "added: (logical port id=%d)\n", + logical_port_id); + } else { + ehea_remove_adapter_mr(adapter); + return -EIO; + } + + return (ssize_t) count; +} + +static ssize_t ehea_remove_port(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ehea_adapter *adapter = dev_get_drvdata(dev); + struct ehea_port *port; + int i; + u32 logical_port_id; + + sscanf(buf, "%d", &logical_port_id); + + port = ehea_get_port(adapter, logical_port_id); + + if (port) { + netdev_info(port->netdev, "removed: (logical port id=%d)\n", + logical_port_id); + + ehea_shutdown_single_port(port); + + for (i = 0; i < EHEA_MAX_PORTS; i++) + if (adapter->port[i] == port) { + adapter->port[i] = NULL; + break; + } + } else { + pr_err("removing port with logical port id=%d failed. port not configured.\n", + logical_port_id); + return -EINVAL; + } + + ehea_remove_adapter_mr(adapter); + + return (ssize_t) count; +} + +static DEVICE_ATTR(probe_port, S_IWUSR, NULL, ehea_probe_port); +static DEVICE_ATTR(remove_port, S_IWUSR, NULL, ehea_remove_port); + +int ehea_create_device_sysfs(struct platform_device *dev) +{ + int ret = device_create_file(&dev->dev, &dev_attr_probe_port); + if (ret) + goto out; + + ret = device_create_file(&dev->dev, &dev_attr_remove_port); +out: + return ret; +} + +void ehea_remove_device_sysfs(struct platform_device *dev) +{ + device_remove_file(&dev->dev, &dev_attr_probe_port); + device_remove_file(&dev->dev, &dev_attr_remove_port); +} + +static int __devinit ehea_probe_adapter(struct platform_device *dev, + const struct of_device_id *id) +{ + struct ehea_adapter *adapter; + const u64 *adapter_handle; + int ret; + + if (!dev || !dev->dev.of_node) { + pr_err("Invalid ibmebus device probed\n"); + return -EINVAL; + } + + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + if (!adapter) { + ret = -ENOMEM; + dev_err(&dev->dev, "no mem for ehea_adapter\n"); + goto out; + } + + list_add(&adapter->list, &adapter_list); + + adapter->ofdev = dev; + + adapter_handle = of_get_property(dev->dev.of_node, "ibm,hea-handle", + NULL); + if (adapter_handle) + adapter->handle = *adapter_handle; + + if (!adapter->handle) { + dev_err(&dev->dev, "failed getting handle for adapter" + " '%s'\n", dev->dev.of_node->full_name); + ret = -ENODEV; + goto out_free_ad; + } + + adapter->pd = EHEA_PD_ID; + + dev_set_drvdata(&dev->dev, adapter); + + + /* initialize adapter and ports */ + /* get adapter properties */ + ret = ehea_sense_adapter_attr(adapter); + if (ret) { + dev_err(&dev->dev, "sense_adapter_attr failed: %d\n", ret); + goto out_free_ad; + } + + adapter->neq = ehea_create_eq(adapter, + EHEA_NEQ, EHEA_MAX_ENTRIES_EQ, 1); + if (!adapter->neq) { + ret = -EIO; + dev_err(&dev->dev, "NEQ creation failed\n"); + goto out_free_ad; + } + + tasklet_init(&adapter->neq_tasklet, ehea_neq_tasklet, + (unsigned long)adapter); + + ret = ibmebus_request_irq(adapter->neq->attr.ist1, + ehea_interrupt_neq, IRQF_DISABLED, + "ehea_neq", adapter); + if (ret) { + dev_err(&dev->dev, "requesting NEQ IRQ failed\n"); + goto out_kill_eq; + } + + ret = ehea_create_device_sysfs(dev); + if (ret) + goto out_free_irq; + + ret = ehea_setup_ports(adapter); + if (ret) { + dev_err(&dev->dev, "setup_ports failed\n"); + goto out_rem_dev_sysfs; + } + + ret = 0; + goto out; + +out_rem_dev_sysfs: + ehea_remove_device_sysfs(dev); + +out_free_irq: + ibmebus_free_irq(adapter->neq->attr.ist1, adapter); + +out_kill_eq: + ehea_destroy_eq(adapter->neq); + +out_free_ad: + list_del(&adapter->list); + kfree(adapter); + +out: + ehea_update_firmware_handles(); + + return ret; +} + +static int __devexit ehea_remove(struct platform_device *dev) +{ + struct ehea_adapter *adapter = dev_get_drvdata(&dev->dev); + int i; + + for (i = 0; i < EHEA_MAX_PORTS; i++) + if (adapter->port[i]) { + ehea_shutdown_single_port(adapter->port[i]); + adapter->port[i] = NULL; + } + + ehea_remove_device_sysfs(dev); + + ibmebus_free_irq(adapter->neq->attr.ist1, adapter); + tasklet_kill(&adapter->neq_tasklet); + + ehea_destroy_eq(adapter->neq); + ehea_remove_adapter_mr(adapter); + list_del(&adapter->list); + kfree(adapter); + + ehea_update_firmware_handles(); + + return 0; +} + +void ehea_crash_handler(void) +{ + int i; + + if (ehea_fw_handles.arr) + for (i = 0; i < ehea_fw_handles.num_entries; i++) + ehea_h_free_resource(ehea_fw_handles.arr[i].adh, + ehea_fw_handles.arr[i].fwh, + FORCE_FREE); + + if (ehea_bcmc_regs.arr) + for (i = 0; i < ehea_bcmc_regs.num_entries; i++) + ehea_h_reg_dereg_bcmc(ehea_bcmc_regs.arr[i].adh, + ehea_bcmc_regs.arr[i].port_id, + ehea_bcmc_regs.arr[i].reg_type, + ehea_bcmc_regs.arr[i].macaddr, + 0, H_DEREG_BCMC); +} + +static int ehea_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + int ret = NOTIFY_BAD; + struct memory_notify *arg = data; + + mutex_lock(&dlpar_mem_lock); + + switch (action) { + case MEM_CANCEL_OFFLINE: + pr_info("memory offlining canceled"); + /* Readd canceled memory block */ + case MEM_ONLINE: + pr_info("memory is going online"); + set_bit(__EHEA_STOP_XFER, &ehea_driver_flags); + if (ehea_add_sect_bmap(arg->start_pfn, arg->nr_pages)) + goto out_unlock; + ehea_rereg_mrs(); + break; + case MEM_GOING_OFFLINE: + pr_info("memory is going offline"); + set_bit(__EHEA_STOP_XFER, &ehea_driver_flags); + if (ehea_rem_sect_bmap(arg->start_pfn, arg->nr_pages)) + goto out_unlock; + ehea_rereg_mrs(); + break; + default: + break; + } + + ehea_update_firmware_handles(); + ret = NOTIFY_OK; + +out_unlock: + mutex_unlock(&dlpar_mem_lock); + return ret; +} + +static struct notifier_block ehea_mem_nb = { + .notifier_call = ehea_mem_notifier, +}; + +static int ehea_reboot_notifier(struct notifier_block *nb, + unsigned long action, void *unused) +{ + if (action == SYS_RESTART) { + pr_info("Reboot: freeing all eHEA resources\n"); + ibmebus_unregister_driver(&ehea_driver); + } + return NOTIFY_DONE; +} + +static struct notifier_block ehea_reboot_nb = { + .notifier_call = ehea_reboot_notifier, +}; + +static int check_module_parm(void) +{ + int ret = 0; + + if ((rq1_entries < EHEA_MIN_ENTRIES_QP) || + (rq1_entries > EHEA_MAX_ENTRIES_RQ1)) { + pr_info("Bad parameter: rq1_entries\n"); + ret = -EINVAL; + } + if ((rq2_entries < EHEA_MIN_ENTRIES_QP) || + (rq2_entries > EHEA_MAX_ENTRIES_RQ2)) { + pr_info("Bad parameter: rq2_entries\n"); + ret = -EINVAL; + } + if ((rq3_entries < EHEA_MIN_ENTRIES_QP) || + (rq3_entries > EHEA_MAX_ENTRIES_RQ3)) { + pr_info("Bad parameter: rq3_entries\n"); + ret = -EINVAL; + } + if ((sq_entries < EHEA_MIN_ENTRIES_QP) || + (sq_entries > EHEA_MAX_ENTRIES_SQ)) { + pr_info("Bad parameter: sq_entries\n"); + ret = -EINVAL; + } + + return ret; +} + +static ssize_t ehea_show_capabilities(struct device_driver *drv, + char *buf) +{ + return sprintf(buf, "%d", EHEA_CAPABILITIES); +} + +static DRIVER_ATTR(capabilities, S_IRUSR | S_IRGRP | S_IROTH, + ehea_show_capabilities, NULL); + +int __init ehea_module_init(void) +{ + int ret; + + pr_info("IBM eHEA ethernet device driver (Release %s)\n", DRV_VERSION); + + memset(&ehea_fw_handles, 0, sizeof(ehea_fw_handles)); + memset(&ehea_bcmc_regs, 0, sizeof(ehea_bcmc_regs)); + + mutex_init(&ehea_fw_handles.lock); + spin_lock_init(&ehea_bcmc_regs.lock); + + ret = check_module_parm(); + if (ret) + goto out; + + ret = ehea_create_busmap(); + if (ret) + goto out; + + ret = register_reboot_notifier(&ehea_reboot_nb); + if (ret) + pr_info("failed registering reboot notifier\n"); + + ret = register_memory_notifier(&ehea_mem_nb); + if (ret) + pr_info("failed registering memory remove notifier\n"); + + ret = crash_shutdown_register(ehea_crash_handler); + if (ret) + pr_info("failed registering crash handler\n"); + + ret = ibmebus_register_driver(&ehea_driver); + if (ret) { + pr_err("failed registering eHEA device driver on ebus\n"); + goto out2; + } + + ret = driver_create_file(&ehea_driver.driver, + &driver_attr_capabilities); + if (ret) { + pr_err("failed to register capabilities attribute, ret=%d\n", + ret); + goto out3; + } + + return ret; + +out3: + ibmebus_unregister_driver(&ehea_driver); +out2: + unregister_memory_notifier(&ehea_mem_nb); + unregister_reboot_notifier(&ehea_reboot_nb); + crash_shutdown_unregister(ehea_crash_handler); +out: + return ret; +} + +static void __exit ehea_module_exit(void) +{ + int ret; + + driver_remove_file(&ehea_driver.driver, &driver_attr_capabilities); + ibmebus_unregister_driver(&ehea_driver); + unregister_reboot_notifier(&ehea_reboot_nb); + ret = crash_shutdown_unregister(ehea_crash_handler); + if (ret) + pr_info("failed unregistering crash handler\n"); + unregister_memory_notifier(&ehea_mem_nb); + kfree(ehea_fw_handles.arr); + kfree(ehea_bcmc_regs.arr); + ehea_destroy_busmap(); +} + +module_init(ehea_module_init); +module_exit(ehea_module_exit); diff --git a/drivers/net/ethernet/ibm/ehea/ehea_phyp.c b/drivers/net/ethernet/ibm/ehea/ehea_phyp.c new file mode 100644 index 000000000000..0506967b9044 --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/ehea_phyp.c @@ -0,0 +1,626 @@ +/* + * linux/drivers/net/ehea/ehea_phyp.c + * + * eHEA ethernet device driver for IBM eServer System p + * + * (C) Copyright IBM Corp. 2006 + * + * Authors: + * Christoph Raisch <raisch@de.ibm.com> + * Jan-Bernd Themann <themann@de.ibm.com> + * Thomas Klein <tklein@de.ibm.com> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "ehea_phyp.h" + + +static inline u16 get_order_of_qentries(u16 queue_entries) +{ + u8 ld = 1; /* logarithmus dualis */ + while (((1U << ld) - 1) < queue_entries) + ld++; + return ld - 1; +} + +/* Defines for H_CALL H_ALLOC_RESOURCE */ +#define H_ALL_RES_TYPE_QP 1 +#define H_ALL_RES_TYPE_CQ 2 +#define H_ALL_RES_TYPE_EQ 3 +#define H_ALL_RES_TYPE_MR 5 +#define H_ALL_RES_TYPE_MW 6 + +static long ehea_plpar_hcall_norets(unsigned long opcode, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7) +{ + long ret; + int i, sleep_msecs; + + for (i = 0; i < 5; i++) { + ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, + arg5, arg6, arg7); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) + pr_err("opcode=%lx ret=%lx" + " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" + " arg5=%lx arg6=%lx arg7=%lx\n", + opcode, ret, + arg1, arg2, arg3, arg4, arg5, arg6, arg7); + + return ret; + } + + return H_BUSY; +} + +static long ehea_plpar_hcall9(unsigned long opcode, + unsigned long *outs, /* array of 9 outputs */ + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7, + unsigned long arg8, + unsigned long arg9) +{ + long ret; + int i, sleep_msecs; + u8 cb_cat; + + for (i = 0; i < 5; i++) { + ret = plpar_hcall9(opcode, outs, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + cb_cat = EHEA_BMASK_GET(H_MEHEAPORT_CAT, arg2); + + if ((ret < H_SUCCESS) && !(((ret == H_AUTHORITY) + && (opcode == H_MODIFY_HEA_PORT)) + && (((cb_cat == H_PORT_CB4) && ((arg3 == H_PORT_CB4_JUMBO) + || (arg3 == H_PORT_CB4_SPEED))) || ((cb_cat == H_PORT_CB7) + && (arg3 == H_PORT_CB7_DUCQPN))))) + pr_err("opcode=%lx ret=%lx" + " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" + " arg5=%lx arg6=%lx arg7=%lx arg8=%lx" + " arg9=%lx" + " out1=%lx out2=%lx out3=%lx out4=%lx" + " out5=%lx out6=%lx out7=%lx out8=%lx" + " out9=%lx\n", + opcode, ret, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9, + outs[0], outs[1], outs[2], outs[3], outs[4], + outs[5], outs[6], outs[7], outs[8]); + return ret; + } + + return H_BUSY; +} + +u64 ehea_h_query_ehea_qp(const u64 adapter_handle, const u8 qp_category, + const u64 qp_handle, const u64 sel_mask, void *cb_addr) +{ + return ehea_plpar_hcall_norets(H_QUERY_HEA_QP, + adapter_handle, /* R4 */ + qp_category, /* R5 */ + qp_handle, /* R6 */ + sel_mask, /* R7 */ + virt_to_abs(cb_addr), /* R8 */ + 0, 0); +} + +/* input param R5 */ +#define H_ALL_RES_QP_EQPO EHEA_BMASK_IBM(9, 11) +#define H_ALL_RES_QP_QPP EHEA_BMASK_IBM(12, 12) +#define H_ALL_RES_QP_RQR EHEA_BMASK_IBM(13, 15) +#define H_ALL_RES_QP_EQEG EHEA_BMASK_IBM(16, 16) +#define H_ALL_RES_QP_LL_QP EHEA_BMASK_IBM(17, 17) +#define H_ALL_RES_QP_DMA128 EHEA_BMASK_IBM(19, 19) +#define H_ALL_RES_QP_HSM EHEA_BMASK_IBM(20, 21) +#define H_ALL_RES_QP_SIGT EHEA_BMASK_IBM(22, 23) +#define H_ALL_RES_QP_TENURE EHEA_BMASK_IBM(48, 55) +#define H_ALL_RES_QP_RES_TYP EHEA_BMASK_IBM(56, 63) + +/* input param R9 */ +#define H_ALL_RES_QP_TOKEN EHEA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_PD EHEA_BMASK_IBM(32, 63) + +/* input param R10 */ +#define H_ALL_RES_QP_MAX_SWQE EHEA_BMASK_IBM(4, 7) +#define H_ALL_RES_QP_MAX_R1WQE EHEA_BMASK_IBM(12, 15) +#define H_ALL_RES_QP_MAX_R2WQE EHEA_BMASK_IBM(20, 23) +#define H_ALL_RES_QP_MAX_R3WQE EHEA_BMASK_IBM(28, 31) +/* Max Send Scatter Gather Elements */ +#define H_ALL_RES_QP_MAX_SSGE EHEA_BMASK_IBM(37, 39) +#define H_ALL_RES_QP_MAX_R1SGE EHEA_BMASK_IBM(45, 47) +/* Max Receive SG Elements RQ1 */ +#define H_ALL_RES_QP_MAX_R2SGE EHEA_BMASK_IBM(53, 55) +#define H_ALL_RES_QP_MAX_R3SGE EHEA_BMASK_IBM(61, 63) + +/* input param R11 */ +#define H_ALL_RES_QP_SWQE_IDL EHEA_BMASK_IBM(0, 7) +/* max swqe immediate data length */ +#define H_ALL_RES_QP_PORT_NUM EHEA_BMASK_IBM(48, 63) + +/* input param R12 */ +#define H_ALL_RES_QP_TH_RQ2 EHEA_BMASK_IBM(0, 15) +/* Threshold RQ2 */ +#define H_ALL_RES_QP_TH_RQ3 EHEA_BMASK_IBM(16, 31) +/* Threshold RQ3 */ + +/* output param R6 */ +#define H_ALL_RES_QP_ACT_SWQE EHEA_BMASK_IBM(0, 15) +#define H_ALL_RES_QP_ACT_R1WQE EHEA_BMASK_IBM(16, 31) +#define H_ALL_RES_QP_ACT_R2WQE EHEA_BMASK_IBM(32, 47) +#define H_ALL_RES_QP_ACT_R3WQE EHEA_BMASK_IBM(48, 63) + +/* output param, R7 */ +#define H_ALL_RES_QP_ACT_SSGE EHEA_BMASK_IBM(0, 7) +#define H_ALL_RES_QP_ACT_R1SGE EHEA_BMASK_IBM(8, 15) +#define H_ALL_RES_QP_ACT_R2SGE EHEA_BMASK_IBM(16, 23) +#define H_ALL_RES_QP_ACT_R3SGE EHEA_BMASK_IBM(24, 31) +#define H_ALL_RES_QP_ACT_SWQE_IDL EHEA_BMASK_IBM(32, 39) + +/* output param R8,R9 */ +#define H_ALL_RES_QP_SIZE_SQ EHEA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_SIZE_RQ1 EHEA_BMASK_IBM(32, 63) +#define H_ALL_RES_QP_SIZE_RQ2 EHEA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_SIZE_RQ3 EHEA_BMASK_IBM(32, 63) + +/* output param R11,R12 */ +#define H_ALL_RES_QP_LIOBN_SQ EHEA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_LIOBN_RQ1 EHEA_BMASK_IBM(32, 63) +#define H_ALL_RES_QP_LIOBN_RQ2 EHEA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_LIOBN_RQ3 EHEA_BMASK_IBM(32, 63) + +u64 ehea_h_alloc_resource_qp(const u64 adapter_handle, + struct ehea_qp_init_attr *init_attr, const u32 pd, + u64 *qp_handle, struct h_epas *h_epas) +{ + u64 hret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + u64 allocate_controls = + EHEA_BMASK_SET(H_ALL_RES_QP_EQPO, init_attr->low_lat_rq1 ? 1 : 0) + | EHEA_BMASK_SET(H_ALL_RES_QP_QPP, 0) + | EHEA_BMASK_SET(H_ALL_RES_QP_RQR, 6) /* rq1 & rq2 & rq3 */ + | EHEA_BMASK_SET(H_ALL_RES_QP_EQEG, 0) /* EQE gen. disabled */ + | EHEA_BMASK_SET(H_ALL_RES_QP_LL_QP, init_attr->low_lat_rq1) + | EHEA_BMASK_SET(H_ALL_RES_QP_DMA128, 0) + | EHEA_BMASK_SET(H_ALL_RES_QP_HSM, 0) + | EHEA_BMASK_SET(H_ALL_RES_QP_SIGT, init_attr->signalingtype) + | EHEA_BMASK_SET(H_ALL_RES_QP_RES_TYP, H_ALL_RES_TYPE_QP); + + u64 r9_reg = EHEA_BMASK_SET(H_ALL_RES_QP_PD, pd) + | EHEA_BMASK_SET(H_ALL_RES_QP_TOKEN, init_attr->qp_token); + + u64 max_r10_reg = + EHEA_BMASK_SET(H_ALL_RES_QP_MAX_SWQE, + get_order_of_qentries(init_attr->max_nr_send_wqes)) + | EHEA_BMASK_SET(H_ALL_RES_QP_MAX_R1WQE, + get_order_of_qentries(init_attr->max_nr_rwqes_rq1)) + | EHEA_BMASK_SET(H_ALL_RES_QP_MAX_R2WQE, + get_order_of_qentries(init_attr->max_nr_rwqes_rq2)) + | EHEA_BMASK_SET(H_ALL_RES_QP_MAX_R3WQE, + get_order_of_qentries(init_attr->max_nr_rwqes_rq3)) + | EHEA_BMASK_SET(H_ALL_RES_QP_MAX_SSGE, init_attr->wqe_size_enc_sq) + | EHEA_BMASK_SET(H_ALL_RES_QP_MAX_R1SGE, + init_attr->wqe_size_enc_rq1) + | EHEA_BMASK_SET(H_ALL_RES_QP_MAX_R2SGE, + init_attr->wqe_size_enc_rq2) + | EHEA_BMASK_SET(H_ALL_RES_QP_MAX_R3SGE, + init_attr->wqe_size_enc_rq3); + + u64 r11_in = + EHEA_BMASK_SET(H_ALL_RES_QP_SWQE_IDL, init_attr->swqe_imm_data_len) + | EHEA_BMASK_SET(H_ALL_RES_QP_PORT_NUM, init_attr->port_nr); + u64 threshold = + EHEA_BMASK_SET(H_ALL_RES_QP_TH_RQ2, init_attr->rq2_threshold) + | EHEA_BMASK_SET(H_ALL_RES_QP_TH_RQ3, init_attr->rq3_threshold); + + hret = ehea_plpar_hcall9(H_ALLOC_HEA_RESOURCE, + outs, + adapter_handle, /* R4 */ + allocate_controls, /* R5 */ + init_attr->send_cq_handle, /* R6 */ + init_attr->recv_cq_handle, /* R7 */ + init_attr->aff_eq_handle, /* R8 */ + r9_reg, /* R9 */ + max_r10_reg, /* R10 */ + r11_in, /* R11 */ + threshold); /* R12 */ + + *qp_handle = outs[0]; + init_attr->qp_nr = (u32)outs[1]; + + init_attr->act_nr_send_wqes = + (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_SWQE, outs[2]); + init_attr->act_nr_rwqes_rq1 = + (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R1WQE, outs[2]); + init_attr->act_nr_rwqes_rq2 = + (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R2WQE, outs[2]); + init_attr->act_nr_rwqes_rq3 = + (u16)EHEA_BMASK_GET(H_ALL_RES_QP_ACT_R3WQE, outs[2]); + + init_attr->act_wqe_size_enc_sq = init_attr->wqe_size_enc_sq; + init_attr->act_wqe_size_enc_rq1 = init_attr->wqe_size_enc_rq1; + init_attr->act_wqe_size_enc_rq2 = init_attr->wqe_size_enc_rq2; + init_attr->act_wqe_size_enc_rq3 = init_attr->wqe_size_enc_rq3; + + init_attr->nr_sq_pages = + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_SQ, outs[4]); + init_attr->nr_rq1_pages = + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ1, outs[4]); + init_attr->nr_rq2_pages = + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ2, outs[5]); + init_attr->nr_rq3_pages = + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_SIZE_RQ3, outs[5]); + + init_attr->liobn_sq = + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_SQ, outs[7]); + init_attr->liobn_rq1 = + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ1, outs[7]); + init_attr->liobn_rq2 = + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ2, outs[8]); + init_attr->liobn_rq3 = + (u32)EHEA_BMASK_GET(H_ALL_RES_QP_LIOBN_RQ3, outs[8]); + + if (!hret) + hcp_epas_ctor(h_epas, outs[6], outs[6]); + + return hret; +} + +u64 ehea_h_alloc_resource_cq(const u64 adapter_handle, + struct ehea_cq_attr *cq_attr, + u64 *cq_handle, struct h_epas *epas) +{ + u64 hret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + hret = ehea_plpar_hcall9(H_ALLOC_HEA_RESOURCE, + outs, + adapter_handle, /* R4 */ + H_ALL_RES_TYPE_CQ, /* R5 */ + cq_attr->eq_handle, /* R6 */ + cq_attr->cq_token, /* R7 */ + cq_attr->max_nr_of_cqes, /* R8 */ + 0, 0, 0, 0); /* R9-R12 */ + + *cq_handle = outs[0]; + cq_attr->act_nr_of_cqes = outs[3]; + cq_attr->nr_pages = outs[4]; + + if (!hret) + hcp_epas_ctor(epas, outs[5], outs[6]); + + return hret; +} + +/* Defines for H_CALL H_ALLOC_RESOURCE */ +#define H_ALL_RES_TYPE_QP 1 +#define H_ALL_RES_TYPE_CQ 2 +#define H_ALL_RES_TYPE_EQ 3 +#define H_ALL_RES_TYPE_MR 5 +#define H_ALL_RES_TYPE_MW 6 + +/* input param R5 */ +#define H_ALL_RES_EQ_NEQ EHEA_BMASK_IBM(0, 0) +#define H_ALL_RES_EQ_NON_NEQ_ISN EHEA_BMASK_IBM(6, 7) +#define H_ALL_RES_EQ_INH_EQE_GEN EHEA_BMASK_IBM(16, 16) +#define H_ALL_RES_EQ_RES_TYPE EHEA_BMASK_IBM(56, 63) +/* input param R6 */ +#define H_ALL_RES_EQ_MAX_EQE EHEA_BMASK_IBM(32, 63) + +/* output param R6 */ +#define H_ALL_RES_EQ_LIOBN EHEA_BMASK_IBM(32, 63) + +/* output param R7 */ +#define H_ALL_RES_EQ_ACT_EQE EHEA_BMASK_IBM(32, 63) + +/* output param R8 */ +#define H_ALL_RES_EQ_ACT_PS EHEA_BMASK_IBM(32, 63) + +/* output param R9 */ +#define H_ALL_RES_EQ_ACT_EQ_IST_C EHEA_BMASK_IBM(30, 31) +#define H_ALL_RES_EQ_ACT_EQ_IST_1 EHEA_BMASK_IBM(40, 63) + +/* output param R10 */ +#define H_ALL_RES_EQ_ACT_EQ_IST_2 EHEA_BMASK_IBM(40, 63) + +/* output param R11 */ +#define H_ALL_RES_EQ_ACT_EQ_IST_3 EHEA_BMASK_IBM(40, 63) + +/* output param R12 */ +#define H_ALL_RES_EQ_ACT_EQ_IST_4 EHEA_BMASK_IBM(40, 63) + +u64 ehea_h_alloc_resource_eq(const u64 adapter_handle, + struct ehea_eq_attr *eq_attr, u64 *eq_handle) +{ + u64 hret, allocate_controls; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + /* resource type */ + allocate_controls = + EHEA_BMASK_SET(H_ALL_RES_EQ_RES_TYPE, H_ALL_RES_TYPE_EQ) + | EHEA_BMASK_SET(H_ALL_RES_EQ_NEQ, eq_attr->type ? 1 : 0) + | EHEA_BMASK_SET(H_ALL_RES_EQ_INH_EQE_GEN, !eq_attr->eqe_gen) + | EHEA_BMASK_SET(H_ALL_RES_EQ_NON_NEQ_ISN, 1); + + hret = ehea_plpar_hcall9(H_ALLOC_HEA_RESOURCE, + outs, + adapter_handle, /* R4 */ + allocate_controls, /* R5 */ + eq_attr->max_nr_of_eqes, /* R6 */ + 0, 0, 0, 0, 0, 0); /* R7-R10 */ + + *eq_handle = outs[0]; + eq_attr->act_nr_of_eqes = outs[3]; + eq_attr->nr_pages = outs[4]; + eq_attr->ist1 = outs[5]; + eq_attr->ist2 = outs[6]; + eq_attr->ist3 = outs[7]; + eq_attr->ist4 = outs[8]; + + return hret; +} + +u64 ehea_h_modify_ehea_qp(const u64 adapter_handle, const u8 cat, + const u64 qp_handle, const u64 sel_mask, + void *cb_addr, u64 *inv_attr_id, u64 *proc_mask, + u16 *out_swr, u16 *out_rwr) +{ + u64 hret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + hret = ehea_plpar_hcall9(H_MODIFY_HEA_QP, + outs, + adapter_handle, /* R4 */ + (u64) cat, /* R5 */ + qp_handle, /* R6 */ + sel_mask, /* R7 */ + virt_to_abs(cb_addr), /* R8 */ + 0, 0, 0, 0); /* R9-R12 */ + + *inv_attr_id = outs[0]; + *out_swr = outs[3]; + *out_rwr = outs[4]; + *proc_mask = outs[5]; + + return hret; +} + +u64 ehea_h_register_rpage(const u64 adapter_handle, const u8 pagesize, + const u8 queue_type, const u64 resource_handle, + const u64 log_pageaddr, u64 count) +{ + u64 reg_control; + + reg_control = EHEA_BMASK_SET(H_REG_RPAGE_PAGE_SIZE, pagesize) + | EHEA_BMASK_SET(H_REG_RPAGE_QT, queue_type); + + return ehea_plpar_hcall_norets(H_REGISTER_HEA_RPAGES, + adapter_handle, /* R4 */ + reg_control, /* R5 */ + resource_handle, /* R6 */ + log_pageaddr, /* R7 */ + count, /* R8 */ + 0, 0); /* R9-R10 */ +} + +u64 ehea_h_register_smr(const u64 adapter_handle, const u64 orig_mr_handle, + const u64 vaddr_in, const u32 access_ctrl, const u32 pd, + struct ehea_mr *mr) +{ + u64 hret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + hret = ehea_plpar_hcall9(H_REGISTER_SMR, + outs, + adapter_handle , /* R4 */ + orig_mr_handle, /* R5 */ + vaddr_in, /* R6 */ + (((u64)access_ctrl) << 32ULL), /* R7 */ + pd, /* R8 */ + 0, 0, 0, 0); /* R9-R12 */ + + mr->handle = outs[0]; + mr->lkey = (u32)outs[2]; + + return hret; +} + +u64 ehea_h_disable_and_get_hea(const u64 adapter_handle, const u64 qp_handle) +{ + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + return ehea_plpar_hcall9(H_DISABLE_AND_GET_HEA, + outs, + adapter_handle, /* R4 */ + H_DISABLE_GET_EHEA_WQE_P, /* R5 */ + qp_handle, /* R6 */ + 0, 0, 0, 0, 0, 0); /* R7-R12 */ +} + +u64 ehea_h_free_resource(const u64 adapter_handle, const u64 res_handle, + u64 force_bit) +{ + return ehea_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle, /* R4 */ + res_handle, /* R5 */ + force_bit, + 0, 0, 0, 0); /* R7-R10 */ +} + +u64 ehea_h_alloc_resource_mr(const u64 adapter_handle, const u64 vaddr, + const u64 length, const u32 access_ctrl, + const u32 pd, u64 *mr_handle, u32 *lkey) +{ + u64 hret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + hret = ehea_plpar_hcall9(H_ALLOC_HEA_RESOURCE, + outs, + adapter_handle, /* R4 */ + 5, /* R5 */ + vaddr, /* R6 */ + length, /* R7 */ + (((u64) access_ctrl) << 32ULL), /* R8 */ + pd, /* R9 */ + 0, 0, 0); /* R10-R12 */ + + *mr_handle = outs[0]; + *lkey = (u32)outs[2]; + return hret; +} + +u64 ehea_h_register_rpage_mr(const u64 adapter_handle, const u64 mr_handle, + const u8 pagesize, const u8 queue_type, + const u64 log_pageaddr, const u64 count) +{ + if ((count > 1) && (log_pageaddr & ~PAGE_MASK)) { + pr_err("not on pageboundary\n"); + return H_PARAMETER; + } + + return ehea_h_register_rpage(adapter_handle, pagesize, + queue_type, mr_handle, + log_pageaddr, count); +} + +u64 ehea_h_query_ehea(const u64 adapter_handle, void *cb_addr) +{ + u64 hret, cb_logaddr; + + cb_logaddr = virt_to_abs(cb_addr); + + hret = ehea_plpar_hcall_norets(H_QUERY_HEA, + adapter_handle, /* R4 */ + cb_logaddr, /* R5 */ + 0, 0, 0, 0, 0); /* R6-R10 */ +#ifdef DEBUG + ehea_dump(cb_addr, sizeof(struct hcp_query_ehea), "hcp_query_ehea"); +#endif + return hret; +} + +u64 ehea_h_query_ehea_port(const u64 adapter_handle, const u16 port_num, + const u8 cb_cat, const u64 select_mask, + void *cb_addr) +{ + u64 port_info; + u64 cb_logaddr = virt_to_abs(cb_addr); + u64 arr_index = 0; + + port_info = EHEA_BMASK_SET(H_MEHEAPORT_CAT, cb_cat) + | EHEA_BMASK_SET(H_MEHEAPORT_PN, port_num); + + return ehea_plpar_hcall_norets(H_QUERY_HEA_PORT, + adapter_handle, /* R4 */ + port_info, /* R5 */ + select_mask, /* R6 */ + arr_index, /* R7 */ + cb_logaddr, /* R8 */ + 0, 0); /* R9-R10 */ +} + +u64 ehea_h_modify_ehea_port(const u64 adapter_handle, const u16 port_num, + const u8 cb_cat, const u64 select_mask, + void *cb_addr) +{ + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + u64 port_info; + u64 arr_index = 0; + u64 cb_logaddr = virt_to_abs(cb_addr); + + port_info = EHEA_BMASK_SET(H_MEHEAPORT_CAT, cb_cat) + | EHEA_BMASK_SET(H_MEHEAPORT_PN, port_num); +#ifdef DEBUG + ehea_dump(cb_addr, sizeof(struct hcp_ehea_port_cb0), "Before HCALL"); +#endif + return ehea_plpar_hcall9(H_MODIFY_HEA_PORT, + outs, + adapter_handle, /* R4 */ + port_info, /* R5 */ + select_mask, /* R6 */ + arr_index, /* R7 */ + cb_logaddr, /* R8 */ + 0, 0, 0, 0); /* R9-R12 */ +} + +u64 ehea_h_reg_dereg_bcmc(const u64 adapter_handle, const u16 port_num, + const u8 reg_type, const u64 mc_mac_addr, + const u16 vlan_id, const u32 hcall_id) +{ + u64 r5_port_num, r6_reg_type, r7_mc_mac_addr, r8_vlan_id; + u64 mac_addr = mc_mac_addr >> 16; + + r5_port_num = EHEA_BMASK_SET(H_REGBCMC_PN, port_num); + r6_reg_type = EHEA_BMASK_SET(H_REGBCMC_REGTYPE, reg_type); + r7_mc_mac_addr = EHEA_BMASK_SET(H_REGBCMC_MACADDR, mac_addr); + r8_vlan_id = EHEA_BMASK_SET(H_REGBCMC_VLANID, vlan_id); + + return ehea_plpar_hcall_norets(hcall_id, + adapter_handle, /* R4 */ + r5_port_num, /* R5 */ + r6_reg_type, /* R6 */ + r7_mc_mac_addr, /* R7 */ + r8_vlan_id, /* R8 */ + 0, 0); /* R9-R12 */ +} + +u64 ehea_h_reset_events(const u64 adapter_handle, const u64 neq_handle, + const u64 event_mask) +{ + return ehea_plpar_hcall_norets(H_RESET_EVENTS, + adapter_handle, /* R4 */ + neq_handle, /* R5 */ + event_mask, /* R6 */ + 0, 0, 0, 0); /* R7-R12 */ +} + +u64 ehea_h_error_data(const u64 adapter_handle, const u64 ressource_handle, + void *rblock) +{ + return ehea_plpar_hcall_norets(H_ERROR_DATA, + adapter_handle, /* R4 */ + ressource_handle, /* R5 */ + virt_to_abs(rblock), /* R6 */ + 0, 0, 0, 0); /* R7-R12 */ +} diff --git a/drivers/net/ethernet/ibm/ehea/ehea_phyp.h b/drivers/net/ethernet/ibm/ehea/ehea_phyp.h new file mode 100644 index 000000000000..2f8174c248bc --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/ehea_phyp.h @@ -0,0 +1,467 @@ +/* + * linux/drivers/net/ehea/ehea_phyp.h + * + * eHEA ethernet device driver for IBM eServer System p + * + * (C) Copyright IBM Corp. 2006 + * + * Authors: + * Christoph Raisch <raisch@de.ibm.com> + * Jan-Bernd Themann <themann@de.ibm.com> + * Thomas Klein <tklein@de.ibm.com> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __EHEA_PHYP_H__ +#define __EHEA_PHYP_H__ + +#include <linux/delay.h> +#include <asm/hvcall.h> +#include "ehea.h" +#include "ehea_hw.h" + +/* Some abbreviations used here: + * + * hcp_* - structures, variables and functions releated to Hypervisor Calls + */ + +static inline u32 get_longbusy_msecs(int long_busy_ret_code) +{ + switch (long_busy_ret_code) { + case H_LONG_BUSY_ORDER_1_MSEC: + return 1; + case H_LONG_BUSY_ORDER_10_MSEC: + return 10; + case H_LONG_BUSY_ORDER_100_MSEC: + return 100; + case H_LONG_BUSY_ORDER_1_SEC: + return 1000; + case H_LONG_BUSY_ORDER_10_SEC: + return 10000; + case H_LONG_BUSY_ORDER_100_SEC: + return 100000; + default: + return 1; + } +} + +/* Number of pages which can be registered at once by H_REGISTER_HEA_RPAGES */ +#define EHEA_MAX_RPAGE 512 + +/* Notification Event Queue (NEQ) Entry bit masks */ +#define NEQE_EVENT_CODE EHEA_BMASK_IBM(2, 7) +#define NEQE_PORTNUM EHEA_BMASK_IBM(32, 47) +#define NEQE_PORT_UP EHEA_BMASK_IBM(16, 16) +#define NEQE_EXTSWITCH_PORT_UP EHEA_BMASK_IBM(17, 17) +#define NEQE_EXTSWITCH_PRIMARY EHEA_BMASK_IBM(18, 18) +#define NEQE_PLID EHEA_BMASK_IBM(16, 47) + +/* Notification Event Codes */ +#define EHEA_EC_PORTSTATE_CHG 0x30 +#define EHEA_EC_ADAPTER_MALFUNC 0x32 +#define EHEA_EC_PORT_MALFUNC 0x33 + +/* Notification Event Log Register (NELR) bit masks */ +#define NELR_PORT_MALFUNC EHEA_BMASK_IBM(61, 61) +#define NELR_ADAPTER_MALFUNC EHEA_BMASK_IBM(62, 62) +#define NELR_PORTSTATE_CHG EHEA_BMASK_IBM(63, 63) + +static inline void hcp_epas_ctor(struct h_epas *epas, u64 paddr_kernel, + u64 paddr_user) +{ + /* To support 64k pages we must round to 64k page boundary */ + epas->kernel.addr = ioremap((paddr_kernel & PAGE_MASK), PAGE_SIZE) + + (paddr_kernel & ~PAGE_MASK); + epas->user.addr = paddr_user; +} + +static inline void hcp_epas_dtor(struct h_epas *epas) +{ + if (epas->kernel.addr) + iounmap((void __iomem *)((u64)epas->kernel.addr & PAGE_MASK)); + + epas->user.addr = 0; + epas->kernel.addr = 0; +} + +struct hcp_modify_qp_cb0 { + u64 qp_ctl_reg; /* 00 */ + u32 max_swqe; /* 02 */ + u32 max_rwqe; /* 03 */ + u32 port_nb; /* 04 */ + u32 reserved0; /* 05 */ + u64 qp_aer; /* 06 */ + u64 qp_tenure; /* 08 */ +}; + +/* Hcall Query/Modify Queue Pair Control Block 0 Selection Mask Bits */ +#define H_QPCB0_ALL EHEA_BMASK_IBM(0, 5) +#define H_QPCB0_QP_CTL_REG EHEA_BMASK_IBM(0, 0) +#define H_QPCB0_MAX_SWQE EHEA_BMASK_IBM(1, 1) +#define H_QPCB0_MAX_RWQE EHEA_BMASK_IBM(2, 2) +#define H_QPCB0_PORT_NB EHEA_BMASK_IBM(3, 3) +#define H_QPCB0_QP_AER EHEA_BMASK_IBM(4, 4) +#define H_QPCB0_QP_TENURE EHEA_BMASK_IBM(5, 5) + +/* Queue Pair Control Register Status Bits */ +#define H_QP_CR_ENABLED 0x8000000000000000ULL /* QP enabled */ + /* QP States: */ +#define H_QP_CR_STATE_RESET 0x0000010000000000ULL /* Reset */ +#define H_QP_CR_STATE_INITIALIZED 0x0000020000000000ULL /* Initialized */ +#define H_QP_CR_STATE_RDY2RCV 0x0000030000000000ULL /* Ready to recv */ +#define H_QP_CR_STATE_RDY2SND 0x0000050000000000ULL /* Ready to send */ +#define H_QP_CR_STATE_ERROR 0x0000800000000000ULL /* Error */ +#define H_QP_CR_RES_STATE 0x0000007F00000000ULL /* Resultant state */ + +struct hcp_modify_qp_cb1 { + u32 qpn; /* 00 */ + u32 qp_asyn_ev_eq_nb; /* 01 */ + u64 sq_cq_handle; /* 02 */ + u64 rq_cq_handle; /* 04 */ + /* sgel = scatter gather element */ + u32 sgel_nb_sq; /* 06 */ + u32 sgel_nb_rq1; /* 07 */ + u32 sgel_nb_rq2; /* 08 */ + u32 sgel_nb_rq3; /* 09 */ +}; + +/* Hcall Query/Modify Queue Pair Control Block 1 Selection Mask Bits */ +#define H_QPCB1_ALL EHEA_BMASK_IBM(0, 7) +#define H_QPCB1_QPN EHEA_BMASK_IBM(0, 0) +#define H_QPCB1_ASYN_EV_EQ_NB EHEA_BMASK_IBM(1, 1) +#define H_QPCB1_SQ_CQ_HANDLE EHEA_BMASK_IBM(2, 2) +#define H_QPCB1_RQ_CQ_HANDLE EHEA_BMASK_IBM(3, 3) +#define H_QPCB1_SGEL_NB_SQ EHEA_BMASK_IBM(4, 4) +#define H_QPCB1_SGEL_NB_RQ1 EHEA_BMASK_IBM(5, 5) +#define H_QPCB1_SGEL_NB_RQ2 EHEA_BMASK_IBM(6, 6) +#define H_QPCB1_SGEL_NB_RQ3 EHEA_BMASK_IBM(7, 7) + +struct hcp_query_ehea { + u32 cur_num_qps; /* 00 */ + u32 cur_num_cqs; /* 01 */ + u32 cur_num_eqs; /* 02 */ + u32 cur_num_mrs; /* 03 */ + u32 auth_level; /* 04 */ + u32 max_num_qps; /* 05 */ + u32 max_num_cqs; /* 06 */ + u32 max_num_eqs; /* 07 */ + u32 max_num_mrs; /* 08 */ + u32 reserved0; /* 09 */ + u32 int_clock_freq; /* 10 */ + u32 max_num_pds; /* 11 */ + u32 max_num_addr_handles; /* 12 */ + u32 max_num_cqes; /* 13 */ + u32 max_num_wqes; /* 14 */ + u32 max_num_sgel_rq1wqe; /* 15 */ + u32 max_num_sgel_rq2wqe; /* 16 */ + u32 max_num_sgel_rq3wqe; /* 17 */ + u32 mr_page_size; /* 18 */ + u32 reserved1; /* 19 */ + u64 max_mr_size; /* 20 */ + u64 reserved2; /* 22 */ + u32 num_ports; /* 24 */ + u32 reserved3; /* 25 */ + u32 reserved4; /* 26 */ + u32 reserved5; /* 27 */ + u64 max_mc_mac; /* 28 */ + u64 ehea_cap; /* 30 */ + u32 max_isn_per_eq; /* 32 */ + u32 max_num_neq; /* 33 */ + u64 max_num_vlan_ids; /* 34 */ + u32 max_num_port_group; /* 36 */ + u32 max_num_phys_port; /* 37 */ + +}; + +/* Hcall Query/Modify Port Control Block defines */ +#define H_PORT_CB0 0 +#define H_PORT_CB1 1 +#define H_PORT_CB2 2 +#define H_PORT_CB3 3 +#define H_PORT_CB4 4 +#define H_PORT_CB5 5 +#define H_PORT_CB6 6 +#define H_PORT_CB7 7 + +struct hcp_ehea_port_cb0 { + u64 port_mac_addr; + u64 port_rc; + u64 reserved0; + u32 port_op_state; + u32 port_speed; + u32 ext_swport_op_state; + u32 neg_tpf_prpf; + u32 num_default_qps; + u32 reserved1; + u64 default_qpn_arr[16]; +}; + +/* Hcall Query/Modify Port Control Block 0 Selection Mask Bits */ +#define H_PORT_CB0_ALL EHEA_BMASK_IBM(0, 7) /* Set all bits */ +#define H_PORT_CB0_MAC EHEA_BMASK_IBM(0, 0) /* MAC address */ +#define H_PORT_CB0_PRC EHEA_BMASK_IBM(1, 1) /* Port Recv Control */ +#define H_PORT_CB0_DEFQPNARRAY EHEA_BMASK_IBM(7, 7) /* Default QPN Array */ + +/* Hcall Query Port: Returned port speed values */ +#define H_SPEED_10M_H 1 /* 10 Mbps, Half Duplex */ +#define H_SPEED_10M_F 2 /* 10 Mbps, Full Duplex */ +#define H_SPEED_100M_H 3 /* 100 Mbps, Half Duplex */ +#define H_SPEED_100M_F 4 /* 100 Mbps, Full Duplex */ +#define H_SPEED_1G_F 6 /* 1 Gbps, Full Duplex */ +#define H_SPEED_10G_F 8 /* 10 Gbps, Full Duplex */ + +/* Port Receive Control Status Bits */ +#define PXLY_RC_VALID EHEA_BMASK_IBM(49, 49) +#define PXLY_RC_VLAN_XTRACT EHEA_BMASK_IBM(50, 50) +#define PXLY_RC_TCP_6_TUPLE EHEA_BMASK_IBM(51, 51) +#define PXLY_RC_UDP_6_TUPLE EHEA_BMASK_IBM(52, 52) +#define PXLY_RC_TCP_3_TUPLE EHEA_BMASK_IBM(53, 53) +#define PXLY_RC_TCP_2_TUPLE EHEA_BMASK_IBM(54, 54) +#define PXLY_RC_LLC_SNAP EHEA_BMASK_IBM(55, 55) +#define PXLY_RC_JUMBO_FRAME EHEA_BMASK_IBM(56, 56) +#define PXLY_RC_FRAG_IP_PKT EHEA_BMASK_IBM(57, 57) +#define PXLY_RC_TCP_UDP_CHKSUM EHEA_BMASK_IBM(58, 58) +#define PXLY_RC_IP_CHKSUM EHEA_BMASK_IBM(59, 59) +#define PXLY_RC_MAC_FILTER EHEA_BMASK_IBM(60, 60) +#define PXLY_RC_UNTAG_FILTER EHEA_BMASK_IBM(61, 61) +#define PXLY_RC_VLAN_TAG_FILTER EHEA_BMASK_IBM(62, 63) + +#define PXLY_RC_VLAN_FILTER 2 +#define PXLY_RC_VLAN_PERM 0 + + +#define H_PORT_CB1_ALL 0x8000000000000000ULL + +struct hcp_ehea_port_cb1 { + u64 vlan_filter[64]; +}; + +#define H_PORT_CB2_ALL 0xFFE0000000000000ULL + +struct hcp_ehea_port_cb2 { + u64 rxo; + u64 rxucp; + u64 rxufd; + u64 rxuerr; + u64 rxftl; + u64 rxmcp; + u64 rxbcp; + u64 txo; + u64 txucp; + u64 txmcp; + u64 txbcp; +}; + +struct hcp_ehea_port_cb3 { + u64 vlan_bc_filter[64]; + u64 vlan_mc_filter[64]; + u64 vlan_un_filter[64]; + u64 port_mac_hash_array[64]; +}; + +#define H_PORT_CB4_ALL 0xF000000000000000ULL +#define H_PORT_CB4_JUMBO 0x1000000000000000ULL +#define H_PORT_CB4_SPEED 0x8000000000000000ULL + +struct hcp_ehea_port_cb4 { + u32 port_speed; + u32 pause_frame; + u32 ens_port_op_state; + u32 jumbo_frame; + u32 ens_port_wrap; +}; + +/* Hcall Query/Modify Port Control Block 5 Selection Mask Bits */ +#define H_PORT_CB5_RCU 0x0001000000000000ULL +#define PXS_RCU EHEA_BMASK_IBM(61, 63) + +struct hcp_ehea_port_cb5 { + u64 prc; /* 00 */ + u64 uaa; /* 01 */ + u64 macvc; /* 02 */ + u64 xpcsc; /* 03 */ + u64 xpcsp; /* 04 */ + u64 pcsid; /* 05 */ + u64 xpcsst; /* 06 */ + u64 pthlb; /* 07 */ + u64 pthrb; /* 08 */ + u64 pqu; /* 09 */ + u64 pqd; /* 10 */ + u64 prt; /* 11 */ + u64 wsth; /* 12 */ + u64 rcb; /* 13 */ + u64 rcm; /* 14 */ + u64 rcu; /* 15 */ + u64 macc; /* 16 */ + u64 pc; /* 17 */ + u64 pst; /* 18 */ + u64 ducqpn; /* 19 */ + u64 mcqpn; /* 20 */ + u64 mma; /* 21 */ + u64 pmc0h; /* 22 */ + u64 pmc0l; /* 23 */ + u64 lbc; /* 24 */ +}; + +#define H_PORT_CB6_ALL 0xFFFFFE7FFFFF8000ULL + +struct hcp_ehea_port_cb6 { + u64 rxo; /* 00 */ + u64 rx64; /* 01 */ + u64 rx65; /* 02 */ + u64 rx128; /* 03 */ + u64 rx256; /* 04 */ + u64 rx512; /* 05 */ + u64 rx1024; /* 06 */ + u64 rxbfcs; /* 07 */ + u64 rxime; /* 08 */ + u64 rxrle; /* 09 */ + u64 rxorle; /* 10 */ + u64 rxftl; /* 11 */ + u64 rxjab; /* 12 */ + u64 rxse; /* 13 */ + u64 rxce; /* 14 */ + u64 rxrf; /* 15 */ + u64 rxfrag; /* 16 */ + u64 rxuoc; /* 17 */ + u64 rxcpf; /* 18 */ + u64 rxsb; /* 19 */ + u64 rxfd; /* 20 */ + u64 rxoerr; /* 21 */ + u64 rxaln; /* 22 */ + u64 ducqpn; /* 23 */ + u64 reserved0; /* 24 */ + u64 rxmcp; /* 25 */ + u64 rxbcp; /* 26 */ + u64 txmcp; /* 27 */ + u64 txbcp; /* 28 */ + u64 txo; /* 29 */ + u64 tx64; /* 30 */ + u64 tx65; /* 31 */ + u64 tx128; /* 32 */ + u64 tx256; /* 33 */ + u64 tx512; /* 34 */ + u64 tx1024; /* 35 */ + u64 txbfcs; /* 36 */ + u64 txcpf; /* 37 */ + u64 txlf; /* 38 */ + u64 txrf; /* 39 */ + u64 txime; /* 40 */ + u64 txsc; /* 41 */ + u64 txmc; /* 42 */ + u64 txsqe; /* 43 */ + u64 txdef; /* 44 */ + u64 txlcol; /* 45 */ + u64 txexcol; /* 46 */ + u64 txcse; /* 47 */ + u64 txbor; /* 48 */ +}; + +#define H_PORT_CB7_DUCQPN 0x8000000000000000ULL + +struct hcp_ehea_port_cb7 { + u64 def_uc_qpn; +}; + +u64 ehea_h_query_ehea_qp(const u64 adapter_handle, + const u8 qp_category, + const u64 qp_handle, const u64 sel_mask, + void *cb_addr); + +u64 ehea_h_modify_ehea_qp(const u64 adapter_handle, + const u8 cat, + const u64 qp_handle, + const u64 sel_mask, + void *cb_addr, + u64 *inv_attr_id, + u64 *proc_mask, u16 *out_swr, u16 *out_rwr); + +u64 ehea_h_alloc_resource_eq(const u64 adapter_handle, + struct ehea_eq_attr *eq_attr, u64 *eq_handle); + +u64 ehea_h_alloc_resource_cq(const u64 adapter_handle, + struct ehea_cq_attr *cq_attr, + u64 *cq_handle, struct h_epas *epas); + +u64 ehea_h_alloc_resource_qp(const u64 adapter_handle, + struct ehea_qp_init_attr *init_attr, + const u32 pd, + u64 *qp_handle, struct h_epas *h_epas); + +#define H_REG_RPAGE_PAGE_SIZE EHEA_BMASK_IBM(48, 55) +#define H_REG_RPAGE_QT EHEA_BMASK_IBM(62, 63) + +u64 ehea_h_register_rpage(const u64 adapter_handle, + const u8 pagesize, + const u8 queue_type, + const u64 resource_handle, + const u64 log_pageaddr, u64 count); + +#define H_DISABLE_GET_EHEA_WQE_P 1 +#define H_DISABLE_GET_SQ_WQE_P 2 +#define H_DISABLE_GET_RQC 3 + +u64 ehea_h_disable_and_get_hea(const u64 adapter_handle, const u64 qp_handle); + +#define FORCE_FREE 1 +#define NORMAL_FREE 0 + +u64 ehea_h_free_resource(const u64 adapter_handle, const u64 res_handle, + u64 force_bit); + +u64 ehea_h_alloc_resource_mr(const u64 adapter_handle, const u64 vaddr, + const u64 length, const u32 access_ctrl, + const u32 pd, u64 *mr_handle, u32 *lkey); + +u64 ehea_h_register_rpage_mr(const u64 adapter_handle, const u64 mr_handle, + const u8 pagesize, const u8 queue_type, + const u64 log_pageaddr, const u64 count); + +u64 ehea_h_register_smr(const u64 adapter_handle, const u64 orig_mr_handle, + const u64 vaddr_in, const u32 access_ctrl, const u32 pd, + struct ehea_mr *mr); + +u64 ehea_h_query_ehea(const u64 adapter_handle, void *cb_addr); + +/* output param R5 */ +#define H_MEHEAPORT_CAT EHEA_BMASK_IBM(40, 47) +#define H_MEHEAPORT_PN EHEA_BMASK_IBM(48, 63) + +u64 ehea_h_query_ehea_port(const u64 adapter_handle, const u16 port_num, + const u8 cb_cat, const u64 select_mask, + void *cb_addr); + +u64 ehea_h_modify_ehea_port(const u64 adapter_handle, const u16 port_num, + const u8 cb_cat, const u64 select_mask, + void *cb_addr); + +#define H_REGBCMC_PN EHEA_BMASK_IBM(48, 63) +#define H_REGBCMC_REGTYPE EHEA_BMASK_IBM(61, 63) +#define H_REGBCMC_MACADDR EHEA_BMASK_IBM(16, 63) +#define H_REGBCMC_VLANID EHEA_BMASK_IBM(52, 63) + +u64 ehea_h_reg_dereg_bcmc(const u64 adapter_handle, const u16 port_num, + const u8 reg_type, const u64 mc_mac_addr, + const u16 vlan_id, const u32 hcall_id); + +u64 ehea_h_reset_events(const u64 adapter_handle, const u64 neq_handle, + const u64 event_mask); + +u64 ehea_h_error_data(const u64 adapter_handle, const u64 ressource_handle, + void *rblock); + +#endif /* __EHEA_PHYP_H__ */ diff --git a/drivers/net/ethernet/ibm/ehea/ehea_qmr.c b/drivers/net/ethernet/ibm/ehea/ehea_qmr.c new file mode 100644 index 000000000000..95b9f4fa811e --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/ehea_qmr.c @@ -0,0 +1,1031 @@ +/* + * linux/drivers/net/ehea/ehea_qmr.c + * + * eHEA ethernet device driver for IBM eServer System p + * + * (C) Copyright IBM Corp. 2006 + * + * Authors: + * Christoph Raisch <raisch@de.ibm.com> + * Jan-Bernd Themann <themann@de.ibm.com> + * Thomas Klein <tklein@de.ibm.com> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/mm.h> +#include <linux/slab.h> +#include "ehea.h" +#include "ehea_phyp.h" +#include "ehea_qmr.h" + +struct ehea_bmap *ehea_bmap = NULL; + + + +static void *hw_qpageit_get_inc(struct hw_queue *queue) +{ + void *retvalue = hw_qeit_get(queue); + + queue->current_q_offset += queue->pagesize; + if (queue->current_q_offset > queue->queue_length) { + queue->current_q_offset -= queue->pagesize; + retvalue = NULL; + } else if (((u64) retvalue) & (EHEA_PAGESIZE-1)) { + pr_err("not on pageboundary\n"); + retvalue = NULL; + } + return retvalue; +} + +static int hw_queue_ctor(struct hw_queue *queue, const u32 nr_of_pages, + const u32 pagesize, const u32 qe_size) +{ + int pages_per_kpage = PAGE_SIZE / pagesize; + int i, k; + + if ((pagesize > PAGE_SIZE) || (!pages_per_kpage)) { + pr_err("pagesize conflict! kernel pagesize=%d, ehea pagesize=%d\n", + (int)PAGE_SIZE, (int)pagesize); + return -EINVAL; + } + + queue->queue_length = nr_of_pages * pagesize; + queue->queue_pages = kmalloc(nr_of_pages * sizeof(void *), GFP_KERNEL); + if (!queue->queue_pages) { + pr_err("no mem for queue_pages\n"); + return -ENOMEM; + } + + /* + * allocate pages for queue: + * outer loop allocates whole kernel pages (page aligned) and + * inner loop divides a kernel page into smaller hea queue pages + */ + i = 0; + while (i < nr_of_pages) { + u8 *kpage = (u8 *)get_zeroed_page(GFP_KERNEL); + if (!kpage) + goto out_nomem; + for (k = 0; k < pages_per_kpage && i < nr_of_pages; k++) { + (queue->queue_pages)[i] = (struct ehea_page *)kpage; + kpage += pagesize; + i++; + } + } + + queue->current_q_offset = 0; + queue->qe_size = qe_size; + queue->pagesize = pagesize; + queue->toggle_state = 1; + + return 0; +out_nomem: + for (i = 0; i < nr_of_pages; i += pages_per_kpage) { + if (!(queue->queue_pages)[i]) + break; + free_page((unsigned long)(queue->queue_pages)[i]); + } + return -ENOMEM; +} + +static void hw_queue_dtor(struct hw_queue *queue) +{ + int pages_per_kpage = PAGE_SIZE / queue->pagesize; + int i, nr_pages; + + if (!queue || !queue->queue_pages) + return; + + nr_pages = queue->queue_length / queue->pagesize; + + for (i = 0; i < nr_pages; i += pages_per_kpage) + free_page((unsigned long)(queue->queue_pages)[i]); + + kfree(queue->queue_pages); +} + +struct ehea_cq *ehea_create_cq(struct ehea_adapter *adapter, + int nr_of_cqe, u64 eq_handle, u32 cq_token) +{ + struct ehea_cq *cq; + struct h_epa epa; + u64 *cq_handle_ref, hret, rpage; + u32 act_nr_of_entries, act_pages, counter; + int ret; + void *vpage; + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) { + pr_err("no mem for cq\n"); + goto out_nomem; + } + + cq->attr.max_nr_of_cqes = nr_of_cqe; + cq->attr.cq_token = cq_token; + cq->attr.eq_handle = eq_handle; + + cq->adapter = adapter; + + cq_handle_ref = &cq->fw_handle; + act_nr_of_entries = 0; + act_pages = 0; + + hret = ehea_h_alloc_resource_cq(adapter->handle, &cq->attr, + &cq->fw_handle, &cq->epas); + if (hret != H_SUCCESS) { + pr_err("alloc_resource_cq failed\n"); + goto out_freemem; + } + + ret = hw_queue_ctor(&cq->hw_queue, cq->attr.nr_pages, + EHEA_PAGESIZE, sizeof(struct ehea_cqe)); + if (ret) + goto out_freeres; + + for (counter = 0; counter < cq->attr.nr_pages; counter++) { + vpage = hw_qpageit_get_inc(&cq->hw_queue); + if (!vpage) { + pr_err("hw_qpageit_get_inc failed\n"); + goto out_kill_hwq; + } + + rpage = virt_to_abs(vpage); + hret = ehea_h_register_rpage(adapter->handle, + 0, EHEA_CQ_REGISTER_ORIG, + cq->fw_handle, rpage, 1); + if (hret < H_SUCCESS) { + pr_err("register_rpage_cq failed ehea_cq=%p hret=%llx counter=%i act_pages=%i\n", + cq, hret, counter, cq->attr.nr_pages); + goto out_kill_hwq; + } + + if (counter == (cq->attr.nr_pages - 1)) { + vpage = hw_qpageit_get_inc(&cq->hw_queue); + + if ((hret != H_SUCCESS) || (vpage)) { + pr_err("registration of pages not complete hret=%llx\n", + hret); + goto out_kill_hwq; + } + } else { + if (hret != H_PAGE_REGISTERED) { + pr_err("CQ: registration of page failed hret=%llx\n", + hret); + goto out_kill_hwq; + } + } + } + + hw_qeit_reset(&cq->hw_queue); + epa = cq->epas.kernel; + ehea_reset_cq_ep(cq); + ehea_reset_cq_n1(cq); + + return cq; + +out_kill_hwq: + hw_queue_dtor(&cq->hw_queue); + +out_freeres: + ehea_h_free_resource(adapter->handle, cq->fw_handle, FORCE_FREE); + +out_freemem: + kfree(cq); + +out_nomem: + return NULL; +} + +u64 ehea_destroy_cq_res(struct ehea_cq *cq, u64 force) +{ + u64 hret; + u64 adapter_handle = cq->adapter->handle; + + /* deregister all previous registered pages */ + hret = ehea_h_free_resource(adapter_handle, cq->fw_handle, force); + if (hret != H_SUCCESS) + return hret; + + hw_queue_dtor(&cq->hw_queue); + kfree(cq); + + return hret; +} + +int ehea_destroy_cq(struct ehea_cq *cq) +{ + u64 hret, aer, aerr; + if (!cq) + return 0; + + hcp_epas_dtor(&cq->epas); + hret = ehea_destroy_cq_res(cq, NORMAL_FREE); + if (hret == H_R_STATE) { + ehea_error_data(cq->adapter, cq->fw_handle, &aer, &aerr); + hret = ehea_destroy_cq_res(cq, FORCE_FREE); + } + + if (hret != H_SUCCESS) { + pr_err("destroy CQ failed\n"); + return -EIO; + } + + return 0; +} + +struct ehea_eq *ehea_create_eq(struct ehea_adapter *adapter, + const enum ehea_eq_type type, + const u32 max_nr_of_eqes, const u8 eqe_gen) +{ + int ret, i; + u64 hret, rpage; + void *vpage; + struct ehea_eq *eq; + + eq = kzalloc(sizeof(*eq), GFP_KERNEL); + if (!eq) { + pr_err("no mem for eq\n"); + return NULL; + } + + eq->adapter = adapter; + eq->attr.type = type; + eq->attr.max_nr_of_eqes = max_nr_of_eqes; + eq->attr.eqe_gen = eqe_gen; + spin_lock_init(&eq->spinlock); + + hret = ehea_h_alloc_resource_eq(adapter->handle, + &eq->attr, &eq->fw_handle); + if (hret != H_SUCCESS) { + pr_err("alloc_resource_eq failed\n"); + goto out_freemem; + } + + ret = hw_queue_ctor(&eq->hw_queue, eq->attr.nr_pages, + EHEA_PAGESIZE, sizeof(struct ehea_eqe)); + if (ret) { + pr_err("can't allocate eq pages\n"); + goto out_freeres; + } + + for (i = 0; i < eq->attr.nr_pages; i++) { + vpage = hw_qpageit_get_inc(&eq->hw_queue); + if (!vpage) { + pr_err("hw_qpageit_get_inc failed\n"); + hret = H_RESOURCE; + goto out_kill_hwq; + } + + rpage = virt_to_abs(vpage); + + hret = ehea_h_register_rpage(adapter->handle, 0, + EHEA_EQ_REGISTER_ORIG, + eq->fw_handle, rpage, 1); + + if (i == (eq->attr.nr_pages - 1)) { + /* last page */ + vpage = hw_qpageit_get_inc(&eq->hw_queue); + if ((hret != H_SUCCESS) || (vpage)) + goto out_kill_hwq; + + } else { + if (hret != H_PAGE_REGISTERED) + goto out_kill_hwq; + + } + } + + hw_qeit_reset(&eq->hw_queue); + return eq; + +out_kill_hwq: + hw_queue_dtor(&eq->hw_queue); + +out_freeres: + ehea_h_free_resource(adapter->handle, eq->fw_handle, FORCE_FREE); + +out_freemem: + kfree(eq); + return NULL; +} + +struct ehea_eqe *ehea_poll_eq(struct ehea_eq *eq) +{ + struct ehea_eqe *eqe; + unsigned long flags; + + spin_lock_irqsave(&eq->spinlock, flags); + eqe = hw_eqit_eq_get_inc_valid(&eq->hw_queue); + spin_unlock_irqrestore(&eq->spinlock, flags); + + return eqe; +} + +u64 ehea_destroy_eq_res(struct ehea_eq *eq, u64 force) +{ + u64 hret; + unsigned long flags; + + spin_lock_irqsave(&eq->spinlock, flags); + + hret = ehea_h_free_resource(eq->adapter->handle, eq->fw_handle, force); + spin_unlock_irqrestore(&eq->spinlock, flags); + + if (hret != H_SUCCESS) + return hret; + + hw_queue_dtor(&eq->hw_queue); + kfree(eq); + + return hret; +} + +int ehea_destroy_eq(struct ehea_eq *eq) +{ + u64 hret, aer, aerr; + if (!eq) + return 0; + + hcp_epas_dtor(&eq->epas); + + hret = ehea_destroy_eq_res(eq, NORMAL_FREE); + if (hret == H_R_STATE) { + ehea_error_data(eq->adapter, eq->fw_handle, &aer, &aerr); + hret = ehea_destroy_eq_res(eq, FORCE_FREE); + } + + if (hret != H_SUCCESS) { + pr_err("destroy EQ failed\n"); + return -EIO; + } + + return 0; +} + +/** + * allocates memory for a queue and registers pages in phyp + */ +int ehea_qp_alloc_register(struct ehea_qp *qp, struct hw_queue *hw_queue, + int nr_pages, int wqe_size, int act_nr_sges, + struct ehea_adapter *adapter, int h_call_q_selector) +{ + u64 hret, rpage; + int ret, cnt; + void *vpage; + + ret = hw_queue_ctor(hw_queue, nr_pages, EHEA_PAGESIZE, wqe_size); + if (ret) + return ret; + + for (cnt = 0; cnt < nr_pages; cnt++) { + vpage = hw_qpageit_get_inc(hw_queue); + if (!vpage) { + pr_err("hw_qpageit_get_inc failed\n"); + goto out_kill_hwq; + } + rpage = virt_to_abs(vpage); + hret = ehea_h_register_rpage(adapter->handle, + 0, h_call_q_selector, + qp->fw_handle, rpage, 1); + if (hret < H_SUCCESS) { + pr_err("register_rpage_qp failed\n"); + goto out_kill_hwq; + } + } + hw_qeit_reset(hw_queue); + return 0; + +out_kill_hwq: + hw_queue_dtor(hw_queue); + return -EIO; +} + +static inline u32 map_wqe_size(u8 wqe_enc_size) +{ + return 128 << wqe_enc_size; +} + +struct ehea_qp *ehea_create_qp(struct ehea_adapter *adapter, + u32 pd, struct ehea_qp_init_attr *init_attr) +{ + int ret; + u64 hret; + struct ehea_qp *qp; + u32 wqe_size_in_bytes_sq, wqe_size_in_bytes_rq1; + u32 wqe_size_in_bytes_rq2, wqe_size_in_bytes_rq3; + + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + pr_err("no mem for qp\n"); + return NULL; + } + + qp->adapter = adapter; + + hret = ehea_h_alloc_resource_qp(adapter->handle, init_attr, pd, + &qp->fw_handle, &qp->epas); + if (hret != H_SUCCESS) { + pr_err("ehea_h_alloc_resource_qp failed\n"); + goto out_freemem; + } + + wqe_size_in_bytes_sq = map_wqe_size(init_attr->act_wqe_size_enc_sq); + wqe_size_in_bytes_rq1 = map_wqe_size(init_attr->act_wqe_size_enc_rq1); + wqe_size_in_bytes_rq2 = map_wqe_size(init_attr->act_wqe_size_enc_rq2); + wqe_size_in_bytes_rq3 = map_wqe_size(init_attr->act_wqe_size_enc_rq3); + + ret = ehea_qp_alloc_register(qp, &qp->hw_squeue, init_attr->nr_sq_pages, + wqe_size_in_bytes_sq, + init_attr->act_wqe_size_enc_sq, adapter, + 0); + if (ret) { + pr_err("can't register for sq ret=%x\n", ret); + goto out_freeres; + } + + ret = ehea_qp_alloc_register(qp, &qp->hw_rqueue1, + init_attr->nr_rq1_pages, + wqe_size_in_bytes_rq1, + init_attr->act_wqe_size_enc_rq1, + adapter, 1); + if (ret) { + pr_err("can't register for rq1 ret=%x\n", ret); + goto out_kill_hwsq; + } + + if (init_attr->rq_count > 1) { + ret = ehea_qp_alloc_register(qp, &qp->hw_rqueue2, + init_attr->nr_rq2_pages, + wqe_size_in_bytes_rq2, + init_attr->act_wqe_size_enc_rq2, + adapter, 2); + if (ret) { + pr_err("can't register for rq2 ret=%x\n", ret); + goto out_kill_hwr1q; + } + } + + if (init_attr->rq_count > 2) { + ret = ehea_qp_alloc_register(qp, &qp->hw_rqueue3, + init_attr->nr_rq3_pages, + wqe_size_in_bytes_rq3, + init_attr->act_wqe_size_enc_rq3, + adapter, 3); + if (ret) { + pr_err("can't register for rq3 ret=%x\n", ret); + goto out_kill_hwr2q; + } + } + + qp->init_attr = *init_attr; + + return qp; + +out_kill_hwr2q: + hw_queue_dtor(&qp->hw_rqueue2); + +out_kill_hwr1q: + hw_queue_dtor(&qp->hw_rqueue1); + +out_kill_hwsq: + hw_queue_dtor(&qp->hw_squeue); + +out_freeres: + ehea_h_disable_and_get_hea(adapter->handle, qp->fw_handle); + ehea_h_free_resource(adapter->handle, qp->fw_handle, FORCE_FREE); + +out_freemem: + kfree(qp); + return NULL; +} + +u64 ehea_destroy_qp_res(struct ehea_qp *qp, u64 force) +{ + u64 hret; + struct ehea_qp_init_attr *qp_attr = &qp->init_attr; + + + ehea_h_disable_and_get_hea(qp->adapter->handle, qp->fw_handle); + hret = ehea_h_free_resource(qp->adapter->handle, qp->fw_handle, force); + if (hret != H_SUCCESS) + return hret; + + hw_queue_dtor(&qp->hw_squeue); + hw_queue_dtor(&qp->hw_rqueue1); + + if (qp_attr->rq_count > 1) + hw_queue_dtor(&qp->hw_rqueue2); + if (qp_attr->rq_count > 2) + hw_queue_dtor(&qp->hw_rqueue3); + kfree(qp); + + return hret; +} + +int ehea_destroy_qp(struct ehea_qp *qp) +{ + u64 hret, aer, aerr; + if (!qp) + return 0; + + hcp_epas_dtor(&qp->epas); + + hret = ehea_destroy_qp_res(qp, NORMAL_FREE); + if (hret == H_R_STATE) { + ehea_error_data(qp->adapter, qp->fw_handle, &aer, &aerr); + hret = ehea_destroy_qp_res(qp, FORCE_FREE); + } + + if (hret != H_SUCCESS) { + pr_err("destroy QP failed\n"); + return -EIO; + } + + return 0; +} + +static inline int ehea_calc_index(unsigned long i, unsigned long s) +{ + return (i >> s) & EHEA_INDEX_MASK; +} + +static inline int ehea_init_top_bmap(struct ehea_top_bmap *ehea_top_bmap, + int dir) +{ + if (!ehea_top_bmap->dir[dir]) { + ehea_top_bmap->dir[dir] = + kzalloc(sizeof(struct ehea_dir_bmap), GFP_KERNEL); + if (!ehea_top_bmap->dir[dir]) + return -ENOMEM; + } + return 0; +} + +static inline int ehea_init_bmap(struct ehea_bmap *ehea_bmap, int top, int dir) +{ + if (!ehea_bmap->top[top]) { + ehea_bmap->top[top] = + kzalloc(sizeof(struct ehea_top_bmap), GFP_KERNEL); + if (!ehea_bmap->top[top]) + return -ENOMEM; + } + return ehea_init_top_bmap(ehea_bmap->top[top], dir); +} + +static DEFINE_MUTEX(ehea_busmap_mutex); +static unsigned long ehea_mr_len; + +#define EHEA_BUSMAP_ADD_SECT 1 +#define EHEA_BUSMAP_REM_SECT 0 + +static void ehea_rebuild_busmap(void) +{ + u64 vaddr = EHEA_BUSMAP_START; + int top, dir, idx; + + for (top = 0; top < EHEA_MAP_ENTRIES; top++) { + struct ehea_top_bmap *ehea_top; + int valid_dir_entries = 0; + + if (!ehea_bmap->top[top]) + continue; + ehea_top = ehea_bmap->top[top]; + for (dir = 0; dir < EHEA_MAP_ENTRIES; dir++) { + struct ehea_dir_bmap *ehea_dir; + int valid_entries = 0; + + if (!ehea_top->dir[dir]) + continue; + valid_dir_entries++; + ehea_dir = ehea_top->dir[dir]; + for (idx = 0; idx < EHEA_MAP_ENTRIES; idx++) { + if (!ehea_dir->ent[idx]) + continue; + valid_entries++; + ehea_dir->ent[idx] = vaddr; + vaddr += EHEA_SECTSIZE; + } + if (!valid_entries) { + ehea_top->dir[dir] = NULL; + kfree(ehea_dir); + } + } + if (!valid_dir_entries) { + ehea_bmap->top[top] = NULL; + kfree(ehea_top); + } + } +} + +static int ehea_update_busmap(unsigned long pfn, unsigned long nr_pages, int add) +{ + unsigned long i, start_section, end_section; + + if (!nr_pages) + return 0; + + if (!ehea_bmap) { + ehea_bmap = kzalloc(sizeof(struct ehea_bmap), GFP_KERNEL); + if (!ehea_bmap) + return -ENOMEM; + } + + start_section = (pfn * PAGE_SIZE) / EHEA_SECTSIZE; + end_section = start_section + ((nr_pages * PAGE_SIZE) / EHEA_SECTSIZE); + /* Mark entries as valid or invalid only; address is assigned later */ + for (i = start_section; i < end_section; i++) { + u64 flag; + int top = ehea_calc_index(i, EHEA_TOP_INDEX_SHIFT); + int dir = ehea_calc_index(i, EHEA_DIR_INDEX_SHIFT); + int idx = i & EHEA_INDEX_MASK; + + if (add) { + int ret = ehea_init_bmap(ehea_bmap, top, dir); + if (ret) + return ret; + flag = 1; /* valid */ + ehea_mr_len += EHEA_SECTSIZE; + } else { + if (!ehea_bmap->top[top]) + continue; + if (!ehea_bmap->top[top]->dir[dir]) + continue; + flag = 0; /* invalid */ + ehea_mr_len -= EHEA_SECTSIZE; + } + + ehea_bmap->top[top]->dir[dir]->ent[idx] = flag; + } + ehea_rebuild_busmap(); /* Assign contiguous addresses for mr */ + return 0; +} + +int ehea_add_sect_bmap(unsigned long pfn, unsigned long nr_pages) +{ + int ret; + + mutex_lock(&ehea_busmap_mutex); + ret = ehea_update_busmap(pfn, nr_pages, EHEA_BUSMAP_ADD_SECT); + mutex_unlock(&ehea_busmap_mutex); + return ret; +} + +int ehea_rem_sect_bmap(unsigned long pfn, unsigned long nr_pages) +{ + int ret; + + mutex_lock(&ehea_busmap_mutex); + ret = ehea_update_busmap(pfn, nr_pages, EHEA_BUSMAP_REM_SECT); + mutex_unlock(&ehea_busmap_mutex); + return ret; +} + +static int ehea_is_hugepage(unsigned long pfn) +{ + int page_order; + + if (pfn & EHEA_HUGEPAGE_PFN_MASK) + return 0; + + page_order = compound_order(pfn_to_page(pfn)); + if (page_order + PAGE_SHIFT != EHEA_HUGEPAGESHIFT) + return 0; + + return 1; +} + +static int ehea_create_busmap_callback(unsigned long initial_pfn, + unsigned long total_nr_pages, void *arg) +{ + int ret; + unsigned long pfn, start_pfn, end_pfn, nr_pages; + + if ((total_nr_pages * PAGE_SIZE) < EHEA_HUGEPAGE_SIZE) + return ehea_update_busmap(initial_pfn, total_nr_pages, + EHEA_BUSMAP_ADD_SECT); + + /* Given chunk is >= 16GB -> check for hugepages */ + start_pfn = initial_pfn; + end_pfn = initial_pfn + total_nr_pages; + pfn = start_pfn; + + while (pfn < end_pfn) { + if (ehea_is_hugepage(pfn)) { + /* Add mem found in front of the hugepage */ + nr_pages = pfn - start_pfn; + ret = ehea_update_busmap(start_pfn, nr_pages, + EHEA_BUSMAP_ADD_SECT); + if (ret) + return ret; + + /* Skip the hugepage */ + pfn += (EHEA_HUGEPAGE_SIZE / PAGE_SIZE); + start_pfn = pfn; + } else + pfn += (EHEA_SECTSIZE / PAGE_SIZE); + } + + /* Add mem found behind the hugepage(s) */ + nr_pages = pfn - start_pfn; + return ehea_update_busmap(start_pfn, nr_pages, EHEA_BUSMAP_ADD_SECT); +} + +int ehea_create_busmap(void) +{ + int ret; + + mutex_lock(&ehea_busmap_mutex); + ehea_mr_len = 0; + ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL, + ehea_create_busmap_callback); + mutex_unlock(&ehea_busmap_mutex); + return ret; +} + +void ehea_destroy_busmap(void) +{ + int top, dir; + mutex_lock(&ehea_busmap_mutex); + if (!ehea_bmap) + goto out_destroy; + + for (top = 0; top < EHEA_MAP_ENTRIES; top++) { + if (!ehea_bmap->top[top]) + continue; + + for (dir = 0; dir < EHEA_MAP_ENTRIES; dir++) { + if (!ehea_bmap->top[top]->dir[dir]) + continue; + + kfree(ehea_bmap->top[top]->dir[dir]); + } + + kfree(ehea_bmap->top[top]); + } + + kfree(ehea_bmap); + ehea_bmap = NULL; +out_destroy: + mutex_unlock(&ehea_busmap_mutex); +} + +u64 ehea_map_vaddr(void *caddr) +{ + int top, dir, idx; + unsigned long index, offset; + + if (!ehea_bmap) + return EHEA_INVAL_ADDR; + + index = virt_to_abs(caddr) >> SECTION_SIZE_BITS; + top = (index >> EHEA_TOP_INDEX_SHIFT) & EHEA_INDEX_MASK; + if (!ehea_bmap->top[top]) + return EHEA_INVAL_ADDR; + + dir = (index >> EHEA_DIR_INDEX_SHIFT) & EHEA_INDEX_MASK; + if (!ehea_bmap->top[top]->dir[dir]) + return EHEA_INVAL_ADDR; + + idx = index & EHEA_INDEX_MASK; + if (!ehea_bmap->top[top]->dir[dir]->ent[idx]) + return EHEA_INVAL_ADDR; + + offset = (unsigned long)caddr & (EHEA_SECTSIZE - 1); + return ehea_bmap->top[top]->dir[dir]->ent[idx] | offset; +} + +static inline void *ehea_calc_sectbase(int top, int dir, int idx) +{ + unsigned long ret = idx; + ret |= dir << EHEA_DIR_INDEX_SHIFT; + ret |= top << EHEA_TOP_INDEX_SHIFT; + return abs_to_virt(ret << SECTION_SIZE_BITS); +} + +static u64 ehea_reg_mr_section(int top, int dir, int idx, u64 *pt, + struct ehea_adapter *adapter, + struct ehea_mr *mr) +{ + void *pg; + u64 j, m, hret; + unsigned long k = 0; + u64 pt_abs = virt_to_abs(pt); + + void *sectbase = ehea_calc_sectbase(top, dir, idx); + + for (j = 0; j < (EHEA_PAGES_PER_SECTION / EHEA_MAX_RPAGE); j++) { + + for (m = 0; m < EHEA_MAX_RPAGE; m++) { + pg = sectbase + ((k++) * EHEA_PAGESIZE); + pt[m] = virt_to_abs(pg); + } + hret = ehea_h_register_rpage_mr(adapter->handle, mr->handle, 0, + 0, pt_abs, EHEA_MAX_RPAGE); + + if ((hret != H_SUCCESS) && + (hret != H_PAGE_REGISTERED)) { + ehea_h_free_resource(adapter->handle, mr->handle, + FORCE_FREE); + pr_err("register_rpage_mr failed\n"); + return hret; + } + } + return hret; +} + +static u64 ehea_reg_mr_sections(int top, int dir, u64 *pt, + struct ehea_adapter *adapter, + struct ehea_mr *mr) +{ + u64 hret = H_SUCCESS; + int idx; + + for (idx = 0; idx < EHEA_MAP_ENTRIES; idx++) { + if (!ehea_bmap->top[top]->dir[dir]->ent[idx]) + continue; + + hret = ehea_reg_mr_section(top, dir, idx, pt, adapter, mr); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} + +static u64 ehea_reg_mr_dir_sections(int top, u64 *pt, + struct ehea_adapter *adapter, + struct ehea_mr *mr) +{ + u64 hret = H_SUCCESS; + int dir; + + for (dir = 0; dir < EHEA_MAP_ENTRIES; dir++) { + if (!ehea_bmap->top[top]->dir[dir]) + continue; + + hret = ehea_reg_mr_sections(top, dir, pt, adapter, mr); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} + +int ehea_reg_kernel_mr(struct ehea_adapter *adapter, struct ehea_mr *mr) +{ + int ret; + u64 *pt; + u64 hret; + u32 acc_ctrl = EHEA_MR_ACC_CTRL; + + unsigned long top; + + pt = (void *)get_zeroed_page(GFP_KERNEL); + if (!pt) { + pr_err("no mem\n"); + ret = -ENOMEM; + goto out; + } + + hret = ehea_h_alloc_resource_mr(adapter->handle, EHEA_BUSMAP_START, + ehea_mr_len, acc_ctrl, adapter->pd, + &mr->handle, &mr->lkey); + + if (hret != H_SUCCESS) { + pr_err("alloc_resource_mr failed\n"); + ret = -EIO; + goto out; + } + + if (!ehea_bmap) { + ehea_h_free_resource(adapter->handle, mr->handle, FORCE_FREE); + pr_err("no busmap available\n"); + ret = -EIO; + goto out; + } + + for (top = 0; top < EHEA_MAP_ENTRIES; top++) { + if (!ehea_bmap->top[top]) + continue; + + hret = ehea_reg_mr_dir_sections(top, pt, adapter, mr); + if((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS)) + break; + } + + if (hret != H_SUCCESS) { + ehea_h_free_resource(adapter->handle, mr->handle, FORCE_FREE); + pr_err("registering mr failed\n"); + ret = -EIO; + goto out; + } + + mr->vaddr = EHEA_BUSMAP_START; + mr->adapter = adapter; + ret = 0; +out: + free_page((unsigned long)pt); + return ret; +} + +int ehea_rem_mr(struct ehea_mr *mr) +{ + u64 hret; + + if (!mr || !mr->adapter) + return -EINVAL; + + hret = ehea_h_free_resource(mr->adapter->handle, mr->handle, + FORCE_FREE); + if (hret != H_SUCCESS) { + pr_err("destroy MR failed\n"); + return -EIO; + } + + return 0; +} + +int ehea_gen_smr(struct ehea_adapter *adapter, struct ehea_mr *old_mr, + struct ehea_mr *shared_mr) +{ + u64 hret; + + hret = ehea_h_register_smr(adapter->handle, old_mr->handle, + old_mr->vaddr, EHEA_MR_ACC_CTRL, + adapter->pd, shared_mr); + if (hret != H_SUCCESS) + return -EIO; + + shared_mr->adapter = adapter; + + return 0; +} + +void print_error_data(u64 *data) +{ + int length; + u64 type = EHEA_BMASK_GET(ERROR_DATA_TYPE, data[2]); + u64 resource = data[1]; + + length = EHEA_BMASK_GET(ERROR_DATA_LENGTH, data[0]); + + if (length > EHEA_PAGESIZE) + length = EHEA_PAGESIZE; + + if (type == EHEA_AER_RESTYPE_QP) + pr_err("QP (resource=%llX) state: AER=0x%llX, AERR=0x%llX, port=%llX\n", + resource, data[6], data[12], data[22]); + else if (type == EHEA_AER_RESTYPE_CQ) + pr_err("CQ (resource=%llX) state: AER=0x%llX\n", + resource, data[6]); + else if (type == EHEA_AER_RESTYPE_EQ) + pr_err("EQ (resource=%llX) state: AER=0x%llX\n", + resource, data[6]); + + ehea_dump(data, length, "error data"); +} + +u64 ehea_error_data(struct ehea_adapter *adapter, u64 res_handle, + u64 *aer, u64 *aerr) +{ + unsigned long ret; + u64 *rblock; + u64 type = 0; + + rblock = (void *)get_zeroed_page(GFP_KERNEL); + if (!rblock) { + pr_err("Cannot allocate rblock memory\n"); + goto out; + } + + ret = ehea_h_error_data(adapter->handle, res_handle, rblock); + + if (ret == H_SUCCESS) { + type = EHEA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); + *aer = rblock[6]; + *aerr = rblock[12]; + print_error_data(rblock); + } else if (ret == H_R_STATE) { + pr_err("No error data available: %llX\n", res_handle); + } else + pr_err("Error data could not be fetched: %llX\n", res_handle); + + free_page((unsigned long)rblock); +out: + return type; +} diff --git a/drivers/net/ethernet/ibm/ehea/ehea_qmr.h b/drivers/net/ethernet/ibm/ehea/ehea_qmr.h new file mode 100644 index 000000000000..fddff8ec8cfd --- /dev/null +++ b/drivers/net/ethernet/ibm/ehea/ehea_qmr.h @@ -0,0 +1,404 @@ +/* + * linux/drivers/net/ehea/ehea_qmr.h + * + * eHEA ethernet device driver for IBM eServer System p + * + * (C) Copyright IBM Corp. 2006 + * + * Authors: + * Christoph Raisch <raisch@de.ibm.com> + * Jan-Bernd Themann <themann@de.ibm.com> + * Thomas Klein <tklein@de.ibm.com> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __EHEA_QMR_H__ +#define __EHEA_QMR_H__ + +#include <linux/prefetch.h> +#include "ehea.h" +#include "ehea_hw.h" + +/* + * page size of ehea hardware queues + */ + +#define EHEA_PAGESHIFT 12 +#define EHEA_PAGESIZE (1UL << EHEA_PAGESHIFT) +#define EHEA_SECTSIZE (1UL << 24) +#define EHEA_PAGES_PER_SECTION (EHEA_SECTSIZE >> EHEA_PAGESHIFT) +#define EHEA_HUGEPAGESHIFT 34 +#define EHEA_HUGEPAGE_SIZE (1UL << EHEA_HUGEPAGESHIFT) +#define EHEA_HUGEPAGE_PFN_MASK ((EHEA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT) + +#if ((1UL << SECTION_SIZE_BITS) < EHEA_SECTSIZE) +#error eHEA module cannot work if kernel sectionsize < ehea sectionsize +#endif + +/* Some abbreviations used here: + * + * WQE - Work Queue Entry + * SWQE - Send Work Queue Entry + * RWQE - Receive Work Queue Entry + * CQE - Completion Queue Entry + * EQE - Event Queue Entry + * MR - Memory Region + */ + +/* Use of WR_ID field for EHEA */ +#define EHEA_WR_ID_COUNT EHEA_BMASK_IBM(0, 19) +#define EHEA_WR_ID_TYPE EHEA_BMASK_IBM(20, 23) +#define EHEA_SWQE2_TYPE 0x1 +#define EHEA_SWQE3_TYPE 0x2 +#define EHEA_RWQE2_TYPE 0x3 +#define EHEA_RWQE3_TYPE 0x4 +#define EHEA_WR_ID_INDEX EHEA_BMASK_IBM(24, 47) +#define EHEA_WR_ID_REFILL EHEA_BMASK_IBM(48, 63) + +struct ehea_vsgentry { + u64 vaddr; + u32 l_key; + u32 len; +}; + +/* maximum number of sg entries allowed in a WQE */ +#define EHEA_MAX_WQE_SG_ENTRIES 252 +#define SWQE2_MAX_IMM (0xD0 - 0x30) +#define SWQE3_MAX_IMM 224 + +/* tx control flags for swqe */ +#define EHEA_SWQE_CRC 0x8000 +#define EHEA_SWQE_IP_CHECKSUM 0x4000 +#define EHEA_SWQE_TCP_CHECKSUM 0x2000 +#define EHEA_SWQE_TSO 0x1000 +#define EHEA_SWQE_SIGNALLED_COMPLETION 0x0800 +#define EHEA_SWQE_VLAN_INSERT 0x0400 +#define EHEA_SWQE_IMM_DATA_PRESENT 0x0200 +#define EHEA_SWQE_DESCRIPTORS_PRESENT 0x0100 +#define EHEA_SWQE_WRAP_CTL_REC 0x0080 +#define EHEA_SWQE_WRAP_CTL_FORCE 0x0040 +#define EHEA_SWQE_BIND 0x0020 +#define EHEA_SWQE_PURGE 0x0010 + +/* sizeof(struct ehea_swqe) less the union */ +#define SWQE_HEADER_SIZE 32 + +struct ehea_swqe { + u64 wr_id; + u16 tx_control; + u16 vlan_tag; + u8 reserved1; + u8 ip_start; + u8 ip_end; + u8 immediate_data_length; + u8 tcp_offset; + u8 reserved2; + u16 tcp_end; + u8 wrap_tag; + u8 descriptors; /* number of valid descriptors in WQE */ + u16 reserved3; + u16 reserved4; + u16 mss; + u32 reserved5; + union { + /* Send WQE Format 1 */ + struct { + struct ehea_vsgentry sg_list[EHEA_MAX_WQE_SG_ENTRIES]; + } no_immediate_data; + + /* Send WQE Format 2 */ + struct { + struct ehea_vsgentry sg_entry; + /* 0x30 */ + u8 immediate_data[SWQE2_MAX_IMM]; + /* 0xd0 */ + struct ehea_vsgentry sg_list[EHEA_MAX_WQE_SG_ENTRIES-1]; + } immdata_desc __packed; + + /* Send WQE Format 3 */ + struct { + u8 immediate_data[SWQE3_MAX_IMM]; + } immdata_nodesc; + } u; +}; + +struct ehea_rwqe { + u64 wr_id; /* work request ID */ + u8 reserved1[5]; + u8 data_segments; + u16 reserved2; + u64 reserved3; + u64 reserved4; + struct ehea_vsgentry sg_list[EHEA_MAX_WQE_SG_ENTRIES]; +}; + +#define EHEA_CQE_VLAN_TAG_XTRACT 0x0400 + +#define EHEA_CQE_TYPE_RQ 0x60 +#define EHEA_CQE_STAT_ERR_MASK 0x700F +#define EHEA_CQE_STAT_FAT_ERR_MASK 0xF +#define EHEA_CQE_BLIND_CKSUM 0x8000 +#define EHEA_CQE_STAT_ERR_TCP 0x4000 +#define EHEA_CQE_STAT_ERR_IP 0x2000 +#define EHEA_CQE_STAT_ERR_CRC 0x1000 + +/* Defines which bad send cqe stati lead to a port reset */ +#define EHEA_CQE_STAT_RESET_MASK 0x0002 + +struct ehea_cqe { + u64 wr_id; /* work request ID from WQE */ + u8 type; + u8 valid; + u16 status; + u16 reserved1; + u16 num_bytes_transfered; + u16 vlan_tag; + u16 inet_checksum_value; + u8 reserved2; + u8 header_length; + u16 reserved3; + u16 page_offset; + u16 wqe_count; + u32 qp_token; + u32 timestamp; + u32 reserved4; + u64 reserved5[3]; +}; + +#define EHEA_EQE_VALID EHEA_BMASK_IBM(0, 0) +#define EHEA_EQE_IS_CQE EHEA_BMASK_IBM(1, 1) +#define EHEA_EQE_IDENTIFIER EHEA_BMASK_IBM(2, 7) +#define EHEA_EQE_QP_CQ_NUMBER EHEA_BMASK_IBM(8, 31) +#define EHEA_EQE_QP_TOKEN EHEA_BMASK_IBM(32, 63) +#define EHEA_EQE_CQ_TOKEN EHEA_BMASK_IBM(32, 63) +#define EHEA_EQE_KEY EHEA_BMASK_IBM(32, 63) +#define EHEA_EQE_PORT_NUMBER EHEA_BMASK_IBM(56, 63) +#define EHEA_EQE_EQ_NUMBER EHEA_BMASK_IBM(48, 63) +#define EHEA_EQE_SM_ID EHEA_BMASK_IBM(48, 63) +#define EHEA_EQE_SM_MECH_NUMBER EHEA_BMASK_IBM(48, 55) +#define EHEA_EQE_SM_PORT_NUMBER EHEA_BMASK_IBM(56, 63) + +#define EHEA_AER_RESTYPE_QP 0x8 +#define EHEA_AER_RESTYPE_CQ 0x4 +#define EHEA_AER_RESTYPE_EQ 0x3 + +/* Defines which affiliated errors lead to a port reset */ +#define EHEA_AER_RESET_MASK 0xFFFFFFFFFEFFFFFFULL +#define EHEA_AERR_RESET_MASK 0xFFFFFFFFFFFFFFFFULL + +struct ehea_eqe { + u64 entry; +}; + +#define ERROR_DATA_LENGTH EHEA_BMASK_IBM(52, 63) +#define ERROR_DATA_TYPE EHEA_BMASK_IBM(0, 7) + +static inline void *hw_qeit_calc(struct hw_queue *queue, u64 q_offset) +{ + struct ehea_page *current_page; + + if (q_offset >= queue->queue_length) + q_offset -= queue->queue_length; + current_page = (queue->queue_pages)[q_offset >> EHEA_PAGESHIFT]; + return ¤t_page->entries[q_offset & (EHEA_PAGESIZE - 1)]; +} + +static inline void *hw_qeit_get(struct hw_queue *queue) +{ + return hw_qeit_calc(queue, queue->current_q_offset); +} + +static inline void hw_qeit_inc(struct hw_queue *queue) +{ + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset >= queue->queue_length) { + queue->current_q_offset = 0; + /* toggle the valid flag */ + queue->toggle_state = (~queue->toggle_state) & 1; + } +} + +static inline void *hw_qeit_get_inc(struct hw_queue *queue) +{ + void *retvalue = hw_qeit_get(queue); + hw_qeit_inc(queue); + return retvalue; +} + +static inline void *hw_qeit_get_inc_valid(struct hw_queue *queue) +{ + struct ehea_cqe *retvalue = hw_qeit_get(queue); + u8 valid = retvalue->valid; + void *pref; + + if ((valid >> 7) == (queue->toggle_state & 1)) { + /* this is a good one */ + hw_qeit_inc(queue); + pref = hw_qeit_calc(queue, queue->current_q_offset); + prefetch(pref); + prefetch(pref + 128); + } else + retvalue = NULL; + return retvalue; +} + +static inline void *hw_qeit_get_valid(struct hw_queue *queue) +{ + struct ehea_cqe *retvalue = hw_qeit_get(queue); + void *pref; + u8 valid; + + pref = hw_qeit_calc(queue, queue->current_q_offset); + prefetch(pref); + prefetch(pref + 128); + prefetch(pref + 256); + valid = retvalue->valid; + if (!((valid >> 7) == (queue->toggle_state & 1))) + retvalue = NULL; + return retvalue; +} + +static inline void *hw_qeit_reset(struct hw_queue *queue) +{ + queue->current_q_offset = 0; + return hw_qeit_get(queue); +} + +static inline void *hw_qeit_eq_get_inc(struct hw_queue *queue) +{ + u64 last_entry_in_q = queue->queue_length - queue->qe_size; + void *retvalue; + + retvalue = hw_qeit_get(queue); + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset > last_entry_in_q) { + queue->current_q_offset = 0; + queue->toggle_state = (~queue->toggle_state) & 1; + } + return retvalue; +} + +static inline void *hw_eqit_eq_get_inc_valid(struct hw_queue *queue) +{ + void *retvalue = hw_qeit_get(queue); + u32 qe = *(u8 *)retvalue; + if ((qe >> 7) == (queue->toggle_state & 1)) + hw_qeit_eq_get_inc(queue); + else + retvalue = NULL; + return retvalue; +} + +static inline struct ehea_rwqe *ehea_get_next_rwqe(struct ehea_qp *qp, + int rq_nr) +{ + struct hw_queue *queue; + + if (rq_nr == 1) + queue = &qp->hw_rqueue1; + else if (rq_nr == 2) + queue = &qp->hw_rqueue2; + else + queue = &qp->hw_rqueue3; + + return hw_qeit_get_inc(queue); +} + +static inline struct ehea_swqe *ehea_get_swqe(struct ehea_qp *my_qp, + int *wqe_index) +{ + struct hw_queue *queue = &my_qp->hw_squeue; + struct ehea_swqe *wqe_p; + + *wqe_index = (queue->current_q_offset) >> (7 + EHEA_SG_SQ); + wqe_p = hw_qeit_get_inc(&my_qp->hw_squeue); + + return wqe_p; +} + +static inline void ehea_post_swqe(struct ehea_qp *my_qp, struct ehea_swqe *swqe) +{ + iosync(); + ehea_update_sqa(my_qp, 1); +} + +static inline struct ehea_cqe *ehea_poll_rq1(struct ehea_qp *qp, int *wqe_index) +{ + struct hw_queue *queue = &qp->hw_rqueue1; + + *wqe_index = (queue->current_q_offset) >> (7 + EHEA_SG_RQ1); + return hw_qeit_get_valid(queue); +} + +static inline void ehea_inc_cq(struct ehea_cq *cq) +{ + hw_qeit_inc(&cq->hw_queue); +} + +static inline void ehea_inc_rq1(struct ehea_qp *qp) +{ + hw_qeit_inc(&qp->hw_rqueue1); +} + +static inline struct ehea_cqe *ehea_poll_cq(struct ehea_cq *my_cq) +{ + return hw_qeit_get_valid(&my_cq->hw_queue); +} + +#define EHEA_CQ_REGISTER_ORIG 0 +#define EHEA_EQ_REGISTER_ORIG 0 + +enum ehea_eq_type { + EHEA_EQ = 0, /* event queue */ + EHEA_NEQ /* notification event queue */ +}; + +struct ehea_eq *ehea_create_eq(struct ehea_adapter *adapter, + enum ehea_eq_type type, + const u32 length, const u8 eqe_gen); + +int ehea_destroy_eq(struct ehea_eq *eq); + +struct ehea_eqe *ehea_poll_eq(struct ehea_eq *eq); + +struct ehea_cq *ehea_create_cq(struct ehea_adapter *adapter, int cqe, + u64 eq_handle, u32 cq_token); + +int ehea_destroy_cq(struct ehea_cq *cq); + +struct ehea_qp *ehea_create_qp(struct ehea_adapter *adapter, u32 pd, + struct ehea_qp_init_attr *init_attr); + +int ehea_destroy_qp(struct ehea_qp *qp); + +int ehea_reg_kernel_mr(struct ehea_adapter *adapter, struct ehea_mr *mr); + +int ehea_gen_smr(struct ehea_adapter *adapter, struct ehea_mr *old_mr, + struct ehea_mr *shared_mr); + +int ehea_rem_mr(struct ehea_mr *mr); + +u64 ehea_error_data(struct ehea_adapter *adapter, u64 res_handle, + u64 *aer, u64 *aerr); + +int ehea_add_sect_bmap(unsigned long pfn, unsigned long nr_pages); +int ehea_rem_sect_bmap(unsigned long pfn, unsigned long nr_pages); +int ehea_create_busmap(void); +void ehea_destroy_busmap(void); +u64 ehea_map_vaddr(void *caddr); + +#endif /* __EHEA_QMR_H__ */ diff --git a/drivers/net/ethernet/ibm/emac/Kconfig b/drivers/net/ethernet/ibm/emac/Kconfig new file mode 100644 index 000000000000..3f44a30e0615 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/Kconfig @@ -0,0 +1,76 @@ +config IBM_EMAC + tristate "IBM EMAC Ethernet support" + depends on PPC_DCR + select CRC32 + help + This driver supports the IBM EMAC family of Ethernet controllers + typically found on 4xx embedded PowerPC chips, but also on the + Axon southbridge for Cell. + +config IBM_EMAC_RXB + int "Number of receive buffers" + depends on IBM_EMAC + default "128" + +config IBM_EMAC_TXB + int "Number of transmit buffers" + depends on IBM_EMAC + default "64" + +config IBM_EMAC_POLL_WEIGHT + int "MAL NAPI polling weight" + depends on IBM_EMAC + default "32" + +config IBM_EMAC_RX_COPY_THRESHOLD + int "RX skb copy threshold (bytes)" + depends on IBM_EMAC + default "256" + +config IBM_EMAC_RX_SKB_HEADROOM + int "Additional RX skb headroom (bytes)" + depends on IBM_EMAC + default "0" + help + Additional receive skb headroom. Note, that driver + will always reserve at least 2 bytes to make IP header + aligned, so usually there is no need to add any additional + headroom. + + If unsure, set to 0. + +config IBM_EMAC_DEBUG + bool "Debugging" + depends on IBM_EMAC + default n + +# The options below has to be select'ed by the respective +# processor types or platforms + +config IBM_EMAC_ZMII + bool + default n + +config IBM_EMAC_RGMII + bool + default n + +config IBM_EMAC_TAH + bool + default n + +config IBM_EMAC_EMAC4 + bool + default n + +config IBM_EMAC_NO_FLOW_CTRL + bool + default n + +config IBM_EMAC_MAL_CLR_ICINTSTAT + bool + default n + +config IBM_EMAC_MAL_COMMON_ERR + bool + default n diff --git a/drivers/net/ethernet/ibm/emac/Makefile b/drivers/net/ethernet/ibm/emac/Makefile new file mode 100644 index 000000000000..eba21835d90d --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/Makefile @@ -0,0 +1,11 @@ +# +# Makefile for the PowerPC 4xx on-chip ethernet driver +# + +obj-$(CONFIG_IBM_EMAC) += ibm_emac.o + +ibm_emac-y := mal.o core.o phy.o +ibm_emac-$(CONFIG_IBM_EMAC_ZMII) += zmii.o +ibm_emac-$(CONFIG_IBM_EMAC_RGMII) += rgmii.o +ibm_emac-$(CONFIG_IBM_EMAC_TAH) += tah.o +ibm_emac-$(CONFIG_IBM_EMAC_DEBUG) += debug.o diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c new file mode 100644 index 000000000000..a573df1fafb6 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -0,0 +1,3074 @@ +/* + * drivers/net/ibm_newemac/core.c + * + * Driver for PowerPC 4xx on-chip ethernet controller. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * Based on original work by + * Matt Porter <mporter@kernel.crashing.org> + * (c) 2003 Benjamin Herrenschmidt <benh@kernel.crashing.org> + * Armin Kuster <akuster@mvista.com> + * Johnnie Peters <jpeters@mvista.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/delay.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/crc32.h> +#include <linux/ethtool.h> +#include <linux/mii.h> +#include <linux/bitops.h> +#include <linux/workqueue.h> +#include <linux/of.h> +#include <linux/of_net.h> +#include <linux/slab.h> + +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/dma.h> +#include <asm/uaccess.h> +#include <asm/dcr.h> +#include <asm/dcr-regs.h> + +#include "core.h" + +/* + * Lack of dma_unmap_???? calls is intentional. + * + * API-correct usage requires additional support state information to be + * maintained for every RX and TX buffer descriptor (BD). Unfortunately, due to + * EMAC design (e.g. TX buffer passed from network stack can be split into + * several BDs, dma_map_single/dma_map_page can be used to map particular BD), + * maintaining such information will add additional overhead. + * Current DMA API implementation for 4xx processors only ensures cache coherency + * and dma_unmap_???? routines are empty and are likely to stay this way. + * I decided to omit dma_unmap_??? calls because I don't want to add additional + * complexity just for the sake of following some abstract API, when it doesn't + * add any real benefit to the driver. I understand that this decision maybe + * controversial, but I really tried to make code API-correct and efficient + * at the same time and didn't come up with code I liked :(. --ebs + */ + +#define DRV_NAME "emac" +#define DRV_VERSION "3.54" +#define DRV_DESC "PPC 4xx OCP EMAC driver" + +MODULE_DESCRIPTION(DRV_DESC); +MODULE_AUTHOR + ("Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>"); +MODULE_LICENSE("GPL"); + +/* + * PPC64 doesn't (yet) have a cacheable_memcpy + */ +#ifdef CONFIG_PPC64 +#define cacheable_memcpy(d,s,n) memcpy((d),(s),(n)) +#endif + +/* minimum number of free TX descriptors required to wake up TX process */ +#define EMAC_TX_WAKEUP_THRESH (NUM_TX_BUFF / 4) + +/* If packet size is less than this number, we allocate small skb and copy packet + * contents into it instead of just sending original big skb up + */ +#define EMAC_RX_COPY_THRESH CONFIG_IBM_EMAC_RX_COPY_THRESHOLD + +/* Since multiple EMACs share MDIO lines in various ways, we need + * to avoid re-using the same PHY ID in cases where the arch didn't + * setup precise phy_map entries + * + * XXX This is something that needs to be reworked as we can have multiple + * EMAC "sets" (multiple ASICs containing several EMACs) though we can + * probably require in that case to have explicit PHY IDs in the device-tree + */ +static u32 busy_phy_map; +static DEFINE_MUTEX(emac_phy_map_lock); + +/* This is the wait queue used to wait on any event related to probe, that + * is discovery of MALs, other EMACs, ZMII/RGMIIs, etc... + */ +static DECLARE_WAIT_QUEUE_HEAD(emac_probe_wait); + +/* Having stable interface names is a doomed idea. However, it would be nice + * if we didn't have completely random interface names at boot too :-) It's + * just a matter of making everybody's life easier. Since we are doing + * threaded probing, it's a bit harder though. The base idea here is that + * we make up a list of all emacs in the device-tree before we register the + * driver. Every emac will then wait for the previous one in the list to + * initialize before itself. We should also keep that list ordered by + * cell_index. + * That list is only 4 entries long, meaning that additional EMACs don't + * get ordering guarantees unless EMAC_BOOT_LIST_SIZE is increased. + */ + +#define EMAC_BOOT_LIST_SIZE 4 +static struct device_node *emac_boot_list[EMAC_BOOT_LIST_SIZE]; + +/* How long should I wait for dependent devices ? */ +#define EMAC_PROBE_DEP_TIMEOUT (HZ * 5) + +/* I don't want to litter system log with timeout errors + * when we have brain-damaged PHY. + */ +static inline void emac_report_timeout_error(struct emac_instance *dev, + const char *error) +{ + if (emac_has_feature(dev, EMAC_FTR_440GX_PHY_CLK_FIX | + EMAC_FTR_460EX_PHY_CLK_FIX | + EMAC_FTR_440EP_PHY_CLK_FIX)) + DBG(dev, "%s" NL, error); + else if (net_ratelimit()) + printk(KERN_ERR "%s: %s\n", dev->ofdev->dev.of_node->full_name, + error); +} + +/* EMAC PHY clock workaround: + * 440EP/440GR has more sane SDR0_MFR register implementation than 440GX, + * which allows controlling each EMAC clock + */ +static inline void emac_rx_clk_tx(struct emac_instance *dev) +{ +#ifdef CONFIG_PPC_DCR_NATIVE + if (emac_has_feature(dev, EMAC_FTR_440EP_PHY_CLK_FIX)) + dcri_clrset(SDR0, SDR0_MFR, + 0, SDR0_MFR_ECS >> dev->cell_index); +#endif +} + +static inline void emac_rx_clk_default(struct emac_instance *dev) +{ +#ifdef CONFIG_PPC_DCR_NATIVE + if (emac_has_feature(dev, EMAC_FTR_440EP_PHY_CLK_FIX)) + dcri_clrset(SDR0, SDR0_MFR, + SDR0_MFR_ECS >> dev->cell_index, 0); +#endif +} + +/* PHY polling intervals */ +#define PHY_POLL_LINK_ON HZ +#define PHY_POLL_LINK_OFF (HZ / 5) + +/* Graceful stop timeouts in us. + * We should allow up to 1 frame time (full-duplex, ignoring collisions) + */ +#define STOP_TIMEOUT_10 1230 +#define STOP_TIMEOUT_100 124 +#define STOP_TIMEOUT_1000 13 +#define STOP_TIMEOUT_1000_JUMBO 73 + +static unsigned char default_mcast_addr[] = { + 0x01, 0x80, 0xC2, 0x00, 0x00, 0x01 +}; + +/* Please, keep in sync with struct ibm_emac_stats/ibm_emac_error_stats */ +static const char emac_stats_keys[EMAC_ETHTOOL_STATS_COUNT][ETH_GSTRING_LEN] = { + "rx_packets", "rx_bytes", "tx_packets", "tx_bytes", "rx_packets_csum", + "tx_packets_csum", "tx_undo", "rx_dropped_stack", "rx_dropped_oom", + "rx_dropped_error", "rx_dropped_resize", "rx_dropped_mtu", + "rx_stopped", "rx_bd_errors", "rx_bd_overrun", "rx_bd_bad_packet", + "rx_bd_runt_packet", "rx_bd_short_event", "rx_bd_alignment_error", + "rx_bd_bad_fcs", "rx_bd_packet_too_long", "rx_bd_out_of_range", + "rx_bd_in_range", "rx_parity", "rx_fifo_overrun", "rx_overrun", + "rx_bad_packet", "rx_runt_packet", "rx_short_event", + "rx_alignment_error", "rx_bad_fcs", "rx_packet_too_long", + "rx_out_of_range", "rx_in_range", "tx_dropped", "tx_bd_errors", + "tx_bd_bad_fcs", "tx_bd_carrier_loss", "tx_bd_excessive_deferral", + "tx_bd_excessive_collisions", "tx_bd_late_collision", + "tx_bd_multple_collisions", "tx_bd_single_collision", + "tx_bd_underrun", "tx_bd_sqe", "tx_parity", "tx_underrun", "tx_sqe", + "tx_errors" +}; + +static irqreturn_t emac_irq(int irq, void *dev_instance); +static void emac_clean_tx_ring(struct emac_instance *dev); +static void __emac_set_multicast_list(struct emac_instance *dev); + +static inline int emac_phy_supports_gige(int phy_mode) +{ + return phy_mode == PHY_MODE_GMII || + phy_mode == PHY_MODE_RGMII || + phy_mode == PHY_MODE_SGMII || + phy_mode == PHY_MODE_TBI || + phy_mode == PHY_MODE_RTBI; +} + +static inline int emac_phy_gpcs(int phy_mode) +{ + return phy_mode == PHY_MODE_SGMII || + phy_mode == PHY_MODE_TBI || + phy_mode == PHY_MODE_RTBI; +} + +static inline void emac_tx_enable(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + u32 r; + + DBG(dev, "tx_enable" NL); + + r = in_be32(&p->mr0); + if (!(r & EMAC_MR0_TXE)) + out_be32(&p->mr0, r | EMAC_MR0_TXE); +} + +static void emac_tx_disable(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + u32 r; + + DBG(dev, "tx_disable" NL); + + r = in_be32(&p->mr0); + if (r & EMAC_MR0_TXE) { + int n = dev->stop_timeout; + out_be32(&p->mr0, r & ~EMAC_MR0_TXE); + while (!(in_be32(&p->mr0) & EMAC_MR0_TXI) && n) { + udelay(1); + --n; + } + if (unlikely(!n)) + emac_report_timeout_error(dev, "TX disable timeout"); + } +} + +static void emac_rx_enable(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + u32 r; + + if (unlikely(test_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags))) + goto out; + + DBG(dev, "rx_enable" NL); + + r = in_be32(&p->mr0); + if (!(r & EMAC_MR0_RXE)) { + if (unlikely(!(r & EMAC_MR0_RXI))) { + /* Wait if previous async disable is still in progress */ + int n = dev->stop_timeout; + while (!(r = in_be32(&p->mr0) & EMAC_MR0_RXI) && n) { + udelay(1); + --n; + } + if (unlikely(!n)) + emac_report_timeout_error(dev, + "RX disable timeout"); + } + out_be32(&p->mr0, r | EMAC_MR0_RXE); + } + out: + ; +} + +static void emac_rx_disable(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + u32 r; + + DBG(dev, "rx_disable" NL); + + r = in_be32(&p->mr0); + if (r & EMAC_MR0_RXE) { + int n = dev->stop_timeout; + out_be32(&p->mr0, r & ~EMAC_MR0_RXE); + while (!(in_be32(&p->mr0) & EMAC_MR0_RXI) && n) { + udelay(1); + --n; + } + if (unlikely(!n)) + emac_report_timeout_error(dev, "RX disable timeout"); + } +} + +static inline void emac_netif_stop(struct emac_instance *dev) +{ + netif_tx_lock_bh(dev->ndev); + netif_addr_lock(dev->ndev); + dev->no_mcast = 1; + netif_addr_unlock(dev->ndev); + netif_tx_unlock_bh(dev->ndev); + dev->ndev->trans_start = jiffies; /* prevent tx timeout */ + mal_poll_disable(dev->mal, &dev->commac); + netif_tx_disable(dev->ndev); +} + +static inline void emac_netif_start(struct emac_instance *dev) +{ + netif_tx_lock_bh(dev->ndev); + netif_addr_lock(dev->ndev); + dev->no_mcast = 0; + if (dev->mcast_pending && netif_running(dev->ndev)) + __emac_set_multicast_list(dev); + netif_addr_unlock(dev->ndev); + netif_tx_unlock_bh(dev->ndev); + + netif_wake_queue(dev->ndev); + + /* NOTE: unconditional netif_wake_queue is only appropriate + * so long as all callers are assured to have free tx slots + * (taken from tg3... though the case where that is wrong is + * not terribly harmful) + */ + mal_poll_enable(dev->mal, &dev->commac); +} + +static inline void emac_rx_disable_async(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + u32 r; + + DBG(dev, "rx_disable_async" NL); + + r = in_be32(&p->mr0); + if (r & EMAC_MR0_RXE) + out_be32(&p->mr0, r & ~EMAC_MR0_RXE); +} + +static int emac_reset(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + int n = 20; + + DBG(dev, "reset" NL); + + if (!dev->reset_failed) { + /* 40x erratum suggests stopping RX channel before reset, + * we stop TX as well + */ + emac_rx_disable(dev); + emac_tx_disable(dev); + } + +#ifdef CONFIG_PPC_DCR_NATIVE + /* Enable internal clock source */ + if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) + dcri_clrset(SDR0, SDR0_ETH_CFG, + 0, SDR0_ETH_CFG_ECS << dev->cell_index); +#endif + + out_be32(&p->mr0, EMAC_MR0_SRST); + while ((in_be32(&p->mr0) & EMAC_MR0_SRST) && n) + --n; + +#ifdef CONFIG_PPC_DCR_NATIVE + /* Enable external clock source */ + if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) + dcri_clrset(SDR0, SDR0_ETH_CFG, + SDR0_ETH_CFG_ECS << dev->cell_index, 0); +#endif + + if (n) { + dev->reset_failed = 0; + return 0; + } else { + emac_report_timeout_error(dev, "reset timeout"); + dev->reset_failed = 1; + return -ETIMEDOUT; + } +} + +static void emac_hash_mc(struct emac_instance *dev) +{ + const int regs = EMAC_XAHT_REGS(dev); + u32 *gaht_base = emac_gaht_base(dev); + u32 gaht_temp[regs]; + struct netdev_hw_addr *ha; + int i; + + DBG(dev, "hash_mc %d" NL, netdev_mc_count(dev->ndev)); + + memset(gaht_temp, 0, sizeof (gaht_temp)); + + netdev_for_each_mc_addr(ha, dev->ndev) { + int slot, reg, mask; + DBG2(dev, "mc %pM" NL, ha->addr); + + slot = EMAC_XAHT_CRC_TO_SLOT(dev, + ether_crc(ETH_ALEN, ha->addr)); + reg = EMAC_XAHT_SLOT_TO_REG(dev, slot); + mask = EMAC_XAHT_SLOT_TO_MASK(dev, slot); + + gaht_temp[reg] |= mask; + } + + for (i = 0; i < regs; i++) + out_be32(gaht_base + i, gaht_temp[i]); +} + +static inline u32 emac_iff2rmr(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + u32 r; + + r = EMAC_RMR_SP | EMAC_RMR_SFCS | EMAC_RMR_IAE | EMAC_RMR_BAE; + + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + r |= EMAC4_RMR_BASE; + else + r |= EMAC_RMR_BASE; + + if (ndev->flags & IFF_PROMISC) + r |= EMAC_RMR_PME; + else if (ndev->flags & IFF_ALLMULTI || + (netdev_mc_count(ndev) > EMAC_XAHT_SLOTS(dev))) + r |= EMAC_RMR_PMME; + else if (!netdev_mc_empty(ndev)) + r |= EMAC_RMR_MAE; + + return r; +} + +static u32 __emac_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_size) +{ + u32 ret = EMAC_MR1_VLE | EMAC_MR1_IST | EMAC_MR1_TR0_MULT; + + DBG2(dev, "__emac_calc_base_mr1" NL); + + switch(tx_size) { + case 2048: + ret |= EMAC_MR1_TFS_2K; + break; + default: + printk(KERN_WARNING "%s: Unknown Tx FIFO size %d\n", + dev->ndev->name, tx_size); + } + + switch(rx_size) { + case 16384: + ret |= EMAC_MR1_RFS_16K; + break; + case 4096: + ret |= EMAC_MR1_RFS_4K; + break; + default: + printk(KERN_WARNING "%s: Unknown Rx FIFO size %d\n", + dev->ndev->name, rx_size); + } + + return ret; +} + +static u32 __emac4_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_size) +{ + u32 ret = EMAC_MR1_VLE | EMAC_MR1_IST | EMAC4_MR1_TR | + EMAC4_MR1_OBCI(dev->opb_bus_freq / 1000000); + + DBG2(dev, "__emac4_calc_base_mr1" NL); + + switch(tx_size) { + case 16384: + ret |= EMAC4_MR1_TFS_16K; + break; + case 4096: + ret |= EMAC4_MR1_TFS_4K; + break; + case 2048: + ret |= EMAC4_MR1_TFS_2K; + break; + default: + printk(KERN_WARNING "%s: Unknown Tx FIFO size %d\n", + dev->ndev->name, tx_size); + } + + switch(rx_size) { + case 16384: + ret |= EMAC4_MR1_RFS_16K; + break; + case 4096: + ret |= EMAC4_MR1_RFS_4K; + break; + case 2048: + ret |= EMAC4_MR1_RFS_2K; + break; + default: + printk(KERN_WARNING "%s: Unknown Rx FIFO size %d\n", + dev->ndev->name, rx_size); + } + + return ret; +} + +static u32 emac_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_size) +{ + return emac_has_feature(dev, EMAC_FTR_EMAC4) ? + __emac4_calc_base_mr1(dev, tx_size, rx_size) : + __emac_calc_base_mr1(dev, tx_size, rx_size); +} + +static inline u32 emac_calc_trtr(struct emac_instance *dev, unsigned int size) +{ + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + return ((size >> 6) - 1) << EMAC_TRTR_SHIFT_EMAC4; + else + return ((size >> 6) - 1) << EMAC_TRTR_SHIFT; +} + +static inline u32 emac_calc_rwmr(struct emac_instance *dev, + unsigned int low, unsigned int high) +{ + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + return (low << 22) | ( (high & 0x3ff) << 6); + else + return (low << 23) | ( (high & 0x1ff) << 7); +} + +static int emac_configure(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + struct net_device *ndev = dev->ndev; + int tx_size, rx_size, link = netif_carrier_ok(dev->ndev); + u32 r, mr1 = 0; + + DBG(dev, "configure" NL); + + if (!link) { + out_be32(&p->mr1, in_be32(&p->mr1) + | EMAC_MR1_FDE | EMAC_MR1_ILE); + udelay(100); + } else if (emac_reset(dev) < 0) + return -ETIMEDOUT; + + if (emac_has_feature(dev, EMAC_FTR_HAS_TAH)) + tah_reset(dev->tah_dev); + + DBG(dev, " link = %d duplex = %d, pause = %d, asym_pause = %d\n", + link, dev->phy.duplex, dev->phy.pause, dev->phy.asym_pause); + + /* Default fifo sizes */ + tx_size = dev->tx_fifo_size; + rx_size = dev->rx_fifo_size; + + /* No link, force loopback */ + if (!link) + mr1 = EMAC_MR1_FDE | EMAC_MR1_ILE; + + /* Check for full duplex */ + else if (dev->phy.duplex == DUPLEX_FULL) + mr1 |= EMAC_MR1_FDE | EMAC_MR1_MWSW_001; + + /* Adjust fifo sizes, mr1 and timeouts based on link speed */ + dev->stop_timeout = STOP_TIMEOUT_10; + switch (dev->phy.speed) { + case SPEED_1000: + if (emac_phy_gpcs(dev->phy.mode)) { + mr1 |= EMAC_MR1_MF_1000GPCS | EMAC_MR1_MF_IPPA( + (dev->phy.gpcs_address != 0xffffffff) ? + dev->phy.gpcs_address : dev->phy.address); + + /* Put some arbitrary OUI, Manuf & Rev IDs so we can + * identify this GPCS PHY later. + */ + out_be32(&p->u1.emac4.ipcr, 0xdeadbeef); + } else + mr1 |= EMAC_MR1_MF_1000; + + /* Extended fifo sizes */ + tx_size = dev->tx_fifo_size_gige; + rx_size = dev->rx_fifo_size_gige; + + if (dev->ndev->mtu > ETH_DATA_LEN) { + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + mr1 |= EMAC4_MR1_JPSM; + else + mr1 |= EMAC_MR1_JPSM; + dev->stop_timeout = STOP_TIMEOUT_1000_JUMBO; + } else + dev->stop_timeout = STOP_TIMEOUT_1000; + break; + case SPEED_100: + mr1 |= EMAC_MR1_MF_100; + dev->stop_timeout = STOP_TIMEOUT_100; + break; + default: /* make gcc happy */ + break; + } + + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) + rgmii_set_speed(dev->rgmii_dev, dev->rgmii_port, + dev->phy.speed); + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) + zmii_set_speed(dev->zmii_dev, dev->zmii_port, dev->phy.speed); + + /* on 40x erratum forces us to NOT use integrated flow control, + * let's hope it works on 44x ;) + */ + if (!emac_has_feature(dev, EMAC_FTR_NO_FLOW_CONTROL_40x) && + dev->phy.duplex == DUPLEX_FULL) { + if (dev->phy.pause) + mr1 |= EMAC_MR1_EIFC | EMAC_MR1_APP; + else if (dev->phy.asym_pause) + mr1 |= EMAC_MR1_APP; + } + + /* Add base settings & fifo sizes & program MR1 */ + mr1 |= emac_calc_base_mr1(dev, tx_size, rx_size); + out_be32(&p->mr1, mr1); + + /* Set individual MAC address */ + out_be32(&p->iahr, (ndev->dev_addr[0] << 8) | ndev->dev_addr[1]); + out_be32(&p->ialr, (ndev->dev_addr[2] << 24) | + (ndev->dev_addr[3] << 16) | (ndev->dev_addr[4] << 8) | + ndev->dev_addr[5]); + + /* VLAN Tag Protocol ID */ + out_be32(&p->vtpid, 0x8100); + + /* Receive mode register */ + r = emac_iff2rmr(ndev); + if (r & EMAC_RMR_MAE) + emac_hash_mc(dev); + out_be32(&p->rmr, r); + + /* FIFOs thresholds */ + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + r = EMAC4_TMR1((dev->mal_burst_size / dev->fifo_entry_size) + 1, + tx_size / 2 / dev->fifo_entry_size); + else + r = EMAC_TMR1((dev->mal_burst_size / dev->fifo_entry_size) + 1, + tx_size / 2 / dev->fifo_entry_size); + out_be32(&p->tmr1, r); + out_be32(&p->trtr, emac_calc_trtr(dev, tx_size / 2)); + + /* PAUSE frame is sent when RX FIFO reaches its high-water mark, + there should be still enough space in FIFO to allow the our link + partner time to process this frame and also time to send PAUSE + frame itself. + + Here is the worst case scenario for the RX FIFO "headroom" + (from "The Switch Book") (100Mbps, without preamble, inter-frame gap): + + 1) One maximum-length frame on TX 1522 bytes + 2) One PAUSE frame time 64 bytes + 3) PAUSE frame decode time allowance 64 bytes + 4) One maximum-length frame on RX 1522 bytes + 5) Round-trip propagation delay of the link (100Mb) 15 bytes + ---------- + 3187 bytes + + I chose to set high-water mark to RX_FIFO_SIZE / 4 (1024 bytes) + low-water mark to RX_FIFO_SIZE / 8 (512 bytes) + */ + r = emac_calc_rwmr(dev, rx_size / 8 / dev->fifo_entry_size, + rx_size / 4 / dev->fifo_entry_size); + out_be32(&p->rwmr, r); + + /* Set PAUSE timer to the maximum */ + out_be32(&p->ptr, 0xffff); + + /* IRQ sources */ + r = EMAC_ISR_OVR | EMAC_ISR_BP | EMAC_ISR_SE | + EMAC_ISR_ALE | EMAC_ISR_BFCS | EMAC_ISR_PTLE | EMAC_ISR_ORE | + EMAC_ISR_IRE | EMAC_ISR_TE; + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + r |= EMAC4_ISR_TXPE | EMAC4_ISR_RXPE /* | EMAC4_ISR_TXUE | + EMAC4_ISR_RXOE | */; + out_be32(&p->iser, r); + + /* We need to take GPCS PHY out of isolate mode after EMAC reset */ + if (emac_phy_gpcs(dev->phy.mode)) { + if (dev->phy.gpcs_address != 0xffffffff) + emac_mii_reset_gpcs(&dev->phy); + else + emac_mii_reset_phy(&dev->phy); + } + + return 0; +} + +static void emac_reinitialize(struct emac_instance *dev) +{ + DBG(dev, "reinitialize" NL); + + emac_netif_stop(dev); + if (!emac_configure(dev)) { + emac_tx_enable(dev); + emac_rx_enable(dev); + } + emac_netif_start(dev); +} + +static void emac_full_tx_reset(struct emac_instance *dev) +{ + DBG(dev, "full_tx_reset" NL); + + emac_tx_disable(dev); + mal_disable_tx_channel(dev->mal, dev->mal_tx_chan); + emac_clean_tx_ring(dev); + dev->tx_cnt = dev->tx_slot = dev->ack_slot = 0; + + emac_configure(dev); + + mal_enable_tx_channel(dev->mal, dev->mal_tx_chan); + emac_tx_enable(dev); + emac_rx_enable(dev); +} + +static void emac_reset_work(struct work_struct *work) +{ + struct emac_instance *dev = container_of(work, struct emac_instance, reset_work); + + DBG(dev, "reset_work" NL); + + mutex_lock(&dev->link_lock); + if (dev->opened) { + emac_netif_stop(dev); + emac_full_tx_reset(dev); + emac_netif_start(dev); + } + mutex_unlock(&dev->link_lock); +} + +static void emac_tx_timeout(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + + DBG(dev, "tx_timeout" NL); + + schedule_work(&dev->reset_work); +} + + +static inline int emac_phy_done(struct emac_instance *dev, u32 stacr) +{ + int done = !!(stacr & EMAC_STACR_OC); + + if (emac_has_feature(dev, EMAC_FTR_STACR_OC_INVERT)) + done = !done; + + return done; +}; + +static int __emac_mdio_read(struct emac_instance *dev, u8 id, u8 reg) +{ + struct emac_regs __iomem *p = dev->emacp; + u32 r = 0; + int n, err = -ETIMEDOUT; + + mutex_lock(&dev->mdio_lock); + + DBG2(dev, "mdio_read(%02x,%02x)" NL, id, reg); + + /* Enable proper MDIO port */ + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) + zmii_get_mdio(dev->zmii_dev, dev->zmii_port); + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) + rgmii_get_mdio(dev->rgmii_dev, dev->rgmii_port); + + /* Wait for management interface to become idle */ + n = 20; + while (!emac_phy_done(dev, in_be32(&p->stacr))) { + udelay(1); + if (!--n) { + DBG2(dev, " -> timeout wait idle\n"); + goto bail; + } + } + + /* Issue read command */ + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + r = EMAC4_STACR_BASE(dev->opb_bus_freq); + else + r = EMAC_STACR_BASE(dev->opb_bus_freq); + if (emac_has_feature(dev, EMAC_FTR_STACR_OC_INVERT)) + r |= EMAC_STACR_OC; + if (emac_has_feature(dev, EMAC_FTR_HAS_NEW_STACR)) + r |= EMACX_STACR_STAC_READ; + else + r |= EMAC_STACR_STAC_READ; + r |= (reg & EMAC_STACR_PRA_MASK) + | ((id & EMAC_STACR_PCDA_MASK) << EMAC_STACR_PCDA_SHIFT); + out_be32(&p->stacr, r); + + /* Wait for read to complete */ + n = 200; + while (!emac_phy_done(dev, (r = in_be32(&p->stacr)))) { + udelay(1); + if (!--n) { + DBG2(dev, " -> timeout wait complete\n"); + goto bail; + } + } + + if (unlikely(r & EMAC_STACR_PHYE)) { + DBG(dev, "mdio_read(%02x, %02x) failed" NL, id, reg); + err = -EREMOTEIO; + goto bail; + } + + r = ((r >> EMAC_STACR_PHYD_SHIFT) & EMAC_STACR_PHYD_MASK); + + DBG2(dev, "mdio_read -> %04x" NL, r); + err = 0; + bail: + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) + rgmii_put_mdio(dev->rgmii_dev, dev->rgmii_port); + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) + zmii_put_mdio(dev->zmii_dev, dev->zmii_port); + mutex_unlock(&dev->mdio_lock); + + return err == 0 ? r : err; +} + +static void __emac_mdio_write(struct emac_instance *dev, u8 id, u8 reg, + u16 val) +{ + struct emac_regs __iomem *p = dev->emacp; + u32 r = 0; + int n, err = -ETIMEDOUT; + + mutex_lock(&dev->mdio_lock); + + DBG2(dev, "mdio_write(%02x,%02x,%04x)" NL, id, reg, val); + + /* Enable proper MDIO port */ + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) + zmii_get_mdio(dev->zmii_dev, dev->zmii_port); + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) + rgmii_get_mdio(dev->rgmii_dev, dev->rgmii_port); + + /* Wait for management interface to be idle */ + n = 20; + while (!emac_phy_done(dev, in_be32(&p->stacr))) { + udelay(1); + if (!--n) { + DBG2(dev, " -> timeout wait idle\n"); + goto bail; + } + } + + /* Issue write command */ + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + r = EMAC4_STACR_BASE(dev->opb_bus_freq); + else + r = EMAC_STACR_BASE(dev->opb_bus_freq); + if (emac_has_feature(dev, EMAC_FTR_STACR_OC_INVERT)) + r |= EMAC_STACR_OC; + if (emac_has_feature(dev, EMAC_FTR_HAS_NEW_STACR)) + r |= EMACX_STACR_STAC_WRITE; + else + r |= EMAC_STACR_STAC_WRITE; + r |= (reg & EMAC_STACR_PRA_MASK) | + ((id & EMAC_STACR_PCDA_MASK) << EMAC_STACR_PCDA_SHIFT) | + (val << EMAC_STACR_PHYD_SHIFT); + out_be32(&p->stacr, r); + + /* Wait for write to complete */ + n = 200; + while (!emac_phy_done(dev, in_be32(&p->stacr))) { + udelay(1); + if (!--n) { + DBG2(dev, " -> timeout wait complete\n"); + goto bail; + } + } + err = 0; + bail: + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) + rgmii_put_mdio(dev->rgmii_dev, dev->rgmii_port); + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) + zmii_put_mdio(dev->zmii_dev, dev->zmii_port); + mutex_unlock(&dev->mdio_lock); +} + +static int emac_mdio_read(struct net_device *ndev, int id, int reg) +{ + struct emac_instance *dev = netdev_priv(ndev); + int res; + + res = __emac_mdio_read((dev->mdio_instance && + dev->phy.gpcs_address != id) ? + dev->mdio_instance : dev, + (u8) id, (u8) reg); + return res; +} + +static void emac_mdio_write(struct net_device *ndev, int id, int reg, int val) +{ + struct emac_instance *dev = netdev_priv(ndev); + + __emac_mdio_write((dev->mdio_instance && + dev->phy.gpcs_address != id) ? + dev->mdio_instance : dev, + (u8) id, (u8) reg, (u16) val); +} + +/* Tx lock BH */ +static void __emac_set_multicast_list(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + u32 rmr = emac_iff2rmr(dev->ndev); + + DBG(dev, "__multicast %08x" NL, rmr); + + /* I decided to relax register access rules here to avoid + * full EMAC reset. + * + * There is a real problem with EMAC4 core if we use MWSW_001 bit + * in MR1 register and do a full EMAC reset. + * One TX BD status update is delayed and, after EMAC reset, it + * never happens, resulting in TX hung (it'll be recovered by TX + * timeout handler eventually, but this is just gross). + * So we either have to do full TX reset or try to cheat here :) + * + * The only required change is to RX mode register, so I *think* all + * we need is just to stop RX channel. This seems to work on all + * tested SoCs. --ebs + * + * If we need the full reset, we might just trigger the workqueue + * and do it async... a bit nasty but should work --BenH + */ + dev->mcast_pending = 0; + emac_rx_disable(dev); + if (rmr & EMAC_RMR_MAE) + emac_hash_mc(dev); + out_be32(&p->rmr, rmr); + emac_rx_enable(dev); +} + +/* Tx lock BH */ +static void emac_set_multicast_list(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + + DBG(dev, "multicast" NL); + + BUG_ON(!netif_running(dev->ndev)); + + if (dev->no_mcast) { + dev->mcast_pending = 1; + return; + } + __emac_set_multicast_list(dev); +} + +static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu) +{ + int rx_sync_size = emac_rx_sync_size(new_mtu); + int rx_skb_size = emac_rx_skb_size(new_mtu); + int i, ret = 0; + + mutex_lock(&dev->link_lock); + emac_netif_stop(dev); + emac_rx_disable(dev); + mal_disable_rx_channel(dev->mal, dev->mal_rx_chan); + + if (dev->rx_sg_skb) { + ++dev->estats.rx_dropped_resize; + dev_kfree_skb(dev->rx_sg_skb); + dev->rx_sg_skb = NULL; + } + + /* Make a first pass over RX ring and mark BDs ready, dropping + * non-processed packets on the way. We need this as a separate pass + * to simplify error recovery in the case of allocation failure later. + */ + for (i = 0; i < NUM_RX_BUFF; ++i) { + if (dev->rx_desc[i].ctrl & MAL_RX_CTRL_FIRST) + ++dev->estats.rx_dropped_resize; + + dev->rx_desc[i].data_len = 0; + dev->rx_desc[i].ctrl = MAL_RX_CTRL_EMPTY | + (i == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0); + } + + /* Reallocate RX ring only if bigger skb buffers are required */ + if (rx_skb_size <= dev->rx_skb_size) + goto skip; + + /* Second pass, allocate new skbs */ + for (i = 0; i < NUM_RX_BUFF; ++i) { + struct sk_buff *skb = alloc_skb(rx_skb_size, GFP_ATOMIC); + if (!skb) { + ret = -ENOMEM; + goto oom; + } + + BUG_ON(!dev->rx_skb[i]); + dev_kfree_skb(dev->rx_skb[i]); + + skb_reserve(skb, EMAC_RX_SKB_HEADROOM + 2); + dev->rx_desc[i].data_ptr = + dma_map_single(&dev->ofdev->dev, skb->data - 2, rx_sync_size, + DMA_FROM_DEVICE) + 2; + dev->rx_skb[i] = skb; + } + skip: + /* Check if we need to change "Jumbo" bit in MR1 */ + if ((new_mtu > ETH_DATA_LEN) ^ (dev->ndev->mtu > ETH_DATA_LEN)) { + /* This is to prevent starting RX channel in emac_rx_enable() */ + set_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags); + + dev->ndev->mtu = new_mtu; + emac_full_tx_reset(dev); + } + + mal_set_rcbs(dev->mal, dev->mal_rx_chan, emac_rx_size(new_mtu)); + oom: + /* Restart RX */ + clear_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags); + dev->rx_slot = 0; + mal_enable_rx_channel(dev->mal, dev->mal_rx_chan); + emac_rx_enable(dev); + emac_netif_start(dev); + mutex_unlock(&dev->link_lock); + + return ret; +} + +/* Process ctx, rtnl_lock semaphore */ +static int emac_change_mtu(struct net_device *ndev, int new_mtu) +{ + struct emac_instance *dev = netdev_priv(ndev); + int ret = 0; + + if (new_mtu < EMAC_MIN_MTU || new_mtu > dev->max_mtu) + return -EINVAL; + + DBG(dev, "change_mtu(%d)" NL, new_mtu); + + if (netif_running(ndev)) { + /* Check if we really need to reinitialize RX ring */ + if (emac_rx_skb_size(ndev->mtu) != emac_rx_skb_size(new_mtu)) + ret = emac_resize_rx_ring(dev, new_mtu); + } + + if (!ret) { + ndev->mtu = new_mtu; + dev->rx_skb_size = emac_rx_skb_size(new_mtu); + dev->rx_sync_size = emac_rx_sync_size(new_mtu); + } + + return ret; +} + +static void emac_clean_tx_ring(struct emac_instance *dev) +{ + int i; + + for (i = 0; i < NUM_TX_BUFF; ++i) { + if (dev->tx_skb[i]) { + dev_kfree_skb(dev->tx_skb[i]); + dev->tx_skb[i] = NULL; + if (dev->tx_desc[i].ctrl & MAL_TX_CTRL_READY) + ++dev->estats.tx_dropped; + } + dev->tx_desc[i].ctrl = 0; + dev->tx_desc[i].data_ptr = 0; + } +} + +static void emac_clean_rx_ring(struct emac_instance *dev) +{ + int i; + + for (i = 0; i < NUM_RX_BUFF; ++i) + if (dev->rx_skb[i]) { + dev->rx_desc[i].ctrl = 0; + dev_kfree_skb(dev->rx_skb[i]); + dev->rx_skb[i] = NULL; + dev->rx_desc[i].data_ptr = 0; + } + + if (dev->rx_sg_skb) { + dev_kfree_skb(dev->rx_sg_skb); + dev->rx_sg_skb = NULL; + } +} + +static inline int emac_alloc_rx_skb(struct emac_instance *dev, int slot, + gfp_t flags) +{ + struct sk_buff *skb = alloc_skb(dev->rx_skb_size, flags); + if (unlikely(!skb)) + return -ENOMEM; + + dev->rx_skb[slot] = skb; + dev->rx_desc[slot].data_len = 0; + + skb_reserve(skb, EMAC_RX_SKB_HEADROOM + 2); + dev->rx_desc[slot].data_ptr = + dma_map_single(&dev->ofdev->dev, skb->data - 2, dev->rx_sync_size, + DMA_FROM_DEVICE) + 2; + wmb(); + dev->rx_desc[slot].ctrl = MAL_RX_CTRL_EMPTY | + (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0); + + return 0; +} + +static void emac_print_link_status(struct emac_instance *dev) +{ + if (netif_carrier_ok(dev->ndev)) + printk(KERN_INFO "%s: link is up, %d %s%s\n", + dev->ndev->name, dev->phy.speed, + dev->phy.duplex == DUPLEX_FULL ? "FDX" : "HDX", + dev->phy.pause ? ", pause enabled" : + dev->phy.asym_pause ? ", asymmetric pause enabled" : ""); + else + printk(KERN_INFO "%s: link is down\n", dev->ndev->name); +} + +/* Process ctx, rtnl_lock semaphore */ +static int emac_open(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + int err, i; + + DBG(dev, "open" NL); + + /* Setup error IRQ handler */ + err = request_irq(dev->emac_irq, emac_irq, 0, "EMAC", dev); + if (err) { + printk(KERN_ERR "%s: failed to request IRQ %d\n", + ndev->name, dev->emac_irq); + return err; + } + + /* Allocate RX ring */ + for (i = 0; i < NUM_RX_BUFF; ++i) + if (emac_alloc_rx_skb(dev, i, GFP_KERNEL)) { + printk(KERN_ERR "%s: failed to allocate RX ring\n", + ndev->name); + goto oom; + } + + dev->tx_cnt = dev->tx_slot = dev->ack_slot = dev->rx_slot = 0; + clear_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags); + dev->rx_sg_skb = NULL; + + mutex_lock(&dev->link_lock); + dev->opened = 1; + + /* Start PHY polling now. + */ + if (dev->phy.address >= 0) { + int link_poll_interval; + if (dev->phy.def->ops->poll_link(&dev->phy)) { + dev->phy.def->ops->read_link(&dev->phy); + emac_rx_clk_default(dev); + netif_carrier_on(dev->ndev); + link_poll_interval = PHY_POLL_LINK_ON; + } else { + emac_rx_clk_tx(dev); + netif_carrier_off(dev->ndev); + link_poll_interval = PHY_POLL_LINK_OFF; + } + dev->link_polling = 1; + wmb(); + schedule_delayed_work(&dev->link_work, link_poll_interval); + emac_print_link_status(dev); + } else + netif_carrier_on(dev->ndev); + + /* Required for Pause packet support in EMAC */ + dev_mc_add_global(ndev, default_mcast_addr); + + emac_configure(dev); + mal_poll_add(dev->mal, &dev->commac); + mal_enable_tx_channel(dev->mal, dev->mal_tx_chan); + mal_set_rcbs(dev->mal, dev->mal_rx_chan, emac_rx_size(ndev->mtu)); + mal_enable_rx_channel(dev->mal, dev->mal_rx_chan); + emac_tx_enable(dev); + emac_rx_enable(dev); + emac_netif_start(dev); + + mutex_unlock(&dev->link_lock); + + return 0; + oom: + emac_clean_rx_ring(dev); + free_irq(dev->emac_irq, dev); + + return -ENOMEM; +} + +/* BHs disabled */ +#if 0 +static int emac_link_differs(struct emac_instance *dev) +{ + u32 r = in_be32(&dev->emacp->mr1); + + int duplex = r & EMAC_MR1_FDE ? DUPLEX_FULL : DUPLEX_HALF; + int speed, pause, asym_pause; + + if (r & EMAC_MR1_MF_1000) + speed = SPEED_1000; + else if (r & EMAC_MR1_MF_100) + speed = SPEED_100; + else + speed = SPEED_10; + + switch (r & (EMAC_MR1_EIFC | EMAC_MR1_APP)) { + case (EMAC_MR1_EIFC | EMAC_MR1_APP): + pause = 1; + asym_pause = 0; + break; + case EMAC_MR1_APP: + pause = 0; + asym_pause = 1; + break; + default: + pause = asym_pause = 0; + } + return speed != dev->phy.speed || duplex != dev->phy.duplex || + pause != dev->phy.pause || asym_pause != dev->phy.asym_pause; +} +#endif + +static void emac_link_timer(struct work_struct *work) +{ + struct emac_instance *dev = + container_of(to_delayed_work(work), + struct emac_instance, link_work); + int link_poll_interval; + + mutex_lock(&dev->link_lock); + DBG2(dev, "link timer" NL); + + if (!dev->opened) + goto bail; + + if (dev->phy.def->ops->poll_link(&dev->phy)) { + if (!netif_carrier_ok(dev->ndev)) { + emac_rx_clk_default(dev); + /* Get new link parameters */ + dev->phy.def->ops->read_link(&dev->phy); + + netif_carrier_on(dev->ndev); + emac_netif_stop(dev); + emac_full_tx_reset(dev); + emac_netif_start(dev); + emac_print_link_status(dev); + } + link_poll_interval = PHY_POLL_LINK_ON; + } else { + if (netif_carrier_ok(dev->ndev)) { + emac_rx_clk_tx(dev); + netif_carrier_off(dev->ndev); + netif_tx_disable(dev->ndev); + emac_reinitialize(dev); + emac_print_link_status(dev); + } + link_poll_interval = PHY_POLL_LINK_OFF; + } + schedule_delayed_work(&dev->link_work, link_poll_interval); + bail: + mutex_unlock(&dev->link_lock); +} + +static void emac_force_link_update(struct emac_instance *dev) +{ + netif_carrier_off(dev->ndev); + smp_rmb(); + if (dev->link_polling) { + cancel_delayed_work_sync(&dev->link_work); + if (dev->link_polling) + schedule_delayed_work(&dev->link_work, PHY_POLL_LINK_OFF); + } +} + +/* Process ctx, rtnl_lock semaphore */ +static int emac_close(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + + DBG(dev, "close" NL); + + if (dev->phy.address >= 0) { + dev->link_polling = 0; + cancel_delayed_work_sync(&dev->link_work); + } + mutex_lock(&dev->link_lock); + emac_netif_stop(dev); + dev->opened = 0; + mutex_unlock(&dev->link_lock); + + emac_rx_disable(dev); + emac_tx_disable(dev); + mal_disable_rx_channel(dev->mal, dev->mal_rx_chan); + mal_disable_tx_channel(dev->mal, dev->mal_tx_chan); + mal_poll_del(dev->mal, &dev->commac); + + emac_clean_tx_ring(dev); + emac_clean_rx_ring(dev); + + free_irq(dev->emac_irq, dev); + + netif_carrier_off(ndev); + + return 0; +} + +static inline u16 emac_tx_csum(struct emac_instance *dev, + struct sk_buff *skb) +{ + if (emac_has_feature(dev, EMAC_FTR_HAS_TAH) && + (skb->ip_summed == CHECKSUM_PARTIAL)) { + ++dev->stats.tx_packets_csum; + return EMAC_TX_CTRL_TAH_CSUM; + } + return 0; +} + +static inline int emac_xmit_finish(struct emac_instance *dev, int len) +{ + struct emac_regs __iomem *p = dev->emacp; + struct net_device *ndev = dev->ndev; + + /* Send the packet out. If the if makes a significant perf + * difference, then we can store the TMR0 value in "dev" + * instead + */ + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + out_be32(&p->tmr0, EMAC4_TMR0_XMIT); + else + out_be32(&p->tmr0, EMAC_TMR0_XMIT); + + if (unlikely(++dev->tx_cnt == NUM_TX_BUFF)) { + netif_stop_queue(ndev); + DBG2(dev, "stopped TX queue" NL); + } + + ndev->trans_start = jiffies; + ++dev->stats.tx_packets; + dev->stats.tx_bytes += len; + + return NETDEV_TX_OK; +} + +/* Tx lock BH */ +static int emac_start_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + unsigned int len = skb->len; + int slot; + + u16 ctrl = EMAC_TX_CTRL_GFCS | EMAC_TX_CTRL_GP | MAL_TX_CTRL_READY | + MAL_TX_CTRL_LAST | emac_tx_csum(dev, skb); + + slot = dev->tx_slot++; + if (dev->tx_slot == NUM_TX_BUFF) { + dev->tx_slot = 0; + ctrl |= MAL_TX_CTRL_WRAP; + } + + DBG2(dev, "xmit(%u) %d" NL, len, slot); + + dev->tx_skb[slot] = skb; + dev->tx_desc[slot].data_ptr = dma_map_single(&dev->ofdev->dev, + skb->data, len, + DMA_TO_DEVICE); + dev->tx_desc[slot].data_len = (u16) len; + wmb(); + dev->tx_desc[slot].ctrl = ctrl; + + return emac_xmit_finish(dev, len); +} + +static inline int emac_xmit_split(struct emac_instance *dev, int slot, + u32 pd, int len, int last, u16 base_ctrl) +{ + while (1) { + u16 ctrl = base_ctrl; + int chunk = min(len, MAL_MAX_TX_SIZE); + len -= chunk; + + slot = (slot + 1) % NUM_TX_BUFF; + + if (last && !len) + ctrl |= MAL_TX_CTRL_LAST; + if (slot == NUM_TX_BUFF - 1) + ctrl |= MAL_TX_CTRL_WRAP; + + dev->tx_skb[slot] = NULL; + dev->tx_desc[slot].data_ptr = pd; + dev->tx_desc[slot].data_len = (u16) chunk; + dev->tx_desc[slot].ctrl = ctrl; + ++dev->tx_cnt; + + if (!len) + break; + + pd += chunk; + } + return slot; +} + +/* Tx lock BH disabled (SG version for TAH equipped EMACs) */ +static int emac_start_xmit_sg(struct sk_buff *skb, struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + int nr_frags = skb_shinfo(skb)->nr_frags; + int len = skb->len, chunk; + int slot, i; + u16 ctrl; + u32 pd; + + /* This is common "fast" path */ + if (likely(!nr_frags && len <= MAL_MAX_TX_SIZE)) + return emac_start_xmit(skb, ndev); + + len -= skb->data_len; + + /* Note, this is only an *estimation*, we can still run out of empty + * slots because of the additional fragmentation into + * MAL_MAX_TX_SIZE-sized chunks + */ + if (unlikely(dev->tx_cnt + nr_frags + mal_tx_chunks(len) > NUM_TX_BUFF)) + goto stop_queue; + + ctrl = EMAC_TX_CTRL_GFCS | EMAC_TX_CTRL_GP | MAL_TX_CTRL_READY | + emac_tx_csum(dev, skb); + slot = dev->tx_slot; + + /* skb data */ + dev->tx_skb[slot] = NULL; + chunk = min(len, MAL_MAX_TX_SIZE); + dev->tx_desc[slot].data_ptr = pd = + dma_map_single(&dev->ofdev->dev, skb->data, len, DMA_TO_DEVICE); + dev->tx_desc[slot].data_len = (u16) chunk; + len -= chunk; + if (unlikely(len)) + slot = emac_xmit_split(dev, slot, pd + chunk, len, !nr_frags, + ctrl); + /* skb fragments */ + for (i = 0; i < nr_frags; ++i) { + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + len = frag->size; + + if (unlikely(dev->tx_cnt + mal_tx_chunks(len) >= NUM_TX_BUFF)) + goto undo_frame; + + pd = dma_map_page(&dev->ofdev->dev, frag->page, frag->page_offset, len, + DMA_TO_DEVICE); + + slot = emac_xmit_split(dev, slot, pd, len, i == nr_frags - 1, + ctrl); + } + + DBG2(dev, "xmit_sg(%u) %d - %d" NL, skb->len, dev->tx_slot, slot); + + /* Attach skb to the last slot so we don't release it too early */ + dev->tx_skb[slot] = skb; + + /* Send the packet out */ + if (dev->tx_slot == NUM_TX_BUFF - 1) + ctrl |= MAL_TX_CTRL_WRAP; + wmb(); + dev->tx_desc[dev->tx_slot].ctrl = ctrl; + dev->tx_slot = (slot + 1) % NUM_TX_BUFF; + + return emac_xmit_finish(dev, skb->len); + + undo_frame: + /* Well, too bad. Our previous estimation was overly optimistic. + * Undo everything. + */ + while (slot != dev->tx_slot) { + dev->tx_desc[slot].ctrl = 0; + --dev->tx_cnt; + if (--slot < 0) + slot = NUM_TX_BUFF - 1; + } + ++dev->estats.tx_undo; + + stop_queue: + netif_stop_queue(ndev); + DBG2(dev, "stopped TX queue" NL); + return NETDEV_TX_BUSY; +} + +/* Tx lock BHs */ +static void emac_parse_tx_error(struct emac_instance *dev, u16 ctrl) +{ + struct emac_error_stats *st = &dev->estats; + + DBG(dev, "BD TX error %04x" NL, ctrl); + + ++st->tx_bd_errors; + if (ctrl & EMAC_TX_ST_BFCS) + ++st->tx_bd_bad_fcs; + if (ctrl & EMAC_TX_ST_LCS) + ++st->tx_bd_carrier_loss; + if (ctrl & EMAC_TX_ST_ED) + ++st->tx_bd_excessive_deferral; + if (ctrl & EMAC_TX_ST_EC) + ++st->tx_bd_excessive_collisions; + if (ctrl & EMAC_TX_ST_LC) + ++st->tx_bd_late_collision; + if (ctrl & EMAC_TX_ST_MC) + ++st->tx_bd_multple_collisions; + if (ctrl & EMAC_TX_ST_SC) + ++st->tx_bd_single_collision; + if (ctrl & EMAC_TX_ST_UR) + ++st->tx_bd_underrun; + if (ctrl & EMAC_TX_ST_SQE) + ++st->tx_bd_sqe; +} + +static void emac_poll_tx(void *param) +{ + struct emac_instance *dev = param; + u32 bad_mask; + + DBG2(dev, "poll_tx, %d %d" NL, dev->tx_cnt, dev->ack_slot); + + if (emac_has_feature(dev, EMAC_FTR_HAS_TAH)) + bad_mask = EMAC_IS_BAD_TX_TAH; + else + bad_mask = EMAC_IS_BAD_TX; + + netif_tx_lock_bh(dev->ndev); + if (dev->tx_cnt) { + u16 ctrl; + int slot = dev->ack_slot, n = 0; + again: + ctrl = dev->tx_desc[slot].ctrl; + if (!(ctrl & MAL_TX_CTRL_READY)) { + struct sk_buff *skb = dev->tx_skb[slot]; + ++n; + + if (skb) { + dev_kfree_skb(skb); + dev->tx_skb[slot] = NULL; + } + slot = (slot + 1) % NUM_TX_BUFF; + + if (unlikely(ctrl & bad_mask)) + emac_parse_tx_error(dev, ctrl); + + if (--dev->tx_cnt) + goto again; + } + if (n) { + dev->ack_slot = slot; + if (netif_queue_stopped(dev->ndev) && + dev->tx_cnt < EMAC_TX_WAKEUP_THRESH) + netif_wake_queue(dev->ndev); + + DBG2(dev, "tx %d pkts" NL, n); + } + } + netif_tx_unlock_bh(dev->ndev); +} + +static inline void emac_recycle_rx_skb(struct emac_instance *dev, int slot, + int len) +{ + struct sk_buff *skb = dev->rx_skb[slot]; + + DBG2(dev, "recycle %d %d" NL, slot, len); + + if (len) + dma_map_single(&dev->ofdev->dev, skb->data - 2, + EMAC_DMA_ALIGN(len + 2), DMA_FROM_DEVICE); + + dev->rx_desc[slot].data_len = 0; + wmb(); + dev->rx_desc[slot].ctrl = MAL_RX_CTRL_EMPTY | + (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0); +} + +static void emac_parse_rx_error(struct emac_instance *dev, u16 ctrl) +{ + struct emac_error_stats *st = &dev->estats; + + DBG(dev, "BD RX error %04x" NL, ctrl); + + ++st->rx_bd_errors; + if (ctrl & EMAC_RX_ST_OE) + ++st->rx_bd_overrun; + if (ctrl & EMAC_RX_ST_BP) + ++st->rx_bd_bad_packet; + if (ctrl & EMAC_RX_ST_RP) + ++st->rx_bd_runt_packet; + if (ctrl & EMAC_RX_ST_SE) + ++st->rx_bd_short_event; + if (ctrl & EMAC_RX_ST_AE) + ++st->rx_bd_alignment_error; + if (ctrl & EMAC_RX_ST_BFCS) + ++st->rx_bd_bad_fcs; + if (ctrl & EMAC_RX_ST_PTL) + ++st->rx_bd_packet_too_long; + if (ctrl & EMAC_RX_ST_ORE) + ++st->rx_bd_out_of_range; + if (ctrl & EMAC_RX_ST_IRE) + ++st->rx_bd_in_range; +} + +static inline void emac_rx_csum(struct emac_instance *dev, + struct sk_buff *skb, u16 ctrl) +{ +#ifdef CONFIG_IBM_EMAC_TAH + if (!ctrl && dev->tah_dev) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + ++dev->stats.rx_packets_csum; + } +#endif +} + +static inline int emac_rx_sg_append(struct emac_instance *dev, int slot) +{ + if (likely(dev->rx_sg_skb != NULL)) { + int len = dev->rx_desc[slot].data_len; + int tot_len = dev->rx_sg_skb->len + len; + + if (unlikely(tot_len + 2 > dev->rx_skb_size)) { + ++dev->estats.rx_dropped_mtu; + dev_kfree_skb(dev->rx_sg_skb); + dev->rx_sg_skb = NULL; + } else { + cacheable_memcpy(skb_tail_pointer(dev->rx_sg_skb), + dev->rx_skb[slot]->data, len); + skb_put(dev->rx_sg_skb, len); + emac_recycle_rx_skb(dev, slot, len); + return 0; + } + } + emac_recycle_rx_skb(dev, slot, 0); + return -1; +} + +/* NAPI poll context */ +static int emac_poll_rx(void *param, int budget) +{ + struct emac_instance *dev = param; + int slot = dev->rx_slot, received = 0; + + DBG2(dev, "poll_rx(%d)" NL, budget); + + again: + while (budget > 0) { + int len; + struct sk_buff *skb; + u16 ctrl = dev->rx_desc[slot].ctrl; + + if (ctrl & MAL_RX_CTRL_EMPTY) + break; + + skb = dev->rx_skb[slot]; + mb(); + len = dev->rx_desc[slot].data_len; + + if (unlikely(!MAL_IS_SINGLE_RX(ctrl))) + goto sg; + + ctrl &= EMAC_BAD_RX_MASK; + if (unlikely(ctrl && ctrl != EMAC_RX_TAH_BAD_CSUM)) { + emac_parse_rx_error(dev, ctrl); + ++dev->estats.rx_dropped_error; + emac_recycle_rx_skb(dev, slot, 0); + len = 0; + goto next; + } + + if (len < ETH_HLEN) { + ++dev->estats.rx_dropped_stack; + emac_recycle_rx_skb(dev, slot, len); + goto next; + } + + if (len && len < EMAC_RX_COPY_THRESH) { + struct sk_buff *copy_skb = + alloc_skb(len + EMAC_RX_SKB_HEADROOM + 2, GFP_ATOMIC); + if (unlikely(!copy_skb)) + goto oom; + + skb_reserve(copy_skb, EMAC_RX_SKB_HEADROOM + 2); + cacheable_memcpy(copy_skb->data - 2, skb->data - 2, + len + 2); + emac_recycle_rx_skb(dev, slot, len); + skb = copy_skb; + } else if (unlikely(emac_alloc_rx_skb(dev, slot, GFP_ATOMIC))) + goto oom; + + skb_put(skb, len); + push_packet: + skb->protocol = eth_type_trans(skb, dev->ndev); + emac_rx_csum(dev, skb, ctrl); + + if (unlikely(netif_receive_skb(skb) == NET_RX_DROP)) + ++dev->estats.rx_dropped_stack; + next: + ++dev->stats.rx_packets; + skip: + dev->stats.rx_bytes += len; + slot = (slot + 1) % NUM_RX_BUFF; + --budget; + ++received; + continue; + sg: + if (ctrl & MAL_RX_CTRL_FIRST) { + BUG_ON(dev->rx_sg_skb); + if (unlikely(emac_alloc_rx_skb(dev, slot, GFP_ATOMIC))) { + DBG(dev, "rx OOM %d" NL, slot); + ++dev->estats.rx_dropped_oom; + emac_recycle_rx_skb(dev, slot, 0); + } else { + dev->rx_sg_skb = skb; + skb_put(skb, len); + } + } else if (!emac_rx_sg_append(dev, slot) && + (ctrl & MAL_RX_CTRL_LAST)) { + + skb = dev->rx_sg_skb; + dev->rx_sg_skb = NULL; + + ctrl &= EMAC_BAD_RX_MASK; + if (unlikely(ctrl && ctrl != EMAC_RX_TAH_BAD_CSUM)) { + emac_parse_rx_error(dev, ctrl); + ++dev->estats.rx_dropped_error; + dev_kfree_skb(skb); + len = 0; + } else + goto push_packet; + } + goto skip; + oom: + DBG(dev, "rx OOM %d" NL, slot); + /* Drop the packet and recycle skb */ + ++dev->estats.rx_dropped_oom; + emac_recycle_rx_skb(dev, slot, 0); + goto next; + } + + if (received) { + DBG2(dev, "rx %d BDs" NL, received); + dev->rx_slot = slot; + } + + if (unlikely(budget && test_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags))) { + mb(); + if (!(dev->rx_desc[slot].ctrl & MAL_RX_CTRL_EMPTY)) { + DBG2(dev, "rx restart" NL); + received = 0; + goto again; + } + + if (dev->rx_sg_skb) { + DBG2(dev, "dropping partial rx packet" NL); + ++dev->estats.rx_dropped_error; + dev_kfree_skb(dev->rx_sg_skb); + dev->rx_sg_skb = NULL; + } + + clear_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags); + mal_enable_rx_channel(dev->mal, dev->mal_rx_chan); + emac_rx_enable(dev); + dev->rx_slot = 0; + } + return received; +} + +/* NAPI poll context */ +static int emac_peek_rx(void *param) +{ + struct emac_instance *dev = param; + + return !(dev->rx_desc[dev->rx_slot].ctrl & MAL_RX_CTRL_EMPTY); +} + +/* NAPI poll context */ +static int emac_peek_rx_sg(void *param) +{ + struct emac_instance *dev = param; + + int slot = dev->rx_slot; + while (1) { + u16 ctrl = dev->rx_desc[slot].ctrl; + if (ctrl & MAL_RX_CTRL_EMPTY) + return 0; + else if (ctrl & MAL_RX_CTRL_LAST) + return 1; + + slot = (slot + 1) % NUM_RX_BUFF; + + /* I'm just being paranoid here :) */ + if (unlikely(slot == dev->rx_slot)) + return 0; + } +} + +/* Hard IRQ */ +static void emac_rxde(void *param) +{ + struct emac_instance *dev = param; + + ++dev->estats.rx_stopped; + emac_rx_disable_async(dev); +} + +/* Hard IRQ */ +static irqreturn_t emac_irq(int irq, void *dev_instance) +{ + struct emac_instance *dev = dev_instance; + struct emac_regs __iomem *p = dev->emacp; + struct emac_error_stats *st = &dev->estats; + u32 isr; + + spin_lock(&dev->lock); + + isr = in_be32(&p->isr); + out_be32(&p->isr, isr); + + DBG(dev, "isr = %08x" NL, isr); + + if (isr & EMAC4_ISR_TXPE) + ++st->tx_parity; + if (isr & EMAC4_ISR_RXPE) + ++st->rx_parity; + if (isr & EMAC4_ISR_TXUE) + ++st->tx_underrun; + if (isr & EMAC4_ISR_RXOE) + ++st->rx_fifo_overrun; + if (isr & EMAC_ISR_OVR) + ++st->rx_overrun; + if (isr & EMAC_ISR_BP) + ++st->rx_bad_packet; + if (isr & EMAC_ISR_RP) + ++st->rx_runt_packet; + if (isr & EMAC_ISR_SE) + ++st->rx_short_event; + if (isr & EMAC_ISR_ALE) + ++st->rx_alignment_error; + if (isr & EMAC_ISR_BFCS) + ++st->rx_bad_fcs; + if (isr & EMAC_ISR_PTLE) + ++st->rx_packet_too_long; + if (isr & EMAC_ISR_ORE) + ++st->rx_out_of_range; + if (isr & EMAC_ISR_IRE) + ++st->rx_in_range; + if (isr & EMAC_ISR_SQE) + ++st->tx_sqe; + if (isr & EMAC_ISR_TE) + ++st->tx_errors; + + spin_unlock(&dev->lock); + + return IRQ_HANDLED; +} + +static struct net_device_stats *emac_stats(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + struct emac_stats *st = &dev->stats; + struct emac_error_stats *est = &dev->estats; + struct net_device_stats *nst = &dev->nstats; + unsigned long flags; + + DBG2(dev, "stats" NL); + + /* Compute "legacy" statistics */ + spin_lock_irqsave(&dev->lock, flags); + nst->rx_packets = (unsigned long)st->rx_packets; + nst->rx_bytes = (unsigned long)st->rx_bytes; + nst->tx_packets = (unsigned long)st->tx_packets; + nst->tx_bytes = (unsigned long)st->tx_bytes; + nst->rx_dropped = (unsigned long)(est->rx_dropped_oom + + est->rx_dropped_error + + est->rx_dropped_resize + + est->rx_dropped_mtu); + nst->tx_dropped = (unsigned long)est->tx_dropped; + + nst->rx_errors = (unsigned long)est->rx_bd_errors; + nst->rx_fifo_errors = (unsigned long)(est->rx_bd_overrun + + est->rx_fifo_overrun + + est->rx_overrun); + nst->rx_frame_errors = (unsigned long)(est->rx_bd_alignment_error + + est->rx_alignment_error); + nst->rx_crc_errors = (unsigned long)(est->rx_bd_bad_fcs + + est->rx_bad_fcs); + nst->rx_length_errors = (unsigned long)(est->rx_bd_runt_packet + + est->rx_bd_short_event + + est->rx_bd_packet_too_long + + est->rx_bd_out_of_range + + est->rx_bd_in_range + + est->rx_runt_packet + + est->rx_short_event + + est->rx_packet_too_long + + est->rx_out_of_range + + est->rx_in_range); + + nst->tx_errors = (unsigned long)(est->tx_bd_errors + est->tx_errors); + nst->tx_fifo_errors = (unsigned long)(est->tx_bd_underrun + + est->tx_underrun); + nst->tx_carrier_errors = (unsigned long)est->tx_bd_carrier_loss; + nst->collisions = (unsigned long)(est->tx_bd_excessive_deferral + + est->tx_bd_excessive_collisions + + est->tx_bd_late_collision + + est->tx_bd_multple_collisions); + spin_unlock_irqrestore(&dev->lock, flags); + return nst; +} + +static struct mal_commac_ops emac_commac_ops = { + .poll_tx = &emac_poll_tx, + .poll_rx = &emac_poll_rx, + .peek_rx = &emac_peek_rx, + .rxde = &emac_rxde, +}; + +static struct mal_commac_ops emac_commac_sg_ops = { + .poll_tx = &emac_poll_tx, + .poll_rx = &emac_poll_rx, + .peek_rx = &emac_peek_rx_sg, + .rxde = &emac_rxde, +}; + +/* Ethtool support */ +static int emac_ethtool_get_settings(struct net_device *ndev, + struct ethtool_cmd *cmd) +{ + struct emac_instance *dev = netdev_priv(ndev); + + cmd->supported = dev->phy.features; + cmd->port = PORT_MII; + cmd->phy_address = dev->phy.address; + cmd->transceiver = + dev->phy.address >= 0 ? XCVR_EXTERNAL : XCVR_INTERNAL; + + mutex_lock(&dev->link_lock); + cmd->advertising = dev->phy.advertising; + cmd->autoneg = dev->phy.autoneg; + cmd->speed = dev->phy.speed; + cmd->duplex = dev->phy.duplex; + mutex_unlock(&dev->link_lock); + + return 0; +} + +static int emac_ethtool_set_settings(struct net_device *ndev, + struct ethtool_cmd *cmd) +{ + struct emac_instance *dev = netdev_priv(ndev); + u32 f = dev->phy.features; + + DBG(dev, "set_settings(%d, %d, %d, 0x%08x)" NL, + cmd->autoneg, cmd->speed, cmd->duplex, cmd->advertising); + + /* Basic sanity checks */ + if (dev->phy.address < 0) + return -EOPNOTSUPP; + if (cmd->autoneg != AUTONEG_ENABLE && cmd->autoneg != AUTONEG_DISABLE) + return -EINVAL; + if (cmd->autoneg == AUTONEG_ENABLE && cmd->advertising == 0) + return -EINVAL; + if (cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) + return -EINVAL; + + if (cmd->autoneg == AUTONEG_DISABLE) { + switch (cmd->speed) { + case SPEED_10: + if (cmd->duplex == DUPLEX_HALF && + !(f & SUPPORTED_10baseT_Half)) + return -EINVAL; + if (cmd->duplex == DUPLEX_FULL && + !(f & SUPPORTED_10baseT_Full)) + return -EINVAL; + break; + case SPEED_100: + if (cmd->duplex == DUPLEX_HALF && + !(f & SUPPORTED_100baseT_Half)) + return -EINVAL; + if (cmd->duplex == DUPLEX_FULL && + !(f & SUPPORTED_100baseT_Full)) + return -EINVAL; + break; + case SPEED_1000: + if (cmd->duplex == DUPLEX_HALF && + !(f & SUPPORTED_1000baseT_Half)) + return -EINVAL; + if (cmd->duplex == DUPLEX_FULL && + !(f & SUPPORTED_1000baseT_Full)) + return -EINVAL; + break; + default: + return -EINVAL; + } + + mutex_lock(&dev->link_lock); + dev->phy.def->ops->setup_forced(&dev->phy, cmd->speed, + cmd->duplex); + mutex_unlock(&dev->link_lock); + + } else { + if (!(f & SUPPORTED_Autoneg)) + return -EINVAL; + + mutex_lock(&dev->link_lock); + dev->phy.def->ops->setup_aneg(&dev->phy, + (cmd->advertising & f) | + (dev->phy.advertising & + (ADVERTISED_Pause | + ADVERTISED_Asym_Pause))); + mutex_unlock(&dev->link_lock); + } + emac_force_link_update(dev); + + return 0; +} + +static void emac_ethtool_get_ringparam(struct net_device *ndev, + struct ethtool_ringparam *rp) +{ + rp->rx_max_pending = rp->rx_pending = NUM_RX_BUFF; + rp->tx_max_pending = rp->tx_pending = NUM_TX_BUFF; +} + +static void emac_ethtool_get_pauseparam(struct net_device *ndev, + struct ethtool_pauseparam *pp) +{ + struct emac_instance *dev = netdev_priv(ndev); + + mutex_lock(&dev->link_lock); + if ((dev->phy.features & SUPPORTED_Autoneg) && + (dev->phy.advertising & (ADVERTISED_Pause | ADVERTISED_Asym_Pause))) + pp->autoneg = 1; + + if (dev->phy.duplex == DUPLEX_FULL) { + if (dev->phy.pause) + pp->rx_pause = pp->tx_pause = 1; + else if (dev->phy.asym_pause) + pp->tx_pause = 1; + } + mutex_unlock(&dev->link_lock); +} + +static int emac_get_regs_len(struct emac_instance *dev) +{ + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) + return sizeof(struct emac_ethtool_regs_subhdr) + + EMAC4_ETHTOOL_REGS_SIZE(dev); + else + return sizeof(struct emac_ethtool_regs_subhdr) + + EMAC_ETHTOOL_REGS_SIZE(dev); +} + +static int emac_ethtool_get_regs_len(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + int size; + + size = sizeof(struct emac_ethtool_regs_hdr) + + emac_get_regs_len(dev) + mal_get_regs_len(dev->mal); + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) + size += zmii_get_regs_len(dev->zmii_dev); + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) + size += rgmii_get_regs_len(dev->rgmii_dev); + if (emac_has_feature(dev, EMAC_FTR_HAS_TAH)) + size += tah_get_regs_len(dev->tah_dev); + + return size; +} + +static void *emac_dump_regs(struct emac_instance *dev, void *buf) +{ + struct emac_ethtool_regs_subhdr *hdr = buf; + + hdr->index = dev->cell_index; + if (emac_has_feature(dev, EMAC_FTR_EMAC4)) { + hdr->version = EMAC4_ETHTOOL_REGS_VER; + memcpy_fromio(hdr + 1, dev->emacp, EMAC4_ETHTOOL_REGS_SIZE(dev)); + return (void *)(hdr + 1) + EMAC4_ETHTOOL_REGS_SIZE(dev); + } else { + hdr->version = EMAC_ETHTOOL_REGS_VER; + memcpy_fromio(hdr + 1, dev->emacp, EMAC_ETHTOOL_REGS_SIZE(dev)); + return (void *)(hdr + 1) + EMAC_ETHTOOL_REGS_SIZE(dev); + } +} + +static void emac_ethtool_get_regs(struct net_device *ndev, + struct ethtool_regs *regs, void *buf) +{ + struct emac_instance *dev = netdev_priv(ndev); + struct emac_ethtool_regs_hdr *hdr = buf; + + hdr->components = 0; + buf = hdr + 1; + + buf = mal_dump_regs(dev->mal, buf); + buf = emac_dump_regs(dev, buf); + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) { + hdr->components |= EMAC_ETHTOOL_REGS_ZMII; + buf = zmii_dump_regs(dev->zmii_dev, buf); + } + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) { + hdr->components |= EMAC_ETHTOOL_REGS_RGMII; + buf = rgmii_dump_regs(dev->rgmii_dev, buf); + } + if (emac_has_feature(dev, EMAC_FTR_HAS_TAH)) { + hdr->components |= EMAC_ETHTOOL_REGS_TAH; + buf = tah_dump_regs(dev->tah_dev, buf); + } +} + +static int emac_ethtool_nway_reset(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + int res = 0; + + DBG(dev, "nway_reset" NL); + + if (dev->phy.address < 0) + return -EOPNOTSUPP; + + mutex_lock(&dev->link_lock); + if (!dev->phy.autoneg) { + res = -EINVAL; + goto out; + } + + dev->phy.def->ops->setup_aneg(&dev->phy, dev->phy.advertising); + out: + mutex_unlock(&dev->link_lock); + emac_force_link_update(dev); + return res; +} + +static int emac_ethtool_get_sset_count(struct net_device *ndev, int stringset) +{ + if (stringset == ETH_SS_STATS) + return EMAC_ETHTOOL_STATS_COUNT; + else + return -EINVAL; +} + +static void emac_ethtool_get_strings(struct net_device *ndev, u32 stringset, + u8 * buf) +{ + if (stringset == ETH_SS_STATS) + memcpy(buf, &emac_stats_keys, sizeof(emac_stats_keys)); +} + +static void emac_ethtool_get_ethtool_stats(struct net_device *ndev, + struct ethtool_stats *estats, + u64 * tmp_stats) +{ + struct emac_instance *dev = netdev_priv(ndev); + + memcpy(tmp_stats, &dev->stats, sizeof(dev->stats)); + tmp_stats += sizeof(dev->stats) / sizeof(u64); + memcpy(tmp_stats, &dev->estats, sizeof(dev->estats)); +} + +static void emac_ethtool_get_drvinfo(struct net_device *ndev, + struct ethtool_drvinfo *info) +{ + struct emac_instance *dev = netdev_priv(ndev); + + strcpy(info->driver, "ibm_emac"); + strcpy(info->version, DRV_VERSION); + info->fw_version[0] = '\0'; + sprintf(info->bus_info, "PPC 4xx EMAC-%d %s", + dev->cell_index, dev->ofdev->dev.of_node->full_name); + info->regdump_len = emac_ethtool_get_regs_len(ndev); +} + +static const struct ethtool_ops emac_ethtool_ops = { + .get_settings = emac_ethtool_get_settings, + .set_settings = emac_ethtool_set_settings, + .get_drvinfo = emac_ethtool_get_drvinfo, + + .get_regs_len = emac_ethtool_get_regs_len, + .get_regs = emac_ethtool_get_regs, + + .nway_reset = emac_ethtool_nway_reset, + + .get_ringparam = emac_ethtool_get_ringparam, + .get_pauseparam = emac_ethtool_get_pauseparam, + + .get_strings = emac_ethtool_get_strings, + .get_sset_count = emac_ethtool_get_sset_count, + .get_ethtool_stats = emac_ethtool_get_ethtool_stats, + + .get_link = ethtool_op_get_link, +}; + +static int emac_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) +{ + struct emac_instance *dev = netdev_priv(ndev); + struct mii_ioctl_data *data = if_mii(rq); + + DBG(dev, "ioctl %08x" NL, cmd); + + if (dev->phy.address < 0) + return -EOPNOTSUPP; + + switch (cmd) { + case SIOCGMIIPHY: + data->phy_id = dev->phy.address; + /* Fall through */ + case SIOCGMIIREG: + data->val_out = emac_mdio_read(ndev, dev->phy.address, + data->reg_num); + return 0; + + case SIOCSMIIREG: + emac_mdio_write(ndev, dev->phy.address, data->reg_num, + data->val_in); + return 0; + default: + return -EOPNOTSUPP; + } +} + +struct emac_depentry { + u32 phandle; + struct device_node *node; + struct platform_device *ofdev; + void *drvdata; +}; + +#define EMAC_DEP_MAL_IDX 0 +#define EMAC_DEP_ZMII_IDX 1 +#define EMAC_DEP_RGMII_IDX 2 +#define EMAC_DEP_TAH_IDX 3 +#define EMAC_DEP_MDIO_IDX 4 +#define EMAC_DEP_PREV_IDX 5 +#define EMAC_DEP_COUNT 6 + +static int __devinit emac_check_deps(struct emac_instance *dev, + struct emac_depentry *deps) +{ + int i, there = 0; + struct device_node *np; + + for (i = 0; i < EMAC_DEP_COUNT; i++) { + /* no dependency on that item, allright */ + if (deps[i].phandle == 0) { + there++; + continue; + } + /* special case for blist as the dependency might go away */ + if (i == EMAC_DEP_PREV_IDX) { + np = *(dev->blist - 1); + if (np == NULL) { + deps[i].phandle = 0; + there++; + continue; + } + if (deps[i].node == NULL) + deps[i].node = of_node_get(np); + } + if (deps[i].node == NULL) + deps[i].node = of_find_node_by_phandle(deps[i].phandle); + if (deps[i].node == NULL) + continue; + if (deps[i].ofdev == NULL) + deps[i].ofdev = of_find_device_by_node(deps[i].node); + if (deps[i].ofdev == NULL) + continue; + if (deps[i].drvdata == NULL) + deps[i].drvdata = dev_get_drvdata(&deps[i].ofdev->dev); + if (deps[i].drvdata != NULL) + there++; + } + return there == EMAC_DEP_COUNT; +} + +static void emac_put_deps(struct emac_instance *dev) +{ + if (dev->mal_dev) + of_dev_put(dev->mal_dev); + if (dev->zmii_dev) + of_dev_put(dev->zmii_dev); + if (dev->rgmii_dev) + of_dev_put(dev->rgmii_dev); + if (dev->mdio_dev) + of_dev_put(dev->mdio_dev); + if (dev->tah_dev) + of_dev_put(dev->tah_dev); +} + +static int __devinit emac_of_bus_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + /* We are only intereted in device addition */ + if (action == BUS_NOTIFY_BOUND_DRIVER) + wake_up_all(&emac_probe_wait); + return 0; +} + +static struct notifier_block emac_of_bus_notifier __devinitdata = { + .notifier_call = emac_of_bus_notify +}; + +static int __devinit emac_wait_deps(struct emac_instance *dev) +{ + struct emac_depentry deps[EMAC_DEP_COUNT]; + int i, err; + + memset(&deps, 0, sizeof(deps)); + + deps[EMAC_DEP_MAL_IDX].phandle = dev->mal_ph; + deps[EMAC_DEP_ZMII_IDX].phandle = dev->zmii_ph; + deps[EMAC_DEP_RGMII_IDX].phandle = dev->rgmii_ph; + if (dev->tah_ph) + deps[EMAC_DEP_TAH_IDX].phandle = dev->tah_ph; + if (dev->mdio_ph) + deps[EMAC_DEP_MDIO_IDX].phandle = dev->mdio_ph; + if (dev->blist && dev->blist > emac_boot_list) + deps[EMAC_DEP_PREV_IDX].phandle = 0xffffffffu; + bus_register_notifier(&platform_bus_type, &emac_of_bus_notifier); + wait_event_timeout(emac_probe_wait, + emac_check_deps(dev, deps), + EMAC_PROBE_DEP_TIMEOUT); + bus_unregister_notifier(&platform_bus_type, &emac_of_bus_notifier); + err = emac_check_deps(dev, deps) ? 0 : -ENODEV; + for (i = 0; i < EMAC_DEP_COUNT; i++) { + if (deps[i].node) + of_node_put(deps[i].node); + if (err && deps[i].ofdev) + of_dev_put(deps[i].ofdev); + } + if (err == 0) { + dev->mal_dev = deps[EMAC_DEP_MAL_IDX].ofdev; + dev->zmii_dev = deps[EMAC_DEP_ZMII_IDX].ofdev; + dev->rgmii_dev = deps[EMAC_DEP_RGMII_IDX].ofdev; + dev->tah_dev = deps[EMAC_DEP_TAH_IDX].ofdev; + dev->mdio_dev = deps[EMAC_DEP_MDIO_IDX].ofdev; + } + if (deps[EMAC_DEP_PREV_IDX].ofdev) + of_dev_put(deps[EMAC_DEP_PREV_IDX].ofdev); + return err; +} + +static int __devinit emac_read_uint_prop(struct device_node *np, const char *name, + u32 *val, int fatal) +{ + int len; + const u32 *prop = of_get_property(np, name, &len); + if (prop == NULL || len < sizeof(u32)) { + if (fatal) + printk(KERN_ERR "%s: missing %s property\n", + np->full_name, name); + return -ENODEV; + } + *val = *prop; + return 0; +} + +static int __devinit emac_init_phy(struct emac_instance *dev) +{ + struct device_node *np = dev->ofdev->dev.of_node; + struct net_device *ndev = dev->ndev; + u32 phy_map, adv; + int i; + + dev->phy.dev = ndev; + dev->phy.mode = dev->phy_mode; + + /* PHY-less configuration. + * XXX I probably should move these settings to the dev tree + */ + if (dev->phy_address == 0xffffffff && dev->phy_map == 0xffffffff) { + emac_reset(dev); + + /* PHY-less configuration. + * XXX I probably should move these settings to the dev tree + */ + dev->phy.address = -1; + dev->phy.features = SUPPORTED_MII; + if (emac_phy_supports_gige(dev->phy_mode)) + dev->phy.features |= SUPPORTED_1000baseT_Full; + else + dev->phy.features |= SUPPORTED_100baseT_Full; + dev->phy.pause = 1; + + return 0; + } + + mutex_lock(&emac_phy_map_lock); + phy_map = dev->phy_map | busy_phy_map; + + DBG(dev, "PHY maps %08x %08x" NL, dev->phy_map, busy_phy_map); + + dev->phy.mdio_read = emac_mdio_read; + dev->phy.mdio_write = emac_mdio_write; + + /* Enable internal clock source */ +#ifdef CONFIG_PPC_DCR_NATIVE + if (emac_has_feature(dev, EMAC_FTR_440GX_PHY_CLK_FIX)) + dcri_clrset(SDR0, SDR0_MFR, 0, SDR0_MFR_ECS); +#endif + /* PHY clock workaround */ + emac_rx_clk_tx(dev); + + /* Enable internal clock source on 440GX*/ +#ifdef CONFIG_PPC_DCR_NATIVE + if (emac_has_feature(dev, EMAC_FTR_440GX_PHY_CLK_FIX)) + dcri_clrset(SDR0, SDR0_MFR, 0, SDR0_MFR_ECS); +#endif + /* Configure EMAC with defaults so we can at least use MDIO + * This is needed mostly for 440GX + */ + if (emac_phy_gpcs(dev->phy.mode)) { + /* XXX + * Make GPCS PHY address equal to EMAC index. + * We probably should take into account busy_phy_map + * and/or phy_map here. + * + * Note that the busy_phy_map is currently global + * while it should probably be per-ASIC... + */ + dev->phy.gpcs_address = dev->gpcs_address; + if (dev->phy.gpcs_address == 0xffffffff) + dev->phy.address = dev->cell_index; + } + + emac_configure(dev); + + if (dev->phy_address != 0xffffffff) + phy_map = ~(1 << dev->phy_address); + + for (i = 0; i < 0x20; phy_map >>= 1, ++i) + if (!(phy_map & 1)) { + int r; + busy_phy_map |= 1 << i; + + /* Quick check if there is a PHY at the address */ + r = emac_mdio_read(dev->ndev, i, MII_BMCR); + if (r == 0xffff || r < 0) + continue; + if (!emac_mii_phy_probe(&dev->phy, i)) + break; + } + + /* Enable external clock source */ +#ifdef CONFIG_PPC_DCR_NATIVE + if (emac_has_feature(dev, EMAC_FTR_440GX_PHY_CLK_FIX)) + dcri_clrset(SDR0, SDR0_MFR, SDR0_MFR_ECS, 0); +#endif + mutex_unlock(&emac_phy_map_lock); + if (i == 0x20) { + printk(KERN_WARNING "%s: can't find PHY!\n", np->full_name); + return -ENXIO; + } + + /* Init PHY */ + if (dev->phy.def->ops->init) + dev->phy.def->ops->init(&dev->phy); + + /* Disable any PHY features not supported by the platform */ + dev->phy.def->features &= ~dev->phy_feat_exc; + + /* Setup initial link parameters */ + if (dev->phy.features & SUPPORTED_Autoneg) { + adv = dev->phy.features; + if (!emac_has_feature(dev, EMAC_FTR_NO_FLOW_CONTROL_40x)) + adv |= ADVERTISED_Pause | ADVERTISED_Asym_Pause; + /* Restart autonegotiation */ + dev->phy.def->ops->setup_aneg(&dev->phy, adv); + } else { + u32 f = dev->phy.def->features; + int speed = SPEED_10, fd = DUPLEX_HALF; + + /* Select highest supported speed/duplex */ + if (f & SUPPORTED_1000baseT_Full) { + speed = SPEED_1000; + fd = DUPLEX_FULL; + } else if (f & SUPPORTED_1000baseT_Half) + speed = SPEED_1000; + else if (f & SUPPORTED_100baseT_Full) { + speed = SPEED_100; + fd = DUPLEX_FULL; + } else if (f & SUPPORTED_100baseT_Half) + speed = SPEED_100; + else if (f & SUPPORTED_10baseT_Full) + fd = DUPLEX_FULL; + + /* Force link parameters */ + dev->phy.def->ops->setup_forced(&dev->phy, speed, fd); + } + return 0; +} + +static int __devinit emac_init_config(struct emac_instance *dev) +{ + struct device_node *np = dev->ofdev->dev.of_node; + const void *p; + + /* Read config from device-tree */ + if (emac_read_uint_prop(np, "mal-device", &dev->mal_ph, 1)) + return -ENXIO; + if (emac_read_uint_prop(np, "mal-tx-channel", &dev->mal_tx_chan, 1)) + return -ENXIO; + if (emac_read_uint_prop(np, "mal-rx-channel", &dev->mal_rx_chan, 1)) + return -ENXIO; + if (emac_read_uint_prop(np, "cell-index", &dev->cell_index, 1)) + return -ENXIO; + if (emac_read_uint_prop(np, "max-frame-size", &dev->max_mtu, 0)) + dev->max_mtu = 1500; + if (emac_read_uint_prop(np, "rx-fifo-size", &dev->rx_fifo_size, 0)) + dev->rx_fifo_size = 2048; + if (emac_read_uint_prop(np, "tx-fifo-size", &dev->tx_fifo_size, 0)) + dev->tx_fifo_size = 2048; + if (emac_read_uint_prop(np, "rx-fifo-size-gige", &dev->rx_fifo_size_gige, 0)) + dev->rx_fifo_size_gige = dev->rx_fifo_size; + if (emac_read_uint_prop(np, "tx-fifo-size-gige", &dev->tx_fifo_size_gige, 0)) + dev->tx_fifo_size_gige = dev->tx_fifo_size; + if (emac_read_uint_prop(np, "phy-address", &dev->phy_address, 0)) + dev->phy_address = 0xffffffff; + if (emac_read_uint_prop(np, "phy-map", &dev->phy_map, 0)) + dev->phy_map = 0xffffffff; + if (emac_read_uint_prop(np, "gpcs-address", &dev->gpcs_address, 0)) + dev->gpcs_address = 0xffffffff; + if (emac_read_uint_prop(np->parent, "clock-frequency", &dev->opb_bus_freq, 1)) + return -ENXIO; + if (emac_read_uint_prop(np, "tah-device", &dev->tah_ph, 0)) + dev->tah_ph = 0; + if (emac_read_uint_prop(np, "tah-channel", &dev->tah_port, 0)) + dev->tah_port = 0; + if (emac_read_uint_prop(np, "mdio-device", &dev->mdio_ph, 0)) + dev->mdio_ph = 0; + if (emac_read_uint_prop(np, "zmii-device", &dev->zmii_ph, 0)) + dev->zmii_ph = 0; + if (emac_read_uint_prop(np, "zmii-channel", &dev->zmii_port, 0)) + dev->zmii_port = 0xffffffff; + if (emac_read_uint_prop(np, "rgmii-device", &dev->rgmii_ph, 0)) + dev->rgmii_ph = 0; + if (emac_read_uint_prop(np, "rgmii-channel", &dev->rgmii_port, 0)) + dev->rgmii_port = 0xffffffff; + if (emac_read_uint_prop(np, "fifo-entry-size", &dev->fifo_entry_size, 0)) + dev->fifo_entry_size = 16; + if (emac_read_uint_prop(np, "mal-burst-size", &dev->mal_burst_size, 0)) + dev->mal_burst_size = 256; + + /* PHY mode needs some decoding */ + dev->phy_mode = of_get_phy_mode(np); + if (dev->phy_mode < 0) + dev->phy_mode = PHY_MODE_NA; + + /* Check EMAC version */ + if (of_device_is_compatible(np, "ibm,emac4sync")) { + dev->features |= (EMAC_FTR_EMAC4 | EMAC_FTR_EMAC4SYNC); + if (of_device_is_compatible(np, "ibm,emac-460ex") || + of_device_is_compatible(np, "ibm,emac-460gt")) + dev->features |= EMAC_FTR_460EX_PHY_CLK_FIX; + if (of_device_is_compatible(np, "ibm,emac-405ex") || + of_device_is_compatible(np, "ibm,emac-405exr")) + dev->features |= EMAC_FTR_440EP_PHY_CLK_FIX; + } else if (of_device_is_compatible(np, "ibm,emac4")) { + dev->features |= EMAC_FTR_EMAC4; + if (of_device_is_compatible(np, "ibm,emac-440gx")) + dev->features |= EMAC_FTR_440GX_PHY_CLK_FIX; + } else { + if (of_device_is_compatible(np, "ibm,emac-440ep") || + of_device_is_compatible(np, "ibm,emac-440gr")) + dev->features |= EMAC_FTR_440EP_PHY_CLK_FIX; + if (of_device_is_compatible(np, "ibm,emac-405ez")) { +#ifdef CONFIG_IBM_EMAC_NO_FLOW_CTRL + dev->features |= EMAC_FTR_NO_FLOW_CONTROL_40x; +#else + printk(KERN_ERR "%s: Flow control not disabled!\n", + np->full_name); + return -ENXIO; +#endif + } + + } + + /* Fixup some feature bits based on the device tree */ + if (of_get_property(np, "has-inverted-stacr-oc", NULL)) + dev->features |= EMAC_FTR_STACR_OC_INVERT; + if (of_get_property(np, "has-new-stacr-staopc", NULL)) + dev->features |= EMAC_FTR_HAS_NEW_STACR; + + /* CAB lacks the appropriate properties */ + if (of_device_is_compatible(np, "ibm,emac-axon")) + dev->features |= EMAC_FTR_HAS_NEW_STACR | + EMAC_FTR_STACR_OC_INVERT; + + /* Enable TAH/ZMII/RGMII features as found */ + if (dev->tah_ph != 0) { +#ifdef CONFIG_IBM_EMAC_TAH + dev->features |= EMAC_FTR_HAS_TAH; +#else + printk(KERN_ERR "%s: TAH support not enabled !\n", + np->full_name); + return -ENXIO; +#endif + } + + if (dev->zmii_ph != 0) { +#ifdef CONFIG_IBM_EMAC_ZMII + dev->features |= EMAC_FTR_HAS_ZMII; +#else + printk(KERN_ERR "%s: ZMII support not enabled !\n", + np->full_name); + return -ENXIO; +#endif + } + + if (dev->rgmii_ph != 0) { +#ifdef CONFIG_IBM_EMAC_RGMII + dev->features |= EMAC_FTR_HAS_RGMII; +#else + printk(KERN_ERR "%s: RGMII support not enabled !\n", + np->full_name); + return -ENXIO; +#endif + } + + /* Read MAC-address */ + p = of_get_property(np, "local-mac-address", NULL); + if (p == NULL) { + printk(KERN_ERR "%s: Can't find local-mac-address property\n", + np->full_name); + return -ENXIO; + } + memcpy(dev->ndev->dev_addr, p, 6); + + /* IAHT and GAHT filter parameterization */ + if (emac_has_feature(dev, EMAC_FTR_EMAC4SYNC)) { + dev->xaht_slots_shift = EMAC4SYNC_XAHT_SLOTS_SHIFT; + dev->xaht_width_shift = EMAC4SYNC_XAHT_WIDTH_SHIFT; + } else { + dev->xaht_slots_shift = EMAC4_XAHT_SLOTS_SHIFT; + dev->xaht_width_shift = EMAC4_XAHT_WIDTH_SHIFT; + } + + DBG(dev, "features : 0x%08x / 0x%08x\n", dev->features, EMAC_FTRS_POSSIBLE); + DBG(dev, "tx_fifo_size : %d (%d gige)\n", dev->tx_fifo_size, dev->tx_fifo_size_gige); + DBG(dev, "rx_fifo_size : %d (%d gige)\n", dev->rx_fifo_size, dev->rx_fifo_size_gige); + DBG(dev, "max_mtu : %d\n", dev->max_mtu); + DBG(dev, "OPB freq : %d\n", dev->opb_bus_freq); + + return 0; +} + +static const struct net_device_ops emac_netdev_ops = { + .ndo_open = emac_open, + .ndo_stop = emac_close, + .ndo_get_stats = emac_stats, + .ndo_set_rx_mode = emac_set_multicast_list, + .ndo_do_ioctl = emac_ioctl, + .ndo_tx_timeout = emac_tx_timeout, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = eth_mac_addr, + .ndo_start_xmit = emac_start_xmit, + .ndo_change_mtu = eth_change_mtu, +}; + +static const struct net_device_ops emac_gige_netdev_ops = { + .ndo_open = emac_open, + .ndo_stop = emac_close, + .ndo_get_stats = emac_stats, + .ndo_set_rx_mode = emac_set_multicast_list, + .ndo_do_ioctl = emac_ioctl, + .ndo_tx_timeout = emac_tx_timeout, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = eth_mac_addr, + .ndo_start_xmit = emac_start_xmit_sg, + .ndo_change_mtu = emac_change_mtu, +}; + +static int __devinit emac_probe(struct platform_device *ofdev) +{ + struct net_device *ndev; + struct emac_instance *dev; + struct device_node *np = ofdev->dev.of_node; + struct device_node **blist = NULL; + int err, i; + + /* Skip unused/unwired EMACS. We leave the check for an unused + * property here for now, but new flat device trees should set a + * status property to "disabled" instead. + */ + if (of_get_property(np, "unused", NULL) || !of_device_is_available(np)) + return -ENODEV; + + /* Find ourselves in the bootlist if we are there */ + for (i = 0; i < EMAC_BOOT_LIST_SIZE; i++) + if (emac_boot_list[i] == np) + blist = &emac_boot_list[i]; + + /* Allocate our net_device structure */ + err = -ENOMEM; + ndev = alloc_etherdev(sizeof(struct emac_instance)); + if (!ndev) { + printk(KERN_ERR "%s: could not allocate ethernet device!\n", + np->full_name); + goto err_gone; + } + dev = netdev_priv(ndev); + dev->ndev = ndev; + dev->ofdev = ofdev; + dev->blist = blist; + SET_NETDEV_DEV(ndev, &ofdev->dev); + + /* Initialize some embedded data structures */ + mutex_init(&dev->mdio_lock); + mutex_init(&dev->link_lock); + spin_lock_init(&dev->lock); + INIT_WORK(&dev->reset_work, emac_reset_work); + + /* Init various config data based on device-tree */ + err = emac_init_config(dev); + if (err != 0) + goto err_free; + + /* Get interrupts. EMAC irq is mandatory, WOL irq is optional */ + dev->emac_irq = irq_of_parse_and_map(np, 0); + dev->wol_irq = irq_of_parse_and_map(np, 1); + if (dev->emac_irq == NO_IRQ) { + printk(KERN_ERR "%s: Can't map main interrupt\n", np->full_name); + goto err_free; + } + ndev->irq = dev->emac_irq; + + /* Map EMAC regs */ + if (of_address_to_resource(np, 0, &dev->rsrc_regs)) { + printk(KERN_ERR "%s: Can't get registers address\n", + np->full_name); + goto err_irq_unmap; + } + // TODO : request_mem_region + dev->emacp = ioremap(dev->rsrc_regs.start, + resource_size(&dev->rsrc_regs)); + if (dev->emacp == NULL) { + printk(KERN_ERR "%s: Can't map device registers!\n", + np->full_name); + err = -ENOMEM; + goto err_irq_unmap; + } + + /* Wait for dependent devices */ + err = emac_wait_deps(dev); + if (err) { + printk(KERN_ERR + "%s: Timeout waiting for dependent devices\n", + np->full_name); + /* display more info about what's missing ? */ + goto err_reg_unmap; + } + dev->mal = dev_get_drvdata(&dev->mal_dev->dev); + if (dev->mdio_dev != NULL) + dev->mdio_instance = dev_get_drvdata(&dev->mdio_dev->dev); + + /* Register with MAL */ + dev->commac.ops = &emac_commac_ops; + dev->commac.dev = dev; + dev->commac.tx_chan_mask = MAL_CHAN_MASK(dev->mal_tx_chan); + dev->commac.rx_chan_mask = MAL_CHAN_MASK(dev->mal_rx_chan); + err = mal_register_commac(dev->mal, &dev->commac); + if (err) { + printk(KERN_ERR "%s: failed to register with mal %s!\n", + np->full_name, dev->mal_dev->dev.of_node->full_name); + goto err_rel_deps; + } + dev->rx_skb_size = emac_rx_skb_size(ndev->mtu); + dev->rx_sync_size = emac_rx_sync_size(ndev->mtu); + + /* Get pointers to BD rings */ + dev->tx_desc = + dev->mal->bd_virt + mal_tx_bd_offset(dev->mal, dev->mal_tx_chan); + dev->rx_desc = + dev->mal->bd_virt + mal_rx_bd_offset(dev->mal, dev->mal_rx_chan); + + DBG(dev, "tx_desc %p" NL, dev->tx_desc); + DBG(dev, "rx_desc %p" NL, dev->rx_desc); + + /* Clean rings */ + memset(dev->tx_desc, 0, NUM_TX_BUFF * sizeof(struct mal_descriptor)); + memset(dev->rx_desc, 0, NUM_RX_BUFF * sizeof(struct mal_descriptor)); + memset(dev->tx_skb, 0, NUM_TX_BUFF * sizeof(struct sk_buff *)); + memset(dev->rx_skb, 0, NUM_RX_BUFF * sizeof(struct sk_buff *)); + + /* Attach to ZMII, if needed */ + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII) && + (err = zmii_attach(dev->zmii_dev, dev->zmii_port, &dev->phy_mode)) != 0) + goto err_unreg_commac; + + /* Attach to RGMII, if needed */ + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII) && + (err = rgmii_attach(dev->rgmii_dev, dev->rgmii_port, dev->phy_mode)) != 0) + goto err_detach_zmii; + + /* Attach to TAH, if needed */ + if (emac_has_feature(dev, EMAC_FTR_HAS_TAH) && + (err = tah_attach(dev->tah_dev, dev->tah_port)) != 0) + goto err_detach_rgmii; + + /* Set some link defaults before we can find out real parameters */ + dev->phy.speed = SPEED_100; + dev->phy.duplex = DUPLEX_FULL; + dev->phy.autoneg = AUTONEG_DISABLE; + dev->phy.pause = dev->phy.asym_pause = 0; + dev->stop_timeout = STOP_TIMEOUT_100; + INIT_DELAYED_WORK(&dev->link_work, emac_link_timer); + + /* Find PHY if any */ + err = emac_init_phy(dev); + if (err != 0) + goto err_detach_tah; + + if (dev->tah_dev) { + ndev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG; + ndev->features |= ndev->hw_features | NETIF_F_RXCSUM; + } + ndev->watchdog_timeo = 5 * HZ; + if (emac_phy_supports_gige(dev->phy_mode)) { + ndev->netdev_ops = &emac_gige_netdev_ops; + dev->commac.ops = &emac_commac_sg_ops; + } else + ndev->netdev_ops = &emac_netdev_ops; + SET_ETHTOOL_OPS(ndev, &emac_ethtool_ops); + + netif_carrier_off(ndev); + + err = register_netdev(ndev); + if (err) { + printk(KERN_ERR "%s: failed to register net device (%d)!\n", + np->full_name, err); + goto err_detach_tah; + } + + /* Set our drvdata last as we don't want them visible until we are + * fully initialized + */ + wmb(); + dev_set_drvdata(&ofdev->dev, dev); + + /* There's a new kid in town ! Let's tell everybody */ + wake_up_all(&emac_probe_wait); + + + printk(KERN_INFO "%s: EMAC-%d %s, MAC %pM\n", + ndev->name, dev->cell_index, np->full_name, ndev->dev_addr); + + if (dev->phy_mode == PHY_MODE_SGMII) + printk(KERN_NOTICE "%s: in SGMII mode\n", ndev->name); + + if (dev->phy.address >= 0) + printk("%s: found %s PHY (0x%02x)\n", ndev->name, + dev->phy.def->name, dev->phy.address); + + emac_dbg_register(dev); + + /* Life is good */ + return 0; + + /* I have a bad feeling about this ... */ + + err_detach_tah: + if (emac_has_feature(dev, EMAC_FTR_HAS_TAH)) + tah_detach(dev->tah_dev, dev->tah_port); + err_detach_rgmii: + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) + rgmii_detach(dev->rgmii_dev, dev->rgmii_port); + err_detach_zmii: + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) + zmii_detach(dev->zmii_dev, dev->zmii_port); + err_unreg_commac: + mal_unregister_commac(dev->mal, &dev->commac); + err_rel_deps: + emac_put_deps(dev); + err_reg_unmap: + iounmap(dev->emacp); + err_irq_unmap: + if (dev->wol_irq != NO_IRQ) + irq_dispose_mapping(dev->wol_irq); + if (dev->emac_irq != NO_IRQ) + irq_dispose_mapping(dev->emac_irq); + err_free: + free_netdev(ndev); + err_gone: + /* if we were on the bootlist, remove us as we won't show up and + * wake up all waiters to notify them in case they were waiting + * on us + */ + if (blist) { + *blist = NULL; + wake_up_all(&emac_probe_wait); + } + return err; +} + +static int __devexit emac_remove(struct platform_device *ofdev) +{ + struct emac_instance *dev = dev_get_drvdata(&ofdev->dev); + + DBG(dev, "remove" NL); + + dev_set_drvdata(&ofdev->dev, NULL); + + unregister_netdev(dev->ndev); + + cancel_work_sync(&dev->reset_work); + + if (emac_has_feature(dev, EMAC_FTR_HAS_TAH)) + tah_detach(dev->tah_dev, dev->tah_port); + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) + rgmii_detach(dev->rgmii_dev, dev->rgmii_port); + if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) + zmii_detach(dev->zmii_dev, dev->zmii_port); + + mal_unregister_commac(dev->mal, &dev->commac); + emac_put_deps(dev); + + emac_dbg_unregister(dev); + iounmap(dev->emacp); + + if (dev->wol_irq != NO_IRQ) + irq_dispose_mapping(dev->wol_irq); + if (dev->emac_irq != NO_IRQ) + irq_dispose_mapping(dev->emac_irq); + + free_netdev(dev->ndev); + + return 0; +} + +/* XXX Features in here should be replaced by properties... */ +static struct of_device_id emac_match[] = +{ + { + .type = "network", + .compatible = "ibm,emac", + }, + { + .type = "network", + .compatible = "ibm,emac4", + }, + { + .type = "network", + .compatible = "ibm,emac4sync", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, emac_match); + +static struct platform_driver emac_driver = { + .driver = { + .name = "emac", + .owner = THIS_MODULE, + .of_match_table = emac_match, + }, + .probe = emac_probe, + .remove = emac_remove, +}; + +static void __init emac_make_bootlist(void) +{ + struct device_node *np = NULL; + int j, max, i = 0, k; + int cell_indices[EMAC_BOOT_LIST_SIZE]; + + /* Collect EMACs */ + while((np = of_find_all_nodes(np)) != NULL) { + const u32 *idx; + + if (of_match_node(emac_match, np) == NULL) + continue; + if (of_get_property(np, "unused", NULL)) + continue; + idx = of_get_property(np, "cell-index", NULL); + if (idx == NULL) + continue; + cell_indices[i] = *idx; + emac_boot_list[i++] = of_node_get(np); + if (i >= EMAC_BOOT_LIST_SIZE) { + of_node_put(np); + break; + } + } + max = i; + + /* Bubble sort them (doh, what a creative algorithm :-) */ + for (i = 0; max > 1 && (i < (max - 1)); i++) + for (j = i; j < max; j++) { + if (cell_indices[i] > cell_indices[j]) { + np = emac_boot_list[i]; + emac_boot_list[i] = emac_boot_list[j]; + emac_boot_list[j] = np; + k = cell_indices[i]; + cell_indices[i] = cell_indices[j]; + cell_indices[j] = k; + } + } +} + +static int __init emac_init(void) +{ + int rc; + + printk(KERN_INFO DRV_DESC ", version " DRV_VERSION "\n"); + + /* Init debug stuff */ + emac_init_debug(); + + /* Build EMAC boot list */ + emac_make_bootlist(); + + /* Init submodules */ + rc = mal_init(); + if (rc) + goto err; + rc = zmii_init(); + if (rc) + goto err_mal; + rc = rgmii_init(); + if (rc) + goto err_zmii; + rc = tah_init(); + if (rc) + goto err_rgmii; + rc = platform_driver_register(&emac_driver); + if (rc) + goto err_tah; + + return 0; + + err_tah: + tah_exit(); + err_rgmii: + rgmii_exit(); + err_zmii: + zmii_exit(); + err_mal: + mal_exit(); + err: + return rc; +} + +static void __exit emac_exit(void) +{ + int i; + + platform_driver_unregister(&emac_driver); + + tah_exit(); + rgmii_exit(); + zmii_exit(); + mal_exit(); + emac_fini_debug(); + + /* Destroy EMAC boot list */ + for (i = 0; i < EMAC_BOOT_LIST_SIZE; i++) + if (emac_boot_list[i]) + of_node_put(emac_boot_list[i]); +} + +module_init(emac_init); +module_exit(emac_exit); diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h new file mode 100644 index 000000000000..fa3ec57935fa --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/core.h @@ -0,0 +1,462 @@ +/* + * drivers/net/ibm_newemac/core.h + * + * Driver for PowerPC 4xx on-chip ethernet controller. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * Based on original work by + * Armin Kuster <akuster@mvista.com> + * Johnnie Peters <jpeters@mvista.com> + * Copyright 2000, 2001 MontaVista Softare Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#ifndef __IBM_NEWEMAC_CORE_H +#define __IBM_NEWEMAC_CORE_H + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/netdevice.h> +#include <linux/dma-mapping.h> +#include <linux/spinlock.h> +#include <linux/of_platform.h> +#include <linux/slab.h> + +#include <asm/io.h> +#include <asm/dcr.h> + +#include "emac.h" +#include "phy.h" +#include "zmii.h" +#include "rgmii.h" +#include "mal.h" +#include "tah.h" +#include "debug.h" + +#define NUM_TX_BUFF CONFIG_IBM_EMAC_TXB +#define NUM_RX_BUFF CONFIG_IBM_EMAC_RXB + +/* Simple sanity check */ +#if NUM_TX_BUFF > 256 || NUM_RX_BUFF > 256 +#error Invalid number of buffer descriptors (greater than 256) +#endif + +#define EMAC_MIN_MTU 46 + +/* Maximum L2 header length (VLAN tagged, no FCS) */ +#define EMAC_MTU_OVERHEAD (6 * 2 + 2 + 4) + +/* RX BD size for the given MTU */ +static inline int emac_rx_size(int mtu) +{ + if (mtu > ETH_DATA_LEN) + return MAL_MAX_RX_SIZE; + else + return mal_rx_size(ETH_DATA_LEN + EMAC_MTU_OVERHEAD); +} + +#define EMAC_DMA_ALIGN(x) ALIGN((x), dma_get_cache_alignment()) + +#define EMAC_RX_SKB_HEADROOM \ + EMAC_DMA_ALIGN(CONFIG_IBM_EMAC_RX_SKB_HEADROOM) + +/* Size of RX skb for the given MTU */ +static inline int emac_rx_skb_size(int mtu) +{ + int size = max(mtu + EMAC_MTU_OVERHEAD, emac_rx_size(mtu)); + return EMAC_DMA_ALIGN(size + 2) + EMAC_RX_SKB_HEADROOM; +} + +/* RX DMA sync size */ +static inline int emac_rx_sync_size(int mtu) +{ + return EMAC_DMA_ALIGN(emac_rx_size(mtu) + 2); +} + +/* Driver statistcs is split into two parts to make it more cache friendly: + * - normal statistics (packet count, etc) + * - error statistics + * + * When statistics is requested by ethtool, these parts are concatenated, + * normal one goes first. + * + * Please, keep these structures in sync with emac_stats_keys. + */ + +/* Normal TX/RX Statistics */ +struct emac_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + u64 rx_packets_csum; + u64 tx_packets_csum; +}; + +/* Error statistics */ +struct emac_error_stats { + u64 tx_undo; + + /* Software RX Errors */ + u64 rx_dropped_stack; + u64 rx_dropped_oom; + u64 rx_dropped_error; + u64 rx_dropped_resize; + u64 rx_dropped_mtu; + u64 rx_stopped; + /* BD reported RX errors */ + u64 rx_bd_errors; + u64 rx_bd_overrun; + u64 rx_bd_bad_packet; + u64 rx_bd_runt_packet; + u64 rx_bd_short_event; + u64 rx_bd_alignment_error; + u64 rx_bd_bad_fcs; + u64 rx_bd_packet_too_long; + u64 rx_bd_out_of_range; + u64 rx_bd_in_range; + /* EMAC IRQ reported RX errors */ + u64 rx_parity; + u64 rx_fifo_overrun; + u64 rx_overrun; + u64 rx_bad_packet; + u64 rx_runt_packet; + u64 rx_short_event; + u64 rx_alignment_error; + u64 rx_bad_fcs; + u64 rx_packet_too_long; + u64 rx_out_of_range; + u64 rx_in_range; + + /* Software TX Errors */ + u64 tx_dropped; + /* BD reported TX errors */ + u64 tx_bd_errors; + u64 tx_bd_bad_fcs; + u64 tx_bd_carrier_loss; + u64 tx_bd_excessive_deferral; + u64 tx_bd_excessive_collisions; + u64 tx_bd_late_collision; + u64 tx_bd_multple_collisions; + u64 tx_bd_single_collision; + u64 tx_bd_underrun; + u64 tx_bd_sqe; + /* EMAC IRQ reported TX errors */ + u64 tx_parity; + u64 tx_underrun; + u64 tx_sqe; + u64 tx_errors; +}; + +#define EMAC_ETHTOOL_STATS_COUNT ((sizeof(struct emac_stats) + \ + sizeof(struct emac_error_stats)) \ + / sizeof(u64)) + +struct emac_instance { + struct net_device *ndev; + struct resource rsrc_regs; + struct emac_regs __iomem *emacp; + struct platform_device *ofdev; + struct device_node **blist; /* bootlist entry */ + + /* MAL linkage */ + u32 mal_ph; + struct platform_device *mal_dev; + u32 mal_rx_chan; + u32 mal_tx_chan; + struct mal_instance *mal; + struct mal_commac commac; + + /* PHY infos */ + u32 phy_mode; + u32 phy_map; + u32 phy_address; + u32 phy_feat_exc; + struct mii_phy phy; + struct mutex link_lock; + struct delayed_work link_work; + int link_polling; + + /* GPCS PHY infos */ + u32 gpcs_address; + + /* Shared MDIO if any */ + u32 mdio_ph; + struct platform_device *mdio_dev; + struct emac_instance *mdio_instance; + struct mutex mdio_lock; + + /* ZMII infos if any */ + u32 zmii_ph; + u32 zmii_port; + struct platform_device *zmii_dev; + + /* RGMII infos if any */ + u32 rgmii_ph; + u32 rgmii_port; + struct platform_device *rgmii_dev; + + /* TAH infos if any */ + u32 tah_ph; + u32 tah_port; + struct platform_device *tah_dev; + + /* IRQs */ + int wol_irq; + int emac_irq; + + /* OPB bus frequency in Mhz */ + u32 opb_bus_freq; + + /* Cell index within an ASIC (for clk mgmnt) */ + u32 cell_index; + + /* Max supported MTU */ + u32 max_mtu; + + /* Feature bits (from probe table) */ + unsigned int features; + + /* Tx and Rx fifo sizes & other infos in bytes */ + u32 tx_fifo_size; + u32 tx_fifo_size_gige; + u32 rx_fifo_size; + u32 rx_fifo_size_gige; + u32 fifo_entry_size; + u32 mal_burst_size; /* move to MAL ? */ + + /* IAHT and GAHT filter parameterization */ + u32 xaht_slots_shift; + u32 xaht_width_shift; + + /* Descriptor management + */ + struct mal_descriptor *tx_desc; + int tx_cnt; + int tx_slot; + int ack_slot; + + struct mal_descriptor *rx_desc; + int rx_slot; + struct sk_buff *rx_sg_skb; /* 1 */ + int rx_skb_size; + int rx_sync_size; + + struct sk_buff *tx_skb[NUM_TX_BUFF]; + struct sk_buff *rx_skb[NUM_RX_BUFF]; + + /* Stats + */ + struct emac_error_stats estats; + struct net_device_stats nstats; + struct emac_stats stats; + + /* Misc + */ + int reset_failed; + int stop_timeout; /* in us */ + int no_mcast; + int mcast_pending; + int opened; + struct work_struct reset_work; + spinlock_t lock; +}; + +/* + * Features of various EMAC implementations + */ + +/* + * No flow control on 40x according to the original driver + */ +#define EMAC_FTR_NO_FLOW_CONTROL_40x 0x00000001 +/* + * Cell is an EMAC4 + */ +#define EMAC_FTR_EMAC4 0x00000002 +/* + * For the 440SPe, AMCC inexplicably changed the polarity of + * the "operation complete" bit in the MII control register. + */ +#define EMAC_FTR_STACR_OC_INVERT 0x00000004 +/* + * Set if we have a TAH. + */ +#define EMAC_FTR_HAS_TAH 0x00000008 +/* + * Set if we have a ZMII. + */ +#define EMAC_FTR_HAS_ZMII 0x00000010 +/* + * Set if we have a RGMII. + */ +#define EMAC_FTR_HAS_RGMII 0x00000020 +/* + * Set if we have new type STACR with STAOPC + */ +#define EMAC_FTR_HAS_NEW_STACR 0x00000040 +/* + * Set if we need phy clock workaround for 440gx + */ +#define EMAC_FTR_440GX_PHY_CLK_FIX 0x00000080 +/* + * Set if we need phy clock workaround for 440ep or 440gr + */ +#define EMAC_FTR_440EP_PHY_CLK_FIX 0x00000100 +/* + * The 405EX and 460EX contain the EMAC4SYNC core + */ +#define EMAC_FTR_EMAC4SYNC 0x00000200 +/* + * Set if we need phy clock workaround for 460ex or 460gt + */ +#define EMAC_FTR_460EX_PHY_CLK_FIX 0x00000400 + + +/* Right now, we don't quite handle the always/possible masks on the + * most optimal way as we don't have a way to say something like + * always EMAC4. Patches welcome. + */ +enum { + EMAC_FTRS_ALWAYS = 0, + + EMAC_FTRS_POSSIBLE = +#ifdef CONFIG_IBM_EMAC_EMAC4 + EMAC_FTR_EMAC4 | EMAC_FTR_EMAC4SYNC | + EMAC_FTR_HAS_NEW_STACR | + EMAC_FTR_STACR_OC_INVERT | EMAC_FTR_440GX_PHY_CLK_FIX | +#endif +#ifdef CONFIG_IBM_EMAC_TAH + EMAC_FTR_HAS_TAH | +#endif +#ifdef CONFIG_IBM_EMAC_ZMII + EMAC_FTR_HAS_ZMII | +#endif +#ifdef CONFIG_IBM_EMAC_RGMII + EMAC_FTR_HAS_RGMII | +#endif +#ifdef CONFIG_IBM_EMAC_NO_FLOW_CTRL + EMAC_FTR_NO_FLOW_CONTROL_40x | +#endif + EMAC_FTR_460EX_PHY_CLK_FIX | + EMAC_FTR_440EP_PHY_CLK_FIX, +}; + +static inline int emac_has_feature(struct emac_instance *dev, + unsigned long feature) +{ + return (EMAC_FTRS_ALWAYS & feature) || + (EMAC_FTRS_POSSIBLE & dev->features & feature); +} + +/* + * Various instances of the EMAC core have varying 1) number of + * address match slots, 2) width of the registers for handling address + * match slots, 3) number of registers for handling address match + * slots and 4) base offset for those registers. + * + * These macros and inlines handle these differences based on + * parameters supplied by the device structure which are, in turn, + * initialized based on the "compatible" entry in the device tree. + */ + +#define EMAC4_XAHT_SLOTS_SHIFT 6 +#define EMAC4_XAHT_WIDTH_SHIFT 4 + +#define EMAC4SYNC_XAHT_SLOTS_SHIFT 8 +#define EMAC4SYNC_XAHT_WIDTH_SHIFT 5 + +#define EMAC_XAHT_SLOTS(dev) (1 << (dev)->xaht_slots_shift) +#define EMAC_XAHT_WIDTH(dev) (1 << (dev)->xaht_width_shift) +#define EMAC_XAHT_REGS(dev) (1 << ((dev)->xaht_slots_shift - \ + (dev)->xaht_width_shift)) + +#define EMAC_XAHT_CRC_TO_SLOT(dev, crc) \ + ((EMAC_XAHT_SLOTS(dev) - 1) - \ + ((crc) >> ((sizeof (u32) * BITS_PER_BYTE) - \ + (dev)->xaht_slots_shift))) + +#define EMAC_XAHT_SLOT_TO_REG(dev, slot) \ + ((slot) >> (dev)->xaht_width_shift) + +#define EMAC_XAHT_SLOT_TO_MASK(dev, slot) \ + ((u32)(1 << (EMAC_XAHT_WIDTH(dev) - 1)) >> \ + ((slot) & (u32)(EMAC_XAHT_WIDTH(dev) - 1))) + +static inline u32 *emac_xaht_base(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + int offset; + + /* The first IAHT entry always is the base of the block of + * IAHT and GAHT registers. + */ + if (emac_has_feature(dev, EMAC_FTR_EMAC4SYNC)) + offset = offsetof(struct emac_regs, u1.emac4sync.iaht1); + else + offset = offsetof(struct emac_regs, u0.emac4.iaht1); + + return (u32 *)((ptrdiff_t)p + offset); +} + +static inline u32 *emac_gaht_base(struct emac_instance *dev) +{ + /* GAHT registers always come after an identical number of + * IAHT registers. + */ + return emac_xaht_base(dev) + EMAC_XAHT_REGS(dev); +} + +static inline u32 *emac_iaht_base(struct emac_instance *dev) +{ + /* IAHT registers always come before an identical number of + * GAHT registers. + */ + return emac_xaht_base(dev); +} + +/* Ethtool get_regs complex data. + * We want to get not just EMAC registers, but also MAL, ZMII, RGMII, TAH + * when available. + * + * Returned BLOB consists of the ibm_emac_ethtool_regs_hdr, + * MAL registers, EMAC registers and optional ZMII, RGMII, TAH registers. + * Each register component is preceded with emac_ethtool_regs_subhdr. + * Order of the optional headers follows their relative bit posititions + * in emac_ethtool_regs_hdr.components + */ +#define EMAC_ETHTOOL_REGS_ZMII 0x00000001 +#define EMAC_ETHTOOL_REGS_RGMII 0x00000002 +#define EMAC_ETHTOOL_REGS_TAH 0x00000004 + +struct emac_ethtool_regs_hdr { + u32 components; +}; + +struct emac_ethtool_regs_subhdr { + u32 version; + u32 index; +}; + +#define EMAC_ETHTOOL_REGS_VER 0 +#define EMAC_ETHTOOL_REGS_SIZE(dev) ((dev)->rsrc_regs.end - \ + (dev)->rsrc_regs.start + 1) +#define EMAC4_ETHTOOL_REGS_VER 1 +#define EMAC4_ETHTOOL_REGS_SIZE(dev) ((dev)->rsrc_regs.end - \ + (dev)->rsrc_regs.start + 1) + +#endif /* __IBM_NEWEMAC_CORE_H */ diff --git a/drivers/net/ethernet/ibm/emac/debug.c b/drivers/net/ethernet/ibm/emac/debug.c new file mode 100644 index 000000000000..8c6c1e2a8750 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/debug.c @@ -0,0 +1,270 @@ +/* + * drivers/net/ibm_newemac/debug.c + * + * Driver for PowerPC 4xx on-chip ethernet controller, debug print routines. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/sysrq.h> +#include <asm/io.h> + +#include "core.h" + +static DEFINE_SPINLOCK(emac_dbg_lock); + +static void emac_desc_dump(struct emac_instance *p) +{ + int i; + printk("** EMAC %s TX BDs **\n" + " tx_cnt = %d tx_slot = %d ack_slot = %d\n", + p->ofdev->dev.of_node->full_name, + p->tx_cnt, p->tx_slot, p->ack_slot); + for (i = 0; i < NUM_TX_BUFF / 2; ++i) + printk + ("bd[%2d] 0x%08x %c 0x%04x %4u - bd[%2d] 0x%08x %c 0x%04x %4u\n", + i, p->tx_desc[i].data_ptr, p->tx_skb[i] ? 'V' : ' ', + p->tx_desc[i].ctrl, p->tx_desc[i].data_len, + NUM_TX_BUFF / 2 + i, + p->tx_desc[NUM_TX_BUFF / 2 + i].data_ptr, + p->tx_skb[NUM_TX_BUFF / 2 + i] ? 'V' : ' ', + p->tx_desc[NUM_TX_BUFF / 2 + i].ctrl, + p->tx_desc[NUM_TX_BUFF / 2 + i].data_len); + + printk("** EMAC %s RX BDs **\n" + " rx_slot = %d flags = 0x%lx rx_skb_size = %d rx_sync_size = %d\n" + " rx_sg_skb = 0x%p\n", + p->ofdev->dev.of_node->full_name, + p->rx_slot, p->commac.flags, p->rx_skb_size, + p->rx_sync_size, p->rx_sg_skb); + for (i = 0; i < NUM_RX_BUFF / 2; ++i) + printk + ("bd[%2d] 0x%08x %c 0x%04x %4u - bd[%2d] 0x%08x %c 0x%04x %4u\n", + i, p->rx_desc[i].data_ptr, p->rx_skb[i] ? 'V' : ' ', + p->rx_desc[i].ctrl, p->rx_desc[i].data_len, + NUM_RX_BUFF / 2 + i, + p->rx_desc[NUM_RX_BUFF / 2 + i].data_ptr, + p->rx_skb[NUM_RX_BUFF / 2 + i] ? 'V' : ' ', + p->rx_desc[NUM_RX_BUFF / 2 + i].ctrl, + p->rx_desc[NUM_RX_BUFF / 2 + i].data_len); +} + +static void emac_mac_dump(struct emac_instance *dev) +{ + struct emac_regs __iomem *p = dev->emacp; + const int xaht_regs = EMAC_XAHT_REGS(dev); + u32 *gaht_base = emac_gaht_base(dev); + u32 *iaht_base = emac_iaht_base(dev); + int emac4sync = emac_has_feature(dev, EMAC_FTR_EMAC4SYNC); + int n; + + printk("** EMAC %s registers **\n" + "MR0 = 0x%08x MR1 = 0x%08x TMR0 = 0x%08x TMR1 = 0x%08x\n" + "RMR = 0x%08x ISR = 0x%08x ISER = 0x%08x\n" + "IAR = %04x%08x VTPID = 0x%04x VTCI = 0x%04x\n", + dev->ofdev->dev.of_node->full_name, + in_be32(&p->mr0), in_be32(&p->mr1), + in_be32(&p->tmr0), in_be32(&p->tmr1), + in_be32(&p->rmr), in_be32(&p->isr), in_be32(&p->iser), + in_be32(&p->iahr), in_be32(&p->ialr), in_be32(&p->vtpid), + in_be32(&p->vtci) + ); + + if (emac4sync) + printk("MAR = %04x%08x MMAR = %04x%08x\n", + in_be32(&p->u0.emac4sync.mahr), + in_be32(&p->u0.emac4sync.malr), + in_be32(&p->u0.emac4sync.mmahr), + in_be32(&p->u0.emac4sync.mmalr) + ); + + for (n = 0; n < xaht_regs; n++) + printk("IAHT%02d = 0x%08x\n", n + 1, in_be32(iaht_base + n)); + + for (n = 0; n < xaht_regs; n++) + printk("GAHT%02d = 0x%08x\n", n + 1, in_be32(gaht_base + n)); + + printk("LSA = %04x%08x IPGVR = 0x%04x\n" + "STACR = 0x%08x TRTR = 0x%08x RWMR = 0x%08x\n" + "OCTX = 0x%08x OCRX = 0x%08x\n", + in_be32(&p->lsah), in_be32(&p->lsal), in_be32(&p->ipgvr), + in_be32(&p->stacr), in_be32(&p->trtr), in_be32(&p->rwmr), + in_be32(&p->octx), in_be32(&p->ocrx) + ); + + if (!emac4sync) { + printk("IPCR = 0x%08x\n", + in_be32(&p->u1.emac4.ipcr) + ); + } else { + printk("REVID = 0x%08x TPC = 0x%08x\n", + in_be32(&p->u1.emac4sync.revid), + in_be32(&p->u1.emac4sync.tpc) + ); + } + + emac_desc_dump(dev); +} + +static void emac_mal_dump(struct mal_instance *mal) +{ + int i; + + printk("** MAL %s Registers **\n" + "CFG = 0x%08x ESR = 0x%08x IER = 0x%08x\n" + "TX|CASR = 0x%08x CARR = 0x%08x EOBISR = 0x%08x DEIR = 0x%08x\n" + "RX|CASR = 0x%08x CARR = 0x%08x EOBISR = 0x%08x DEIR = 0x%08x\n", + mal->ofdev->dev.of_node->full_name, + get_mal_dcrn(mal, MAL_CFG), get_mal_dcrn(mal, MAL_ESR), + get_mal_dcrn(mal, MAL_IER), + get_mal_dcrn(mal, MAL_TXCASR), get_mal_dcrn(mal, MAL_TXCARR), + get_mal_dcrn(mal, MAL_TXEOBISR), get_mal_dcrn(mal, MAL_TXDEIR), + get_mal_dcrn(mal, MAL_RXCASR), get_mal_dcrn(mal, MAL_RXCARR), + get_mal_dcrn(mal, MAL_RXEOBISR), get_mal_dcrn(mal, MAL_RXDEIR) + ); + + printk("TX|"); + for (i = 0; i < mal->num_tx_chans; ++i) { + if (i && !(i % 4)) + printk("\n "); + printk("CTP%d = 0x%08x ", i, get_mal_dcrn(mal, MAL_TXCTPR(i))); + } + printk("\nRX|"); + for (i = 0; i < mal->num_rx_chans; ++i) { + if (i && !(i % 4)) + printk("\n "); + printk("CTP%d = 0x%08x ", i, get_mal_dcrn(mal, MAL_RXCTPR(i))); + } + printk("\n "); + for (i = 0; i < mal->num_rx_chans; ++i) { + u32 r = get_mal_dcrn(mal, MAL_RCBS(i)); + if (i && !(i % 3)) + printk("\n "); + printk("RCBS%d = 0x%08x (%d) ", i, r, r * 16); + } + printk("\n"); +} + +static struct emac_instance *__emacs[4]; +static struct mal_instance *__mals[1]; + +void emac_dbg_register(struct emac_instance *dev) +{ + unsigned long flags; + int i; + + spin_lock_irqsave(&emac_dbg_lock, flags); + for (i = 0; i < ARRAY_SIZE(__emacs); i++) + if (__emacs[i] == NULL) { + __emacs[i] = dev; + break; + } + spin_unlock_irqrestore(&emac_dbg_lock, flags); +} + +void emac_dbg_unregister(struct emac_instance *dev) +{ + unsigned long flags; + int i; + + spin_lock_irqsave(&emac_dbg_lock, flags); + for (i = 0; i < ARRAY_SIZE(__emacs); i++) + if (__emacs[i] == dev) { + __emacs[i] = NULL; + break; + } + spin_unlock_irqrestore(&emac_dbg_lock, flags); +} + +void mal_dbg_register(struct mal_instance *mal) +{ + unsigned long flags; + int i; + + spin_lock_irqsave(&emac_dbg_lock, flags); + for (i = 0; i < ARRAY_SIZE(__mals); i++) + if (__mals[i] == NULL) { + __mals[i] = mal; + break; + } + spin_unlock_irqrestore(&emac_dbg_lock, flags); +} + +void mal_dbg_unregister(struct mal_instance *mal) +{ + unsigned long flags; + int i; + + spin_lock_irqsave(&emac_dbg_lock, flags); + for (i = 0; i < ARRAY_SIZE(__mals); i++) + if (__mals[i] == mal) { + __mals[i] = NULL; + break; + } + spin_unlock_irqrestore(&emac_dbg_lock, flags); +} + +void emac_dbg_dump_all(void) +{ + unsigned int i; + unsigned long flags; + + spin_lock_irqsave(&emac_dbg_lock, flags); + + for (i = 0; i < ARRAY_SIZE(__mals); ++i) + if (__mals[i]) + emac_mal_dump(__mals[i]); + + for (i = 0; i < ARRAY_SIZE(__emacs); ++i) + if (__emacs[i]) + emac_mac_dump(__emacs[i]); + + spin_unlock_irqrestore(&emac_dbg_lock, flags); +} + +#if defined(CONFIG_MAGIC_SYSRQ) +static void emac_sysrq_handler(int key) +{ + emac_dbg_dump_all(); +} + +static struct sysrq_key_op emac_sysrq_op = { + .handler = emac_sysrq_handler, + .help_msg = "emaC", + .action_msg = "Show EMAC(s) status", +}; + +int __init emac_init_debug(void) +{ + return register_sysrq_key('c', &emac_sysrq_op); +} + +void __exit emac_fini_debug(void) +{ + unregister_sysrq_key('c', &emac_sysrq_op); +} + +#else +int __init emac_init_debug(void) +{ + return 0; +} +void __exit emac_fini_debug(void) +{ +} +#endif /* CONFIG_MAGIC_SYSRQ */ diff --git a/drivers/net/ethernet/ibm/emac/debug.h b/drivers/net/ethernet/ibm/emac/debug.h new file mode 100644 index 000000000000..90477fe69d0c --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/debug.h @@ -0,0 +1,83 @@ +/* + * drivers/net/ibm_newemac/debug.h + * + * Driver for PowerPC 4xx on-chip ethernet controller, debug print routines. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#ifndef __IBM_NEWEMAC_DEBUG_H +#define __IBM_NEWEMAC_DEBUG_H + +#include <linux/init.h> + +#include "core.h" + +#if defined(CONFIG_IBM_EMAC_DEBUG) + +struct emac_instance; +struct mal_instance; + +extern void emac_dbg_register(struct emac_instance *dev); +extern void emac_dbg_unregister(struct emac_instance *dev); +extern void mal_dbg_register(struct mal_instance *mal); +extern void mal_dbg_unregister(struct mal_instance *mal); +extern int emac_init_debug(void) __init; +extern void emac_fini_debug(void) __exit; +extern void emac_dbg_dump_all(void); + +# define DBG_LEVEL 1 + +#else + +# define emac_dbg_register(x) do { } while(0) +# define emac_dbg_unregister(x) do { } while(0) +# define mal_dbg_register(x) do { } while(0) +# define mal_dbg_unregister(x) do { } while(0) +# define emac_init_debug() do { } while(0) +# define emac_fini_debug() do { } while(0) +# define emac_dbg_dump_all() do { } while(0) + +# define DBG_LEVEL 0 + +#endif + +#define EMAC_DBG(d, name, fmt, arg...) \ + printk(KERN_DEBUG #name "%s: " fmt, d->ofdev->dev.of_node->full_name, ## arg) + +#if DBG_LEVEL > 0 +# define DBG(d,f,x...) EMAC_DBG(d, emac, f, ##x) +# define MAL_DBG(d,f,x...) EMAC_DBG(d, mal, f, ##x) +# define ZMII_DBG(d,f,x...) EMAC_DBG(d, zmii, f, ##x) +# define RGMII_DBG(d,f,x...) EMAC_DBG(d, rgmii, f, ##x) +# define NL "\n" +#else +# define DBG(f,x...) ((void)0) +# define MAL_DBG(d,f,x...) ((void)0) +# define ZMII_DBG(d,f,x...) ((void)0) +# define RGMII_DBG(d,f,x...) ((void)0) +#endif +#if DBG_LEVEL > 1 +# define DBG2(d,f,x...) DBG(d,f, ##x) +# define MAL_DBG2(d,f,x...) MAL_DBG(d,f, ##x) +# define ZMII_DBG2(d,f,x...) ZMII_DBG(d,f, ##x) +# define RGMII_DBG2(d,f,x...) RGMII_DBG(d,f, ##x) +#else +# define DBG2(f,x...) ((void)0) +# define MAL_DBG2(d,f,x...) ((void)0) +# define ZMII_DBG2(d,f,x...) ((void)0) +# define RGMII_DBG2(d,f,x...) ((void)0) +#endif + +#endif /* __IBM_NEWEMAC_DEBUG_H */ diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/ibm/emac/emac.h new file mode 100644 index 000000000000..1568278d759a --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/emac.h @@ -0,0 +1,312 @@ +/* + * drivers/net/ibm_newemac/emac.h + * + * Register definitions for PowerPC 4xx on-chip ethernet contoller + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * Based on original work by + * Matt Porter <mporter@kernel.crashing.org> + * Armin Kuster <akuster@mvista.com> + * Copyright 2002-2004 MontaVista Software Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#ifndef __IBM_NEWEMAC_H +#define __IBM_NEWEMAC_H + +#include <linux/types.h> +#include <linux/phy.h> + +/* EMAC registers Write Access rules */ +struct emac_regs { + /* Common registers across all EMAC implementations. */ + u32 mr0; /* Special */ + u32 mr1; /* Reset */ + u32 tmr0; /* Special */ + u32 tmr1; /* Special */ + u32 rmr; /* Reset */ + u32 isr; /* Always */ + u32 iser; /* Reset */ + u32 iahr; /* Reset, R, T */ + u32 ialr; /* Reset, R, T */ + u32 vtpid; /* Reset, R, T */ + u32 vtci; /* Reset, R, T */ + u32 ptr; /* Reset, T */ + union { + /* Registers unique to EMAC4 implementations */ + struct { + u32 iaht1; /* Reset, R */ + u32 iaht2; /* Reset, R */ + u32 iaht3; /* Reset, R */ + u32 iaht4; /* Reset, R */ + u32 gaht1; /* Reset, R */ + u32 gaht2; /* Reset, R */ + u32 gaht3; /* Reset, R */ + u32 gaht4; /* Reset, R */ + } emac4; + /* Registers unique to EMAC4SYNC implementations */ + struct { + u32 mahr; /* Reset, R, T */ + u32 malr; /* Reset, R, T */ + u32 mmahr; /* Reset, R, T */ + u32 mmalr; /* Reset, R, T */ + u32 rsvd0[4]; + } emac4sync; + } u0; + /* Common registers across all EMAC implementations. */ + u32 lsah; + u32 lsal; + u32 ipgvr; /* Reset, T */ + u32 stacr; /* Special */ + u32 trtr; /* Special */ + u32 rwmr; /* Reset */ + u32 octx; + u32 ocrx; + union { + /* Registers unique to EMAC4 implementations */ + struct { + u32 ipcr; + } emac4; + /* Registers unique to EMAC4SYNC implementations */ + struct { + u32 rsvd1; + u32 revid; + u32 rsvd2[2]; + u32 iaht1; /* Reset, R */ + u32 iaht2; /* Reset, R */ + u32 iaht3; /* Reset, R */ + u32 iaht4; /* Reset, R */ + u32 iaht5; /* Reset, R */ + u32 iaht6; /* Reset, R */ + u32 iaht7; /* Reset, R */ + u32 iaht8; /* Reset, R */ + u32 gaht1; /* Reset, R */ + u32 gaht2; /* Reset, R */ + u32 gaht3; /* Reset, R */ + u32 gaht4; /* Reset, R */ + u32 gaht5; /* Reset, R */ + u32 gaht6; /* Reset, R */ + u32 gaht7; /* Reset, R */ + u32 gaht8; /* Reset, R */ + u32 tpc; /* Reset, T */ + } emac4sync; + } u1; +}; + +/* + * PHY mode settings (EMAC <-> ZMII/RGMII bridge <-> PHY) + */ +#define PHY_MODE_NA PHY_INTERFACE_MODE_NA +#define PHY_MODE_MII PHY_INTERFACE_MODE_MII +#define PHY_MODE_RMII PHY_INTERFACE_MODE_RMII +#define PHY_MODE_SMII PHY_INTERFACE_MODE_SMII +#define PHY_MODE_RGMII PHY_INTERFACE_MODE_RGMII +#define PHY_MODE_TBI PHY_INTERFACE_MODE_TBI +#define PHY_MODE_GMII PHY_INTERFACE_MODE_GMII +#define PHY_MODE_RTBI PHY_INTERFACE_MODE_RTBI +#define PHY_MODE_SGMII PHY_INTERFACE_MODE_SGMII + +/* EMACx_MR0 */ +#define EMAC_MR0_RXI 0x80000000 +#define EMAC_MR0_TXI 0x40000000 +#define EMAC_MR0_SRST 0x20000000 +#define EMAC_MR0_TXE 0x10000000 +#define EMAC_MR0_RXE 0x08000000 +#define EMAC_MR0_WKE 0x04000000 + +/* EMACx_MR1 */ +#define EMAC_MR1_FDE 0x80000000 +#define EMAC_MR1_ILE 0x40000000 +#define EMAC_MR1_VLE 0x20000000 +#define EMAC_MR1_EIFC 0x10000000 +#define EMAC_MR1_APP 0x08000000 +#define EMAC_MR1_IST 0x01000000 + +#define EMAC_MR1_MF_MASK 0x00c00000 +#define EMAC_MR1_MF_10 0x00000000 +#define EMAC_MR1_MF_100 0x00400000 +#define EMAC_MR1_MF_1000 0x00800000 +#define EMAC_MR1_MF_1000GPCS 0x00c00000 +#define EMAC_MR1_MF_IPPA(id) (((id) & 0x1f) << 6) + +#define EMAC_MR1_RFS_4K 0x00300000 +#define EMAC_MR1_RFS_16K 0x00000000 +#define EMAC_MR1_TFS_2K 0x00080000 +#define EMAC_MR1_TR0_MULT 0x00008000 +#define EMAC_MR1_JPSM 0x00000000 +#define EMAC_MR1_MWSW_001 0x00000000 +#define EMAC_MR1_BASE(opb) (EMAC_MR1_TFS_2K | EMAC_MR1_TR0_MULT) + + +#define EMAC4_MR1_RFS_2K 0x00100000 +#define EMAC4_MR1_RFS_4K 0x00180000 +#define EMAC4_MR1_RFS_16K 0x00280000 +#define EMAC4_MR1_TFS_2K 0x00020000 +#define EMAC4_MR1_TFS_4K 0x00030000 +#define EMAC4_MR1_TFS_16K 0x00050000 +#define EMAC4_MR1_TR 0x00008000 +#define EMAC4_MR1_MWSW_001 0x00001000 +#define EMAC4_MR1_JPSM 0x00000800 +#define EMAC4_MR1_OBCI_MASK 0x00000038 +#define EMAC4_MR1_OBCI_50 0x00000000 +#define EMAC4_MR1_OBCI_66 0x00000008 +#define EMAC4_MR1_OBCI_83 0x00000010 +#define EMAC4_MR1_OBCI_100 0x00000018 +#define EMAC4_MR1_OBCI_100P 0x00000020 +#define EMAC4_MR1_OBCI(freq) ((freq) <= 50 ? EMAC4_MR1_OBCI_50 : \ + (freq) <= 66 ? EMAC4_MR1_OBCI_66 : \ + (freq) <= 83 ? EMAC4_MR1_OBCI_83 : \ + (freq) <= 100 ? EMAC4_MR1_OBCI_100 : \ + EMAC4_MR1_OBCI_100P) + +/* EMACx_TMR0 */ +#define EMAC_TMR0_GNP 0x80000000 +#define EMAC_TMR0_DEFAULT 0x00000000 +#define EMAC4_TMR0_TFAE_2_32 0x00000001 +#define EMAC4_TMR0_TFAE_4_64 0x00000002 +#define EMAC4_TMR0_TFAE_8_128 0x00000003 +#define EMAC4_TMR0_TFAE_16_256 0x00000004 +#define EMAC4_TMR0_TFAE_32_512 0x00000005 +#define EMAC4_TMR0_TFAE_64_1024 0x00000006 +#define EMAC4_TMR0_TFAE_128_2048 0x00000007 +#define EMAC4_TMR0_DEFAULT EMAC4_TMR0_TFAE_2_32 +#define EMAC_TMR0_XMIT (EMAC_TMR0_GNP | EMAC_TMR0_DEFAULT) +#define EMAC4_TMR0_XMIT (EMAC_TMR0_GNP | EMAC4_TMR0_DEFAULT) + +/* EMACx_TMR1 */ + +#define EMAC_TMR1(l,h) (((l) << 27) | (((h) & 0xff) << 16)) +#define EMAC4_TMR1(l,h) (((l) << 27) | (((h) & 0x3ff) << 14)) + +/* EMACx_RMR */ +#define EMAC_RMR_SP 0x80000000 +#define EMAC_RMR_SFCS 0x40000000 +#define EMAC_RMR_RRP 0x20000000 +#define EMAC_RMR_RFP 0x10000000 +#define EMAC_RMR_ROP 0x08000000 +#define EMAC_RMR_RPIR 0x04000000 +#define EMAC_RMR_PPP 0x02000000 +#define EMAC_RMR_PME 0x01000000 +#define EMAC_RMR_PMME 0x00800000 +#define EMAC_RMR_IAE 0x00400000 +#define EMAC_RMR_MIAE 0x00200000 +#define EMAC_RMR_BAE 0x00100000 +#define EMAC_RMR_MAE 0x00080000 +#define EMAC_RMR_BASE 0x00000000 +#define EMAC4_RMR_RFAF_2_32 0x00000001 +#define EMAC4_RMR_RFAF_4_64 0x00000002 +#define EMAC4_RMR_RFAF_8_128 0x00000003 +#define EMAC4_RMR_RFAF_16_256 0x00000004 +#define EMAC4_RMR_RFAF_32_512 0x00000005 +#define EMAC4_RMR_RFAF_64_1024 0x00000006 +#define EMAC4_RMR_RFAF_128_2048 0x00000007 +#define EMAC4_RMR_BASE EMAC4_RMR_RFAF_128_2048 + +/* EMACx_ISR & EMACx_ISER */ +#define EMAC4_ISR_TXPE 0x20000000 +#define EMAC4_ISR_RXPE 0x10000000 +#define EMAC4_ISR_TXUE 0x08000000 +#define EMAC4_ISR_RXOE 0x04000000 +#define EMAC_ISR_OVR 0x02000000 +#define EMAC_ISR_PP 0x01000000 +#define EMAC_ISR_BP 0x00800000 +#define EMAC_ISR_RP 0x00400000 +#define EMAC_ISR_SE 0x00200000 +#define EMAC_ISR_ALE 0x00100000 +#define EMAC_ISR_BFCS 0x00080000 +#define EMAC_ISR_PTLE 0x00040000 +#define EMAC_ISR_ORE 0x00020000 +#define EMAC_ISR_IRE 0x00010000 +#define EMAC_ISR_SQE 0x00000080 +#define EMAC_ISR_TE 0x00000040 +#define EMAC_ISR_MOS 0x00000002 +#define EMAC_ISR_MOF 0x00000001 + +/* EMACx_STACR */ +#define EMAC_STACR_PHYD_MASK 0xffff +#define EMAC_STACR_PHYD_SHIFT 16 +#define EMAC_STACR_OC 0x00008000 +#define EMAC_STACR_PHYE 0x00004000 +#define EMAC_STACR_STAC_MASK 0x00003000 +#define EMAC_STACR_STAC_READ 0x00001000 +#define EMAC_STACR_STAC_WRITE 0x00002000 +#define EMAC_STACR_OPBC_MASK 0x00000C00 +#define EMAC_STACR_OPBC_50 0x00000000 +#define EMAC_STACR_OPBC_66 0x00000400 +#define EMAC_STACR_OPBC_83 0x00000800 +#define EMAC_STACR_OPBC_100 0x00000C00 +#define EMAC_STACR_OPBC(freq) ((freq) <= 50 ? EMAC_STACR_OPBC_50 : \ + (freq) <= 66 ? EMAC_STACR_OPBC_66 : \ + (freq) <= 83 ? EMAC_STACR_OPBC_83 : EMAC_STACR_OPBC_100) +#define EMAC_STACR_BASE(opb) EMAC_STACR_OPBC(opb) +#define EMAC4_STACR_BASE(opb) 0x00000000 +#define EMAC_STACR_PCDA_MASK 0x1f +#define EMAC_STACR_PCDA_SHIFT 5 +#define EMAC_STACR_PRA_MASK 0x1f +#define EMACX_STACR_STAC_MASK 0x00003800 +#define EMACX_STACR_STAC_READ 0x00001000 +#define EMACX_STACR_STAC_WRITE 0x00000800 +#define EMACX_STACR_STAC_IND_ADDR 0x00002000 +#define EMACX_STACR_STAC_IND_READ 0x00003800 +#define EMACX_STACR_STAC_IND_READINC 0x00003000 +#define EMACX_STACR_STAC_IND_WRITE 0x00002800 + + +/* EMACx_TRTR */ +#define EMAC_TRTR_SHIFT_EMAC4 24 +#define EMAC_TRTR_SHIFT 27 + +/* EMAC specific TX descriptor control fields (write access) */ +#define EMAC_TX_CTRL_GFCS 0x0200 +#define EMAC_TX_CTRL_GP 0x0100 +#define EMAC_TX_CTRL_ISA 0x0080 +#define EMAC_TX_CTRL_RSA 0x0040 +#define EMAC_TX_CTRL_IVT 0x0020 +#define EMAC_TX_CTRL_RVT 0x0010 +#define EMAC_TX_CTRL_TAH_CSUM 0x000e + +/* EMAC specific TX descriptor status fields (read access) */ +#define EMAC_TX_ST_BFCS 0x0200 +#define EMAC_TX_ST_LCS 0x0080 +#define EMAC_TX_ST_ED 0x0040 +#define EMAC_TX_ST_EC 0x0020 +#define EMAC_TX_ST_LC 0x0010 +#define EMAC_TX_ST_MC 0x0008 +#define EMAC_TX_ST_SC 0x0004 +#define EMAC_TX_ST_UR 0x0002 +#define EMAC_TX_ST_SQE 0x0001 +#define EMAC_IS_BAD_TX (EMAC_TX_ST_LCS | EMAC_TX_ST_ED | \ + EMAC_TX_ST_EC | EMAC_TX_ST_LC | \ + EMAC_TX_ST_MC | EMAC_TX_ST_UR) +#define EMAC_IS_BAD_TX_TAH (EMAC_TX_ST_LCS | EMAC_TX_ST_ED | \ + EMAC_TX_ST_EC | EMAC_TX_ST_LC) + +/* EMAC specific RX descriptor status fields (read access) */ +#define EMAC_RX_ST_OE 0x0200 +#define EMAC_RX_ST_PP 0x0100 +#define EMAC_RX_ST_BP 0x0080 +#define EMAC_RX_ST_RP 0x0040 +#define EMAC_RX_ST_SE 0x0020 +#define EMAC_RX_ST_AE 0x0010 +#define EMAC_RX_ST_BFCS 0x0008 +#define EMAC_RX_ST_PTL 0x0004 +#define EMAC_RX_ST_ORE 0x0002 +#define EMAC_RX_ST_IRE 0x0001 +#define EMAC_RX_TAH_BAD_CSUM 0x0003 +#define EMAC_BAD_RX_MASK (EMAC_RX_ST_OE | EMAC_RX_ST_BP | \ + EMAC_RX_ST_RP | EMAC_RX_ST_SE | \ + EMAC_RX_ST_AE | EMAC_RX_ST_BFCS | \ + EMAC_RX_ST_PTL | EMAC_RX_ST_ORE | \ + EMAC_RX_ST_IRE ) +#endif /* __IBM_NEWEMAC_H */ diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c new file mode 100644 index 000000000000..f3c50b97ec61 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -0,0 +1,809 @@ +/* + * drivers/net/ibm_newemac/mal.c + * + * Memory Access Layer (MAL) support + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * Based on original work by + * Benjamin Herrenschmidt <benh@kernel.crashing.org>, + * David Gibson <hermes@gibson.dropbear.id.au>, + * + * Armin Kuster <akuster@mvista.com> + * Copyright 2002 MontaVista Softare Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include <linux/delay.h> +#include <linux/slab.h> + +#include "core.h" +#include <asm/dcr-regs.h> + +static int mal_count; + +int __devinit mal_register_commac(struct mal_instance *mal, + struct mal_commac *commac) +{ + unsigned long flags; + + spin_lock_irqsave(&mal->lock, flags); + + MAL_DBG(mal, "reg(%08x, %08x)" NL, + commac->tx_chan_mask, commac->rx_chan_mask); + + /* Don't let multiple commacs claim the same channel(s) */ + if ((mal->tx_chan_mask & commac->tx_chan_mask) || + (mal->rx_chan_mask & commac->rx_chan_mask)) { + spin_unlock_irqrestore(&mal->lock, flags); + printk(KERN_WARNING "mal%d: COMMAC channels conflict!\n", + mal->index); + return -EBUSY; + } + + if (list_empty(&mal->list)) + napi_enable(&mal->napi); + mal->tx_chan_mask |= commac->tx_chan_mask; + mal->rx_chan_mask |= commac->rx_chan_mask; + list_add(&commac->list, &mal->list); + + spin_unlock_irqrestore(&mal->lock, flags); + + return 0; +} + +void mal_unregister_commac(struct mal_instance *mal, + struct mal_commac *commac) +{ + unsigned long flags; + + spin_lock_irqsave(&mal->lock, flags); + + MAL_DBG(mal, "unreg(%08x, %08x)" NL, + commac->tx_chan_mask, commac->rx_chan_mask); + + mal->tx_chan_mask &= ~commac->tx_chan_mask; + mal->rx_chan_mask &= ~commac->rx_chan_mask; + list_del_init(&commac->list); + if (list_empty(&mal->list)) + napi_disable(&mal->napi); + + spin_unlock_irqrestore(&mal->lock, flags); +} + +int mal_set_rcbs(struct mal_instance *mal, int channel, unsigned long size) +{ + BUG_ON(channel < 0 || channel >= mal->num_rx_chans || + size > MAL_MAX_RX_SIZE); + + MAL_DBG(mal, "set_rbcs(%d, %lu)" NL, channel, size); + + if (size & 0xf) { + printk(KERN_WARNING + "mal%d: incorrect RX size %lu for the channel %d\n", + mal->index, size, channel); + return -EINVAL; + } + + set_mal_dcrn(mal, MAL_RCBS(channel), size >> 4); + return 0; +} + +int mal_tx_bd_offset(struct mal_instance *mal, int channel) +{ + BUG_ON(channel < 0 || channel >= mal->num_tx_chans); + + return channel * NUM_TX_BUFF; +} + +int mal_rx_bd_offset(struct mal_instance *mal, int channel) +{ + BUG_ON(channel < 0 || channel >= mal->num_rx_chans); + return mal->num_tx_chans * NUM_TX_BUFF + channel * NUM_RX_BUFF; +} + +void mal_enable_tx_channel(struct mal_instance *mal, int channel) +{ + unsigned long flags; + + spin_lock_irqsave(&mal->lock, flags); + + MAL_DBG(mal, "enable_tx(%d)" NL, channel); + + set_mal_dcrn(mal, MAL_TXCASR, + get_mal_dcrn(mal, MAL_TXCASR) | MAL_CHAN_MASK(channel)); + + spin_unlock_irqrestore(&mal->lock, flags); +} + +void mal_disable_tx_channel(struct mal_instance *mal, int channel) +{ + set_mal_dcrn(mal, MAL_TXCARR, MAL_CHAN_MASK(channel)); + + MAL_DBG(mal, "disable_tx(%d)" NL, channel); +} + +void mal_enable_rx_channel(struct mal_instance *mal, int channel) +{ + unsigned long flags; + + /* + * On some 4xx PPC's (e.g. 460EX/GT), the rx channel is a multiple + * of 8, but enabling in MAL_RXCASR needs the divided by 8 value + * for the bitmask + */ + if (!(channel % 8)) + channel >>= 3; + + spin_lock_irqsave(&mal->lock, flags); + + MAL_DBG(mal, "enable_rx(%d)" NL, channel); + + set_mal_dcrn(mal, MAL_RXCASR, + get_mal_dcrn(mal, MAL_RXCASR) | MAL_CHAN_MASK(channel)); + + spin_unlock_irqrestore(&mal->lock, flags); +} + +void mal_disable_rx_channel(struct mal_instance *mal, int channel) +{ + /* + * On some 4xx PPC's (e.g. 460EX/GT), the rx channel is a multiple + * of 8, but enabling in MAL_RXCASR needs the divided by 8 value + * for the bitmask + */ + if (!(channel % 8)) + channel >>= 3; + + set_mal_dcrn(mal, MAL_RXCARR, MAL_CHAN_MASK(channel)); + + MAL_DBG(mal, "disable_rx(%d)" NL, channel); +} + +void mal_poll_add(struct mal_instance *mal, struct mal_commac *commac) +{ + unsigned long flags; + + spin_lock_irqsave(&mal->lock, flags); + + MAL_DBG(mal, "poll_add(%p)" NL, commac); + + /* starts disabled */ + set_bit(MAL_COMMAC_POLL_DISABLED, &commac->flags); + + list_add_tail(&commac->poll_list, &mal->poll_list); + + spin_unlock_irqrestore(&mal->lock, flags); +} + +void mal_poll_del(struct mal_instance *mal, struct mal_commac *commac) +{ + unsigned long flags; + + spin_lock_irqsave(&mal->lock, flags); + + MAL_DBG(mal, "poll_del(%p)" NL, commac); + + list_del(&commac->poll_list); + + spin_unlock_irqrestore(&mal->lock, flags); +} + +/* synchronized by mal_poll() */ +static inline void mal_enable_eob_irq(struct mal_instance *mal) +{ + MAL_DBG2(mal, "enable_irq" NL); + + // XXX might want to cache MAL_CFG as the DCR read can be slooooow + set_mal_dcrn(mal, MAL_CFG, get_mal_dcrn(mal, MAL_CFG) | MAL_CFG_EOPIE); +} + +/* synchronized by NAPI state */ +static inline void mal_disable_eob_irq(struct mal_instance *mal) +{ + // XXX might want to cache MAL_CFG as the DCR read can be slooooow + set_mal_dcrn(mal, MAL_CFG, get_mal_dcrn(mal, MAL_CFG) & ~MAL_CFG_EOPIE); + + MAL_DBG2(mal, "disable_irq" NL); +} + +static irqreturn_t mal_serr(int irq, void *dev_instance) +{ + struct mal_instance *mal = dev_instance; + + u32 esr = get_mal_dcrn(mal, MAL_ESR); + + /* Clear the error status register */ + set_mal_dcrn(mal, MAL_ESR, esr); + + MAL_DBG(mal, "SERR %08x" NL, esr); + + if (esr & MAL_ESR_EVB) { + if (esr & MAL_ESR_DE) { + /* We ignore Descriptor error, + * TXDE or RXDE interrupt will be generated anyway. + */ + return IRQ_HANDLED; + } + + if (esr & MAL_ESR_PEIN) { + /* PLB error, it's probably buggy hardware or + * incorrect physical address in BD (i.e. bug) + */ + if (net_ratelimit()) + printk(KERN_ERR + "mal%d: system error, " + "PLB (ESR = 0x%08x)\n", + mal->index, esr); + return IRQ_HANDLED; + } + + /* OPB error, it's probably buggy hardware or incorrect + * EBC setup + */ + if (net_ratelimit()) + printk(KERN_ERR + "mal%d: system error, OPB (ESR = 0x%08x)\n", + mal->index, esr); + } + return IRQ_HANDLED; +} + +static inline void mal_schedule_poll(struct mal_instance *mal) +{ + if (likely(napi_schedule_prep(&mal->napi))) { + MAL_DBG2(mal, "schedule_poll" NL); + mal_disable_eob_irq(mal); + __napi_schedule(&mal->napi); + } else + MAL_DBG2(mal, "already in poll" NL); +} + +static irqreturn_t mal_txeob(int irq, void *dev_instance) +{ + struct mal_instance *mal = dev_instance; + + u32 r = get_mal_dcrn(mal, MAL_TXEOBISR); + + MAL_DBG2(mal, "txeob %08x" NL, r); + + mal_schedule_poll(mal); + set_mal_dcrn(mal, MAL_TXEOBISR, r); + +#ifdef CONFIG_PPC_DCR_NATIVE + if (mal_has_feature(mal, MAL_FTR_CLEAR_ICINTSTAT)) + mtdcri(SDR0, DCRN_SDR_ICINTSTAT, + (mfdcri(SDR0, DCRN_SDR_ICINTSTAT) | ICINTSTAT_ICTX)); +#endif + + return IRQ_HANDLED; +} + +static irqreturn_t mal_rxeob(int irq, void *dev_instance) +{ + struct mal_instance *mal = dev_instance; + + u32 r = get_mal_dcrn(mal, MAL_RXEOBISR); + + MAL_DBG2(mal, "rxeob %08x" NL, r); + + mal_schedule_poll(mal); + set_mal_dcrn(mal, MAL_RXEOBISR, r); + +#ifdef CONFIG_PPC_DCR_NATIVE + if (mal_has_feature(mal, MAL_FTR_CLEAR_ICINTSTAT)) + mtdcri(SDR0, DCRN_SDR_ICINTSTAT, + (mfdcri(SDR0, DCRN_SDR_ICINTSTAT) | ICINTSTAT_ICRX)); +#endif + + return IRQ_HANDLED; +} + +static irqreturn_t mal_txde(int irq, void *dev_instance) +{ + struct mal_instance *mal = dev_instance; + + u32 deir = get_mal_dcrn(mal, MAL_TXDEIR); + set_mal_dcrn(mal, MAL_TXDEIR, deir); + + MAL_DBG(mal, "txde %08x" NL, deir); + + if (net_ratelimit()) + printk(KERN_ERR + "mal%d: TX descriptor error (TXDEIR = 0x%08x)\n", + mal->index, deir); + + return IRQ_HANDLED; +} + +static irqreturn_t mal_rxde(int irq, void *dev_instance) +{ + struct mal_instance *mal = dev_instance; + struct list_head *l; + + u32 deir = get_mal_dcrn(mal, MAL_RXDEIR); + + MAL_DBG(mal, "rxde %08x" NL, deir); + + list_for_each(l, &mal->list) { + struct mal_commac *mc = list_entry(l, struct mal_commac, list); + if (deir & mc->rx_chan_mask) { + set_bit(MAL_COMMAC_RX_STOPPED, &mc->flags); + mc->ops->rxde(mc->dev); + } + } + + mal_schedule_poll(mal); + set_mal_dcrn(mal, MAL_RXDEIR, deir); + + return IRQ_HANDLED; +} + +static irqreturn_t mal_int(int irq, void *dev_instance) +{ + struct mal_instance *mal = dev_instance; + u32 esr = get_mal_dcrn(mal, MAL_ESR); + + if (esr & MAL_ESR_EVB) { + /* descriptor error */ + if (esr & MAL_ESR_DE) { + if (esr & MAL_ESR_CIDT) + return mal_rxde(irq, dev_instance); + else + return mal_txde(irq, dev_instance); + } else { /* SERR */ + return mal_serr(irq, dev_instance); + } + } + return IRQ_HANDLED; +} + +void mal_poll_disable(struct mal_instance *mal, struct mal_commac *commac) +{ + /* Spinlock-type semantics: only one caller disable poll at a time */ + while (test_and_set_bit(MAL_COMMAC_POLL_DISABLED, &commac->flags)) + msleep(1); + + /* Synchronize with the MAL NAPI poller */ + napi_synchronize(&mal->napi); +} + +void mal_poll_enable(struct mal_instance *mal, struct mal_commac *commac) +{ + smp_wmb(); + clear_bit(MAL_COMMAC_POLL_DISABLED, &commac->flags); + + /* Feels better to trigger a poll here to catch up with events that + * may have happened on this channel while disabled. It will most + * probably be delayed until the next interrupt but that's mostly a + * non-issue in the context where this is called. + */ + napi_schedule(&mal->napi); +} + +static int mal_poll(struct napi_struct *napi, int budget) +{ + struct mal_instance *mal = container_of(napi, struct mal_instance, napi); + struct list_head *l; + int received = 0; + unsigned long flags; + + MAL_DBG2(mal, "poll(%d)" NL, budget); + again: + /* Process TX skbs */ + list_for_each(l, &mal->poll_list) { + struct mal_commac *mc = + list_entry(l, struct mal_commac, poll_list); + mc->ops->poll_tx(mc->dev); + } + + /* Process RX skbs. + * + * We _might_ need something more smart here to enforce polling + * fairness. + */ + list_for_each(l, &mal->poll_list) { + struct mal_commac *mc = + list_entry(l, struct mal_commac, poll_list); + int n; + if (unlikely(test_bit(MAL_COMMAC_POLL_DISABLED, &mc->flags))) + continue; + n = mc->ops->poll_rx(mc->dev, budget); + if (n) { + received += n; + budget -= n; + if (budget <= 0) + goto more_work; // XXX What if this is the last one ? + } + } + + /* We need to disable IRQs to protect from RXDE IRQ here */ + spin_lock_irqsave(&mal->lock, flags); + __napi_complete(napi); + mal_enable_eob_irq(mal); + spin_unlock_irqrestore(&mal->lock, flags); + + /* Check for "rotting" packet(s) */ + list_for_each(l, &mal->poll_list) { + struct mal_commac *mc = + list_entry(l, struct mal_commac, poll_list); + if (unlikely(test_bit(MAL_COMMAC_POLL_DISABLED, &mc->flags))) + continue; + if (unlikely(mc->ops->peek_rx(mc->dev) || + test_bit(MAL_COMMAC_RX_STOPPED, &mc->flags))) { + MAL_DBG2(mal, "rotting packet" NL); + if (napi_reschedule(napi)) + mal_disable_eob_irq(mal); + else + MAL_DBG2(mal, "already in poll list" NL); + + if (budget > 0) + goto again; + else + goto more_work; + } + mc->ops->poll_tx(mc->dev); + } + + more_work: + MAL_DBG2(mal, "poll() %d <- %d" NL, budget, received); + return received; +} + +static void mal_reset(struct mal_instance *mal) +{ + int n = 10; + + MAL_DBG(mal, "reset" NL); + + set_mal_dcrn(mal, MAL_CFG, MAL_CFG_SR); + + /* Wait for reset to complete (1 system clock) */ + while ((get_mal_dcrn(mal, MAL_CFG) & MAL_CFG_SR) && n) + --n; + + if (unlikely(!n)) + printk(KERN_ERR "mal%d: reset timeout\n", mal->index); +} + +int mal_get_regs_len(struct mal_instance *mal) +{ + return sizeof(struct emac_ethtool_regs_subhdr) + + sizeof(struct mal_regs); +} + +void *mal_dump_regs(struct mal_instance *mal, void *buf) +{ + struct emac_ethtool_regs_subhdr *hdr = buf; + struct mal_regs *regs = (struct mal_regs *)(hdr + 1); + int i; + + hdr->version = mal->version; + hdr->index = mal->index; + + regs->tx_count = mal->num_tx_chans; + regs->rx_count = mal->num_rx_chans; + + regs->cfg = get_mal_dcrn(mal, MAL_CFG); + regs->esr = get_mal_dcrn(mal, MAL_ESR); + regs->ier = get_mal_dcrn(mal, MAL_IER); + regs->tx_casr = get_mal_dcrn(mal, MAL_TXCASR); + regs->tx_carr = get_mal_dcrn(mal, MAL_TXCARR); + regs->tx_eobisr = get_mal_dcrn(mal, MAL_TXEOBISR); + regs->tx_deir = get_mal_dcrn(mal, MAL_TXDEIR); + regs->rx_casr = get_mal_dcrn(mal, MAL_RXCASR); + regs->rx_carr = get_mal_dcrn(mal, MAL_RXCARR); + regs->rx_eobisr = get_mal_dcrn(mal, MAL_RXEOBISR); + regs->rx_deir = get_mal_dcrn(mal, MAL_RXDEIR); + + for (i = 0; i < regs->tx_count; ++i) + regs->tx_ctpr[i] = get_mal_dcrn(mal, MAL_TXCTPR(i)); + + for (i = 0; i < regs->rx_count; ++i) { + regs->rx_ctpr[i] = get_mal_dcrn(mal, MAL_RXCTPR(i)); + regs->rcbs[i] = get_mal_dcrn(mal, MAL_RCBS(i)); + } + return regs + 1; +} + +static int __devinit mal_probe(struct platform_device *ofdev) +{ + struct mal_instance *mal; + int err = 0, i, bd_size; + int index = mal_count++; + unsigned int dcr_base; + const u32 *prop; + u32 cfg; + unsigned long irqflags; + irq_handler_t hdlr_serr, hdlr_txde, hdlr_rxde; + + mal = kzalloc(sizeof(struct mal_instance), GFP_KERNEL); + if (!mal) { + printk(KERN_ERR + "mal%d: out of memory allocating MAL structure!\n", + index); + return -ENOMEM; + } + mal->index = index; + mal->ofdev = ofdev; + mal->version = of_device_is_compatible(ofdev->dev.of_node, "ibm,mcmal2") ? 2 : 1; + + MAL_DBG(mal, "probe" NL); + + prop = of_get_property(ofdev->dev.of_node, "num-tx-chans", NULL); + if (prop == NULL) { + printk(KERN_ERR + "mal%d: can't find MAL num-tx-chans property!\n", + index); + err = -ENODEV; + goto fail; + } + mal->num_tx_chans = prop[0]; + + prop = of_get_property(ofdev->dev.of_node, "num-rx-chans", NULL); + if (prop == NULL) { + printk(KERN_ERR + "mal%d: can't find MAL num-rx-chans property!\n", + index); + err = -ENODEV; + goto fail; + } + mal->num_rx_chans = prop[0]; + + dcr_base = dcr_resource_start(ofdev->dev.of_node, 0); + if (dcr_base == 0) { + printk(KERN_ERR + "mal%d: can't find DCR resource!\n", index); + err = -ENODEV; + goto fail; + } + mal->dcr_host = dcr_map(ofdev->dev.of_node, dcr_base, 0x100); + if (!DCR_MAP_OK(mal->dcr_host)) { + printk(KERN_ERR + "mal%d: failed to map DCRs !\n", index); + err = -ENODEV; + goto fail; + } + + if (of_device_is_compatible(ofdev->dev.of_node, "ibm,mcmal-405ez")) { +#if defined(CONFIG_IBM_EMAC_MAL_CLR_ICINTSTAT) && \ + defined(CONFIG_IBM_EMAC_MAL_COMMON_ERR) + mal->features |= (MAL_FTR_CLEAR_ICINTSTAT | + MAL_FTR_COMMON_ERR_INT); +#else + printk(KERN_ERR "%s: Support for 405EZ not enabled!\n", + ofdev->dev.of_node->full_name); + err = -ENODEV; + goto fail; +#endif + } + + mal->txeob_irq = irq_of_parse_and_map(ofdev->dev.of_node, 0); + mal->rxeob_irq = irq_of_parse_and_map(ofdev->dev.of_node, 1); + mal->serr_irq = irq_of_parse_and_map(ofdev->dev.of_node, 2); + + if (mal_has_feature(mal, MAL_FTR_COMMON_ERR_INT)) { + mal->txde_irq = mal->rxde_irq = mal->serr_irq; + } else { + mal->txde_irq = irq_of_parse_and_map(ofdev->dev.of_node, 3); + mal->rxde_irq = irq_of_parse_and_map(ofdev->dev.of_node, 4); + } + + if (mal->txeob_irq == NO_IRQ || mal->rxeob_irq == NO_IRQ || + mal->serr_irq == NO_IRQ || mal->txde_irq == NO_IRQ || + mal->rxde_irq == NO_IRQ) { + printk(KERN_ERR + "mal%d: failed to map interrupts !\n", index); + err = -ENODEV; + goto fail_unmap; + } + + INIT_LIST_HEAD(&mal->poll_list); + INIT_LIST_HEAD(&mal->list); + spin_lock_init(&mal->lock); + + init_dummy_netdev(&mal->dummy_dev); + + netif_napi_add(&mal->dummy_dev, &mal->napi, mal_poll, + CONFIG_IBM_EMAC_POLL_WEIGHT); + + /* Load power-on reset defaults */ + mal_reset(mal); + + /* Set the MAL configuration register */ + cfg = (mal->version == 2) ? MAL2_CFG_DEFAULT : MAL1_CFG_DEFAULT; + cfg |= MAL_CFG_PLBB | MAL_CFG_OPBBL | MAL_CFG_LEA; + + /* Current Axon is not happy with priority being non-0, it can + * deadlock, fix it up here + */ + if (of_device_is_compatible(ofdev->dev.of_node, "ibm,mcmal-axon")) + cfg &= ~(MAL2_CFG_RPP_10 | MAL2_CFG_WPP_10); + + /* Apply configuration */ + set_mal_dcrn(mal, MAL_CFG, cfg); + + /* Allocate space for BD rings */ + BUG_ON(mal->num_tx_chans <= 0 || mal->num_tx_chans > 32); + BUG_ON(mal->num_rx_chans <= 0 || mal->num_rx_chans > 32); + + bd_size = sizeof(struct mal_descriptor) * + (NUM_TX_BUFF * mal->num_tx_chans + + NUM_RX_BUFF * mal->num_rx_chans); + mal->bd_virt = + dma_alloc_coherent(&ofdev->dev, bd_size, &mal->bd_dma, + GFP_KERNEL); + if (mal->bd_virt == NULL) { + printk(KERN_ERR + "mal%d: out of memory allocating RX/TX descriptors!\n", + index); + err = -ENOMEM; + goto fail_unmap; + } + memset(mal->bd_virt, 0, bd_size); + + for (i = 0; i < mal->num_tx_chans; ++i) + set_mal_dcrn(mal, MAL_TXCTPR(i), mal->bd_dma + + sizeof(struct mal_descriptor) * + mal_tx_bd_offset(mal, i)); + + for (i = 0; i < mal->num_rx_chans; ++i) + set_mal_dcrn(mal, MAL_RXCTPR(i), mal->bd_dma + + sizeof(struct mal_descriptor) * + mal_rx_bd_offset(mal, i)); + + if (mal_has_feature(mal, MAL_FTR_COMMON_ERR_INT)) { + irqflags = IRQF_SHARED; + hdlr_serr = hdlr_txde = hdlr_rxde = mal_int; + } else { + irqflags = 0; + hdlr_serr = mal_serr; + hdlr_txde = mal_txde; + hdlr_rxde = mal_rxde; + } + + err = request_irq(mal->serr_irq, hdlr_serr, irqflags, "MAL SERR", mal); + if (err) + goto fail2; + err = request_irq(mal->txde_irq, hdlr_txde, irqflags, "MAL TX DE", mal); + if (err) + goto fail3; + err = request_irq(mal->txeob_irq, mal_txeob, 0, "MAL TX EOB", mal); + if (err) + goto fail4; + err = request_irq(mal->rxde_irq, hdlr_rxde, irqflags, "MAL RX DE", mal); + if (err) + goto fail5; + err = request_irq(mal->rxeob_irq, mal_rxeob, 0, "MAL RX EOB", mal); + if (err) + goto fail6; + + /* Enable all MAL SERR interrupt sources */ + if (mal->version == 2) + set_mal_dcrn(mal, MAL_IER, MAL2_IER_EVENTS); + else + set_mal_dcrn(mal, MAL_IER, MAL1_IER_EVENTS); + + /* Enable EOB interrupt */ + mal_enable_eob_irq(mal); + + printk(KERN_INFO + "MAL v%d %s, %d TX channels, %d RX channels\n", + mal->version, ofdev->dev.of_node->full_name, + mal->num_tx_chans, mal->num_rx_chans); + + /* Advertise this instance to the rest of the world */ + wmb(); + dev_set_drvdata(&ofdev->dev, mal); + + mal_dbg_register(mal); + + return 0; + + fail6: + free_irq(mal->rxde_irq, mal); + fail5: + free_irq(mal->txeob_irq, mal); + fail4: + free_irq(mal->txde_irq, mal); + fail3: + free_irq(mal->serr_irq, mal); + fail2: + dma_free_coherent(&ofdev->dev, bd_size, mal->bd_virt, mal->bd_dma); + fail_unmap: + dcr_unmap(mal->dcr_host, 0x100); + fail: + kfree(mal); + + return err; +} + +static int __devexit mal_remove(struct platform_device *ofdev) +{ + struct mal_instance *mal = dev_get_drvdata(&ofdev->dev); + + MAL_DBG(mal, "remove" NL); + + /* Synchronize with scheduled polling */ + napi_disable(&mal->napi); + + if (!list_empty(&mal->list)) { + /* This is *very* bad */ + printk(KERN_EMERG + "mal%d: commac list is not empty on remove!\n", + mal->index); + WARN_ON(1); + } + + dev_set_drvdata(&ofdev->dev, NULL); + + free_irq(mal->serr_irq, mal); + free_irq(mal->txde_irq, mal); + free_irq(mal->txeob_irq, mal); + free_irq(mal->rxde_irq, mal); + free_irq(mal->rxeob_irq, mal); + + mal_reset(mal); + + mal_dbg_unregister(mal); + + dma_free_coherent(&ofdev->dev, + sizeof(struct mal_descriptor) * + (NUM_TX_BUFF * mal->num_tx_chans + + NUM_RX_BUFF * mal->num_rx_chans), mal->bd_virt, + mal->bd_dma); + kfree(mal); + + return 0; +} + +static struct of_device_id mal_platform_match[] = +{ + { + .compatible = "ibm,mcmal", + }, + { + .compatible = "ibm,mcmal2", + }, + /* Backward compat */ + { + .type = "mcmal-dma", + .compatible = "ibm,mcmal", + }, + { + .type = "mcmal-dma", + .compatible = "ibm,mcmal2", + }, + {}, +}; + +static struct platform_driver mal_of_driver = { + .driver = { + .name = "mcmal", + .owner = THIS_MODULE, + .of_match_table = mal_platform_match, + }, + .probe = mal_probe, + .remove = mal_remove, +}; + +int __init mal_init(void) +{ + return platform_driver_register(&mal_of_driver); +} + +void mal_exit(void) +{ + platform_driver_unregister(&mal_of_driver); +} diff --git a/drivers/net/ethernet/ibm/emac/mal.h b/drivers/net/ethernet/ibm/emac/mal.h new file mode 100644 index 000000000000..d06f985bda32 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/mal.h @@ -0,0 +1,316 @@ +/* + * drivers/net/ibm_newemac/mal.h + * + * Memory Access Layer (MAL) support + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * Based on original work by + * Armin Kuster <akuster@mvista.com> + * Copyright 2002 MontaVista Softare Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#ifndef __IBM_NEWEMAC_MAL_H +#define __IBM_NEWEMAC_MAL_H + +/* + * There are some variations on the MAL, we express them in this driver as + * MAL Version 1 and 2 though that doesn't match any IBM terminology. + * + * We call MAL 1 the version in 405GP, 405GPR, 405EP, 440EP, 440GR and + * NP405H. + * + * We call MAL 2 the version in 440GP, 440GX, 440SP, 440SPE and Axon + * + * The driver expects a "version" property in the emac node containing + * a number 1 or 2. New device-trees for EMAC capable platforms are thus + * required to include that when porting to arch/powerpc. + */ + +/* MALx DCR registers */ +#define MAL_CFG 0x00 +#define MAL_CFG_SR 0x80000000 +#define MAL_CFG_PLBB 0x00004000 +#define MAL_CFG_OPBBL 0x00000080 +#define MAL_CFG_EOPIE 0x00000004 +#define MAL_CFG_LEA 0x00000002 +#define MAL_CFG_SD 0x00000001 + +/* MAL V1 CFG bits */ +#define MAL1_CFG_PLBP_MASK 0x00c00000 +#define MAL1_CFG_PLBP_10 0x00800000 +#define MAL1_CFG_GA 0x00200000 +#define MAL1_CFG_OA 0x00100000 +#define MAL1_CFG_PLBLE 0x00080000 +#define MAL1_CFG_PLBT_MASK 0x00078000 +#define MAL1_CFG_DEFAULT (MAL1_CFG_PLBP_10 | MAL1_CFG_PLBT_MASK) + +/* MAL V2 CFG bits */ +#define MAL2_CFG_RPP_MASK 0x00c00000 +#define MAL2_CFG_RPP_10 0x00800000 +#define MAL2_CFG_RMBS_MASK 0x00300000 +#define MAL2_CFG_WPP_MASK 0x000c0000 +#define MAL2_CFG_WPP_10 0x00080000 +#define MAL2_CFG_WMBS_MASK 0x00030000 +#define MAL2_CFG_PLBLE 0x00008000 +#define MAL2_CFG_DEFAULT (MAL2_CFG_RMBS_MASK | MAL2_CFG_WMBS_MASK | \ + MAL2_CFG_RPP_10 | MAL2_CFG_WPP_10) + +#define MAL_ESR 0x01 +#define MAL_ESR_EVB 0x80000000 +#define MAL_ESR_CIDT 0x40000000 +#define MAL_ESR_CID_MASK 0x3e000000 +#define MAL_ESR_CID_SHIFT 25 +#define MAL_ESR_DE 0x00100000 +#define MAL_ESR_OTE 0x00040000 +#define MAL_ESR_OSE 0x00020000 +#define MAL_ESR_PEIN 0x00010000 +#define MAL_ESR_DEI 0x00000010 +#define MAL_ESR_OTEI 0x00000004 +#define MAL_ESR_OSEI 0x00000002 +#define MAL_ESR_PBEI 0x00000001 + +/* MAL V1 ESR bits */ +#define MAL1_ESR_ONE 0x00080000 +#define MAL1_ESR_ONEI 0x00000008 + +/* MAL V2 ESR bits */ +#define MAL2_ESR_PTE 0x00800000 +#define MAL2_ESR_PRE 0x00400000 +#define MAL2_ESR_PWE 0x00200000 +#define MAL2_ESR_PTEI 0x00000080 +#define MAL2_ESR_PREI 0x00000040 +#define MAL2_ESR_PWEI 0x00000020 + + +#define MAL_IER 0x02 +#define MAL_IER_DE 0x00000010 +#define MAL_IER_OTE 0x00000004 +#define MAL_IER_OE 0x00000002 +#define MAL_IER_PE 0x00000001 +/* MAL V1 IER bits */ +#define MAL1_IER_NWE 0x00000008 +#define MAL1_IER_SOC_EVENTS MAL1_IER_NWE +#define MAL1_IER_EVENTS (MAL1_IER_SOC_EVENTS | MAL_IER_DE | \ + MAL_IER_OTE | MAL_IER_OE | MAL_IER_PE) + +/* MAL V2 IER bits */ +#define MAL2_IER_PT 0x00000080 +#define MAL2_IER_PRE 0x00000040 +#define MAL2_IER_PWE 0x00000020 +#define MAL2_IER_SOC_EVENTS (MAL2_IER_PT | MAL2_IER_PRE | MAL2_IER_PWE) +#define MAL2_IER_EVENTS (MAL2_IER_SOC_EVENTS | MAL_IER_DE | \ + MAL_IER_OTE | MAL_IER_OE | MAL_IER_PE) + + +#define MAL_TXCASR 0x04 +#define MAL_TXCARR 0x05 +#define MAL_TXEOBISR 0x06 +#define MAL_TXDEIR 0x07 +#define MAL_RXCASR 0x10 +#define MAL_RXCARR 0x11 +#define MAL_RXEOBISR 0x12 +#define MAL_RXDEIR 0x13 +#define MAL_TXCTPR(n) ((n) + 0x20) +#define MAL_RXCTPR(n) ((n) + 0x40) +#define MAL_RCBS(n) ((n) + 0x60) + +/* In reality MAL can handle TX buffers up to 4095 bytes long, + * but this isn't a good round number :) --ebs + */ +#define MAL_MAX_TX_SIZE 4080 +#define MAL_MAX_RX_SIZE 4080 + +static inline int mal_rx_size(int len) +{ + len = (len + 0xf) & ~0xf; + return len > MAL_MAX_RX_SIZE ? MAL_MAX_RX_SIZE : len; +} + +static inline int mal_tx_chunks(int len) +{ + return (len + MAL_MAX_TX_SIZE - 1) / MAL_MAX_TX_SIZE; +} + +#define MAL_CHAN_MASK(n) (0x80000000 >> (n)) + +/* MAL Buffer Descriptor structure */ +struct mal_descriptor { + u16 ctrl; /* MAL / Commac status control bits */ + u16 data_len; /* Max length is 4K-1 (12 bits) */ + u32 data_ptr; /* pointer to actual data buffer */ +}; + +/* the following defines are for the MadMAL status and control registers. */ +/* MADMAL transmit and receive status/control bits */ +#define MAL_RX_CTRL_EMPTY 0x8000 +#define MAL_RX_CTRL_WRAP 0x4000 +#define MAL_RX_CTRL_CM 0x2000 +#define MAL_RX_CTRL_LAST 0x1000 +#define MAL_RX_CTRL_FIRST 0x0800 +#define MAL_RX_CTRL_INTR 0x0400 +#define MAL_RX_CTRL_SINGLE (MAL_RX_CTRL_LAST | MAL_RX_CTRL_FIRST) +#define MAL_IS_SINGLE_RX(ctrl) (((ctrl) & MAL_RX_CTRL_SINGLE) == MAL_RX_CTRL_SINGLE) + +#define MAL_TX_CTRL_READY 0x8000 +#define MAL_TX_CTRL_WRAP 0x4000 +#define MAL_TX_CTRL_CM 0x2000 +#define MAL_TX_CTRL_LAST 0x1000 +#define MAL_TX_CTRL_INTR 0x0400 + +struct mal_commac_ops { + void (*poll_tx) (void *dev); + int (*poll_rx) (void *dev, int budget); + int (*peek_rx) (void *dev); + void (*rxde) (void *dev); +}; + +struct mal_commac { + struct mal_commac_ops *ops; + void *dev; + struct list_head poll_list; + long flags; +#define MAL_COMMAC_RX_STOPPED 0 +#define MAL_COMMAC_POLL_DISABLED 1 + u32 tx_chan_mask; + u32 rx_chan_mask; + struct list_head list; +}; + +struct mal_instance { + int version; + dcr_host_t dcr_host; + + int num_tx_chans; /* Number of TX channels */ + int num_rx_chans; /* Number of RX channels */ + int txeob_irq; /* TX End Of Buffer IRQ */ + int rxeob_irq; /* RX End Of Buffer IRQ */ + int txde_irq; /* TX Descriptor Error IRQ */ + int rxde_irq; /* RX Descriptor Error IRQ */ + int serr_irq; /* MAL System Error IRQ */ + + struct list_head poll_list; + struct napi_struct napi; + + struct list_head list; + u32 tx_chan_mask; + u32 rx_chan_mask; + + dma_addr_t bd_dma; + struct mal_descriptor *bd_virt; + + struct platform_device *ofdev; + int index; + spinlock_t lock; + + struct net_device dummy_dev; + + unsigned int features; +}; + +static inline u32 get_mal_dcrn(struct mal_instance *mal, int reg) +{ + return dcr_read(mal->dcr_host, reg); +} + +static inline void set_mal_dcrn(struct mal_instance *mal, int reg, u32 val) +{ + dcr_write(mal->dcr_host, reg, val); +} + +/* Features of various MAL implementations */ + +/* Set if you have interrupt coalescing and you have to clear the SDR + * register for TXEOB and RXEOB interrupts to work + */ +#define MAL_FTR_CLEAR_ICINTSTAT 0x00000001 + +/* Set if your MAL has SERR, TXDE, and RXDE OR'd into a single UIC + * interrupt + */ +#define MAL_FTR_COMMON_ERR_INT 0x00000002 + +enum { + MAL_FTRS_ALWAYS = 0, + + MAL_FTRS_POSSIBLE = +#ifdef CONFIG_IBM_EMAC_MAL_CLR_ICINTSTAT + MAL_FTR_CLEAR_ICINTSTAT | +#endif +#ifdef CONFIG_IBM_EMAC_MAL_COMMON_ERR + MAL_FTR_COMMON_ERR_INT | +#endif + 0, +}; + +static inline int mal_has_feature(struct mal_instance *dev, + unsigned long feature) +{ + return (MAL_FTRS_ALWAYS & feature) || + (MAL_FTRS_POSSIBLE & dev->features & feature); +} + +/* Register MAL devices */ +int mal_init(void); +void mal_exit(void); + +int mal_register_commac(struct mal_instance *mal, + struct mal_commac *commac); +void mal_unregister_commac(struct mal_instance *mal, + struct mal_commac *commac); +int mal_set_rcbs(struct mal_instance *mal, int channel, unsigned long size); + +/* Returns BD ring offset for a particular channel + (in 'struct mal_descriptor' elements) +*/ +int mal_tx_bd_offset(struct mal_instance *mal, int channel); +int mal_rx_bd_offset(struct mal_instance *mal, int channel); + +void mal_enable_tx_channel(struct mal_instance *mal, int channel); +void mal_disable_tx_channel(struct mal_instance *mal, int channel); +void mal_enable_rx_channel(struct mal_instance *mal, int channel); +void mal_disable_rx_channel(struct mal_instance *mal, int channel); + +void mal_poll_disable(struct mal_instance *mal, struct mal_commac *commac); +void mal_poll_enable(struct mal_instance *mal, struct mal_commac *commac); + +/* Add/remove EMAC to/from MAL polling list */ +void mal_poll_add(struct mal_instance *mal, struct mal_commac *commac); +void mal_poll_del(struct mal_instance *mal, struct mal_commac *commac); + +/* Ethtool MAL registers */ +struct mal_regs { + u32 tx_count; + u32 rx_count; + + u32 cfg; + u32 esr; + u32 ier; + u32 tx_casr; + u32 tx_carr; + u32 tx_eobisr; + u32 tx_deir; + u32 rx_casr; + u32 rx_carr; + u32 rx_eobisr; + u32 rx_deir; + u32 tx_ctpr[32]; + u32 rx_ctpr[32]; + u32 rcbs[32]; +}; + +int mal_get_regs_len(struct mal_instance *mal); +void *mal_dump_regs(struct mal_instance *mal, void *buf); + +#endif /* __IBM_NEWEMAC_MAL_H */ diff --git a/drivers/net/ethernet/ibm/emac/phy.c b/drivers/net/ethernet/ibm/emac/phy.c new file mode 100644 index 000000000000..ab4e5969fe65 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/phy.c @@ -0,0 +1,541 @@ +/* + * drivers/net/ibm_newemac/phy.c + * + * Driver for PowerPC 4xx on-chip ethernet controller, PHY support. + * Borrowed from sungem_phy.c, though I only kept the generic MII + * driver for now. + * + * This file should be shared with other drivers or eventually + * merged as the "low level" part of miilib + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * (c) 2003, Benjamin Herrenscmidt (benh@kernel.crashing.org) + * (c) 2004-2005, Eugene Surovegin <ebs@ebshome.net> + * + */ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/netdevice.h> +#include <linux/mii.h> +#include <linux/ethtool.h> +#include <linux/delay.h> + +#include "emac.h" +#include "phy.h" + +#define phy_read _phy_read +#define phy_write _phy_write + +static inline int _phy_read(struct mii_phy *phy, int reg) +{ + return phy->mdio_read(phy->dev, phy->address, reg); +} + +static inline void _phy_write(struct mii_phy *phy, int reg, int val) +{ + phy->mdio_write(phy->dev, phy->address, reg, val); +} + +static inline int gpcs_phy_read(struct mii_phy *phy, int reg) +{ + return phy->mdio_read(phy->dev, phy->gpcs_address, reg); +} + +static inline void gpcs_phy_write(struct mii_phy *phy, int reg, int val) +{ + phy->mdio_write(phy->dev, phy->gpcs_address, reg, val); +} + +int emac_mii_reset_phy(struct mii_phy *phy) +{ + int val; + int limit = 10000; + + val = phy_read(phy, MII_BMCR); + val &= ~(BMCR_ISOLATE | BMCR_ANENABLE); + val |= BMCR_RESET; + phy_write(phy, MII_BMCR, val); + + udelay(300); + + while (--limit) { + val = phy_read(phy, MII_BMCR); + if (val >= 0 && (val & BMCR_RESET) == 0) + break; + udelay(10); + } + if ((val & BMCR_ISOLATE) && limit > 0) + phy_write(phy, MII_BMCR, val & ~BMCR_ISOLATE); + + return limit <= 0; +} + +int emac_mii_reset_gpcs(struct mii_phy *phy) +{ + int val; + int limit = 10000; + + val = gpcs_phy_read(phy, MII_BMCR); + val &= ~(BMCR_ISOLATE | BMCR_ANENABLE); + val |= BMCR_RESET; + gpcs_phy_write(phy, MII_BMCR, val); + + udelay(300); + + while (--limit) { + val = gpcs_phy_read(phy, MII_BMCR); + if (val >= 0 && (val & BMCR_RESET) == 0) + break; + udelay(10); + } + if ((val & BMCR_ISOLATE) && limit > 0) + gpcs_phy_write(phy, MII_BMCR, val & ~BMCR_ISOLATE); + + if (limit > 0 && phy->mode == PHY_MODE_SGMII) { + /* Configure GPCS interface to recommended setting for SGMII */ + gpcs_phy_write(phy, 0x04, 0x8120); /* AsymPause, FDX */ + gpcs_phy_write(phy, 0x07, 0x2801); /* msg_pg, toggle */ + gpcs_phy_write(phy, 0x00, 0x0140); /* 1Gbps, FDX */ + } + + return limit <= 0; +} + +static int genmii_setup_aneg(struct mii_phy *phy, u32 advertise) +{ + int ctl, adv; + + phy->autoneg = AUTONEG_ENABLE; + phy->speed = SPEED_10; + phy->duplex = DUPLEX_HALF; + phy->pause = phy->asym_pause = 0; + phy->advertising = advertise; + + ctl = phy_read(phy, MII_BMCR); + if (ctl < 0) + return ctl; + ctl &= ~(BMCR_FULLDPLX | BMCR_SPEED100 | BMCR_SPEED1000 | BMCR_ANENABLE); + + /* First clear the PHY */ + phy_write(phy, MII_BMCR, ctl); + + /* Setup standard advertise */ + adv = phy_read(phy, MII_ADVERTISE); + if (adv < 0) + return adv; + adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | + ADVERTISE_PAUSE_ASYM); + if (advertise & ADVERTISED_10baseT_Half) + adv |= ADVERTISE_10HALF; + if (advertise & ADVERTISED_10baseT_Full) + adv |= ADVERTISE_10FULL; + if (advertise & ADVERTISED_100baseT_Half) + adv |= ADVERTISE_100HALF; + if (advertise & ADVERTISED_100baseT_Full) + adv |= ADVERTISE_100FULL; + if (advertise & ADVERTISED_Pause) + adv |= ADVERTISE_PAUSE_CAP; + if (advertise & ADVERTISED_Asym_Pause) + adv |= ADVERTISE_PAUSE_ASYM; + phy_write(phy, MII_ADVERTISE, adv); + + if (phy->features & + (SUPPORTED_1000baseT_Full | SUPPORTED_1000baseT_Half)) { + adv = phy_read(phy, MII_CTRL1000); + if (adv < 0) + return adv; + adv &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF); + if (advertise & ADVERTISED_1000baseT_Full) + adv |= ADVERTISE_1000FULL; + if (advertise & ADVERTISED_1000baseT_Half) + adv |= ADVERTISE_1000HALF; + phy_write(phy, MII_CTRL1000, adv); + } + + /* Start/Restart aneg */ + ctl = phy_read(phy, MII_BMCR); + ctl |= (BMCR_ANENABLE | BMCR_ANRESTART); + phy_write(phy, MII_BMCR, ctl); + + return 0; +} + +static int genmii_setup_forced(struct mii_phy *phy, int speed, int fd) +{ + int ctl; + + phy->autoneg = AUTONEG_DISABLE; + phy->speed = speed; + phy->duplex = fd; + phy->pause = phy->asym_pause = 0; + + ctl = phy_read(phy, MII_BMCR); + if (ctl < 0) + return ctl; + ctl &= ~(BMCR_FULLDPLX | BMCR_SPEED100 | BMCR_SPEED1000 | BMCR_ANENABLE); + + /* First clear the PHY */ + phy_write(phy, MII_BMCR, ctl | BMCR_RESET); + + /* Select speed & duplex */ + switch (speed) { + case SPEED_10: + break; + case SPEED_100: + ctl |= BMCR_SPEED100; + break; + case SPEED_1000: + ctl |= BMCR_SPEED1000; + break; + default: + return -EINVAL; + } + if (fd == DUPLEX_FULL) + ctl |= BMCR_FULLDPLX; + phy_write(phy, MII_BMCR, ctl); + + return 0; +} + +static int genmii_poll_link(struct mii_phy *phy) +{ + int status; + + /* Clear latched value with dummy read */ + phy_read(phy, MII_BMSR); + status = phy_read(phy, MII_BMSR); + if (status < 0 || (status & BMSR_LSTATUS) == 0) + return 0; + if (phy->autoneg == AUTONEG_ENABLE && !(status & BMSR_ANEGCOMPLETE)) + return 0; + return 1; +} + +static int genmii_read_link(struct mii_phy *phy) +{ + if (phy->autoneg == AUTONEG_ENABLE) { + int glpa = 0; + int lpa = phy_read(phy, MII_LPA) & phy_read(phy, MII_ADVERTISE); + if (lpa < 0) + return lpa; + + if (phy->features & + (SUPPORTED_1000baseT_Full | SUPPORTED_1000baseT_Half)) { + int adv = phy_read(phy, MII_CTRL1000); + glpa = phy_read(phy, MII_STAT1000); + + if (glpa < 0 || adv < 0) + return adv; + + glpa &= adv << 2; + } + + phy->speed = SPEED_10; + phy->duplex = DUPLEX_HALF; + phy->pause = phy->asym_pause = 0; + + if (glpa & (LPA_1000FULL | LPA_1000HALF)) { + phy->speed = SPEED_1000; + if (glpa & LPA_1000FULL) + phy->duplex = DUPLEX_FULL; + } else if (lpa & (LPA_100FULL | LPA_100HALF)) { + phy->speed = SPEED_100; + if (lpa & LPA_100FULL) + phy->duplex = DUPLEX_FULL; + } else if (lpa & LPA_10FULL) + phy->duplex = DUPLEX_FULL; + + if (phy->duplex == DUPLEX_FULL) { + phy->pause = lpa & LPA_PAUSE_CAP ? 1 : 0; + phy->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0; + } + } else { + int bmcr = phy_read(phy, MII_BMCR); + if (bmcr < 0) + return bmcr; + + if (bmcr & BMCR_FULLDPLX) + phy->duplex = DUPLEX_FULL; + else + phy->duplex = DUPLEX_HALF; + if (bmcr & BMCR_SPEED1000) + phy->speed = SPEED_1000; + else if (bmcr & BMCR_SPEED100) + phy->speed = SPEED_100; + else + phy->speed = SPEED_10; + + phy->pause = phy->asym_pause = 0; + } + return 0; +} + +/* Generic implementation for most 10/100/1000 PHYs */ +static struct mii_phy_ops generic_phy_ops = { + .setup_aneg = genmii_setup_aneg, + .setup_forced = genmii_setup_forced, + .poll_link = genmii_poll_link, + .read_link = genmii_read_link +}; + +static struct mii_phy_def genmii_phy_def = { + .phy_id = 0x00000000, + .phy_id_mask = 0x00000000, + .name = "Generic MII", + .ops = &generic_phy_ops +}; + +/* CIS8201 */ +#define MII_CIS8201_10BTCSR 0x16 +#define TENBTCSR_ECHO_DISABLE 0x2000 +#define MII_CIS8201_EPCR 0x17 +#define EPCR_MODE_MASK 0x3000 +#define EPCR_GMII_MODE 0x0000 +#define EPCR_RGMII_MODE 0x1000 +#define EPCR_TBI_MODE 0x2000 +#define EPCR_RTBI_MODE 0x3000 +#define MII_CIS8201_ACSR 0x1c +#define ACSR_PIN_PRIO_SELECT 0x0004 + +static int cis8201_init(struct mii_phy *phy) +{ + int epcr; + + epcr = phy_read(phy, MII_CIS8201_EPCR); + if (epcr < 0) + return epcr; + + epcr &= ~EPCR_MODE_MASK; + + switch (phy->mode) { + case PHY_MODE_TBI: + epcr |= EPCR_TBI_MODE; + break; + case PHY_MODE_RTBI: + epcr |= EPCR_RTBI_MODE; + break; + case PHY_MODE_GMII: + epcr |= EPCR_GMII_MODE; + break; + case PHY_MODE_RGMII: + default: + epcr |= EPCR_RGMII_MODE; + } + + phy_write(phy, MII_CIS8201_EPCR, epcr); + + /* MII regs override strap pins */ + phy_write(phy, MII_CIS8201_ACSR, + phy_read(phy, MII_CIS8201_ACSR) | ACSR_PIN_PRIO_SELECT); + + /* Disable TX_EN -> CRS echo mode, otherwise 10/HDX doesn't work */ + phy_write(phy, MII_CIS8201_10BTCSR, + phy_read(phy, MII_CIS8201_10BTCSR) | TENBTCSR_ECHO_DISABLE); + + return 0; +} + +static struct mii_phy_ops cis8201_phy_ops = { + .init = cis8201_init, + .setup_aneg = genmii_setup_aneg, + .setup_forced = genmii_setup_forced, + .poll_link = genmii_poll_link, + .read_link = genmii_read_link +}; + +static struct mii_phy_def cis8201_phy_def = { + .phy_id = 0x000fc410, + .phy_id_mask = 0x000ffff0, + .name = "CIS8201 Gigabit Ethernet", + .ops = &cis8201_phy_ops +}; + +static struct mii_phy_def bcm5248_phy_def = { + + .phy_id = 0x0143bc00, + .phy_id_mask = 0x0ffffff0, + .name = "BCM5248 10/100 SMII Ethernet", + .ops = &generic_phy_ops +}; + +static int m88e1111_init(struct mii_phy *phy) +{ + pr_debug("%s: Marvell 88E1111 Ethernet\n", __func__); + phy_write(phy, 0x14, 0x0ce3); + phy_write(phy, 0x18, 0x4101); + phy_write(phy, 0x09, 0x0e00); + phy_write(phy, 0x04, 0x01e1); + phy_write(phy, 0x00, 0x9140); + phy_write(phy, 0x00, 0x1140); + + return 0; +} + +static int m88e1112_init(struct mii_phy *phy) +{ + /* + * Marvell 88E1112 PHY needs to have the SGMII MAC + * interace (page 2) properly configured to + * communicate with the 460EX/GT GPCS interface. + */ + + u16 reg_short; + + pr_debug("%s: Marvell 88E1112 Ethernet\n", __func__); + + /* Set access to Page 2 */ + phy_write(phy, 0x16, 0x0002); + + phy_write(phy, 0x00, 0x0040); /* 1Gbps */ + reg_short = (u16)(phy_read(phy, 0x1a)); + reg_short |= 0x8000; /* bypass Auto-Negotiation */ + phy_write(phy, 0x1a, reg_short); + emac_mii_reset_phy(phy); /* reset MAC interface */ + + /* Reset access to Page 0 */ + phy_write(phy, 0x16, 0x0000); + + return 0; +} + +static int et1011c_init(struct mii_phy *phy) +{ + u16 reg_short; + + reg_short = (u16)(phy_read(phy, 0x16)); + reg_short &= ~(0x7); + reg_short |= 0x6; /* RGMII Trace Delay*/ + phy_write(phy, 0x16, reg_short); + + reg_short = (u16)(phy_read(phy, 0x17)); + reg_short &= ~(0x40); + phy_write(phy, 0x17, reg_short); + + phy_write(phy, 0x1c, 0x74f0); + return 0; +} + +static struct mii_phy_ops et1011c_phy_ops = { + .init = et1011c_init, + .setup_aneg = genmii_setup_aneg, + .setup_forced = genmii_setup_forced, + .poll_link = genmii_poll_link, + .read_link = genmii_read_link +}; + +static struct mii_phy_def et1011c_phy_def = { + .phy_id = 0x0282f000, + .phy_id_mask = 0x0fffff00, + .name = "ET1011C Gigabit Ethernet", + .ops = &et1011c_phy_ops +}; + + + + + +static struct mii_phy_ops m88e1111_phy_ops = { + .init = m88e1111_init, + .setup_aneg = genmii_setup_aneg, + .setup_forced = genmii_setup_forced, + .poll_link = genmii_poll_link, + .read_link = genmii_read_link +}; + +static struct mii_phy_def m88e1111_phy_def = { + + .phy_id = 0x01410CC0, + .phy_id_mask = 0x0ffffff0, + .name = "Marvell 88E1111 Ethernet", + .ops = &m88e1111_phy_ops, +}; + +static struct mii_phy_ops m88e1112_phy_ops = { + .init = m88e1112_init, + .setup_aneg = genmii_setup_aneg, + .setup_forced = genmii_setup_forced, + .poll_link = genmii_poll_link, + .read_link = genmii_read_link +}; + +static struct mii_phy_def m88e1112_phy_def = { + .phy_id = 0x01410C90, + .phy_id_mask = 0x0ffffff0, + .name = "Marvell 88E1112 Ethernet", + .ops = &m88e1112_phy_ops, +}; + +static struct mii_phy_def *mii_phy_table[] = { + &et1011c_phy_def, + &cis8201_phy_def, + &bcm5248_phy_def, + &m88e1111_phy_def, + &m88e1112_phy_def, + &genmii_phy_def, + NULL +}; + +int emac_mii_phy_probe(struct mii_phy *phy, int address) +{ + struct mii_phy_def *def; + int i; + u32 id; + + phy->autoneg = AUTONEG_DISABLE; + phy->advertising = 0; + phy->address = address; + phy->speed = SPEED_10; + phy->duplex = DUPLEX_HALF; + phy->pause = phy->asym_pause = 0; + + /* Take PHY out of isolate mode and reset it. */ + if (emac_mii_reset_phy(phy)) + return -ENODEV; + + /* Read ID and find matching entry */ + id = (phy_read(phy, MII_PHYSID1) << 16) | phy_read(phy, MII_PHYSID2); + for (i = 0; (def = mii_phy_table[i]) != NULL; i++) + if ((id & def->phy_id_mask) == def->phy_id) + break; + /* Should never be NULL (we have a generic entry), but... */ + if (!def) + return -ENODEV; + + phy->def = def; + + /* Determine PHY features if needed */ + phy->features = def->features; + if (!phy->features) { + u16 bmsr = phy_read(phy, MII_BMSR); + if (bmsr & BMSR_ANEGCAPABLE) + phy->features |= SUPPORTED_Autoneg; + if (bmsr & BMSR_10HALF) + phy->features |= SUPPORTED_10baseT_Half; + if (bmsr & BMSR_10FULL) + phy->features |= SUPPORTED_10baseT_Full; + if (bmsr & BMSR_100HALF) + phy->features |= SUPPORTED_100baseT_Half; + if (bmsr & BMSR_100FULL) + phy->features |= SUPPORTED_100baseT_Full; + if (bmsr & BMSR_ESTATEN) { + u16 esr = phy_read(phy, MII_ESTATUS); + if (esr & ESTATUS_1000_TFULL) + phy->features |= SUPPORTED_1000baseT_Full; + if (esr & ESTATUS_1000_THALF) + phy->features |= SUPPORTED_1000baseT_Half; + } + phy->features |= SUPPORTED_MII; + } + + /* Setup default advertising */ + phy->advertising = phy->features; + + return 0; +} + +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/ibm/emac/phy.h b/drivers/net/ethernet/ibm/emac/phy.h new file mode 100644 index 000000000000..5d2bf4cbe50b --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/phy.h @@ -0,0 +1,87 @@ +/* + * drivers/net/ibm_newemac/phy.h + * + * Driver for PowerPC 4xx on-chip ethernet controller, PHY support + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Benjamin Herrenschmidt <benh@kernel.crashing.org> + * February 2003 + * + * Minor additions by Eugene Surovegin <ebs@ebshome.net>, 2004 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This file basically duplicates sungem_phy.{c,h} with different PHYs + * supported. I'm looking into merging that in a single mii layer more + * flexible than mii.c + */ + +#ifndef __IBM_NEWEMAC_PHY_H +#define __IBM_NEWEMAC_PHY_H + +struct mii_phy; + +/* Operations supported by any kind of PHY */ +struct mii_phy_ops { + int (*init) (struct mii_phy * phy); + int (*suspend) (struct mii_phy * phy, int wol_options); + int (*setup_aneg) (struct mii_phy * phy, u32 advertise); + int (*setup_forced) (struct mii_phy * phy, int speed, int fd); + int (*poll_link) (struct mii_phy * phy); + int (*read_link) (struct mii_phy * phy); +}; + +/* Structure used to statically define an mii/gii based PHY */ +struct mii_phy_def { + u32 phy_id; /* Concatenated ID1 << 16 | ID2 */ + u32 phy_id_mask; /* Significant bits */ + u32 features; /* Ethtool SUPPORTED_* defines or + 0 for autodetect */ + int magic_aneg; /* Autoneg does all speed test for us */ + const char *name; + const struct mii_phy_ops *ops; +}; + +/* An instance of a PHY, partially borrowed from mii_if_info */ +struct mii_phy { + struct mii_phy_def *def; + u32 advertising; /* Ethtool ADVERTISED_* defines */ + u32 features; /* Copied from mii_phy_def.features + or determined automaticaly */ + int address; /* PHY address */ + int mode; /* PHY mode */ + int gpcs_address; /* GPCS PHY address */ + + /* 1: autoneg enabled, 0: disabled */ + int autoneg; + + /* forced speed & duplex (no autoneg) + * partner speed & duplex & pause (autoneg) + */ + int speed; + int duplex; + int pause; + int asym_pause; + + /* Provided by host chip */ + struct net_device *dev; + int (*mdio_read) (struct net_device * dev, int addr, int reg); + void (*mdio_write) (struct net_device * dev, int addr, int reg, + int val); +}; + +/* Pass in a struct mii_phy with dev, mdio_read and mdio_write + * filled, the remaining fields will be filled on return + */ +int emac_mii_phy_probe(struct mii_phy *phy, int address); +int emac_mii_reset_phy(struct mii_phy *phy); +int emac_mii_reset_gpcs(struct mii_phy *phy); + +#endif /* __IBM_NEWEMAC_PHY_H */ diff --git a/drivers/net/ethernet/ibm/emac/rgmii.c b/drivers/net/ethernet/ibm/emac/rgmii.c new file mode 100644 index 000000000000..4fa53f3def64 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/rgmii.c @@ -0,0 +1,338 @@ +/* + * drivers/net/ibm_newemac/rgmii.c + * + * Driver for PowerPC 4xx on-chip ethernet controller, RGMII bridge support. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * Based on original work by + * Matt Porter <mporter@kernel.crashing.org> + * Copyright 2004 MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include <linux/slab.h> +#include <linux/kernel.h> +#include <linux/ethtool.h> +#include <asm/io.h> + +#include "emac.h" +#include "debug.h" + +// XXX FIXME: Axon seems to support a subset of the RGMII, we +// thus need to take that into account and possibly change some +// of the bit settings below that don't seem to quite match the +// AXON spec + +/* RGMIIx_FER */ +#define RGMII_FER_MASK(idx) (0x7 << ((idx) * 4)) +#define RGMII_FER_RTBI(idx) (0x4 << ((idx) * 4)) +#define RGMII_FER_RGMII(idx) (0x5 << ((idx) * 4)) +#define RGMII_FER_TBI(idx) (0x6 << ((idx) * 4)) +#define RGMII_FER_GMII(idx) (0x7 << ((idx) * 4)) +#define RGMII_FER_MII(idx) RGMII_FER_GMII(idx) + +/* RGMIIx_SSR */ +#define RGMII_SSR_MASK(idx) (0x7 << ((idx) * 8)) +#define RGMII_SSR_100(idx) (0x2 << ((idx) * 8)) +#define RGMII_SSR_1000(idx) (0x4 << ((idx) * 8)) + +/* RGMII bridge supports only GMII/TBI and RGMII/RTBI PHYs */ +static inline int rgmii_valid_mode(int phy_mode) +{ + return phy_mode == PHY_MODE_GMII || + phy_mode == PHY_MODE_MII || + phy_mode == PHY_MODE_RGMII || + phy_mode == PHY_MODE_TBI || + phy_mode == PHY_MODE_RTBI; +} + +static inline const char *rgmii_mode_name(int mode) +{ + switch (mode) { + case PHY_MODE_RGMII: + return "RGMII"; + case PHY_MODE_TBI: + return "TBI"; + case PHY_MODE_GMII: + return "GMII"; + case PHY_MODE_MII: + return "MII"; + case PHY_MODE_RTBI: + return "RTBI"; + default: + BUG(); + } +} + +static inline u32 rgmii_mode_mask(int mode, int input) +{ + switch (mode) { + case PHY_MODE_RGMII: + return RGMII_FER_RGMII(input); + case PHY_MODE_TBI: + return RGMII_FER_TBI(input); + case PHY_MODE_GMII: + return RGMII_FER_GMII(input); + case PHY_MODE_MII: + return RGMII_FER_MII(input); + case PHY_MODE_RTBI: + return RGMII_FER_RTBI(input); + default: + BUG(); + } +} + +int __devinit rgmii_attach(struct platform_device *ofdev, int input, int mode) +{ + struct rgmii_instance *dev = dev_get_drvdata(&ofdev->dev); + struct rgmii_regs __iomem *p = dev->base; + + RGMII_DBG(dev, "attach(%d)" NL, input); + + /* Check if we need to attach to a RGMII */ + if (input < 0 || !rgmii_valid_mode(mode)) { + printk(KERN_ERR "%s: unsupported settings !\n", + ofdev->dev.of_node->full_name); + return -ENODEV; + } + + mutex_lock(&dev->lock); + + /* Enable this input */ + out_be32(&p->fer, in_be32(&p->fer) | rgmii_mode_mask(mode, input)); + + printk(KERN_NOTICE "%s: input %d in %s mode\n", + ofdev->dev.of_node->full_name, input, rgmii_mode_name(mode)); + + ++dev->users; + + mutex_unlock(&dev->lock); + + return 0; +} + +void rgmii_set_speed(struct platform_device *ofdev, int input, int speed) +{ + struct rgmii_instance *dev = dev_get_drvdata(&ofdev->dev); + struct rgmii_regs __iomem *p = dev->base; + u32 ssr; + + mutex_lock(&dev->lock); + + ssr = in_be32(&p->ssr) & ~RGMII_SSR_MASK(input); + + RGMII_DBG(dev, "speed(%d, %d)" NL, input, speed); + + if (speed == SPEED_1000) + ssr |= RGMII_SSR_1000(input); + else if (speed == SPEED_100) + ssr |= RGMII_SSR_100(input); + + out_be32(&p->ssr, ssr); + + mutex_unlock(&dev->lock); +} + +void rgmii_get_mdio(struct platform_device *ofdev, int input) +{ + struct rgmii_instance *dev = dev_get_drvdata(&ofdev->dev); + struct rgmii_regs __iomem *p = dev->base; + u32 fer; + + RGMII_DBG2(dev, "get_mdio(%d)" NL, input); + + if (!(dev->flags & EMAC_RGMII_FLAG_HAS_MDIO)) + return; + + mutex_lock(&dev->lock); + + fer = in_be32(&p->fer); + fer |= 0x00080000u >> input; + out_be32(&p->fer, fer); + (void)in_be32(&p->fer); + + DBG2(dev, " fer = 0x%08x\n", fer); +} + +void rgmii_put_mdio(struct platform_device *ofdev, int input) +{ + struct rgmii_instance *dev = dev_get_drvdata(&ofdev->dev); + struct rgmii_regs __iomem *p = dev->base; + u32 fer; + + RGMII_DBG2(dev, "put_mdio(%d)" NL, input); + + if (!(dev->flags & EMAC_RGMII_FLAG_HAS_MDIO)) + return; + + fer = in_be32(&p->fer); + fer &= ~(0x00080000u >> input); + out_be32(&p->fer, fer); + (void)in_be32(&p->fer); + + DBG2(dev, " fer = 0x%08x\n", fer); + + mutex_unlock(&dev->lock); +} + +void rgmii_detach(struct platform_device *ofdev, int input) +{ + struct rgmii_instance *dev = dev_get_drvdata(&ofdev->dev); + struct rgmii_regs __iomem *p; + + BUG_ON(!dev || dev->users == 0); + p = dev->base; + + mutex_lock(&dev->lock); + + RGMII_DBG(dev, "detach(%d)" NL, input); + + /* Disable this input */ + out_be32(&p->fer, in_be32(&p->fer) & ~RGMII_FER_MASK(input)); + + --dev->users; + + mutex_unlock(&dev->lock); +} + +int rgmii_get_regs_len(struct platform_device *ofdev) +{ + return sizeof(struct emac_ethtool_regs_subhdr) + + sizeof(struct rgmii_regs); +} + +void *rgmii_dump_regs(struct platform_device *ofdev, void *buf) +{ + struct rgmii_instance *dev = dev_get_drvdata(&ofdev->dev); + struct emac_ethtool_regs_subhdr *hdr = buf; + struct rgmii_regs *regs = (struct rgmii_regs *)(hdr + 1); + + hdr->version = 0; + hdr->index = 0; /* for now, are there chips with more than one + * rgmii ? if yes, then we'll add a cell_index + * like we do for emac + */ + memcpy_fromio(regs, dev->base, sizeof(struct rgmii_regs)); + return regs + 1; +} + + +static int __devinit rgmii_probe(struct platform_device *ofdev) +{ + struct device_node *np = ofdev->dev.of_node; + struct rgmii_instance *dev; + struct resource regs; + int rc; + + rc = -ENOMEM; + dev = kzalloc(sizeof(struct rgmii_instance), GFP_KERNEL); + if (dev == NULL) { + printk(KERN_ERR "%s: could not allocate RGMII device!\n", + np->full_name); + goto err_gone; + } + + mutex_init(&dev->lock); + dev->ofdev = ofdev; + + rc = -ENXIO; + if (of_address_to_resource(np, 0, ®s)) { + printk(KERN_ERR "%s: Can't get registers address\n", + np->full_name); + goto err_free; + } + + rc = -ENOMEM; + dev->base = (struct rgmii_regs __iomem *)ioremap(regs.start, + sizeof(struct rgmii_regs)); + if (dev->base == NULL) { + printk(KERN_ERR "%s: Can't map device registers!\n", + np->full_name); + goto err_free; + } + + /* Check for RGMII flags */ + if (of_get_property(ofdev->dev.of_node, "has-mdio", NULL)) + dev->flags |= EMAC_RGMII_FLAG_HAS_MDIO; + + /* CAB lacks the right properties, fix this up */ + if (of_device_is_compatible(ofdev->dev.of_node, "ibm,rgmii-axon")) + dev->flags |= EMAC_RGMII_FLAG_HAS_MDIO; + + DBG2(dev, " Boot FER = 0x%08x, SSR = 0x%08x\n", + in_be32(&dev->base->fer), in_be32(&dev->base->ssr)); + + /* Disable all inputs by default */ + out_be32(&dev->base->fer, 0); + + printk(KERN_INFO + "RGMII %s initialized with%s MDIO support\n", + ofdev->dev.of_node->full_name, + (dev->flags & EMAC_RGMII_FLAG_HAS_MDIO) ? "" : "out"); + + wmb(); + dev_set_drvdata(&ofdev->dev, dev); + + return 0; + + err_free: + kfree(dev); + err_gone: + return rc; +} + +static int __devexit rgmii_remove(struct platform_device *ofdev) +{ + struct rgmii_instance *dev = dev_get_drvdata(&ofdev->dev); + + dev_set_drvdata(&ofdev->dev, NULL); + + WARN_ON(dev->users != 0); + + iounmap(dev->base); + kfree(dev); + + return 0; +} + +static struct of_device_id rgmii_match[] = +{ + { + .compatible = "ibm,rgmii", + }, + { + .type = "emac-rgmii", + }, + {}, +}; + +static struct platform_driver rgmii_driver = { + .driver = { + .name = "emac-rgmii", + .owner = THIS_MODULE, + .of_match_table = rgmii_match, + }, + .probe = rgmii_probe, + .remove = rgmii_remove, +}; + +int __init rgmii_init(void) +{ + return platform_driver_register(&rgmii_driver); +} + +void rgmii_exit(void) +{ + platform_driver_unregister(&rgmii_driver); +} diff --git a/drivers/net/ethernet/ibm/emac/rgmii.h b/drivers/net/ethernet/ibm/emac/rgmii.h new file mode 100644 index 000000000000..9296b6c5f920 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/rgmii.h @@ -0,0 +1,82 @@ +/* + * drivers/net/ibm_newemac/rgmii.h + * + * Driver for PowerPC 4xx on-chip ethernet controller, RGMII bridge support. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Based on ocp_zmii.h/ibm_emac_zmii.h + * Armin Kuster akuster@mvista.com + * + * Copyright 2004 MontaVista Software, Inc. + * Matt Porter <mporter@kernel.crashing.org> + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __IBM_NEWEMAC_RGMII_H +#define __IBM_NEWEMAC_RGMII_H + +/* RGMII bridge type */ +#define RGMII_STANDARD 0 +#define RGMII_AXON 1 + +/* RGMII bridge */ +struct rgmii_regs { + u32 fer; /* Function enable register */ + u32 ssr; /* Speed select register */ +}; + +/* RGMII device */ +struct rgmii_instance { + struct rgmii_regs __iomem *base; + + /* RGMII bridge flags */ + int flags; +#define EMAC_RGMII_FLAG_HAS_MDIO 0x00000001 + + /* Only one EMAC whacks us at a time */ + struct mutex lock; + + /* number of EMACs using this RGMII bridge */ + int users; + + /* OF device instance */ + struct platform_device *ofdev; +}; + +#ifdef CONFIG_IBM_EMAC_RGMII + +extern int rgmii_init(void); +extern void rgmii_exit(void); +extern int rgmii_attach(struct platform_device *ofdev, int input, int mode); +extern void rgmii_detach(struct platform_device *ofdev, int input); +extern void rgmii_get_mdio(struct platform_device *ofdev, int input); +extern void rgmii_put_mdio(struct platform_device *ofdev, int input); +extern void rgmii_set_speed(struct platform_device *ofdev, int input, int speed); +extern int rgmii_get_regs_len(struct platform_device *ofdev); +extern void *rgmii_dump_regs(struct platform_device *ofdev, void *buf); + +#else + +# define rgmii_init() 0 +# define rgmii_exit() do { } while(0) +# define rgmii_attach(x,y,z) (-ENXIO) +# define rgmii_detach(x,y) do { } while(0) +# define rgmii_get_mdio(o,i) do { } while (0) +# define rgmii_put_mdio(o,i) do { } while (0) +# define rgmii_set_speed(x,y,z) do { } while(0) +# define rgmii_get_regs_len(x) 0 +# define rgmii_dump_regs(x,buf) (buf) +#endif /* !CONFIG_IBM_EMAC_RGMII */ + +#endif /* __IBM_NEWEMAC_RGMII_H */ diff --git a/drivers/net/ethernet/ibm/emac/tah.c b/drivers/net/ethernet/ibm/emac/tah.c new file mode 100644 index 000000000000..5f51bf7c9dc5 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/tah.c @@ -0,0 +1,185 @@ +/* + * drivers/net/ibm_newemac/tah.c + * + * Driver for PowerPC 4xx on-chip ethernet controller, TAH support. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright 2004 MontaVista Software, Inc. + * Matt Porter <mporter@kernel.crashing.org> + * + * Copyright (c) 2005 Eugene Surovegin <ebs@ebshome.net> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include <asm/io.h> + +#include "emac.h" +#include "core.h" + +int __devinit tah_attach(struct platform_device *ofdev, int channel) +{ + struct tah_instance *dev = dev_get_drvdata(&ofdev->dev); + + mutex_lock(&dev->lock); + /* Reset has been done at probe() time... nothing else to do for now */ + ++dev->users; + mutex_unlock(&dev->lock); + + return 0; +} + +void tah_detach(struct platform_device *ofdev, int channel) +{ + struct tah_instance *dev = dev_get_drvdata(&ofdev->dev); + + mutex_lock(&dev->lock); + --dev->users; + mutex_unlock(&dev->lock); +} + +void tah_reset(struct platform_device *ofdev) +{ + struct tah_instance *dev = dev_get_drvdata(&ofdev->dev); + struct tah_regs __iomem *p = dev->base; + int n; + + /* Reset TAH */ + out_be32(&p->mr, TAH_MR_SR); + n = 100; + while ((in_be32(&p->mr) & TAH_MR_SR) && n) + --n; + + if (unlikely(!n)) + printk(KERN_ERR "%s: reset timeout\n", + ofdev->dev.of_node->full_name); + + /* 10KB TAH TX FIFO accommodates the max MTU of 9000 */ + out_be32(&p->mr, + TAH_MR_CVR | TAH_MR_ST_768 | TAH_MR_TFS_10KB | TAH_MR_DTFP | + TAH_MR_DIG); +} + +int tah_get_regs_len(struct platform_device *ofdev) +{ + return sizeof(struct emac_ethtool_regs_subhdr) + + sizeof(struct tah_regs); +} + +void *tah_dump_regs(struct platform_device *ofdev, void *buf) +{ + struct tah_instance *dev = dev_get_drvdata(&ofdev->dev); + struct emac_ethtool_regs_subhdr *hdr = buf; + struct tah_regs *regs = (struct tah_regs *)(hdr + 1); + + hdr->version = 0; + hdr->index = 0; /* for now, are there chips with more than one + * zmii ? if yes, then we'll add a cell_index + * like we do for emac + */ + memcpy_fromio(regs, dev->base, sizeof(struct tah_regs)); + return regs + 1; +} + +static int __devinit tah_probe(struct platform_device *ofdev) +{ + struct device_node *np = ofdev->dev.of_node; + struct tah_instance *dev; + struct resource regs; + int rc; + + rc = -ENOMEM; + dev = kzalloc(sizeof(struct tah_instance), GFP_KERNEL); + if (dev == NULL) { + printk(KERN_ERR "%s: could not allocate TAH device!\n", + np->full_name); + goto err_gone; + } + + mutex_init(&dev->lock); + dev->ofdev = ofdev; + + rc = -ENXIO; + if (of_address_to_resource(np, 0, ®s)) { + printk(KERN_ERR "%s: Can't get registers address\n", + np->full_name); + goto err_free; + } + + rc = -ENOMEM; + dev->base = (struct tah_regs __iomem *)ioremap(regs.start, + sizeof(struct tah_regs)); + if (dev->base == NULL) { + printk(KERN_ERR "%s: Can't map device registers!\n", + np->full_name); + goto err_free; + } + + dev_set_drvdata(&ofdev->dev, dev); + + /* Initialize TAH and enable IPv4 checksum verification, no TSO yet */ + tah_reset(ofdev); + + printk(KERN_INFO + "TAH %s initialized\n", ofdev->dev.of_node->full_name); + wmb(); + + return 0; + + err_free: + kfree(dev); + err_gone: + return rc; +} + +static int __devexit tah_remove(struct platform_device *ofdev) +{ + struct tah_instance *dev = dev_get_drvdata(&ofdev->dev); + + dev_set_drvdata(&ofdev->dev, NULL); + + WARN_ON(dev->users != 0); + + iounmap(dev->base); + kfree(dev); + + return 0; +} + +static struct of_device_id tah_match[] = +{ + { + .compatible = "ibm,tah", + }, + /* For backward compat with old DT */ + { + .type = "tah", + }, + {}, +}; + +static struct platform_driver tah_driver = { + .driver = { + .name = "emac-tah", + .owner = THIS_MODULE, + .of_match_table = tah_match, + }, + .probe = tah_probe, + .remove = tah_remove, +}; + +int __init tah_init(void) +{ + return platform_driver_register(&tah_driver); +} + +void tah_exit(void) +{ + platform_driver_unregister(&tah_driver); +} diff --git a/drivers/net/ethernet/ibm/emac/tah.h b/drivers/net/ethernet/ibm/emac/tah.h new file mode 100644 index 000000000000..3437ab4964c7 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/tah.h @@ -0,0 +1,95 @@ +/* + * drivers/net/ibm_newemac/tah.h + * + * Driver for PowerPC 4xx on-chip ethernet controller, TAH support. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright 2004 MontaVista Software, Inc. + * Matt Porter <mporter@kernel.crashing.org> + * + * Copyright (c) 2005 Eugene Surovegin <ebs@ebshome.net> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __IBM_NEWEMAC_TAH_H +#define __IBM_NEWEMAC_TAH_H + +/* TAH */ +struct tah_regs { + u32 revid; + u32 pad[3]; + u32 mr; + u32 ssr0; + u32 ssr1; + u32 ssr2; + u32 ssr3; + u32 ssr4; + u32 ssr5; + u32 tsr; +}; + + +/* TAH device */ +struct tah_instance { + struct tah_regs __iomem *base; + + /* Only one EMAC whacks us at a time */ + struct mutex lock; + + /* number of EMACs using this TAH */ + int users; + + /* OF device instance */ + struct platform_device *ofdev; +}; + + +/* TAH engine */ +#define TAH_MR_CVR 0x80000000 +#define TAH_MR_SR 0x40000000 +#define TAH_MR_ST_256 0x01000000 +#define TAH_MR_ST_512 0x02000000 +#define TAH_MR_ST_768 0x03000000 +#define TAH_MR_ST_1024 0x04000000 +#define TAH_MR_ST_1280 0x05000000 +#define TAH_MR_ST_1536 0x06000000 +#define TAH_MR_TFS_16KB 0x00000000 +#define TAH_MR_TFS_2KB 0x00200000 +#define TAH_MR_TFS_4KB 0x00400000 +#define TAH_MR_TFS_6KB 0x00600000 +#define TAH_MR_TFS_8KB 0x00800000 +#define TAH_MR_TFS_10KB 0x00a00000 +#define TAH_MR_DTFP 0x00100000 +#define TAH_MR_DIG 0x00080000 + +#ifdef CONFIG_IBM_EMAC_TAH + +extern int tah_init(void); +extern void tah_exit(void); +extern int tah_attach(struct platform_device *ofdev, int channel); +extern void tah_detach(struct platform_device *ofdev, int channel); +extern void tah_reset(struct platform_device *ofdev); +extern int tah_get_regs_len(struct platform_device *ofdev); +extern void *tah_dump_regs(struct platform_device *ofdev, void *buf); + +#else + +# define tah_init() 0 +# define tah_exit() do { } while(0) +# define tah_attach(x,y) (-ENXIO) +# define tah_detach(x,y) do { } while(0) +# define tah_reset(x) do { } while(0) +# define tah_get_regs_len(x) 0 +# define tah_dump_regs(x,buf) (buf) + +#endif /* !CONFIG_IBM_EMAC_TAH */ + +#endif /* __IBM_NEWEMAC_TAH_H */ diff --git a/drivers/net/ethernet/ibm/emac/zmii.c b/drivers/net/ethernet/ibm/emac/zmii.c new file mode 100644 index 000000000000..97449e786d61 --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/zmii.c @@ -0,0 +1,332 @@ +/* + * drivers/net/ibm_newemac/zmii.c + * + * Driver for PowerPC 4xx on-chip ethernet controller, ZMII bridge support. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * Based on original work by + * Armin Kuster <akuster@mvista.com> + * Copyright 2001 MontaVista Softare Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include <linux/slab.h> +#include <linux/kernel.h> +#include <linux/ethtool.h> +#include <asm/io.h> + +#include "emac.h" +#include "core.h" + +/* ZMIIx_FER */ +#define ZMII_FER_MDI(idx) (0x80000000 >> ((idx) * 4)) +#define ZMII_FER_MDI_ALL (ZMII_FER_MDI(0) | ZMII_FER_MDI(1) | \ + ZMII_FER_MDI(2) | ZMII_FER_MDI(3)) + +#define ZMII_FER_SMII(idx) (0x40000000 >> ((idx) * 4)) +#define ZMII_FER_RMII(idx) (0x20000000 >> ((idx) * 4)) +#define ZMII_FER_MII(idx) (0x10000000 >> ((idx) * 4)) + +/* ZMIIx_SSR */ +#define ZMII_SSR_SCI(idx) (0x40000000 >> ((idx) * 4)) +#define ZMII_SSR_FSS(idx) (0x20000000 >> ((idx) * 4)) +#define ZMII_SSR_SP(idx) (0x10000000 >> ((idx) * 4)) + +/* ZMII only supports MII, RMII and SMII + * we also support autodetection for backward compatibility + */ +static inline int zmii_valid_mode(int mode) +{ + return mode == PHY_MODE_MII || + mode == PHY_MODE_RMII || + mode == PHY_MODE_SMII || + mode == PHY_MODE_NA; +} + +static inline const char *zmii_mode_name(int mode) +{ + switch (mode) { + case PHY_MODE_MII: + return "MII"; + case PHY_MODE_RMII: + return "RMII"; + case PHY_MODE_SMII: + return "SMII"; + default: + BUG(); + } +} + +static inline u32 zmii_mode_mask(int mode, int input) +{ + switch (mode) { + case PHY_MODE_MII: + return ZMII_FER_MII(input); + case PHY_MODE_RMII: + return ZMII_FER_RMII(input); + case PHY_MODE_SMII: + return ZMII_FER_SMII(input); + default: + return 0; + } +} + +int __devinit zmii_attach(struct platform_device *ofdev, int input, int *mode) +{ + struct zmii_instance *dev = dev_get_drvdata(&ofdev->dev); + struct zmii_regs __iomem *p = dev->base; + + ZMII_DBG(dev, "init(%d, %d)" NL, input, *mode); + + if (!zmii_valid_mode(*mode)) { + /* Probably an EMAC connected to RGMII, + * but it still may need ZMII for MDIO so + * we don't fail here. + */ + dev->users++; + return 0; + } + + mutex_lock(&dev->lock); + + /* Autodetect ZMII mode if not specified. + * This is only for backward compatibility with the old driver. + * Please, always specify PHY mode in your board port to avoid + * any surprises. + */ + if (dev->mode == PHY_MODE_NA) { + if (*mode == PHY_MODE_NA) { + u32 r = dev->fer_save; + + ZMII_DBG(dev, "autodetecting mode, FER = 0x%08x" NL, r); + + if (r & (ZMII_FER_MII(0) | ZMII_FER_MII(1))) + dev->mode = PHY_MODE_MII; + else if (r & (ZMII_FER_RMII(0) | ZMII_FER_RMII(1))) + dev->mode = PHY_MODE_RMII; + else + dev->mode = PHY_MODE_SMII; + } else + dev->mode = *mode; + + printk(KERN_NOTICE "%s: bridge in %s mode\n", + ofdev->dev.of_node->full_name, + zmii_mode_name(dev->mode)); + } else { + /* All inputs must use the same mode */ + if (*mode != PHY_MODE_NA && *mode != dev->mode) { + printk(KERN_ERR + "%s: invalid mode %d specified for input %d\n", + ofdev->dev.of_node->full_name, *mode, input); + mutex_unlock(&dev->lock); + return -EINVAL; + } + } + + /* Report back correct PHY mode, + * it may be used during PHY initialization. + */ + *mode = dev->mode; + + /* Enable this input */ + out_be32(&p->fer, in_be32(&p->fer) | zmii_mode_mask(dev->mode, input)); + ++dev->users; + + mutex_unlock(&dev->lock); + + return 0; +} + +void zmii_get_mdio(struct platform_device *ofdev, int input) +{ + struct zmii_instance *dev = dev_get_drvdata(&ofdev->dev); + u32 fer; + + ZMII_DBG2(dev, "get_mdio(%d)" NL, input); + + mutex_lock(&dev->lock); + + fer = in_be32(&dev->base->fer) & ~ZMII_FER_MDI_ALL; + out_be32(&dev->base->fer, fer | ZMII_FER_MDI(input)); +} + +void zmii_put_mdio(struct platform_device *ofdev, int input) +{ + struct zmii_instance *dev = dev_get_drvdata(&ofdev->dev); + + ZMII_DBG2(dev, "put_mdio(%d)" NL, input); + mutex_unlock(&dev->lock); +} + + +void zmii_set_speed(struct platform_device *ofdev, int input, int speed) +{ + struct zmii_instance *dev = dev_get_drvdata(&ofdev->dev); + u32 ssr; + + mutex_lock(&dev->lock); + + ssr = in_be32(&dev->base->ssr); + + ZMII_DBG(dev, "speed(%d, %d)" NL, input, speed); + + if (speed == SPEED_100) + ssr |= ZMII_SSR_SP(input); + else + ssr &= ~ZMII_SSR_SP(input); + + out_be32(&dev->base->ssr, ssr); + + mutex_unlock(&dev->lock); +} + +void zmii_detach(struct platform_device *ofdev, int input) +{ + struct zmii_instance *dev = dev_get_drvdata(&ofdev->dev); + + BUG_ON(!dev || dev->users == 0); + + mutex_lock(&dev->lock); + + ZMII_DBG(dev, "detach(%d)" NL, input); + + /* Disable this input */ + out_be32(&dev->base->fer, + in_be32(&dev->base->fer) & ~zmii_mode_mask(dev->mode, input)); + + --dev->users; + + mutex_unlock(&dev->lock); +} + +int zmii_get_regs_len(struct platform_device *ofdev) +{ + return sizeof(struct emac_ethtool_regs_subhdr) + + sizeof(struct zmii_regs); +} + +void *zmii_dump_regs(struct platform_device *ofdev, void *buf) +{ + struct zmii_instance *dev = dev_get_drvdata(&ofdev->dev); + struct emac_ethtool_regs_subhdr *hdr = buf; + struct zmii_regs *regs = (struct zmii_regs *)(hdr + 1); + + hdr->version = 0; + hdr->index = 0; /* for now, are there chips with more than one + * zmii ? if yes, then we'll add a cell_index + * like we do for emac + */ + memcpy_fromio(regs, dev->base, sizeof(struct zmii_regs)); + return regs + 1; +} + +static int __devinit zmii_probe(struct platform_device *ofdev) +{ + struct device_node *np = ofdev->dev.of_node; + struct zmii_instance *dev; + struct resource regs; + int rc; + + rc = -ENOMEM; + dev = kzalloc(sizeof(struct zmii_instance), GFP_KERNEL); + if (dev == NULL) { + printk(KERN_ERR "%s: could not allocate ZMII device!\n", + np->full_name); + goto err_gone; + } + + mutex_init(&dev->lock); + dev->ofdev = ofdev; + dev->mode = PHY_MODE_NA; + + rc = -ENXIO; + if (of_address_to_resource(np, 0, ®s)) { + printk(KERN_ERR "%s: Can't get registers address\n", + np->full_name); + goto err_free; + } + + rc = -ENOMEM; + dev->base = (struct zmii_regs __iomem *)ioremap(regs.start, + sizeof(struct zmii_regs)); + if (dev->base == NULL) { + printk(KERN_ERR "%s: Can't map device registers!\n", + np->full_name); + goto err_free; + } + + /* We may need FER value for autodetection later */ + dev->fer_save = in_be32(&dev->base->fer); + + /* Disable all inputs by default */ + out_be32(&dev->base->fer, 0); + + printk(KERN_INFO + "ZMII %s initialized\n", ofdev->dev.of_node->full_name); + wmb(); + dev_set_drvdata(&ofdev->dev, dev); + + return 0; + + err_free: + kfree(dev); + err_gone: + return rc; +} + +static int __devexit zmii_remove(struct platform_device *ofdev) +{ + struct zmii_instance *dev = dev_get_drvdata(&ofdev->dev); + + dev_set_drvdata(&ofdev->dev, NULL); + + WARN_ON(dev->users != 0); + + iounmap(dev->base); + kfree(dev); + + return 0; +} + +static struct of_device_id zmii_match[] = +{ + { + .compatible = "ibm,zmii", + }, + /* For backward compat with old DT */ + { + .type = "emac-zmii", + }, + {}, +}; + +static struct platform_driver zmii_driver = { + .driver = { + .name = "emac-zmii", + .owner = THIS_MODULE, + .of_match_table = zmii_match, + }, + .probe = zmii_probe, + .remove = zmii_remove, +}; + +int __init zmii_init(void) +{ + return platform_driver_register(&zmii_driver); +} + +void zmii_exit(void) +{ + platform_driver_unregister(&zmii_driver); +} diff --git a/drivers/net/ethernet/ibm/emac/zmii.h b/drivers/net/ethernet/ibm/emac/zmii.h new file mode 100644 index 000000000000..ceaed823a83c --- /dev/null +++ b/drivers/net/ethernet/ibm/emac/zmii.h @@ -0,0 +1,78 @@ +/* + * drivers/net/ibm_newemac/zmii.h + * + * Driver for PowerPC 4xx on-chip ethernet controller, ZMII bridge support. + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * + * Based on the arch/ppc version of the driver: + * + * Copyright (c) 2004, 2005 Zultys Technologies. + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * + * Based on original work by + * Armin Kuster <akuster@mvista.com> + * Copyright 2001 MontaVista Softare Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#ifndef __IBM_NEWEMAC_ZMII_H +#define __IBM_NEWEMAC_ZMII_H + +/* ZMII bridge registers */ +struct zmii_regs { + u32 fer; /* Function enable reg */ + u32 ssr; /* Speed select reg */ + u32 smiirs; /* SMII status reg */ +}; + +/* ZMII device */ +struct zmii_instance { + struct zmii_regs __iomem *base; + + /* Only one EMAC whacks us at a time */ + struct mutex lock; + + /* subset of PHY_MODE_XXXX */ + int mode; + + /* number of EMACs using this ZMII bridge */ + int users; + + /* FER value left by firmware */ + u32 fer_save; + + /* OF device instance */ + struct platform_device *ofdev; +}; + +#ifdef CONFIG_IBM_EMAC_ZMII + +extern int zmii_init(void); +extern void zmii_exit(void); +extern int zmii_attach(struct platform_device *ofdev, int input, int *mode); +extern void zmii_detach(struct platform_device *ofdev, int input); +extern void zmii_get_mdio(struct platform_device *ofdev, int input); +extern void zmii_put_mdio(struct platform_device *ofdev, int input); +extern void zmii_set_speed(struct platform_device *ofdev, int input, int speed); +extern int zmii_get_regs_len(struct platform_device *ocpdev); +extern void *zmii_dump_regs(struct platform_device *ofdev, void *buf); + +#else +# define zmii_init() 0 +# define zmii_exit() do { } while(0) +# define zmii_attach(x,y,z) (-ENXIO) +# define zmii_detach(x,y) do { } while(0) +# define zmii_get_mdio(x,y) do { } while(0) +# define zmii_put_mdio(x,y) do { } while(0) +# define zmii_set_speed(x,y,z) do { } while(0) +# define zmii_get_regs_len(x) 0 +# define zmii_dump_regs(x,buf) (buf) +#endif /* !CONFIG_IBM_EMAC_ZMII */ + +#endif /* __IBM_NEWEMAC_ZMII_H */ diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c new file mode 100644 index 000000000000..4da972eaabb4 --- /dev/null +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -0,0 +1,1638 @@ +/* + * IBM Power Virtual Ethernet Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2003, 2010 + * + * Authors: Dave Larson <larson1@us.ibm.com> + * Santiago Leon <santil@linux.vnet.ibm.com> + * Brian King <brking@linux.vnet.ibm.com> + * Robert Jennings <rcj@linux.vnet.ibm.com> + * Anton Blanchard <anton@au.ibm.com> + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/dma-mapping.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/mm.h> +#include <linux/pm.h> +#include <linux/ethtool.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/slab.h> +#include <asm/hvcall.h> +#include <linux/atomic.h> +#include <asm/vio.h> +#include <asm/iommu.h> +#include <asm/firmware.h> + +#include "ibmveth.h" + +static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance); +static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter); +static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev); + +static struct kobj_type ktype_veth_pool; + + +static const char ibmveth_driver_name[] = "ibmveth"; +static const char ibmveth_driver_string[] = "IBM Power Virtual Ethernet Driver"; +#define ibmveth_driver_version "1.04" + +MODULE_AUTHOR("Santiago Leon <santil@linux.vnet.ibm.com>"); +MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(ibmveth_driver_version); + +static unsigned int tx_copybreak __read_mostly = 128; +module_param(tx_copybreak, uint, 0644); +MODULE_PARM_DESC(tx_copybreak, + "Maximum size of packet that is copied to a new buffer on transmit"); + +static unsigned int rx_copybreak __read_mostly = 128; +module_param(rx_copybreak, uint, 0644); +MODULE_PARM_DESC(rx_copybreak, + "Maximum size of packet that is copied to a new buffer on receive"); + +static unsigned int rx_flush __read_mostly = 0; +module_param(rx_flush, uint, 0644); +MODULE_PARM_DESC(rx_flush, "Flush receive buffers before use"); + +struct ibmveth_stat { + char name[ETH_GSTRING_LEN]; + int offset; +}; + +#define IBMVETH_STAT_OFF(stat) offsetof(struct ibmveth_adapter, stat) +#define IBMVETH_GET_STAT(a, off) *((u64 *)(((unsigned long)(a)) + off)) + +struct ibmveth_stat ibmveth_stats[] = { + { "replenish_task_cycles", IBMVETH_STAT_OFF(replenish_task_cycles) }, + { "replenish_no_mem", IBMVETH_STAT_OFF(replenish_no_mem) }, + { "replenish_add_buff_failure", + IBMVETH_STAT_OFF(replenish_add_buff_failure) }, + { "replenish_add_buff_success", + IBMVETH_STAT_OFF(replenish_add_buff_success) }, + { "rx_invalid_buffer", IBMVETH_STAT_OFF(rx_invalid_buffer) }, + { "rx_no_buffer", IBMVETH_STAT_OFF(rx_no_buffer) }, + { "tx_map_failed", IBMVETH_STAT_OFF(tx_map_failed) }, + { "tx_send_failed", IBMVETH_STAT_OFF(tx_send_failed) }, + { "fw_enabled_ipv4_csum", IBMVETH_STAT_OFF(fw_ipv4_csum_support) }, + { "fw_enabled_ipv6_csum", IBMVETH_STAT_OFF(fw_ipv6_csum_support) }, +}; + +/* simple methods of getting data from the current rxq entry */ +static inline u32 ibmveth_rxq_flags(struct ibmveth_adapter *adapter) +{ + return adapter->rx_queue.queue_addr[adapter->rx_queue.index].flags_off; +} + +static inline int ibmveth_rxq_toggle(struct ibmveth_adapter *adapter) +{ + return (ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_TOGGLE) >> + IBMVETH_RXQ_TOGGLE_SHIFT; +} + +static inline int ibmveth_rxq_pending_buffer(struct ibmveth_adapter *adapter) +{ + return ibmveth_rxq_toggle(adapter) == adapter->rx_queue.toggle; +} + +static inline int ibmveth_rxq_buffer_valid(struct ibmveth_adapter *adapter) +{ + return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_VALID; +} + +static inline int ibmveth_rxq_frame_offset(struct ibmveth_adapter *adapter) +{ + return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_OFF_MASK; +} + +static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter *adapter) +{ + return adapter->rx_queue.queue_addr[adapter->rx_queue.index].length; +} + +static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter *adapter) +{ + return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_CSUM_GOOD; +} + +/* setup the initial settings for a buffer pool */ +static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool *pool, + u32 pool_index, u32 pool_size, + u32 buff_size, u32 pool_active) +{ + pool->size = pool_size; + pool->index = pool_index; + pool->buff_size = buff_size; + pool->threshold = pool_size * 7 / 8; + pool->active = pool_active; +} + +/* allocate and setup an buffer pool - called during open */ +static int ibmveth_alloc_buffer_pool(struct ibmveth_buff_pool *pool) +{ + int i; + + pool->free_map = kmalloc(sizeof(u16) * pool->size, GFP_KERNEL); + + if (!pool->free_map) + return -1; + + pool->dma_addr = kmalloc(sizeof(dma_addr_t) * pool->size, GFP_KERNEL); + if (!pool->dma_addr) { + kfree(pool->free_map); + pool->free_map = NULL; + return -1; + } + + pool->skbuff = kcalloc(pool->size, sizeof(void *), GFP_KERNEL); + + if (!pool->skbuff) { + kfree(pool->dma_addr); + pool->dma_addr = NULL; + + kfree(pool->free_map); + pool->free_map = NULL; + return -1; + } + + memset(pool->dma_addr, 0, sizeof(dma_addr_t) * pool->size); + + for (i = 0; i < pool->size; ++i) + pool->free_map[i] = i; + + atomic_set(&pool->available, 0); + pool->producer_index = 0; + pool->consumer_index = 0; + + return 0; +} + +static inline void ibmveth_flush_buffer(void *addr, unsigned long length) +{ + unsigned long offset; + + for (offset = 0; offset < length; offset += SMP_CACHE_BYTES) + asm("dcbfl %0,%1" :: "b" (addr), "r" (offset)); +} + +/* replenish the buffers for a pool. note that we don't need to + * skb_reserve these since they are used for incoming... + */ +static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, + struct ibmveth_buff_pool *pool) +{ + u32 i; + u32 count = pool->size - atomic_read(&pool->available); + u32 buffers_added = 0; + struct sk_buff *skb; + unsigned int free_index, index; + u64 correlator; + unsigned long lpar_rc; + dma_addr_t dma_addr; + + mb(); + + for (i = 0; i < count; ++i) { + union ibmveth_buf_desc desc; + + skb = netdev_alloc_skb(adapter->netdev, pool->buff_size); + + if (!skb) { + netdev_dbg(adapter->netdev, + "replenish: unable to allocate skb\n"); + adapter->replenish_no_mem++; + break; + } + + free_index = pool->consumer_index; + pool->consumer_index++; + if (pool->consumer_index >= pool->size) + pool->consumer_index = 0; + index = pool->free_map[free_index]; + + BUG_ON(index == IBM_VETH_INVALID_MAP); + BUG_ON(pool->skbuff[index] != NULL); + + dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, + pool->buff_size, DMA_FROM_DEVICE); + + if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) + goto failure; + + pool->free_map[free_index] = IBM_VETH_INVALID_MAP; + pool->dma_addr[index] = dma_addr; + pool->skbuff[index] = skb; + + correlator = ((u64)pool->index << 32) | index; + *(u64 *)skb->data = correlator; + + desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size; + desc.fields.address = dma_addr; + + if (rx_flush) { + unsigned int len = min(pool->buff_size, + adapter->netdev->mtu + + IBMVETH_BUFF_OH); + ibmveth_flush_buffer(skb->data, len); + } + lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, + desc.desc); + + if (lpar_rc != H_SUCCESS) { + goto failure; + } else { + buffers_added++; + adapter->replenish_add_buff_success++; + } + } + + mb(); + atomic_add(buffers_added, &(pool->available)); + return; + +failure: + pool->free_map[free_index] = index; + pool->skbuff[index] = NULL; + if (pool->consumer_index == 0) + pool->consumer_index = pool->size - 1; + else + pool->consumer_index--; + if (!dma_mapping_error(&adapter->vdev->dev, dma_addr)) + dma_unmap_single(&adapter->vdev->dev, + pool->dma_addr[index], pool->buff_size, + DMA_FROM_DEVICE); + dev_kfree_skb_any(skb); + adapter->replenish_add_buff_failure++; + + mb(); + atomic_add(buffers_added, &(pool->available)); +} + +/* replenish routine */ +static void ibmveth_replenish_task(struct ibmveth_adapter *adapter) +{ + int i; + + adapter->replenish_task_cycles++; + + for (i = (IBMVETH_NUM_BUFF_POOLS - 1); i >= 0; i--) { + struct ibmveth_buff_pool *pool = &adapter->rx_buff_pool[i]; + + if (pool->active && + (atomic_read(&pool->available) < pool->threshold)) + ibmveth_replenish_buffer_pool(adapter, pool); + } + + adapter->rx_no_buffer = *(u64 *)(((char*)adapter->buffer_list_addr) + + 4096 - 8); +} + +/* empty and free ana buffer pool - also used to do cleanup in error paths */ +static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter, + struct ibmveth_buff_pool *pool) +{ + int i; + + kfree(pool->free_map); + pool->free_map = NULL; + + if (pool->skbuff && pool->dma_addr) { + for (i = 0; i < pool->size; ++i) { + struct sk_buff *skb = pool->skbuff[i]; + if (skb) { + dma_unmap_single(&adapter->vdev->dev, + pool->dma_addr[i], + pool->buff_size, + DMA_FROM_DEVICE); + dev_kfree_skb_any(skb); + pool->skbuff[i] = NULL; + } + } + } + + if (pool->dma_addr) { + kfree(pool->dma_addr); + pool->dma_addr = NULL; + } + + if (pool->skbuff) { + kfree(pool->skbuff); + pool->skbuff = NULL; + } +} + +/* remove a buffer from a pool */ +static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, + u64 correlator) +{ + unsigned int pool = correlator >> 32; + unsigned int index = correlator & 0xffffffffUL; + unsigned int free_index; + struct sk_buff *skb; + + BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); + BUG_ON(index >= adapter->rx_buff_pool[pool].size); + + skb = adapter->rx_buff_pool[pool].skbuff[index]; + + BUG_ON(skb == NULL); + + adapter->rx_buff_pool[pool].skbuff[index] = NULL; + + dma_unmap_single(&adapter->vdev->dev, + adapter->rx_buff_pool[pool].dma_addr[index], + adapter->rx_buff_pool[pool].buff_size, + DMA_FROM_DEVICE); + + free_index = adapter->rx_buff_pool[pool].producer_index; + adapter->rx_buff_pool[pool].producer_index++; + if (adapter->rx_buff_pool[pool].producer_index >= + adapter->rx_buff_pool[pool].size) + adapter->rx_buff_pool[pool].producer_index = 0; + adapter->rx_buff_pool[pool].free_map[free_index] = index; + + mb(); + + atomic_dec(&(adapter->rx_buff_pool[pool].available)); +} + +/* get the current buffer on the rx queue */ +static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *adapter) +{ + u64 correlator = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator; + unsigned int pool = correlator >> 32; + unsigned int index = correlator & 0xffffffffUL; + + BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); + BUG_ON(index >= adapter->rx_buff_pool[pool].size); + + return adapter->rx_buff_pool[pool].skbuff[index]; +} + +/* recycle the current buffer on the rx queue */ +static int ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter) +{ + u32 q_index = adapter->rx_queue.index; + u64 correlator = adapter->rx_queue.queue_addr[q_index].correlator; + unsigned int pool = correlator >> 32; + unsigned int index = correlator & 0xffffffffUL; + union ibmveth_buf_desc desc; + unsigned long lpar_rc; + int ret = 1; + + BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); + BUG_ON(index >= adapter->rx_buff_pool[pool].size); + + if (!adapter->rx_buff_pool[pool].active) { + ibmveth_rxq_harvest_buffer(adapter); + ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[pool]); + goto out; + } + + desc.fields.flags_len = IBMVETH_BUF_VALID | + adapter->rx_buff_pool[pool].buff_size; + desc.fields.address = adapter->rx_buff_pool[pool].dma_addr[index]; + + lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc); + + if (lpar_rc != H_SUCCESS) { + netdev_dbg(adapter->netdev, "h_add_logical_lan_buffer failed " + "during recycle rc=%ld", lpar_rc); + ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator); + ret = 0; + } + + if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) { + adapter->rx_queue.index = 0; + adapter->rx_queue.toggle = !adapter->rx_queue.toggle; + } + +out: + return ret; +} + +static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter) +{ + ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator); + + if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) { + adapter->rx_queue.index = 0; + adapter->rx_queue.toggle = !adapter->rx_queue.toggle; + } +} + +static void ibmveth_cleanup(struct ibmveth_adapter *adapter) +{ + int i; + struct device *dev = &adapter->vdev->dev; + + if (adapter->buffer_list_addr != NULL) { + if (!dma_mapping_error(dev, adapter->buffer_list_dma)) { + dma_unmap_single(dev, adapter->buffer_list_dma, 4096, + DMA_BIDIRECTIONAL); + adapter->buffer_list_dma = DMA_ERROR_CODE; + } + free_page((unsigned long)adapter->buffer_list_addr); + adapter->buffer_list_addr = NULL; + } + + if (adapter->filter_list_addr != NULL) { + if (!dma_mapping_error(dev, adapter->filter_list_dma)) { + dma_unmap_single(dev, adapter->filter_list_dma, 4096, + DMA_BIDIRECTIONAL); + adapter->filter_list_dma = DMA_ERROR_CODE; + } + free_page((unsigned long)adapter->filter_list_addr); + adapter->filter_list_addr = NULL; + } + + if (adapter->rx_queue.queue_addr != NULL) { + if (!dma_mapping_error(dev, adapter->rx_queue.queue_dma)) { + dma_unmap_single(dev, + adapter->rx_queue.queue_dma, + adapter->rx_queue.queue_len, + DMA_BIDIRECTIONAL); + adapter->rx_queue.queue_dma = DMA_ERROR_CODE; + } + kfree(adapter->rx_queue.queue_addr); + adapter->rx_queue.queue_addr = NULL; + } + + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + if (adapter->rx_buff_pool[i].active) + ibmveth_free_buffer_pool(adapter, + &adapter->rx_buff_pool[i]); + + if (adapter->bounce_buffer != NULL) { + if (!dma_mapping_error(dev, adapter->bounce_buffer_dma)) { + dma_unmap_single(&adapter->vdev->dev, + adapter->bounce_buffer_dma, + adapter->netdev->mtu + IBMVETH_BUFF_OH, + DMA_BIDIRECTIONAL); + adapter->bounce_buffer_dma = DMA_ERROR_CODE; + } + kfree(adapter->bounce_buffer); + adapter->bounce_buffer = NULL; + } +} + +static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter, + union ibmveth_buf_desc rxq_desc, u64 mac_address) +{ + int rc, try_again = 1; + + /* + * After a kexec the adapter will still be open, so our attempt to + * open it will fail. So if we get a failure we free the adapter and + * try again, but only once. + */ +retry: + rc = h_register_logical_lan(adapter->vdev->unit_address, + adapter->buffer_list_dma, rxq_desc.desc, + adapter->filter_list_dma, mac_address); + + if (rc != H_SUCCESS && try_again) { + do { + rc = h_free_logical_lan(adapter->vdev->unit_address); + } while (H_IS_LONG_BUSY(rc) || (rc == H_BUSY)); + + try_again = 0; + goto retry; + } + + return rc; +} + +static int ibmveth_open(struct net_device *netdev) +{ + struct ibmveth_adapter *adapter = netdev_priv(netdev); + u64 mac_address = 0; + int rxq_entries = 1; + unsigned long lpar_rc; + int rc; + union ibmveth_buf_desc rxq_desc; + int i; + struct device *dev; + + netdev_dbg(netdev, "open starting\n"); + + napi_enable(&adapter->napi); + + for(i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + rxq_entries += adapter->rx_buff_pool[i].size; + + adapter->buffer_list_addr = (void*) get_zeroed_page(GFP_KERNEL); + adapter->filter_list_addr = (void*) get_zeroed_page(GFP_KERNEL); + + if (!adapter->buffer_list_addr || !adapter->filter_list_addr) { + netdev_err(netdev, "unable to allocate filter or buffer list " + "pages\n"); + rc = -ENOMEM; + goto err_out; + } + + adapter->rx_queue.queue_len = sizeof(struct ibmveth_rx_q_entry) * + rxq_entries; + adapter->rx_queue.queue_addr = kmalloc(adapter->rx_queue.queue_len, + GFP_KERNEL); + + if (!adapter->rx_queue.queue_addr) { + netdev_err(netdev, "unable to allocate rx queue pages\n"); + rc = -ENOMEM; + goto err_out; + } + + dev = &adapter->vdev->dev; + + adapter->buffer_list_dma = dma_map_single(dev, + adapter->buffer_list_addr, 4096, DMA_BIDIRECTIONAL); + adapter->filter_list_dma = dma_map_single(dev, + adapter->filter_list_addr, 4096, DMA_BIDIRECTIONAL); + adapter->rx_queue.queue_dma = dma_map_single(dev, + adapter->rx_queue.queue_addr, + adapter->rx_queue.queue_len, DMA_BIDIRECTIONAL); + + if ((dma_mapping_error(dev, adapter->buffer_list_dma)) || + (dma_mapping_error(dev, adapter->filter_list_dma)) || + (dma_mapping_error(dev, adapter->rx_queue.queue_dma))) { + netdev_err(netdev, "unable to map filter or buffer list " + "pages\n"); + rc = -ENOMEM; + goto err_out; + } + + adapter->rx_queue.index = 0; + adapter->rx_queue.num_slots = rxq_entries; + adapter->rx_queue.toggle = 1; + + memcpy(&mac_address, netdev->dev_addr, netdev->addr_len); + mac_address = mac_address >> 16; + + rxq_desc.fields.flags_len = IBMVETH_BUF_VALID | + adapter->rx_queue.queue_len; + rxq_desc.fields.address = adapter->rx_queue.queue_dma; + + netdev_dbg(netdev, "buffer list @ 0x%p\n", adapter->buffer_list_addr); + netdev_dbg(netdev, "filter list @ 0x%p\n", adapter->filter_list_addr); + netdev_dbg(netdev, "receive q @ 0x%p\n", adapter->rx_queue.queue_addr); + + h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); + + lpar_rc = ibmveth_register_logical_lan(adapter, rxq_desc, mac_address); + + if (lpar_rc != H_SUCCESS) { + netdev_err(netdev, "h_register_logical_lan failed with %ld\n", + lpar_rc); + netdev_err(netdev, "buffer TCE:0x%llx filter TCE:0x%llx rxq " + "desc:0x%llx MAC:0x%llx\n", + adapter->buffer_list_dma, + adapter->filter_list_dma, + rxq_desc.desc, + mac_address); + rc = -ENONET; + goto err_out; + } + + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { + if (!adapter->rx_buff_pool[i].active) + continue; + if (ibmveth_alloc_buffer_pool(&adapter->rx_buff_pool[i])) { + netdev_err(netdev, "unable to alloc pool\n"); + adapter->rx_buff_pool[i].active = 0; + rc = -ENOMEM; + goto err_out; + } + } + + netdev_dbg(netdev, "registering irq 0x%x\n", netdev->irq); + rc = request_irq(netdev->irq, ibmveth_interrupt, 0, netdev->name, + netdev); + if (rc != 0) { + netdev_err(netdev, "unable to request irq 0x%x, rc %d\n", + netdev->irq, rc); + do { + lpar_rc = h_free_logical_lan(adapter->vdev->unit_address); + } while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY)); + + goto err_out; + } + + adapter->bounce_buffer = + kmalloc(netdev->mtu + IBMVETH_BUFF_OH, GFP_KERNEL); + if (!adapter->bounce_buffer) { + netdev_err(netdev, "unable to allocate bounce buffer\n"); + rc = -ENOMEM; + goto err_out_free_irq; + } + adapter->bounce_buffer_dma = + dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer, + netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, adapter->bounce_buffer_dma)) { + netdev_err(netdev, "unable to map bounce buffer\n"); + rc = -ENOMEM; + goto err_out_free_irq; + } + + netdev_dbg(netdev, "initial replenish cycle\n"); + ibmveth_interrupt(netdev->irq, netdev); + + netif_start_queue(netdev); + + netdev_dbg(netdev, "open complete\n"); + + return 0; + +err_out_free_irq: + free_irq(netdev->irq, netdev); +err_out: + ibmveth_cleanup(adapter); + napi_disable(&adapter->napi); + return rc; +} + +static int ibmveth_close(struct net_device *netdev) +{ + struct ibmveth_adapter *adapter = netdev_priv(netdev); + long lpar_rc; + + netdev_dbg(netdev, "close starting\n"); + + napi_disable(&adapter->napi); + + if (!adapter->pool_config) + netif_stop_queue(netdev); + + h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); + + do { + lpar_rc = h_free_logical_lan(adapter->vdev->unit_address); + } while (H_IS_LONG_BUSY(lpar_rc) || (lpar_rc == H_BUSY)); + + if (lpar_rc != H_SUCCESS) { + netdev_err(netdev, "h_free_logical_lan failed with %lx, " + "continuing with close\n", lpar_rc); + } + + free_irq(netdev->irq, netdev); + + adapter->rx_no_buffer = *(u64 *)(((char *)adapter->buffer_list_addr) + + 4096 - 8); + + ibmveth_cleanup(adapter); + + netdev_dbg(netdev, "close complete\n"); + + return 0; +} + +static int netdev_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + cmd->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | + SUPPORTED_FIBRE); + cmd->advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg | + ADVERTISED_FIBRE); + ethtool_cmd_speed_set(cmd, SPEED_1000); + cmd->duplex = DUPLEX_FULL; + cmd->port = PORT_FIBRE; + cmd->phy_address = 0; + cmd->transceiver = XCVR_INTERNAL; + cmd->autoneg = AUTONEG_ENABLE; + cmd->maxtxpkt = 0; + cmd->maxrxpkt = 1; + return 0; +} + +static void netdev_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strncpy(info->driver, ibmveth_driver_name, sizeof(info->driver) - 1); + strncpy(info->version, ibmveth_driver_version, + sizeof(info->version) - 1); +} + +static u32 ibmveth_fix_features(struct net_device *dev, u32 features) +{ + /* + * Since the ibmveth firmware interface does not have the + * concept of separate tx/rx checksum offload enable, if rx + * checksum is disabled we also have to disable tx checksum + * offload. Once we disable rx checksum offload, we are no + * longer allowed to send tx buffers that are not properly + * checksummed. + */ + + if (!(features & NETIF_F_RXCSUM)) + features &= ~NETIF_F_ALL_CSUM; + + return features; +} + +static int ibmveth_set_csum_offload(struct net_device *dev, u32 data) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + unsigned long set_attr, clr_attr, ret_attr; + unsigned long set_attr6, clr_attr6; + long ret, ret4, ret6; + int rc1 = 0, rc2 = 0; + int restart = 0; + + if (netif_running(dev)) { + restart = 1; + adapter->pool_config = 1; + ibmveth_close(dev); + adapter->pool_config = 0; + } + + set_attr = 0; + clr_attr = 0; + set_attr6 = 0; + clr_attr6 = 0; + + if (data) { + set_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM; + set_attr6 = IBMVETH_ILLAN_IPV6_TCP_CSUM; + } else { + clr_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM; + clr_attr6 = IBMVETH_ILLAN_IPV6_TCP_CSUM; + } + + ret = h_illan_attributes(adapter->vdev->unit_address, 0, 0, &ret_attr); + + if (ret == H_SUCCESS && !(ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK) && + !(ret_attr & IBMVETH_ILLAN_TRUNK_PRI_MASK) && + (ret_attr & IBMVETH_ILLAN_PADDED_PKT_CSUM)) { + ret4 = h_illan_attributes(adapter->vdev->unit_address, clr_attr, + set_attr, &ret_attr); + + if (ret4 != H_SUCCESS) { + netdev_err(dev, "unable to change IPv4 checksum " + "offload settings. %d rc=%ld\n", + data, ret4); + + h_illan_attributes(adapter->vdev->unit_address, + set_attr, clr_attr, &ret_attr); + + if (data == 1) + dev->features &= ~NETIF_F_IP_CSUM; + + } else { + adapter->fw_ipv4_csum_support = data; + } + + ret6 = h_illan_attributes(adapter->vdev->unit_address, + clr_attr6, set_attr6, &ret_attr); + + if (ret6 != H_SUCCESS) { + netdev_err(dev, "unable to change IPv6 checksum " + "offload settings. %d rc=%ld\n", + data, ret6); + + h_illan_attributes(adapter->vdev->unit_address, + set_attr6, clr_attr6, &ret_attr); + + if (data == 1) + dev->features &= ~NETIF_F_IPV6_CSUM; + + } else + adapter->fw_ipv6_csum_support = data; + + if (ret4 == H_SUCCESS || ret6 == H_SUCCESS) + adapter->rx_csum = data; + else + rc1 = -EIO; + } else { + rc1 = -EIO; + netdev_err(dev, "unable to change checksum offload settings." + " %d rc=%ld ret_attr=%lx\n", data, ret, + ret_attr); + } + + if (restart) + rc2 = ibmveth_open(dev); + + return rc1 ? rc1 : rc2; +} + +static int ibmveth_set_features(struct net_device *dev, u32 features) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + int rx_csum = !!(features & NETIF_F_RXCSUM); + int rc; + + if (rx_csum == adapter->rx_csum) + return 0; + + rc = ibmveth_set_csum_offload(dev, rx_csum); + if (rc && !adapter->rx_csum) + dev->features = features & ~(NETIF_F_ALL_CSUM | NETIF_F_RXCSUM); + + return rc; +} + +static void ibmveth_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + int i; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 0; i < ARRAY_SIZE(ibmveth_stats); i++, data += ETH_GSTRING_LEN) + memcpy(data, ibmveth_stats[i].name, ETH_GSTRING_LEN); +} + +static int ibmveth_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(ibmveth_stats); + default: + return -EOPNOTSUPP; + } +} + +static void ibmveth_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + int i; + struct ibmveth_adapter *adapter = netdev_priv(dev); + + for (i = 0; i < ARRAY_SIZE(ibmveth_stats); i++) + data[i] = IBMVETH_GET_STAT(adapter, ibmveth_stats[i].offset); +} + +static const struct ethtool_ops netdev_ethtool_ops = { + .get_drvinfo = netdev_get_drvinfo, + .get_settings = netdev_get_settings, + .get_link = ethtool_op_get_link, + .get_strings = ibmveth_get_strings, + .get_sset_count = ibmveth_get_sset_count, + .get_ethtool_stats = ibmveth_get_ethtool_stats, +}; + +static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + return -EOPNOTSUPP; +} + +#define page_offset(v) ((unsigned long)(v) & ((1 << 12) - 1)) + +static int ibmveth_send(struct ibmveth_adapter *adapter, + union ibmveth_buf_desc *descs) +{ + unsigned long correlator; + unsigned int retry_count; + unsigned long ret; + + /* + * The retry count sets a maximum for the number of broadcast and + * multicast destinations within the system. + */ + retry_count = 1024; + correlator = 0; + do { + ret = h_send_logical_lan(adapter->vdev->unit_address, + descs[0].desc, descs[1].desc, + descs[2].desc, descs[3].desc, + descs[4].desc, descs[5].desc, + correlator, &correlator); + } while ((ret == H_BUSY) && (retry_count--)); + + if (ret != H_SUCCESS && ret != H_DROPPED) { + netdev_err(adapter->netdev, "tx: h_send_logical_lan failed " + "with rc=%ld\n", ret); + return 1; + } + + return 0; +} + +static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct ibmveth_adapter *adapter = netdev_priv(netdev); + unsigned int desc_flags; + union ibmveth_buf_desc descs[6]; + int last, i; + int force_bounce = 0; + dma_addr_t dma_addr; + + /* + * veth handles a maximum of 6 segments including the header, so + * we have to linearize the skb if there are more than this. + */ + if (skb_shinfo(skb)->nr_frags > 5 && __skb_linearize(skb)) { + netdev->stats.tx_dropped++; + goto out; + } + + /* veth can't checksum offload UDP */ + if (skb->ip_summed == CHECKSUM_PARTIAL && + ((skb->protocol == htons(ETH_P_IP) && + ip_hdr(skb)->protocol != IPPROTO_TCP) || + (skb->protocol == htons(ETH_P_IPV6) && + ipv6_hdr(skb)->nexthdr != IPPROTO_TCP)) && + skb_checksum_help(skb)) { + + netdev_err(netdev, "tx: failed to checksum packet\n"); + netdev->stats.tx_dropped++; + goto out; + } + + desc_flags = IBMVETH_BUF_VALID; + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + unsigned char *buf = skb_transport_header(skb) + + skb->csum_offset; + + desc_flags |= (IBMVETH_BUF_NO_CSUM | IBMVETH_BUF_CSUM_GOOD); + + /* Need to zero out the checksum */ + buf[0] = 0; + buf[1] = 0; + } + +retry_bounce: + memset(descs, 0, sizeof(descs)); + + /* + * If a linear packet is below the rx threshold then + * copy it into the static bounce buffer. This avoids the + * cost of a TCE insert and remove. + */ + if (force_bounce || (!skb_is_nonlinear(skb) && + (skb->len < tx_copybreak))) { + skb_copy_from_linear_data(skb, adapter->bounce_buffer, + skb->len); + + descs[0].fields.flags_len = desc_flags | skb->len; + descs[0].fields.address = adapter->bounce_buffer_dma; + + if (ibmveth_send(adapter, descs)) { + adapter->tx_send_failed++; + netdev->stats.tx_dropped++; + } else { + netdev->stats.tx_packets++; + netdev->stats.tx_bytes += skb->len; + } + + goto out; + } + + /* Map the header */ + dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, + skb_headlen(skb), DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) + goto map_failed; + + descs[0].fields.flags_len = desc_flags | skb_headlen(skb); + descs[0].fields.address = dma_addr; + + /* Map the frags */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + dma_addr = skb_frag_dma_map(&adapter->vdev->dev, frag, 0, + frag->size, DMA_TO_DEVICE); + + if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) + goto map_failed_frags; + + descs[i+1].fields.flags_len = desc_flags | frag->size; + descs[i+1].fields.address = dma_addr; + } + + if (ibmveth_send(adapter, descs)) { + adapter->tx_send_failed++; + netdev->stats.tx_dropped++; + } else { + netdev->stats.tx_packets++; + netdev->stats.tx_bytes += skb->len; + } + + dma_unmap_single(&adapter->vdev->dev, + descs[0].fields.address, + descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK, + DMA_TO_DEVICE); + + for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++) + dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address, + descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK, + DMA_TO_DEVICE); + +out: + dev_kfree_skb(skb); + return NETDEV_TX_OK; + +map_failed_frags: + last = i+1; + for (i = 0; i < last; i++) + dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address, + descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK, + DMA_TO_DEVICE); + +map_failed: + if (!firmware_has_feature(FW_FEATURE_CMO)) + netdev_err(netdev, "tx: unable to map xmit buffer\n"); + adapter->tx_map_failed++; + skb_linearize(skb); + force_bounce = 1; + goto retry_bounce; +} + +static int ibmveth_poll(struct napi_struct *napi, int budget) +{ + struct ibmveth_adapter *adapter = + container_of(napi, struct ibmveth_adapter, napi); + struct net_device *netdev = adapter->netdev; + int frames_processed = 0; + unsigned long lpar_rc; + +restart_poll: + do { + if (!ibmveth_rxq_pending_buffer(adapter)) + break; + + smp_rmb(); + if (!ibmveth_rxq_buffer_valid(adapter)) { + wmb(); /* suggested by larson1 */ + adapter->rx_invalid_buffer++; + netdev_dbg(netdev, "recycling invalid buffer\n"); + ibmveth_rxq_recycle_buffer(adapter); + } else { + struct sk_buff *skb, *new_skb; + int length = ibmveth_rxq_frame_length(adapter); + int offset = ibmveth_rxq_frame_offset(adapter); + int csum_good = ibmveth_rxq_csum_good(adapter); + + skb = ibmveth_rxq_get_buffer(adapter); + + new_skb = NULL; + if (length < rx_copybreak) + new_skb = netdev_alloc_skb(netdev, length); + + if (new_skb) { + skb_copy_to_linear_data(new_skb, + skb->data + offset, + length); + if (rx_flush) + ibmveth_flush_buffer(skb->data, + length + offset); + if (!ibmveth_rxq_recycle_buffer(adapter)) + kfree_skb(skb); + skb = new_skb; + } else { + ibmveth_rxq_harvest_buffer(adapter); + skb_reserve(skb, offset); + } + + skb_put(skb, length); + skb->protocol = eth_type_trans(skb, netdev); + + if (csum_good) + skb->ip_summed = CHECKSUM_UNNECESSARY; + + netif_receive_skb(skb); /* send it up */ + + netdev->stats.rx_packets++; + netdev->stats.rx_bytes += length; + frames_processed++; + } + } while (frames_processed < budget); + + ibmveth_replenish_task(adapter); + + if (frames_processed < budget) { + /* We think we are done - reenable interrupts, + * then check once more to make sure we are done. + */ + lpar_rc = h_vio_signal(adapter->vdev->unit_address, + VIO_IRQ_ENABLE); + + BUG_ON(lpar_rc != H_SUCCESS); + + napi_complete(napi); + + if (ibmveth_rxq_pending_buffer(adapter) && + napi_reschedule(napi)) { + lpar_rc = h_vio_signal(adapter->vdev->unit_address, + VIO_IRQ_DISABLE); + goto restart_poll; + } + } + + return frames_processed; +} + +static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance) +{ + struct net_device *netdev = dev_instance; + struct ibmveth_adapter *adapter = netdev_priv(netdev); + unsigned long lpar_rc; + + if (napi_schedule_prep(&adapter->napi)) { + lpar_rc = h_vio_signal(adapter->vdev->unit_address, + VIO_IRQ_DISABLE); + BUG_ON(lpar_rc != H_SUCCESS); + __napi_schedule(&adapter->napi); + } + return IRQ_HANDLED; +} + +static void ibmveth_set_multicast_list(struct net_device *netdev) +{ + struct ibmveth_adapter *adapter = netdev_priv(netdev); + unsigned long lpar_rc; + + if ((netdev->flags & IFF_PROMISC) || + (netdev_mc_count(netdev) > adapter->mcastFilterSize)) { + lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address, + IbmVethMcastEnableRecv | + IbmVethMcastDisableFiltering, + 0); + if (lpar_rc != H_SUCCESS) { + netdev_err(netdev, "h_multicast_ctrl rc=%ld when " + "entering promisc mode\n", lpar_rc); + } + } else { + struct netdev_hw_addr *ha; + /* clear the filter table & disable filtering */ + lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address, + IbmVethMcastEnableRecv | + IbmVethMcastDisableFiltering | + IbmVethMcastClearFilterTable, + 0); + if (lpar_rc != H_SUCCESS) { + netdev_err(netdev, "h_multicast_ctrl rc=%ld when " + "attempting to clear filter table\n", + lpar_rc); + } + /* add the addresses to the filter table */ + netdev_for_each_mc_addr(ha, netdev) { + /* add the multicast address to the filter table */ + unsigned long mcast_addr = 0; + memcpy(((char *)&mcast_addr)+2, ha->addr, 6); + lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address, + IbmVethMcastAddFilter, + mcast_addr); + if (lpar_rc != H_SUCCESS) { + netdev_err(netdev, "h_multicast_ctrl rc=%ld " + "when adding an entry to the filter " + "table\n", lpar_rc); + } + } + + /* re-enable filtering */ + lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address, + IbmVethMcastEnableFiltering, + 0); + if (lpar_rc != H_SUCCESS) { + netdev_err(netdev, "h_multicast_ctrl rc=%ld when " + "enabling filtering\n", lpar_rc); + } + } +} + +static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + struct vio_dev *viodev = adapter->vdev; + int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH; + int i, rc; + int need_restart = 0; + + if (new_mtu < IBMVETH_MIN_MTU) + return -EINVAL; + + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) + break; + + if (i == IBMVETH_NUM_BUFF_POOLS) + return -EINVAL; + + /* Deactivate all the buffer pools so that the next loop can activate + only the buffer pools necessary to hold the new MTU */ + if (netif_running(adapter->netdev)) { + need_restart = 1; + adapter->pool_config = 1; + ibmveth_close(adapter->netdev); + adapter->pool_config = 0; + } + + /* Look for an active buffer pool that can hold the new MTU */ + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { + adapter->rx_buff_pool[i].active = 1; + + if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) { + dev->mtu = new_mtu; + vio_cmo_set_dev_desired(viodev, + ibmveth_get_desired_dma + (viodev)); + if (need_restart) { + return ibmveth_open(adapter->netdev); + } + return 0; + } + } + + if (need_restart && (rc = ibmveth_open(adapter->netdev))) + return rc; + + return -EINVAL; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void ibmveth_poll_controller(struct net_device *dev) +{ + ibmveth_replenish_task(netdev_priv(dev)); + ibmveth_interrupt(dev->irq, dev); +} +#endif + +/** + * ibmveth_get_desired_dma - Calculate IO memory desired by the driver + * + * @vdev: struct vio_dev for the device whose desired IO mem is to be returned + * + * Return value: + * Number of bytes of IO data the driver will need to perform well. + */ +static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev) +{ + struct net_device *netdev = dev_get_drvdata(&vdev->dev); + struct ibmveth_adapter *adapter; + unsigned long ret; + int i; + int rxqentries = 1; + + /* netdev inits at probe time along with the structures we need below*/ + if (netdev == NULL) + return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT); + + adapter = netdev_priv(netdev); + + ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE; + ret += IOMMU_PAGE_ALIGN(netdev->mtu); + + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { + /* add the size of the active receive buffers */ + if (adapter->rx_buff_pool[i].active) + ret += + adapter->rx_buff_pool[i].size * + IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i]. + buff_size); + rxqentries += adapter->rx_buff_pool[i].size; + } + /* add the size of the receive queue entries */ + ret += IOMMU_PAGE_ALIGN(rxqentries * sizeof(struct ibmveth_rx_q_entry)); + + return ret; +} + +static const struct net_device_ops ibmveth_netdev_ops = { + .ndo_open = ibmveth_open, + .ndo_stop = ibmveth_close, + .ndo_start_xmit = ibmveth_start_xmit, + .ndo_set_rx_mode = ibmveth_set_multicast_list, + .ndo_do_ioctl = ibmveth_ioctl, + .ndo_change_mtu = ibmveth_change_mtu, + .ndo_fix_features = ibmveth_fix_features, + .ndo_set_features = ibmveth_set_features, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = eth_mac_addr, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = ibmveth_poll_controller, +#endif +}; + +static int __devinit ibmveth_probe(struct vio_dev *dev, + const struct vio_device_id *id) +{ + int rc, i; + struct net_device *netdev; + struct ibmveth_adapter *adapter; + unsigned char *mac_addr_p; + unsigned int *mcastFilterSize_p; + + dev_dbg(&dev->dev, "entering ibmveth_probe for UA 0x%x\n", + dev->unit_address); + + mac_addr_p = (unsigned char *)vio_get_attribute(dev, VETH_MAC_ADDR, + NULL); + if (!mac_addr_p) { + dev_err(&dev->dev, "Can't find VETH_MAC_ADDR attribute\n"); + return -EINVAL; + } + + mcastFilterSize_p = (unsigned int *)vio_get_attribute(dev, + VETH_MCAST_FILTER_SIZE, NULL); + if (!mcastFilterSize_p) { + dev_err(&dev->dev, "Can't find VETH_MCAST_FILTER_SIZE " + "attribute\n"); + return -EINVAL; + } + + netdev = alloc_etherdev(sizeof(struct ibmveth_adapter)); + + if (!netdev) + return -ENOMEM; + + adapter = netdev_priv(netdev); + dev_set_drvdata(&dev->dev, netdev); + + adapter->vdev = dev; + adapter->netdev = netdev; + adapter->mcastFilterSize = *mcastFilterSize_p; + adapter->pool_config = 0; + + netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16); + + /* + * Some older boxes running PHYP non-natively have an OF that returns + * a 8-byte local-mac-address field (and the first 2 bytes have to be + * ignored) while newer boxes' OF return a 6-byte field. Note that + * IEEE 1275 specifies that local-mac-address must be a 6-byte field. + * The RPA doc specifies that the first byte must be 10b, so we'll + * just look for it to solve this 8 vs. 6 byte field issue + */ + if ((*mac_addr_p & 0x3) != 0x02) + mac_addr_p += 2; + + adapter->mac_addr = 0; + memcpy(&adapter->mac_addr, mac_addr_p, 6); + + netdev->irq = dev->irq; + netdev->netdev_ops = &ibmveth_netdev_ops; + netdev->ethtool_ops = &netdev_ethtool_ops; + SET_NETDEV_DEV(netdev, &dev->dev); + netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | + NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + netdev->features |= netdev->hw_features; + + memcpy(netdev->dev_addr, &adapter->mac_addr, netdev->addr_len); + + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { + struct kobject *kobj = &adapter->rx_buff_pool[i].kobj; + int error; + + ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i, + pool_count[i], pool_size[i], + pool_active[i]); + error = kobject_init_and_add(kobj, &ktype_veth_pool, + &dev->dev.kobj, "pool%d", i); + if (!error) + kobject_uevent(kobj, KOBJ_ADD); + } + + netdev_dbg(netdev, "adapter @ 0x%p\n", adapter); + + adapter->buffer_list_dma = DMA_ERROR_CODE; + adapter->filter_list_dma = DMA_ERROR_CODE; + adapter->rx_queue.queue_dma = DMA_ERROR_CODE; + + netdev_dbg(netdev, "registering netdev...\n"); + + ibmveth_set_features(netdev, netdev->features); + + rc = register_netdev(netdev); + + if (rc) { + netdev_dbg(netdev, "failed to register netdev rc=%d\n", rc); + free_netdev(netdev); + return rc; + } + + netdev_dbg(netdev, "registered\n"); + + return 0; +} + +static int __devexit ibmveth_remove(struct vio_dev *dev) +{ + struct net_device *netdev = dev_get_drvdata(&dev->dev); + struct ibmveth_adapter *adapter = netdev_priv(netdev); + int i; + + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + kobject_put(&adapter->rx_buff_pool[i].kobj); + + unregister_netdev(netdev); + + free_netdev(netdev); + dev_set_drvdata(&dev->dev, NULL); + + return 0; +} + +static struct attribute veth_active_attr; +static struct attribute veth_num_attr; +static struct attribute veth_size_attr; + +static ssize_t veth_pool_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct ibmveth_buff_pool *pool = container_of(kobj, + struct ibmveth_buff_pool, + kobj); + + if (attr == &veth_active_attr) + return sprintf(buf, "%d\n", pool->active); + else if (attr == &veth_num_attr) + return sprintf(buf, "%d\n", pool->size); + else if (attr == &veth_size_attr) + return sprintf(buf, "%d\n", pool->buff_size); + return 0; +} + +static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct ibmveth_buff_pool *pool = container_of(kobj, + struct ibmveth_buff_pool, + kobj); + struct net_device *netdev = dev_get_drvdata( + container_of(kobj->parent, struct device, kobj)); + struct ibmveth_adapter *adapter = netdev_priv(netdev); + long value = simple_strtol(buf, NULL, 10); + long rc; + + if (attr == &veth_active_attr) { + if (value && !pool->active) { + if (netif_running(netdev)) { + if (ibmveth_alloc_buffer_pool(pool)) { + netdev_err(netdev, + "unable to alloc pool\n"); + return -ENOMEM; + } + pool->active = 1; + adapter->pool_config = 1; + ibmveth_close(netdev); + adapter->pool_config = 0; + if ((rc = ibmveth_open(netdev))) + return rc; + } else { + pool->active = 1; + } + } else if (!value && pool->active) { + int mtu = netdev->mtu + IBMVETH_BUFF_OH; + int i; + /* Make sure there is a buffer pool with buffers that + can hold a packet of the size of the MTU */ + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { + if (pool == &adapter->rx_buff_pool[i]) + continue; + if (!adapter->rx_buff_pool[i].active) + continue; + if (mtu <= adapter->rx_buff_pool[i].buff_size) + break; + } + + if (i == IBMVETH_NUM_BUFF_POOLS) { + netdev_err(netdev, "no active pool >= MTU\n"); + return -EPERM; + } + + if (netif_running(netdev)) { + adapter->pool_config = 1; + ibmveth_close(netdev); + pool->active = 0; + adapter->pool_config = 0; + if ((rc = ibmveth_open(netdev))) + return rc; + } + pool->active = 0; + } + } else if (attr == &veth_num_attr) { + if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) { + return -EINVAL; + } else { + if (netif_running(netdev)) { + adapter->pool_config = 1; + ibmveth_close(netdev); + adapter->pool_config = 0; + pool->size = value; + if ((rc = ibmveth_open(netdev))) + return rc; + } else { + pool->size = value; + } + } + } else if (attr == &veth_size_attr) { + if (value <= IBMVETH_BUFF_OH || value > IBMVETH_MAX_BUF_SIZE) { + return -EINVAL; + } else { + if (netif_running(netdev)) { + adapter->pool_config = 1; + ibmveth_close(netdev); + adapter->pool_config = 0; + pool->buff_size = value; + if ((rc = ibmveth_open(netdev))) + return rc; + } else { + pool->buff_size = value; + } + } + } + + /* kick the interrupt handler to allocate/deallocate pools */ + ibmveth_interrupt(netdev->irq, netdev); + return count; +} + + +#define ATTR(_name, _mode) \ + struct attribute veth_##_name##_attr = { \ + .name = __stringify(_name), .mode = _mode, \ + }; + +static ATTR(active, 0644); +static ATTR(num, 0644); +static ATTR(size, 0644); + +static struct attribute *veth_pool_attrs[] = { + &veth_active_attr, + &veth_num_attr, + &veth_size_attr, + NULL, +}; + +static const struct sysfs_ops veth_pool_ops = { + .show = veth_pool_show, + .store = veth_pool_store, +}; + +static struct kobj_type ktype_veth_pool = { + .release = NULL, + .sysfs_ops = &veth_pool_ops, + .default_attrs = veth_pool_attrs, +}; + +static int ibmveth_resume(struct device *dev) +{ + struct net_device *netdev = dev_get_drvdata(dev); + ibmveth_interrupt(netdev->irq, netdev); + return 0; +} + +static struct vio_device_id ibmveth_device_table[] __devinitdata = { + { "network", "IBM,l-lan"}, + { "", "" } +}; +MODULE_DEVICE_TABLE(vio, ibmveth_device_table); + +static struct dev_pm_ops ibmveth_pm_ops = { + .resume = ibmveth_resume +}; + +static struct vio_driver ibmveth_driver = { + .id_table = ibmveth_device_table, + .probe = ibmveth_probe, + .remove = ibmveth_remove, + .get_desired_dma = ibmveth_get_desired_dma, + .driver = { + .name = ibmveth_driver_name, + .owner = THIS_MODULE, + .pm = &ibmveth_pm_ops, + } +}; + +static int __init ibmveth_module_init(void) +{ + printk(KERN_DEBUG "%s: %s %s\n", ibmveth_driver_name, + ibmveth_driver_string, ibmveth_driver_version); + + return vio_register_driver(&ibmveth_driver); +} + +static void __exit ibmveth_module_exit(void) +{ + vio_unregister_driver(&ibmveth_driver); +} + +module_init(ibmveth_module_init); +module_exit(ibmveth_module_exit); diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h new file mode 100644 index 000000000000..43a794fab9ff --- /dev/null +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -0,0 +1,195 @@ +/* + * IBM Power Virtual Ethernet Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2003, 2010 + * + * Authors: Dave Larson <larson1@us.ibm.com> + * Santiago Leon <santil@linux.vnet.ibm.com> + * Brian King <brking@linux.vnet.ibm.com> + * Robert Jennings <rcj@linux.vnet.ibm.com> + * Anton Blanchard <anton@au.ibm.com> + */ + +#ifndef _IBMVETH_H +#define _IBMVETH_H + +/* constants for H_MULTICAST_CTRL */ +#define IbmVethMcastReceptionModifyBit 0x80000UL +#define IbmVethMcastReceptionEnableBit 0x20000UL +#define IbmVethMcastFilterModifyBit 0x40000UL +#define IbmVethMcastFilterEnableBit 0x10000UL + +#define IbmVethMcastEnableRecv (IbmVethMcastReceptionModifyBit | IbmVethMcastReceptionEnableBit) +#define IbmVethMcastDisableRecv (IbmVethMcastReceptionModifyBit) +#define IbmVethMcastEnableFiltering (IbmVethMcastFilterModifyBit | IbmVethMcastFilterEnableBit) +#define IbmVethMcastDisableFiltering (IbmVethMcastFilterModifyBit) +#define IbmVethMcastAddFilter 0x1UL +#define IbmVethMcastRemoveFilter 0x2UL +#define IbmVethMcastClearFilterTable 0x3UL + +#define IBMVETH_ILLAN_PADDED_PKT_CSUM 0x0000000000002000UL +#define IBMVETH_ILLAN_TRUNK_PRI_MASK 0x0000000000000F00UL +#define IBMVETH_ILLAN_IPV6_TCP_CSUM 0x0000000000000004UL +#define IBMVETH_ILLAN_IPV4_TCP_CSUM 0x0000000000000002UL +#define IBMVETH_ILLAN_ACTIVE_TRUNK 0x0000000000000001UL + +/* hcall macros */ +#define h_register_logical_lan(ua, buflst, rxq, fltlst, mac) \ + plpar_hcall_norets(H_REGISTER_LOGICAL_LAN, ua, buflst, rxq, fltlst, mac) + +#define h_free_logical_lan(ua) \ + plpar_hcall_norets(H_FREE_LOGICAL_LAN, ua) + +#define h_add_logical_lan_buffer(ua, buf) \ + plpar_hcall_norets(H_ADD_LOGICAL_LAN_BUFFER, ua, buf) + +static inline long h_send_logical_lan(unsigned long unit_address, + unsigned long desc1, unsigned long desc2, unsigned long desc3, + unsigned long desc4, unsigned long desc5, unsigned long desc6, + unsigned long corellator_in, unsigned long *corellator_out) +{ + long rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + rc = plpar_hcall9(H_SEND_LOGICAL_LAN, retbuf, unit_address, desc1, + desc2, desc3, desc4, desc5, desc6, corellator_in); + + *corellator_out = retbuf[0]; + + return rc; +} + +static inline long h_illan_attributes(unsigned long unit_address, + unsigned long reset_mask, unsigned long set_mask, + unsigned long *ret_attributes) +{ + long rc; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + rc = plpar_hcall(H_ILLAN_ATTRIBUTES, retbuf, unit_address, + reset_mask, set_mask); + + *ret_attributes = retbuf[0]; + + return rc; +} + +#define h_multicast_ctrl(ua, cmd, mac) \ + plpar_hcall_norets(H_MULTICAST_CTRL, ua, cmd, mac) + +#define h_change_logical_lan_mac(ua, mac) \ + plpar_hcall_norets(H_CHANGE_LOGICAL_LAN_MAC, ua, mac) + +#define IBMVETH_NUM_BUFF_POOLS 5 +#define IBMVETH_IO_ENTITLEMENT_DEFAULT 4243456 /* MTU of 1500 needs 4.2Mb */ +#define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */ +#define IBMVETH_MIN_MTU 68 +#define IBMVETH_MAX_POOL_COUNT 4096 +#define IBMVETH_BUFF_LIST_SIZE 4096 +#define IBMVETH_FILT_LIST_SIZE 4096 +#define IBMVETH_MAX_BUF_SIZE (1024 * 128) + +static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 }; +static int pool_count[] = { 256, 512, 256, 256, 256 }; +static int pool_active[] = { 1, 1, 0, 0, 0}; + +#define IBM_VETH_INVALID_MAP ((u16)0xffff) + +struct ibmveth_buff_pool { + u32 size; + u32 index; + u32 buff_size; + u32 threshold; + atomic_t available; + u32 consumer_index; + u32 producer_index; + u16 *free_map; + dma_addr_t *dma_addr; + struct sk_buff **skbuff; + int active; + struct kobject kobj; +}; + +struct ibmveth_rx_q { + u64 index; + u64 num_slots; + u64 toggle; + dma_addr_t queue_dma; + u32 queue_len; + struct ibmveth_rx_q_entry *queue_addr; +}; + +struct ibmveth_adapter { + struct vio_dev *vdev; + struct net_device *netdev; + struct napi_struct napi; + struct net_device_stats stats; + unsigned int mcastFilterSize; + unsigned long mac_addr; + void * buffer_list_addr; + void * filter_list_addr; + dma_addr_t buffer_list_dma; + dma_addr_t filter_list_dma; + struct ibmveth_buff_pool rx_buff_pool[IBMVETH_NUM_BUFF_POOLS]; + struct ibmveth_rx_q rx_queue; + int pool_config; + int rx_csum; + void *bounce_buffer; + dma_addr_t bounce_buffer_dma; + + u64 fw_ipv6_csum_support; + u64 fw_ipv4_csum_support; + /* adapter specific stats */ + u64 replenish_task_cycles; + u64 replenish_no_mem; + u64 replenish_add_buff_failure; + u64 replenish_add_buff_success; + u64 rx_invalid_buffer; + u64 rx_no_buffer; + u64 tx_map_failed; + u64 tx_send_failed; +}; + +struct ibmveth_buf_desc_fields { + u32 flags_len; +#define IBMVETH_BUF_VALID 0x80000000 +#define IBMVETH_BUF_TOGGLE 0x40000000 +#define IBMVETH_BUF_NO_CSUM 0x02000000 +#define IBMVETH_BUF_CSUM_GOOD 0x01000000 +#define IBMVETH_BUF_LEN_MASK 0x00FFFFFF + u32 address; +}; + +union ibmveth_buf_desc { + u64 desc; + struct ibmveth_buf_desc_fields fields; +}; + +struct ibmveth_rx_q_entry { + u32 flags_off; +#define IBMVETH_RXQ_TOGGLE 0x80000000 +#define IBMVETH_RXQ_TOGGLE_SHIFT 31 +#define IBMVETH_RXQ_VALID 0x40000000 +#define IBMVETH_RXQ_NO_CSUM 0x02000000 +#define IBMVETH_RXQ_CSUM_GOOD 0x01000000 +#define IBMVETH_RXQ_OFF_MASK 0x0000FFFF + + u32 length; + u64 correlator; +}; + +#endif /* _IBMVETH_H */ diff --git a/drivers/net/ethernet/ibm/iseries_veth.c b/drivers/net/ethernet/ibm/iseries_veth.c new file mode 100644 index 000000000000..4326681df382 --- /dev/null +++ b/drivers/net/ethernet/ibm/iseries_veth.c @@ -0,0 +1,1710 @@ +/* File veth.c created by Kyle A. Lucke on Mon Aug 7 2000. */ +/* + * IBM eServer iSeries Virtual Ethernet Device Driver + * Copyright (C) 2001 Kyle A. Lucke (klucke@us.ibm.com), IBM Corp. + * Substantially cleaned up by: + * Copyright (C) 2003 David Gibson <dwg@au1.ibm.com>, IBM Corporation. + * Copyright (C) 2004-2005 Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + * + * This module implements the virtual ethernet device for iSeries LPAR + * Linux. It uses hypervisor message passing to implement an + * ethernet-like network device communicating between partitions on + * the iSeries. + * + * The iSeries LPAR hypervisor currently allows for up to 16 different + * virtual ethernets. These are all dynamically configurable on + * OS/400 partitions, but dynamic configuration is not supported under + * Linux yet. An ethXX network device will be created for each + * virtual ethernet this partition is connected to. + * + * - This driver is responsible for routing packets to and from other + * partitions. The MAC addresses used by the virtual ethernets + * contains meaning and must not be modified. + * + * - Having 2 virtual ethernets to the same remote partition DOES NOT + * double the available bandwidth. The 2 devices will share the + * available hypervisor bandwidth. + * + * - If you send a packet to your own mac address, it will just be + * dropped, you won't get it on the receive side. + * + * - Multicast is implemented by sending the frame frame to every + * other partition. It is the responsibility of the receiving + * partition to filter the addresses desired. + * + * Tunable parameters: + * + * VETH_NUMBUFFERS: This compile time option defaults to 120. It + * controls how much memory Linux will allocate per remote partition + * it is communicating with. It can be thought of as the maximum + * number of packets outstanding to a remote partition at a time. + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/ioport.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/mm.h> +#include <linux/ethtool.h> +#include <linux/if_ether.h> +#include <linux/slab.h> + +#include <asm/abs_addr.h> +#include <asm/iseries/mf.h> +#include <asm/uaccess.h> +#include <asm/firmware.h> +#include <asm/iseries/hv_lp_config.h> +#include <asm/iseries/hv_types.h> +#include <asm/iseries/hv_lp_event.h> +#include <asm/iommu.h> +#include <asm/vio.h> + +#undef DEBUG + +MODULE_AUTHOR("Kyle Lucke <klucke@us.ibm.com>"); +MODULE_DESCRIPTION("iSeries Virtual ethernet driver"); +MODULE_LICENSE("GPL"); + +#define VETH_EVENT_CAP (0) +#define VETH_EVENT_FRAMES (1) +#define VETH_EVENT_MONITOR (2) +#define VETH_EVENT_FRAMES_ACK (3) + +#define VETH_MAX_ACKS_PER_MSG (20) +#define VETH_MAX_FRAMES_PER_MSG (6) + +struct veth_frames_data { + u32 addr[VETH_MAX_FRAMES_PER_MSG]; + u16 len[VETH_MAX_FRAMES_PER_MSG]; + u32 eofmask; +}; +#define VETH_EOF_SHIFT (32-VETH_MAX_FRAMES_PER_MSG) + +struct veth_frames_ack_data { + u16 token[VETH_MAX_ACKS_PER_MSG]; +}; + +struct veth_cap_data { + u8 caps_version; + u8 rsvd1; + u16 num_buffers; + u16 ack_threshold; + u16 rsvd2; + u32 ack_timeout; + u32 rsvd3; + u64 rsvd4[3]; +}; + +struct veth_lpevent { + struct HvLpEvent base_event; + union { + struct veth_cap_data caps_data; + struct veth_frames_data frames_data; + struct veth_frames_ack_data frames_ack_data; + } u; + +}; + +#define DRV_NAME "iseries_veth" +#define DRV_VERSION "2.0" + +#define VETH_NUMBUFFERS (120) +#define VETH_ACKTIMEOUT (1000000) /* microseconds */ +#define VETH_MAX_MCAST (12) + +#define VETH_MAX_MTU (9000) + +#if VETH_NUMBUFFERS < 10 +#define ACK_THRESHOLD (1) +#elif VETH_NUMBUFFERS < 20 +#define ACK_THRESHOLD (4) +#elif VETH_NUMBUFFERS < 40 +#define ACK_THRESHOLD (10) +#else +#define ACK_THRESHOLD (20) +#endif + +#define VETH_STATE_SHUTDOWN (0x0001) +#define VETH_STATE_OPEN (0x0002) +#define VETH_STATE_RESET (0x0004) +#define VETH_STATE_SENTMON (0x0008) +#define VETH_STATE_SENTCAPS (0x0010) +#define VETH_STATE_GOTCAPACK (0x0020) +#define VETH_STATE_GOTCAPS (0x0040) +#define VETH_STATE_SENTCAPACK (0x0080) +#define VETH_STATE_READY (0x0100) + +struct veth_msg { + struct veth_msg *next; + struct veth_frames_data data; + int token; + int in_use; + struct sk_buff *skb; + struct device *dev; +}; + +struct veth_lpar_connection { + HvLpIndex remote_lp; + struct delayed_work statemachine_wq; + struct veth_msg *msgs; + int num_events; + struct veth_cap_data local_caps; + + struct kobject kobject; + struct timer_list ack_timer; + + struct timer_list reset_timer; + unsigned int reset_timeout; + unsigned long last_contact; + int outstanding_tx; + + spinlock_t lock; + unsigned long state; + HvLpInstanceId src_inst; + HvLpInstanceId dst_inst; + struct veth_lpevent cap_event, cap_ack_event; + u16 pending_acks[VETH_MAX_ACKS_PER_MSG]; + u32 num_pending_acks; + + int num_ack_events; + struct veth_cap_data remote_caps; + u32 ack_timeout; + + struct veth_msg *msg_stack_head; +}; + +struct veth_port { + struct device *dev; + u64 mac_addr; + HvLpIndexMap lpar_map; + + /* queue_lock protects the stopped_map and dev's queue. */ + spinlock_t queue_lock; + HvLpIndexMap stopped_map; + + /* mcast_gate protects promiscuous, num_mcast & mcast_addr. */ + rwlock_t mcast_gate; + int promiscuous; + int num_mcast; + u64 mcast_addr[VETH_MAX_MCAST]; + + struct kobject kobject; +}; + +static HvLpIndex this_lp; +static struct veth_lpar_connection *veth_cnx[HVMAXARCHITECTEDLPS]; /* = 0 */ +static struct net_device *veth_dev[HVMAXARCHITECTEDVIRTUALLANS]; /* = 0 */ + +static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev); +static void veth_recycle_msg(struct veth_lpar_connection *, struct veth_msg *); +static void veth_wake_queues(struct veth_lpar_connection *cnx); +static void veth_stop_queues(struct veth_lpar_connection *cnx); +static void veth_receive(struct veth_lpar_connection *, struct veth_lpevent *); +static void veth_release_connection(struct kobject *kobject); +static void veth_timed_ack(unsigned long ptr); +static void veth_timed_reset(unsigned long ptr); + +/* + * Utility functions + */ + +#define veth_info(fmt, args...) \ + printk(KERN_INFO DRV_NAME ": " fmt, ## args) + +#define veth_error(fmt, args...) \ + printk(KERN_ERR DRV_NAME ": Error: " fmt, ## args) + +#ifdef DEBUG +#define veth_debug(fmt, args...) \ + printk(KERN_DEBUG DRV_NAME ": " fmt, ## args) +#else +#define veth_debug(fmt, args...) do {} while (0) +#endif + +/* You must hold the connection's lock when you call this function. */ +static inline void veth_stack_push(struct veth_lpar_connection *cnx, + struct veth_msg *msg) +{ + msg->next = cnx->msg_stack_head; + cnx->msg_stack_head = msg; +} + +/* You must hold the connection's lock when you call this function. */ +static inline struct veth_msg *veth_stack_pop(struct veth_lpar_connection *cnx) +{ + struct veth_msg *msg; + + msg = cnx->msg_stack_head; + if (msg) + cnx->msg_stack_head = cnx->msg_stack_head->next; + + return msg; +} + +/* You must hold the connection's lock when you call this function. */ +static inline int veth_stack_is_empty(struct veth_lpar_connection *cnx) +{ + return cnx->msg_stack_head == NULL; +} + +static inline HvLpEvent_Rc +veth_signalevent(struct veth_lpar_connection *cnx, u16 subtype, + HvLpEvent_AckInd ackind, HvLpEvent_AckType acktype, + u64 token, + u64 data1, u64 data2, u64 data3, u64 data4, u64 data5) +{ + return HvCallEvent_signalLpEventFast(cnx->remote_lp, + HvLpEvent_Type_VirtualLan, + subtype, ackind, acktype, + cnx->src_inst, + cnx->dst_inst, + token, data1, data2, data3, + data4, data5); +} + +static inline HvLpEvent_Rc veth_signaldata(struct veth_lpar_connection *cnx, + u16 subtype, u64 token, void *data) +{ + u64 *p = (u64 *) data; + + return veth_signalevent(cnx, subtype, HvLpEvent_AckInd_NoAck, + HvLpEvent_AckType_ImmediateAck, + token, p[0], p[1], p[2], p[3], p[4]); +} + +struct veth_allocation { + struct completion c; + int num; +}; + +static void veth_complete_allocation(void *parm, int number) +{ + struct veth_allocation *vc = (struct veth_allocation *)parm; + + vc->num = number; + complete(&vc->c); +} + +static int veth_allocate_events(HvLpIndex rlp, int number) +{ + struct veth_allocation vc = + { COMPLETION_INITIALIZER_ONSTACK(vc.c), 0 }; + + mf_allocate_lp_events(rlp, HvLpEvent_Type_VirtualLan, + sizeof(struct veth_lpevent), number, + &veth_complete_allocation, &vc); + wait_for_completion(&vc.c); + + return vc.num; +} + +/* + * sysfs support + */ + +struct veth_cnx_attribute { + struct attribute attr; + ssize_t (*show)(struct veth_lpar_connection *, char *buf); + ssize_t (*store)(struct veth_lpar_connection *, const char *buf); +}; + +static ssize_t veth_cnx_attribute_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct veth_cnx_attribute *cnx_attr; + struct veth_lpar_connection *cnx; + + cnx_attr = container_of(attr, struct veth_cnx_attribute, attr); + cnx = container_of(kobj, struct veth_lpar_connection, kobject); + + if (!cnx_attr->show) + return -EIO; + + return cnx_attr->show(cnx, buf); +} + +#define CUSTOM_CNX_ATTR(_name, _format, _expression) \ +static ssize_t _name##_show(struct veth_lpar_connection *cnx, char *buf)\ +{ \ + return sprintf(buf, _format, _expression); \ +} \ +struct veth_cnx_attribute veth_cnx_attr_##_name = __ATTR_RO(_name) + +#define SIMPLE_CNX_ATTR(_name) \ + CUSTOM_CNX_ATTR(_name, "%lu\n", (unsigned long)cnx->_name) + +SIMPLE_CNX_ATTR(outstanding_tx); +SIMPLE_CNX_ATTR(remote_lp); +SIMPLE_CNX_ATTR(num_events); +SIMPLE_CNX_ATTR(src_inst); +SIMPLE_CNX_ATTR(dst_inst); +SIMPLE_CNX_ATTR(num_pending_acks); +SIMPLE_CNX_ATTR(num_ack_events); +CUSTOM_CNX_ATTR(ack_timeout, "%d\n", jiffies_to_msecs(cnx->ack_timeout)); +CUSTOM_CNX_ATTR(reset_timeout, "%d\n", jiffies_to_msecs(cnx->reset_timeout)); +CUSTOM_CNX_ATTR(state, "0x%.4lX\n", cnx->state); +CUSTOM_CNX_ATTR(last_contact, "%d\n", cnx->last_contact ? + jiffies_to_msecs(jiffies - cnx->last_contact) : 0); + +#define GET_CNX_ATTR(_name) (&veth_cnx_attr_##_name.attr) + +static struct attribute *veth_cnx_default_attrs[] = { + GET_CNX_ATTR(outstanding_tx), + GET_CNX_ATTR(remote_lp), + GET_CNX_ATTR(num_events), + GET_CNX_ATTR(reset_timeout), + GET_CNX_ATTR(last_contact), + GET_CNX_ATTR(state), + GET_CNX_ATTR(src_inst), + GET_CNX_ATTR(dst_inst), + GET_CNX_ATTR(num_pending_acks), + GET_CNX_ATTR(num_ack_events), + GET_CNX_ATTR(ack_timeout), + NULL +}; + +static const struct sysfs_ops veth_cnx_sysfs_ops = { + .show = veth_cnx_attribute_show +}; + +static struct kobj_type veth_lpar_connection_ktype = { + .release = veth_release_connection, + .sysfs_ops = &veth_cnx_sysfs_ops, + .default_attrs = veth_cnx_default_attrs +}; + +struct veth_port_attribute { + struct attribute attr; + ssize_t (*show)(struct veth_port *, char *buf); + ssize_t (*store)(struct veth_port *, const char *buf); +}; + +static ssize_t veth_port_attribute_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct veth_port_attribute *port_attr; + struct veth_port *port; + + port_attr = container_of(attr, struct veth_port_attribute, attr); + port = container_of(kobj, struct veth_port, kobject); + + if (!port_attr->show) + return -EIO; + + return port_attr->show(port, buf); +} + +#define CUSTOM_PORT_ATTR(_name, _format, _expression) \ +static ssize_t _name##_show(struct veth_port *port, char *buf) \ +{ \ + return sprintf(buf, _format, _expression); \ +} \ +struct veth_port_attribute veth_port_attr_##_name = __ATTR_RO(_name) + +#define SIMPLE_PORT_ATTR(_name) \ + CUSTOM_PORT_ATTR(_name, "%lu\n", (unsigned long)port->_name) + +SIMPLE_PORT_ATTR(promiscuous); +SIMPLE_PORT_ATTR(num_mcast); +CUSTOM_PORT_ATTR(lpar_map, "0x%X\n", port->lpar_map); +CUSTOM_PORT_ATTR(stopped_map, "0x%X\n", port->stopped_map); +CUSTOM_PORT_ATTR(mac_addr, "0x%llX\n", port->mac_addr); + +#define GET_PORT_ATTR(_name) (&veth_port_attr_##_name.attr) +static struct attribute *veth_port_default_attrs[] = { + GET_PORT_ATTR(mac_addr), + GET_PORT_ATTR(lpar_map), + GET_PORT_ATTR(stopped_map), + GET_PORT_ATTR(promiscuous), + GET_PORT_ATTR(num_mcast), + NULL +}; + +static const struct sysfs_ops veth_port_sysfs_ops = { + .show = veth_port_attribute_show +}; + +static struct kobj_type veth_port_ktype = { + .sysfs_ops = &veth_port_sysfs_ops, + .default_attrs = veth_port_default_attrs +}; + +/* + * LPAR connection code + */ + +static inline void veth_kick_statemachine(struct veth_lpar_connection *cnx) +{ + schedule_delayed_work(&cnx->statemachine_wq, 0); +} + +static void veth_take_cap(struct veth_lpar_connection *cnx, + struct veth_lpevent *event) +{ + unsigned long flags; + + spin_lock_irqsave(&cnx->lock, flags); + /* Receiving caps may mean the other end has just come up, so + * we need to reload the instance ID of the far end */ + cnx->dst_inst = + HvCallEvent_getTargetLpInstanceId(cnx->remote_lp, + HvLpEvent_Type_VirtualLan); + + if (cnx->state & VETH_STATE_GOTCAPS) { + veth_error("Received a second capabilities from LPAR %d.\n", + cnx->remote_lp); + event->base_event.xRc = HvLpEvent_Rc_BufferNotAvailable; + HvCallEvent_ackLpEvent((struct HvLpEvent *) event); + } else { + memcpy(&cnx->cap_event, event, sizeof(cnx->cap_event)); + cnx->state |= VETH_STATE_GOTCAPS; + veth_kick_statemachine(cnx); + } + spin_unlock_irqrestore(&cnx->lock, flags); +} + +static void veth_take_cap_ack(struct veth_lpar_connection *cnx, + struct veth_lpevent *event) +{ + unsigned long flags; + + spin_lock_irqsave(&cnx->lock, flags); + if (cnx->state & VETH_STATE_GOTCAPACK) { + veth_error("Received a second capabilities ack from LPAR %d.\n", + cnx->remote_lp); + } else { + memcpy(&cnx->cap_ack_event, event, + sizeof(cnx->cap_ack_event)); + cnx->state |= VETH_STATE_GOTCAPACK; + veth_kick_statemachine(cnx); + } + spin_unlock_irqrestore(&cnx->lock, flags); +} + +static void veth_take_monitor_ack(struct veth_lpar_connection *cnx, + struct veth_lpevent *event) +{ + unsigned long flags; + + spin_lock_irqsave(&cnx->lock, flags); + veth_debug("cnx %d: lost connection.\n", cnx->remote_lp); + + /* Avoid kicking the statemachine once we're shutdown. + * It's unnecessary and it could break veth_stop_connection(). */ + + if (! (cnx->state & VETH_STATE_SHUTDOWN)) { + cnx->state |= VETH_STATE_RESET; + veth_kick_statemachine(cnx); + } + spin_unlock_irqrestore(&cnx->lock, flags); +} + +static void veth_handle_ack(struct veth_lpevent *event) +{ + HvLpIndex rlp = event->base_event.xTargetLp; + struct veth_lpar_connection *cnx = veth_cnx[rlp]; + + BUG_ON(! cnx); + + switch (event->base_event.xSubtype) { + case VETH_EVENT_CAP: + veth_take_cap_ack(cnx, event); + break; + case VETH_EVENT_MONITOR: + veth_take_monitor_ack(cnx, event); + break; + default: + veth_error("Unknown ack type %d from LPAR %d.\n", + event->base_event.xSubtype, rlp); + } +} + +static void veth_handle_int(struct veth_lpevent *event) +{ + HvLpIndex rlp = event->base_event.xSourceLp; + struct veth_lpar_connection *cnx = veth_cnx[rlp]; + unsigned long flags; + int i, acked = 0; + + BUG_ON(! cnx); + + switch (event->base_event.xSubtype) { + case VETH_EVENT_CAP: + veth_take_cap(cnx, event); + break; + case VETH_EVENT_MONITOR: + /* do nothing... this'll hang out here til we're dead, + * and the hypervisor will return it for us. */ + break; + case VETH_EVENT_FRAMES_ACK: + spin_lock_irqsave(&cnx->lock, flags); + + for (i = 0; i < VETH_MAX_ACKS_PER_MSG; ++i) { + u16 msgnum = event->u.frames_ack_data.token[i]; + + if (msgnum < VETH_NUMBUFFERS) { + veth_recycle_msg(cnx, cnx->msgs + msgnum); + cnx->outstanding_tx--; + acked++; + } + } + + if (acked > 0) { + cnx->last_contact = jiffies; + veth_wake_queues(cnx); + } + + spin_unlock_irqrestore(&cnx->lock, flags); + break; + case VETH_EVENT_FRAMES: + veth_receive(cnx, event); + break; + default: + veth_error("Unknown interrupt type %d from LPAR %d.\n", + event->base_event.xSubtype, rlp); + } +} + +static void veth_handle_event(struct HvLpEvent *event) +{ + struct veth_lpevent *veth_event = (struct veth_lpevent *)event; + + if (hvlpevent_is_ack(event)) + veth_handle_ack(veth_event); + else + veth_handle_int(veth_event); +} + +static int veth_process_caps(struct veth_lpar_connection *cnx) +{ + struct veth_cap_data *remote_caps = &cnx->remote_caps; + int num_acks_needed; + + /* Convert timer to jiffies */ + cnx->ack_timeout = remote_caps->ack_timeout * HZ / 1000000; + + if ( (remote_caps->num_buffers == 0) || + (remote_caps->ack_threshold > VETH_MAX_ACKS_PER_MSG) || + (remote_caps->ack_threshold == 0) || + (cnx->ack_timeout == 0) ) { + veth_error("Received incompatible capabilities from LPAR %d.\n", + cnx->remote_lp); + return HvLpEvent_Rc_InvalidSubtypeData; + } + + num_acks_needed = (remote_caps->num_buffers + / remote_caps->ack_threshold) + 1; + + /* FIXME: locking on num_ack_events? */ + if (cnx->num_ack_events < num_acks_needed) { + int num; + + num = veth_allocate_events(cnx->remote_lp, + num_acks_needed-cnx->num_ack_events); + if (num > 0) + cnx->num_ack_events += num; + + if (cnx->num_ack_events < num_acks_needed) { + veth_error("Couldn't allocate enough ack events " + "for LPAR %d.\n", cnx->remote_lp); + + return HvLpEvent_Rc_BufferNotAvailable; + } + } + + + return HvLpEvent_Rc_Good; +} + +/* FIXME: The gotos here are a bit dubious */ +static void veth_statemachine(struct work_struct *work) +{ + struct veth_lpar_connection *cnx = + container_of(work, struct veth_lpar_connection, + statemachine_wq.work); + int rlp = cnx->remote_lp; + int rc; + + spin_lock_irq(&cnx->lock); + + restart: + if (cnx->state & VETH_STATE_RESET) { + if (cnx->state & VETH_STATE_OPEN) + HvCallEvent_closeLpEventPath(cnx->remote_lp, + HvLpEvent_Type_VirtualLan); + + /* + * Reset ack data. This prevents the ack_timer actually + * doing anything, even if it runs one more time when + * we drop the lock below. + */ + memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks)); + cnx->num_pending_acks = 0; + + cnx->state &= ~(VETH_STATE_RESET | VETH_STATE_SENTMON + | VETH_STATE_OPEN | VETH_STATE_SENTCAPS + | VETH_STATE_GOTCAPACK | VETH_STATE_GOTCAPS + | VETH_STATE_SENTCAPACK | VETH_STATE_READY); + + /* Clean up any leftover messages */ + if (cnx->msgs) { + int i; + for (i = 0; i < VETH_NUMBUFFERS; ++i) + veth_recycle_msg(cnx, cnx->msgs + i); + } + + cnx->outstanding_tx = 0; + veth_wake_queues(cnx); + + /* Drop the lock so we can do stuff that might sleep or + * take other locks. */ + spin_unlock_irq(&cnx->lock); + + del_timer_sync(&cnx->ack_timer); + del_timer_sync(&cnx->reset_timer); + + spin_lock_irq(&cnx->lock); + + if (cnx->state & VETH_STATE_RESET) + goto restart; + + /* Hack, wait for the other end to reset itself. */ + if (! (cnx->state & VETH_STATE_SHUTDOWN)) { + schedule_delayed_work(&cnx->statemachine_wq, 5 * HZ); + goto out; + } + } + + if (cnx->state & VETH_STATE_SHUTDOWN) + /* It's all over, do nothing */ + goto out; + + if ( !(cnx->state & VETH_STATE_OPEN) ) { + if (! cnx->msgs || (cnx->num_events < (2 + VETH_NUMBUFFERS)) ) + goto cant_cope; + + HvCallEvent_openLpEventPath(rlp, HvLpEvent_Type_VirtualLan); + cnx->src_inst = + HvCallEvent_getSourceLpInstanceId(rlp, + HvLpEvent_Type_VirtualLan); + cnx->dst_inst = + HvCallEvent_getTargetLpInstanceId(rlp, + HvLpEvent_Type_VirtualLan); + cnx->state |= VETH_STATE_OPEN; + } + + if ( (cnx->state & VETH_STATE_OPEN) && + !(cnx->state & VETH_STATE_SENTMON) ) { + rc = veth_signalevent(cnx, VETH_EVENT_MONITOR, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_DeferredAck, + 0, 0, 0, 0, 0, 0); + + if (rc == HvLpEvent_Rc_Good) { + cnx->state |= VETH_STATE_SENTMON; + } else { + if ( (rc != HvLpEvent_Rc_PartitionDead) && + (rc != HvLpEvent_Rc_PathClosed) ) + veth_error("Error sending monitor to LPAR %d, " + "rc = %d\n", rlp, rc); + + /* Oh well, hope we get a cap from the other + * end and do better when that kicks us */ + goto out; + } + } + + if ( (cnx->state & VETH_STATE_OPEN) && + !(cnx->state & VETH_STATE_SENTCAPS)) { + u64 *rawcap = (u64 *)&cnx->local_caps; + + rc = veth_signalevent(cnx, VETH_EVENT_CAP, + HvLpEvent_AckInd_DoAck, + HvLpEvent_AckType_ImmediateAck, + 0, rawcap[0], rawcap[1], rawcap[2], + rawcap[3], rawcap[4]); + + if (rc == HvLpEvent_Rc_Good) { + cnx->state |= VETH_STATE_SENTCAPS; + } else { + if ( (rc != HvLpEvent_Rc_PartitionDead) && + (rc != HvLpEvent_Rc_PathClosed) ) + veth_error("Error sending caps to LPAR %d, " + "rc = %d\n", rlp, rc); + + /* Oh well, hope we get a cap from the other + * end and do better when that kicks us */ + goto out; + } + } + + if ((cnx->state & VETH_STATE_GOTCAPS) && + !(cnx->state & VETH_STATE_SENTCAPACK)) { + struct veth_cap_data *remote_caps = &cnx->remote_caps; + + memcpy(remote_caps, &cnx->cap_event.u.caps_data, + sizeof(*remote_caps)); + + spin_unlock_irq(&cnx->lock); + rc = veth_process_caps(cnx); + spin_lock_irq(&cnx->lock); + + /* We dropped the lock, so recheck for anything which + * might mess us up */ + if (cnx->state & (VETH_STATE_RESET|VETH_STATE_SHUTDOWN)) + goto restart; + + cnx->cap_event.base_event.xRc = rc; + HvCallEvent_ackLpEvent((struct HvLpEvent *)&cnx->cap_event); + if (rc == HvLpEvent_Rc_Good) + cnx->state |= VETH_STATE_SENTCAPACK; + else + goto cant_cope; + } + + if ((cnx->state & VETH_STATE_GOTCAPACK) && + (cnx->state & VETH_STATE_GOTCAPS) && + !(cnx->state & VETH_STATE_READY)) { + if (cnx->cap_ack_event.base_event.xRc == HvLpEvent_Rc_Good) { + /* Start the ACK timer */ + cnx->ack_timer.expires = jiffies + cnx->ack_timeout; + add_timer(&cnx->ack_timer); + cnx->state |= VETH_STATE_READY; + } else { + veth_error("Caps rejected by LPAR %d, rc = %d\n", + rlp, cnx->cap_ack_event.base_event.xRc); + goto cant_cope; + } + } + + out: + spin_unlock_irq(&cnx->lock); + return; + + cant_cope: + /* FIXME: we get here if something happens we really can't + * cope with. The link will never work once we get here, and + * all we can do is not lock the rest of the system up */ + veth_error("Unrecoverable error on connection to LPAR %d, shutting down" + " (state = 0x%04lx)\n", rlp, cnx->state); + cnx->state |= VETH_STATE_SHUTDOWN; + spin_unlock_irq(&cnx->lock); +} + +static int veth_init_connection(u8 rlp) +{ + struct veth_lpar_connection *cnx; + struct veth_msg *msgs; + int i; + + if ( (rlp == this_lp) || + ! HvLpConfig_doLpsCommunicateOnVirtualLan(this_lp, rlp) ) + return 0; + + cnx = kzalloc(sizeof(*cnx), GFP_KERNEL); + if (! cnx) + return -ENOMEM; + + cnx->remote_lp = rlp; + spin_lock_init(&cnx->lock); + INIT_DELAYED_WORK(&cnx->statemachine_wq, veth_statemachine); + + init_timer(&cnx->ack_timer); + cnx->ack_timer.function = veth_timed_ack; + cnx->ack_timer.data = (unsigned long) cnx; + + init_timer(&cnx->reset_timer); + cnx->reset_timer.function = veth_timed_reset; + cnx->reset_timer.data = (unsigned long) cnx; + cnx->reset_timeout = 5 * HZ * (VETH_ACKTIMEOUT / 1000000); + + memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks)); + + veth_cnx[rlp] = cnx; + + /* This gets us 1 reference, which is held on behalf of the driver + * infrastructure. It's released at module unload. */ + kobject_init(&cnx->kobject, &veth_lpar_connection_ktype); + + msgs = kcalloc(VETH_NUMBUFFERS, sizeof(struct veth_msg), GFP_KERNEL); + if (! msgs) { + veth_error("Can't allocate buffers for LPAR %d.\n", rlp); + return -ENOMEM; + } + + cnx->msgs = msgs; + + for (i = 0; i < VETH_NUMBUFFERS; i++) { + msgs[i].token = i; + veth_stack_push(cnx, msgs + i); + } + + cnx->num_events = veth_allocate_events(rlp, 2 + VETH_NUMBUFFERS); + + if (cnx->num_events < (2 + VETH_NUMBUFFERS)) { + veth_error("Can't allocate enough events for LPAR %d.\n", rlp); + return -ENOMEM; + } + + cnx->local_caps.num_buffers = VETH_NUMBUFFERS; + cnx->local_caps.ack_threshold = ACK_THRESHOLD; + cnx->local_caps.ack_timeout = VETH_ACKTIMEOUT; + + return 0; +} + +static void veth_stop_connection(struct veth_lpar_connection *cnx) +{ + if (!cnx) + return; + + spin_lock_irq(&cnx->lock); + cnx->state |= VETH_STATE_RESET | VETH_STATE_SHUTDOWN; + veth_kick_statemachine(cnx); + spin_unlock_irq(&cnx->lock); + + /* ensure the statemachine runs now and waits for its completion */ + flush_delayed_work_sync(&cnx->statemachine_wq); +} + +static void veth_destroy_connection(struct veth_lpar_connection *cnx) +{ + if (!cnx) + return; + + if (cnx->num_events > 0) + mf_deallocate_lp_events(cnx->remote_lp, + HvLpEvent_Type_VirtualLan, + cnx->num_events, + NULL, NULL); + if (cnx->num_ack_events > 0) + mf_deallocate_lp_events(cnx->remote_lp, + HvLpEvent_Type_VirtualLan, + cnx->num_ack_events, + NULL, NULL); + + kfree(cnx->msgs); + veth_cnx[cnx->remote_lp] = NULL; + kfree(cnx); +} + +static void veth_release_connection(struct kobject *kobj) +{ + struct veth_lpar_connection *cnx; + cnx = container_of(kobj, struct veth_lpar_connection, kobject); + veth_stop_connection(cnx); + veth_destroy_connection(cnx); +} + +/* + * net_device code + */ + +static int veth_open(struct net_device *dev) +{ + netif_start_queue(dev); + return 0; +} + +static int veth_close(struct net_device *dev) +{ + netif_stop_queue(dev); + return 0; +} + +static int veth_change_mtu(struct net_device *dev, int new_mtu) +{ + if ((new_mtu < 68) || (new_mtu > VETH_MAX_MTU)) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +static void veth_set_multicast_list(struct net_device *dev) +{ + struct veth_port *port = netdev_priv(dev); + unsigned long flags; + + write_lock_irqsave(&port->mcast_gate, flags); + + if ((dev->flags & IFF_PROMISC) || (dev->flags & IFF_ALLMULTI) || + (netdev_mc_count(dev) > VETH_MAX_MCAST)) { + port->promiscuous = 1; + } else { + struct netdev_hw_addr *ha; + + port->promiscuous = 0; + + /* Update table */ + port->num_mcast = 0; + + netdev_for_each_mc_addr(ha, dev) { + u8 *addr = ha->addr; + u64 xaddr = 0; + + memcpy(&xaddr, addr, ETH_ALEN); + port->mcast_addr[port->num_mcast] = xaddr; + port->num_mcast++; + } + } + + write_unlock_irqrestore(&port->mcast_gate, flags); +} + +static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + strncpy(info->driver, DRV_NAME, sizeof(info->driver) - 1); + info->driver[sizeof(info->driver) - 1] = '\0'; + strncpy(info->version, DRV_VERSION, sizeof(info->version) - 1); + info->version[sizeof(info->version) - 1] = '\0'; +} + +static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +{ + ecmd->supported = (SUPPORTED_1000baseT_Full + | SUPPORTED_Autoneg | SUPPORTED_FIBRE); + ecmd->advertising = (SUPPORTED_1000baseT_Full + | SUPPORTED_Autoneg | SUPPORTED_FIBRE); + ecmd->port = PORT_FIBRE; + ecmd->transceiver = XCVR_INTERNAL; + ecmd->phy_address = 0; + ecmd->speed = SPEED_1000; + ecmd->duplex = DUPLEX_FULL; + ecmd->autoneg = AUTONEG_ENABLE; + ecmd->maxtxpkt = 120; + ecmd->maxrxpkt = 120; + return 0; +} + +static const struct ethtool_ops ops = { + .get_drvinfo = veth_get_drvinfo, + .get_settings = veth_get_settings, + .get_link = ethtool_op_get_link, +}; + +static const struct net_device_ops veth_netdev_ops = { + .ndo_open = veth_open, + .ndo_stop = veth_close, + .ndo_start_xmit = veth_start_xmit, + .ndo_change_mtu = veth_change_mtu, + .ndo_set_rx_mode = veth_set_multicast_list, + .ndo_set_mac_address = NULL, + .ndo_validate_addr = eth_validate_addr, +}; + +static struct net_device *veth_probe_one(int vlan, + struct vio_dev *vio_dev) +{ + struct net_device *dev; + struct veth_port *port; + struct device *vdev = &vio_dev->dev; + int i, rc; + const unsigned char *mac_addr; + + mac_addr = vio_get_attribute(vio_dev, "local-mac-address", NULL); + if (mac_addr == NULL) + mac_addr = vio_get_attribute(vio_dev, "mac-address", NULL); + if (mac_addr == NULL) { + veth_error("Unable to fetch MAC address from device tree.\n"); + return NULL; + } + + dev = alloc_etherdev(sizeof (struct veth_port)); + if (! dev) { + veth_error("Unable to allocate net_device structure!\n"); + return NULL; + } + + port = netdev_priv(dev); + + spin_lock_init(&port->queue_lock); + rwlock_init(&port->mcast_gate); + port->stopped_map = 0; + + for (i = 0; i < HVMAXARCHITECTEDLPS; i++) { + HvLpVirtualLanIndexMap map; + + if (i == this_lp) + continue; + map = HvLpConfig_getVirtualLanIndexMapForLp(i); + if (map & (0x8000 >> vlan)) + port->lpar_map |= (1 << i); + } + port->dev = vdev; + + memcpy(dev->dev_addr, mac_addr, ETH_ALEN); + + dev->mtu = VETH_MAX_MTU; + + memcpy(&port->mac_addr, mac_addr, ETH_ALEN); + + dev->netdev_ops = &veth_netdev_ops; + SET_ETHTOOL_OPS(dev, &ops); + + SET_NETDEV_DEV(dev, vdev); + + rc = register_netdev(dev); + if (rc != 0) { + veth_error("Failed registering net device for vlan%d.\n", vlan); + free_netdev(dev); + return NULL; + } + + kobject_init(&port->kobject, &veth_port_ktype); + if (0 != kobject_add(&port->kobject, &dev->dev.kobj, "veth_port")) + veth_error("Failed adding port for %s to sysfs.\n", dev->name); + + veth_info("%s attached to iSeries vlan %d (LPAR map = 0x%.4X)\n", + dev->name, vlan, port->lpar_map); + + return dev; +} + +/* + * Tx path + */ + +static int veth_transmit_to_one(struct sk_buff *skb, HvLpIndex rlp, + struct net_device *dev) +{ + struct veth_lpar_connection *cnx = veth_cnx[rlp]; + struct veth_port *port = netdev_priv(dev); + HvLpEvent_Rc rc; + struct veth_msg *msg = NULL; + unsigned long flags; + + if (! cnx) + return 0; + + spin_lock_irqsave(&cnx->lock, flags); + + if (! (cnx->state & VETH_STATE_READY)) + goto no_error; + + if ((skb->len - ETH_HLEN) > VETH_MAX_MTU) + goto drop; + + msg = veth_stack_pop(cnx); + if (! msg) + goto drop; + + msg->in_use = 1; + msg->skb = skb_get(skb); + + msg->data.addr[0] = dma_map_single(port->dev, skb->data, + skb->len, DMA_TO_DEVICE); + + if (dma_mapping_error(port->dev, msg->data.addr[0])) + goto recycle_and_drop; + + msg->dev = port->dev; + msg->data.len[0] = skb->len; + msg->data.eofmask = 1 << VETH_EOF_SHIFT; + + rc = veth_signaldata(cnx, VETH_EVENT_FRAMES, msg->token, &msg->data); + + if (rc != HvLpEvent_Rc_Good) + goto recycle_and_drop; + + /* If the timer's not already running, start it now. */ + if (0 == cnx->outstanding_tx) + mod_timer(&cnx->reset_timer, jiffies + cnx->reset_timeout); + + cnx->last_contact = jiffies; + cnx->outstanding_tx++; + + if (veth_stack_is_empty(cnx)) + veth_stop_queues(cnx); + + no_error: + spin_unlock_irqrestore(&cnx->lock, flags); + return 0; + + recycle_and_drop: + veth_recycle_msg(cnx, msg); + drop: + spin_unlock_irqrestore(&cnx->lock, flags); + return 1; +} + +static void veth_transmit_to_many(struct sk_buff *skb, + HvLpIndexMap lpmask, + struct net_device *dev) +{ + int i, success, error; + + success = error = 0; + + for (i = 0; i < HVMAXARCHITECTEDLPS; i++) { + if ((lpmask & (1 << i)) == 0) + continue; + + if (veth_transmit_to_one(skb, i, dev)) + error = 1; + else + success = 1; + } + + if (error) + dev->stats.tx_errors++; + + if (success) { + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + } +} + +static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned char *frame = skb->data; + struct veth_port *port = netdev_priv(dev); + HvLpIndexMap lpmask; + + if (is_unicast_ether_addr(frame)) { + /* unicast packet */ + HvLpIndex rlp = frame[5]; + + if ( ! ((1 << rlp) & port->lpar_map) ) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + lpmask = 1 << rlp; + } else { + lpmask = port->lpar_map; + } + + veth_transmit_to_many(skb, lpmask, dev); + + dev_kfree_skb(skb); + + return NETDEV_TX_OK; +} + +/* You must hold the connection's lock when you call this function. */ +static void veth_recycle_msg(struct veth_lpar_connection *cnx, + struct veth_msg *msg) +{ + u32 dma_address, dma_length; + + if (msg->in_use) { + msg->in_use = 0; + dma_address = msg->data.addr[0]; + dma_length = msg->data.len[0]; + + if (!dma_mapping_error(msg->dev, dma_address)) + dma_unmap_single(msg->dev, dma_address, dma_length, + DMA_TO_DEVICE); + + if (msg->skb) { + dev_kfree_skb_any(msg->skb); + msg->skb = NULL; + } + + memset(&msg->data, 0, sizeof(msg->data)); + veth_stack_push(cnx, msg); + } else if (cnx->state & VETH_STATE_OPEN) { + veth_error("Non-pending frame (# %d) acked by LPAR %d.\n", + cnx->remote_lp, msg->token); + } +} + +static void veth_wake_queues(struct veth_lpar_connection *cnx) +{ + int i; + + for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { + struct net_device *dev = veth_dev[i]; + struct veth_port *port; + unsigned long flags; + + if (! dev) + continue; + + port = netdev_priv(dev); + + if (! (port->lpar_map & (1<<cnx->remote_lp))) + continue; + + spin_lock_irqsave(&port->queue_lock, flags); + + port->stopped_map &= ~(1 << cnx->remote_lp); + + if (0 == port->stopped_map && netif_queue_stopped(dev)) { + veth_debug("cnx %d: woke queue for %s.\n", + cnx->remote_lp, dev->name); + netif_wake_queue(dev); + } + spin_unlock_irqrestore(&port->queue_lock, flags); + } +} + +static void veth_stop_queues(struct veth_lpar_connection *cnx) +{ + int i; + + for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { + struct net_device *dev = veth_dev[i]; + struct veth_port *port; + + if (! dev) + continue; + + port = netdev_priv(dev); + + /* If this cnx is not on the vlan for this port, continue */ + if (! (port->lpar_map & (1 << cnx->remote_lp))) + continue; + + spin_lock(&port->queue_lock); + + netif_stop_queue(dev); + port->stopped_map |= (1 << cnx->remote_lp); + + veth_debug("cnx %d: stopped queue for %s, map = 0x%x.\n", + cnx->remote_lp, dev->name, port->stopped_map); + + spin_unlock(&port->queue_lock); + } +} + +static void veth_timed_reset(unsigned long ptr) +{ + struct veth_lpar_connection *cnx = (struct veth_lpar_connection *)ptr; + unsigned long trigger_time, flags; + + /* FIXME is it possible this fires after veth_stop_connection()? + * That would reschedule the statemachine for 5 seconds and probably + * execute it after the module's been unloaded. Hmm. */ + + spin_lock_irqsave(&cnx->lock, flags); + + if (cnx->outstanding_tx > 0) { + trigger_time = cnx->last_contact + cnx->reset_timeout; + + if (trigger_time < jiffies) { + cnx->state |= VETH_STATE_RESET; + veth_kick_statemachine(cnx); + veth_error("%d packets not acked by LPAR %d within %d " + "seconds, resetting.\n", + cnx->outstanding_tx, cnx->remote_lp, + cnx->reset_timeout / HZ); + } else { + /* Reschedule the timer */ + trigger_time = jiffies + cnx->reset_timeout; + mod_timer(&cnx->reset_timer, trigger_time); + } + } + + spin_unlock_irqrestore(&cnx->lock, flags); +} + +/* + * Rx path + */ + +static inline int veth_frame_wanted(struct veth_port *port, u64 mac_addr) +{ + int wanted = 0; + int i; + unsigned long flags; + + if ( (mac_addr == port->mac_addr) || (mac_addr == 0xffffffffffff0000) ) + return 1; + + read_lock_irqsave(&port->mcast_gate, flags); + + if (port->promiscuous) { + wanted = 1; + goto out; + } + + for (i = 0; i < port->num_mcast; ++i) { + if (port->mcast_addr[i] == mac_addr) { + wanted = 1; + break; + } + } + + out: + read_unlock_irqrestore(&port->mcast_gate, flags); + + return wanted; +} + +struct dma_chunk { + u64 addr; + u64 size; +}; + +#define VETH_MAX_PAGES_PER_FRAME ( (VETH_MAX_MTU+PAGE_SIZE-2)/PAGE_SIZE + 1 ) + +static inline void veth_build_dma_list(struct dma_chunk *list, + unsigned char *p, unsigned long length) +{ + unsigned long done; + int i = 1; + + /* FIXME: skbs are contiguous in real addresses. Do we + * really need to break it into PAGE_SIZE chunks, or can we do + * it just at the granularity of iSeries real->absolute + * mapping? Indeed, given the way the allocator works, can we + * count on them being absolutely contiguous? */ + list[0].addr = iseries_hv_addr(p); + list[0].size = min(length, + PAGE_SIZE - ((unsigned long)p & ~PAGE_MASK)); + + done = list[0].size; + while (done < length) { + list[i].addr = iseries_hv_addr(p + done); + list[i].size = min(length-done, PAGE_SIZE); + done += list[i].size; + i++; + } +} + +static void veth_flush_acks(struct veth_lpar_connection *cnx) +{ + HvLpEvent_Rc rc; + + rc = veth_signaldata(cnx, VETH_EVENT_FRAMES_ACK, + 0, &cnx->pending_acks); + + if (rc != HvLpEvent_Rc_Good) + veth_error("Failed acking frames from LPAR %d, rc = %d\n", + cnx->remote_lp, (int)rc); + + cnx->num_pending_acks = 0; + memset(&cnx->pending_acks, 0xff, sizeof(cnx->pending_acks)); +} + +static void veth_receive(struct veth_lpar_connection *cnx, + struct veth_lpevent *event) +{ + struct veth_frames_data *senddata = &event->u.frames_data; + int startchunk = 0; + int nchunks; + unsigned long flags; + HvLpDma_Rc rc; + + do { + u16 length = 0; + struct sk_buff *skb; + struct dma_chunk local_list[VETH_MAX_PAGES_PER_FRAME]; + struct dma_chunk remote_list[VETH_MAX_FRAMES_PER_MSG]; + u64 dest; + HvLpVirtualLanIndex vlan; + struct net_device *dev; + struct veth_port *port; + + /* FIXME: do we need this? */ + memset(local_list, 0, sizeof(local_list)); + memset(remote_list, 0, sizeof(VETH_MAX_FRAMES_PER_MSG)); + + /* a 0 address marks the end of the valid entries */ + if (senddata->addr[startchunk] == 0) + break; + + /* make sure that we have at least 1 EOF entry in the + * remaining entries */ + if (! (senddata->eofmask >> (startchunk + VETH_EOF_SHIFT))) { + veth_error("Missing EOF fragment in event " + "eofmask = 0x%x startchunk = %d\n", + (unsigned)senddata->eofmask, + startchunk); + break; + } + + /* build list of chunks in this frame */ + nchunks = 0; + do { + remote_list[nchunks].addr = + (u64) senddata->addr[startchunk+nchunks] << 32; + remote_list[nchunks].size = + senddata->len[startchunk+nchunks]; + length += remote_list[nchunks].size; + } while (! (senddata->eofmask & + (1 << (VETH_EOF_SHIFT + startchunk + nchunks++)))); + + /* length == total length of all chunks */ + /* nchunks == # of chunks in this frame */ + + if ((length - ETH_HLEN) > VETH_MAX_MTU) { + veth_error("Received oversize frame from LPAR %d " + "(length = %d)\n", + cnx->remote_lp, length); + continue; + } + + skb = alloc_skb(length, GFP_ATOMIC); + if (!skb) + continue; + + veth_build_dma_list(local_list, skb->data, length); + + rc = HvCallEvent_dmaBufList(HvLpEvent_Type_VirtualLan, + event->base_event.xSourceLp, + HvLpDma_Direction_RemoteToLocal, + cnx->src_inst, + cnx->dst_inst, + HvLpDma_AddressType_RealAddress, + HvLpDma_AddressType_TceIndex, + iseries_hv_addr(&local_list), + iseries_hv_addr(&remote_list), + length); + if (rc != HvLpDma_Rc_Good) { + dev_kfree_skb_irq(skb); + continue; + } + + vlan = skb->data[9]; + dev = veth_dev[vlan]; + if (! dev) { + /* + * Some earlier versions of the driver sent + * broadcasts down all connections, even to lpars + * that weren't on the relevant vlan. So ignore + * packets belonging to a vlan we're not on. + * We can also be here if we receive packets while + * the driver is going down, because then dev is NULL. + */ + dev_kfree_skb_irq(skb); + continue; + } + + port = netdev_priv(dev); + dest = *((u64 *) skb->data) & 0xFFFFFFFFFFFF0000; + + if ((vlan > HVMAXARCHITECTEDVIRTUALLANS) || !port) { + dev_kfree_skb_irq(skb); + continue; + } + if (! veth_frame_wanted(port, dest)) { + dev_kfree_skb_irq(skb); + continue; + } + + skb_put(skb, length); + skb->protocol = eth_type_trans(skb, dev); + skb_checksum_none_assert(skb); + netif_rx(skb); /* send it up */ + dev->stats.rx_packets++; + dev->stats.rx_bytes += length; + } while (startchunk += nchunks, startchunk < VETH_MAX_FRAMES_PER_MSG); + + /* Ack it */ + spin_lock_irqsave(&cnx->lock, flags); + BUG_ON(cnx->num_pending_acks > VETH_MAX_ACKS_PER_MSG); + + cnx->pending_acks[cnx->num_pending_acks++] = + event->base_event.xCorrelationToken; + + if ( (cnx->num_pending_acks >= cnx->remote_caps.ack_threshold) || + (cnx->num_pending_acks >= VETH_MAX_ACKS_PER_MSG) ) + veth_flush_acks(cnx); + + spin_unlock_irqrestore(&cnx->lock, flags); +} + +static void veth_timed_ack(unsigned long ptr) +{ + struct veth_lpar_connection *cnx = (struct veth_lpar_connection *) ptr; + unsigned long flags; + + /* Ack all the events */ + spin_lock_irqsave(&cnx->lock, flags); + if (cnx->num_pending_acks > 0) + veth_flush_acks(cnx); + + /* Reschedule the timer */ + cnx->ack_timer.expires = jiffies + cnx->ack_timeout; + add_timer(&cnx->ack_timer); + spin_unlock_irqrestore(&cnx->lock, flags); +} + +static int veth_remove(struct vio_dev *vdev) +{ + struct veth_lpar_connection *cnx; + struct net_device *dev; + struct veth_port *port; + int i; + + dev = veth_dev[vdev->unit_address]; + + if (! dev) + return 0; + + port = netdev_priv(dev); + + for (i = 0; i < HVMAXARCHITECTEDLPS; i++) { + cnx = veth_cnx[i]; + + if (cnx && (port->lpar_map & (1 << i))) { + /* Drop our reference to connections on our VLAN */ + kobject_put(&cnx->kobject); + } + } + + veth_dev[vdev->unit_address] = NULL; + kobject_del(&port->kobject); + kobject_put(&port->kobject); + unregister_netdev(dev); + free_netdev(dev); + + return 0; +} + +static int veth_probe(struct vio_dev *vdev, const struct vio_device_id *id) +{ + int i = vdev->unit_address; + struct net_device *dev; + struct veth_port *port; + + dev = veth_probe_one(i, vdev); + if (dev == NULL) { + veth_remove(vdev); + return 1; + } + veth_dev[i] = dev; + + port = netdev_priv(dev); + + /* Start the state machine on each connection on this vlan. If we're + * the first dev to do so this will commence link negotiation */ + for (i = 0; i < HVMAXARCHITECTEDLPS; i++) { + struct veth_lpar_connection *cnx; + + if (! (port->lpar_map & (1 << i))) + continue; + + cnx = veth_cnx[i]; + if (!cnx) + continue; + + kobject_get(&cnx->kobject); + veth_kick_statemachine(cnx); + } + + return 0; +} + +/** + * veth_device_table: Used by vio.c to match devices that we + * support. + */ +static struct vio_device_id veth_device_table[] __devinitdata = { + { "network", "IBM,iSeries-l-lan" }, + { "", "" } +}; +MODULE_DEVICE_TABLE(vio, veth_device_table); + +static struct vio_driver veth_driver = { + .id_table = veth_device_table, + .probe = veth_probe, + .remove = veth_remove, + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + } +}; + +/* + * Module initialization/cleanup + */ + +static void __exit veth_module_cleanup(void) +{ + int i; + struct veth_lpar_connection *cnx; + + /* Disconnect our "irq" to stop events coming from the Hypervisor. */ + HvLpEvent_unregisterHandler(HvLpEvent_Type_VirtualLan); + + for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) { + cnx = veth_cnx[i]; + + if (!cnx) + continue; + + /* Cancel work queued from Hypervisor callbacks */ + cancel_delayed_work_sync(&cnx->statemachine_wq); + /* Remove the connection from sysfs */ + kobject_del(&cnx->kobject); + /* Drop the driver's reference to the connection */ + kobject_put(&cnx->kobject); + } + + /* Unregister the driver, which will close all the netdevs and stop + * the connections when they're no longer referenced. */ + vio_unregister_driver(&veth_driver); +} +module_exit(veth_module_cleanup); + +static int __init veth_module_init(void) +{ + int i; + int rc; + + if (!firmware_has_feature(FW_FEATURE_ISERIES)) + return -ENODEV; + + this_lp = HvLpConfig_getLpIndex_outline(); + + for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) { + rc = veth_init_connection(i); + if (rc != 0) + goto error; + } + + HvLpEvent_registerHandler(HvLpEvent_Type_VirtualLan, + &veth_handle_event); + + rc = vio_register_driver(&veth_driver); + if (rc != 0) + goto error; + + for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) { + struct kobject *kobj; + + if (!veth_cnx[i]) + continue; + + kobj = &veth_cnx[i]->kobject; + /* If the add failes, complain but otherwise continue */ + if (0 != driver_add_kobj(&veth_driver.driver, kobj, + "cnx%.2d", veth_cnx[i]->remote_lp)) + veth_error("cnx %d: Failed adding to sysfs.\n", i); + } + + return 0; + +error: + for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) { + veth_destroy_connection(veth_cnx[i]); + } + + return rc; +} +module_init(veth_module_init); |