diff options
Diffstat (limited to 'drivers/infiniband')
50 files changed, 2669 insertions, 1046 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 6ef660c1332f..28058ae33d38 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -129,7 +129,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) dev_put(dev); break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { @@ -243,7 +243,7 @@ out: return ret; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static int addr6_resolve(struct sockaddr_in6 *src_in, struct sockaddr_in6 *dst_in, struct rdma_dev_addr *addr) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index c889aaef3416..d67999f6e34a 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3848,24 +3848,28 @@ static int __init ib_cm_init(void) INIT_LIST_HEAD(&cm.timewait_list); ret = class_register(&cm_class); - if (ret) - return -ENOMEM; + if (ret) { + ret = -ENOMEM; + goto error1; + } cm.wq = create_workqueue("ib_cm"); if (!cm.wq) { ret = -ENOMEM; - goto error1; + goto error2; } ret = ib_register_client(&cm_client); if (ret) - goto error2; + goto error3; return 0; -error2: +error3: destroy_workqueue(cm.wq); -error1: +error2: class_unregister(&cm_class); +error1: + idr_destroy(&cm.local_id_table); return ret; } diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 7da9b2102341..be068f47e47e 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -44,18 +44,6 @@ #define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ -#define CM_REQ_ATTR_ID cpu_to_be16(0x0010) -#define CM_MRA_ATTR_ID cpu_to_be16(0x0011) -#define CM_REJ_ATTR_ID cpu_to_be16(0x0012) -#define CM_REP_ATTR_ID cpu_to_be16(0x0013) -#define CM_RTU_ATTR_ID cpu_to_be16(0x0014) -#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015) -#define CM_DREP_ATTR_ID cpu_to_be16(0x0016) -#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017) -#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018) -#define CM_LAP_ATTR_ID cpu_to_be16(0x0019) -#define CM_APR_ATTR_ID cpu_to_be16(0x001A) - enum cm_msg_sequence { CM_MSG_SEQUENCE_REQ, CM_MSG_SEQUENCE_LAP, diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 2e826f9702c6..7172559ce0c1 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -99,6 +99,10 @@ struct rdma_bind_list { unsigned short port; }; +enum { + CMA_OPTION_AFONLY, +}; + /* * Device removal can occur at anytime, so we need extra handling to * serialize notifying the user of device removal with other callbacks. @@ -137,9 +141,11 @@ struct rdma_id_private { u32 qkey; u32 qp_num; pid_t owner; + u32 options; u8 srq; u8 tos; u8 reuseaddr; + u8 afonly; }; struct cma_multicast { @@ -1297,8 +1303,10 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, } else { cma_set_ip_ver(cma_data, 4); cma_set_ip_ver(cma_mask, 0xF); - cma_data->dst_addr.ip4.addr = ip4_addr; - cma_mask->dst_addr.ip4.addr = htonl(~0); + if (!cma_any_addr(addr)) { + cma_data->dst_addr.ip4.addr = ip4_addr; + cma_mask->dst_addr.ip4.addr = htonl(~0); + } } break; case AF_INET6: @@ -1312,9 +1320,11 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, } else { cma_set_ip_ver(cma_data, 6); cma_set_ip_ver(cma_mask, 0xF); - cma_data->dst_addr.ip6 = ip6_addr; - memset(&cma_mask->dst_addr.ip6, 0xFF, - sizeof cma_mask->dst_addr.ip6); + if (!cma_any_addr(addr)) { + cma_data->dst_addr.ip6 = ip6_addr; + memset(&cma_mask->dst_addr.ip6, 0xFF, + sizeof cma_mask->dst_addr.ip6); + } } break; default: @@ -1499,7 +1509,7 @@ static int cma_ib_listen(struct rdma_id_private *id_priv) addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; svc_id = cma_get_service_id(id_priv->id.ps, addr); - if (cma_any_addr(addr)) + if (cma_any_addr(addr) && !id_priv->afonly) ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL); else { cma_set_compare_data(id_priv->id.ps, addr, &compare_data); @@ -1573,6 +1583,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); atomic_inc(&id_priv->refcount); dev_id_priv->internal_id = 1; + dev_id_priv->afonly = id_priv->afonly; ret = rdma_listen(id, id_priv->backlog); if (ret) @@ -2098,6 +2109,26 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) } EXPORT_SYMBOL(rdma_set_reuseaddr); +int rdma_set_afonly(struct rdma_cm_id *id, int afonly) +{ + struct rdma_id_private *id_priv; + unsigned long flags; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + spin_lock_irqsave(&id_priv->lock, flags); + if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { + id_priv->options |= (1 << CMA_OPTION_AFONLY); + id_priv->afonly = afonly; + ret = 0; + } else { + ret = -EINVAL; + } + spin_unlock_irqrestore(&id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(rdma_set_afonly); + static void cma_bind_port(struct rdma_bind_list *bind_list, struct rdma_id_private *id_priv) { @@ -2187,22 +2218,24 @@ static int cma_check_port(struct rdma_bind_list *bind_list, struct hlist_node *node; addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; - if (cma_any_addr(addr) && !reuseaddr) - return -EADDRNOTAVAIL; - hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { if (id_priv == cur_id) continue; - if ((cur_id->state == RDMA_CM_LISTEN) || - !reuseaddr || !cur_id->reuseaddr) { - cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr; - if (cma_any_addr(cur_addr)) - return -EADDRNOTAVAIL; + if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && + cur_id->reuseaddr) + continue; - if (!cma_addr_cmp(addr, cur_addr)) - return -EADDRINUSE; - } + cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr; + if (id_priv->afonly && cur_id->afonly && + (addr->sa_family != cur_addr->sa_family)) + continue; + + if (cma_any_addr(addr) || cma_any_addr(cur_addr)) + return -EADDRNOTAVAIL; + + if (!cma_addr_cmp(addr, cur_addr)) + return -EADDRINUSE; } return 0; } @@ -2278,7 +2311,7 @@ static int cma_get_port(struct rdma_id_private *id_priv) static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, struct sockaddr *addr) { -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct sockaddr_in6 *sin6; if (addr->sa_family != AF_INET6) @@ -2371,6 +2404,14 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) } memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr)); + if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { + if (addr->sa_family == AF_INET) + id_priv->afonly = 1; +#if IS_ENABLED(CONFIG_IPV6) + else if (addr->sa_family == AF_INET6) + id_priv->afonly = init_net.ipv6.sysctl.bindv6only; +#endif + } ret = cma_get_port(id_priv); if (ret) goto err2; @@ -3023,10 +3064,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, id_priv->id.port_num, &rec, comp_mask, GFP_KERNEL, cma_ib_mc_handler, mc); - if (IS_ERR(mc->multicast.ib)) - return PTR_ERR(mc->multicast.ib); - - return 0; + return PTR_RET(mc->multicast.ib); } static void iboe_mcast_work_handler(struct work_struct *work) diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index e497dfbee435..3ae2bfd31015 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -108,12 +108,14 @@ void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq, unsigned char *prev_tail; prev_tail = skb_tail_pointer(skb); - *nlh = NLMSG_NEW(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), - len, NLM_F_MULTI); + *nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), + len, NLM_F_MULTI); + if (!*nlh) + goto out_nlmsg_trim; (*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail; - return NLMSG_DATA(*nlh); + return nlmsg_data(*nlh); -nlmsg_failure: +out_nlmsg_trim: nlmsg_trim(skb, prev_tail); return NULL; } @@ -171,8 +173,11 @@ static void ibnl_rcv(struct sk_buff *skb) int __init ibnl_init(void) { - nls = netlink_kernel_create(&init_net, NETLINK_RDMA, 0, ibnl_rcv, - NULL, THIS_MODULE); + struct netlink_kernel_cfg cfg = { + .input = ibnl_rcv, + }; + + nls = netlink_kernel_create(&init_net, NETLINK_RDMA, THIS_MODULE, &cfg); if (!nls) { pr_warn("Failed to create netlink socket\n"); return -ENOMEM; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index fbbfa24cf572..a8905abc56e4 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -94,6 +94,12 @@ struct ib_sa_path_query { struct ib_sa_query sa_query; }; +struct ib_sa_guidinfo_query { + void (*callback)(int, struct ib_sa_guidinfo_rec *, void *); + void *context; + struct ib_sa_query sa_query; +}; + struct ib_sa_mcmember_query { void (*callback)(int, struct ib_sa_mcmember_rec *, void *); void *context; @@ -347,6 +353,34 @@ static const struct ib_field service_rec_table[] = { .size_bits = 2*64 }, }; +#define GUIDINFO_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ + .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \ + .field_name = "sa_guidinfo_rec:" #field + +static const struct ib_field guidinfo_rec_table[] = { + { GUIDINFO_REC_FIELD(lid), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 16 }, + { GUIDINFO_REC_FIELD(block_num), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 8 }, + { GUIDINFO_REC_FIELD(res1), + .offset_words = 0, + .offset_bits = 24, + .size_bits = 8 }, + { GUIDINFO_REC_FIELD(res2), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 32 }, + { GUIDINFO_REC_FIELD(guid_info_list), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 512 }, +}; + static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); @@ -945,6 +979,105 @@ err1: return ret; } +/* Support GuidInfoRecord */ +static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query, + int status, + struct ib_sa_mad *mad) +{ + struct ib_sa_guidinfo_query *query = + container_of(sa_query, struct ib_sa_guidinfo_query, sa_query); + + if (mad) { + struct ib_sa_guidinfo_rec rec; + + ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), + mad->data, &rec); + query->callback(status, &rec, query->context); + } else + query->callback(status, NULL, query->context); +} + +static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query) +{ + kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query)); +} + +int ib_sa_guid_info_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + struct ib_sa_guidinfo_rec *rec, + ib_sa_comp_mask comp_mask, u8 method, + int timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_sa_guidinfo_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query) +{ + struct ib_sa_guidinfo_query *query; + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; + int ret; + + if (!sa_dev) + return -ENODEV; + + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET && + method != IB_SA_METHOD_DELETE) { + return -EINVAL; + } + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + + query = kmalloc(sizeof *query, gfp_mask); + if (!query) + return -ENOMEM; + + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) + goto err1; + + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; + + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); + + query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL; + query->sa_query.release = ib_sa_guidinfo_rec_release; + + mad->mad_hdr.method = method; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC); + mad->sa_hdr.comp_mask = comp_mask; + + ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec, + mad->data); + + *sa_query = &query->sa_query; + + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); + if (ret < 0) + goto err2; + + return ret; + +err2: + *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); + free_mad(&query->sa_query); + +err1: + kfree(query); + return ret; +} +EXPORT_SYMBOL(ib_sa_guid_info_rec_query); + static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 8002ae642cfe..6bf850422895 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -909,6 +909,13 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname, } ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); break; + case RDMA_OPTION_ID_AFONLY: + if (optlen != sizeof(int)) { + ret = -EINVAL; + break; + } + ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); + break; default: ret = -ENOSYS; } @@ -995,23 +1002,18 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); - optval = kmalloc(cmd.optlen, GFP_KERNEL); - if (!optval) { - ret = -ENOMEM; - goto out1; - } - - if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval, - cmd.optlen)) { - ret = -EFAULT; - goto out2; + optval = memdup_user((void __user *) (unsigned long) cmd.optval, + cmd.optlen); + if (IS_ERR(optval)) { + ret = PTR_ERR(optval); + goto out; } ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, cmd.optlen); -out2: kfree(optval); -out1: + +out: ucma_put_ctx(ctx); return ret; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 740dcc065cf2..77b6b182778a 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1374,7 +1374,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) goto reject; } dst = &rt->dst; - l2t = t3_l2t_get(tdev, dst, NULL); + l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip); if (!l2t) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -1942,7 +1942,8 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto fail3; } ep->dst = &rt->dst; - ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL); + ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL, + &cm_id->remote_addr.sin_addr.s_addr); if (!ep->l2t) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); err = -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index b18870c455ad..51f42061dae9 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -548,8 +548,8 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, } if (mpa_rev_to_use == 2) { - mpa->private_data_size += - htons(sizeof(struct mpa_v2_conn_params)); + mpa->private_data_size = htons(ntohs(mpa->private_data_size) + + sizeof (struct mpa_v2_conn_params)); mpa_v2_params.ird = htons((u16)ep->ird); mpa_v2_params.ord = htons((u16)ep->ord); @@ -635,8 +635,8 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { mpa->flags |= MPA_ENHANCED_RDMA_CONN; - mpa->private_data_size += - htons(sizeof(struct mpa_v2_conn_params)); + mpa->private_data_size = htons(ntohs(mpa->private_data_size) + + sizeof (struct mpa_v2_conn_params)); mpa_v2_params.ird = htons(((u16)ep->ird) | (peer2peer ? MPA_V2_PEER2PEER_MODEL : 0)); @@ -715,8 +715,8 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { mpa->flags |= MPA_ENHANCED_RDMA_CONN; - mpa->private_data_size += - htons(sizeof(struct mpa_v2_conn_params)); + mpa->private_data_size = htons(ntohs(mpa->private_data_size) + + sizeof (struct mpa_v2_conn_params)); mpa_v2_params.ird = htons((u16)ep->ird); mpa_v2_params.ord = htons((u16)ep->ord); if (peer2peer && (ep->mpa_attr.p2p_type != diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 259b0670b51c..c27141fef1ab 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -147,47 +147,51 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) } /* - * Snoop SM MADs for port info and P_Key table sets, so we can - * synthesize LID change and P_Key change events. + * Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can + * synthesize LID change, Client-Rereg, GID change, and P_Key change events. */ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, - u16 prev_lid) + u16 prev_lid) { - struct ib_event event; + struct ib_port_info *pinfo; + u16 lid; + struct mlx4_ib_dev *dev = to_mdev(ibdev); if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && - mad->mad_hdr.method == IB_MGMT_METHOD_SET) { - if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { - struct ib_port_info *pinfo = - (struct ib_port_info *) ((struct ib_smp *) mad)->data; - u16 lid = be16_to_cpu(pinfo->lid); + mad->mad_hdr.method == IB_MGMT_METHOD_SET) + switch (mad->mad_hdr.attr_id) { + case IB_SMP_ATTR_PORT_INFO: + pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data; + lid = be16_to_cpu(pinfo->lid); - update_sm_ah(to_mdev(ibdev), port_num, + update_sm_ah(dev, port_num, be16_to_cpu(pinfo->sm_lid), pinfo->neighbormtu_mastersmsl & 0xf); - event.device = ibdev; - event.element.port_num = port_num; + if (pinfo->clientrereg_resv_subnetto & 0x80) + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_CLIENT_REREGISTER); - if (pinfo->clientrereg_resv_subnetto & 0x80) { - event.event = IB_EVENT_CLIENT_REREGISTER; - ib_dispatch_event(&event); - } + if (prev_lid != lid) + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_LID_CHANGE); + break; - if (prev_lid != lid) { - event.event = IB_EVENT_LID_CHANGE; - ib_dispatch_event(&event); - } - } + case IB_SMP_ATTR_PKEY_TABLE: + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_PKEY_CHANGE); + break; - if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) { - event.device = ibdev; - event.event = IB_EVENT_PKEY_CHANGE; - event.element.port_num = port_num; - ib_dispatch_event(&event); + case IB_SMP_ATTR_GUID_INFO: + /* paravirtualized master's guid is guid 0 -- does not change */ + if (!mlx4_is_master(dev->dev)) + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_GID_CHANGE); + break; + default: + break; } - } } static void node_desc_override(struct ib_device *dev, @@ -242,6 +246,25 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, int err; struct ib_port_attr pattr; + if (in_wc && in_wc->qp->qp_num) { + pr_debug("received MAD: slid:%d sqpn:%d " + "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n", + in_wc->slid, in_wc->src_qp, + in_wc->dlid_path_bits, + in_wc->qp->qp_num, + in_wc->wc_flags, + in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method, + be16_to_cpu(in_mad->mad_hdr.attr_id)); + if (in_wc->wc_flags & IB_WC_GRH) { + pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n", + be64_to_cpu(in_grh->sgid.global.subnet_prefix), + be64_to_cpu(in_grh->sgid.global.interface_id)); + pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n", + be64_to_cpu(in_grh->dgid.global.subnet_prefix), + be64_to_cpu(in_grh->dgid.global.interface_id)); + } + } + slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { @@ -286,7 +309,8 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_FAILURE; if (!out_mad->mad_hdr.status) { - smp_snoop(ibdev, port_num, in_mad, prev_lid); + if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)) + smp_snoop(ibdev, port_num, in_mad, prev_lid); node_desc_override(ibdev, out_mad); } @@ -427,3 +451,64 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) ib_destroy_ah(dev->sm_ah[p]); } } + +void handle_port_mgmt_change_event(struct work_struct *work) +{ + struct ib_event_work *ew = container_of(work, struct ib_event_work, work); + struct mlx4_ib_dev *dev = ew->ib_dev; + struct mlx4_eqe *eqe = &(ew->ib_eqe); + u8 port = eqe->event.port_mgmt_change.port; + u32 changed_attr; + + switch (eqe->subtype) { + case MLX4_DEV_PMC_SUBTYPE_PORT_INFO: + changed_attr = be32_to_cpu(eqe->event.port_mgmt_change.params.port_info.changed_attr); + + /* Update the SM ah - This should be done before handling + the other changed attributes so that MADs can be sent to the SM */ + if (changed_attr & MSTR_SM_CHANGE_MASK) { + u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid); + u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf; + update_sm_ah(dev, port, lid, sl); + } + + /* Check if it is a lid change event */ + if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK) + mlx4_ib_dispatch_event(dev, port, IB_EVENT_LID_CHANGE); + + /* Generate GUID changed event */ + if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) + mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); + + if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK) + mlx4_ib_dispatch_event(dev, port, + IB_EVENT_CLIENT_REREGISTER); + break; + + case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE: + mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE); + break; + case MLX4_DEV_PMC_SUBTYPE_GUID_INFO: + /* paravirtualized master's guid is guid 0 -- does not change */ + if (!mlx4_is_master(dev->dev)) + mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); + break; + default: + pr_warn("Unsupported subtype 0x%x for " + "Port Management Change event\n", eqe->subtype); + } + + kfree(ew); +} + +void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, + enum ib_event_type type) +{ + struct ib_event event; + + event.device = &dev->ib_dev; + event.element.port_num = port_num; + event.event = type; + + ib_dispatch_event(&event); +} diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 3530c41fcd1f..fe2088cfa6ee 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -50,7 +50,7 @@ #include "mlx4_ib.h" #include "user.h" -#define DRV_NAME "mlx4_ib" +#define DRV_NAME MLX4_IB_DRV_NAME #define DRV_VERSION "1.0" #define DRV_RELDATE "April 4, 2008" @@ -157,7 +157,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; - props->masked_atomic_cap = IB_ATOMIC_HCA; + props->masked_atomic_cap = props->atomic_cap; props->max_pkeys = dev->dev->caps.pkey_table_len[1]; props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; @@ -718,26 +718,53 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, return ret; } +struct mlx4_ib_steering { + struct list_head list; + u64 reg_id; + union ib_gid gid; +}; + static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { int err; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_qp *mqp = to_mqp(ibqp); + u64 reg_id; + struct mlx4_ib_steering *ib_steering = NULL; + + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL); + if (!ib_steering) + return -ENOMEM; + } - err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, - !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), - MLX4_PROT_IB_IPV6); + err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port, + !!(mqp->flags & + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), + MLX4_PROT_IB_IPV6, ®_id); if (err) - return err; + goto err_malloc; err = add_gid_entry(ibqp, gid); if (err) goto err_add; + if (ib_steering) { + memcpy(ib_steering->gid.raw, gid->raw, 16); + ib_steering->reg_id = reg_id; + mutex_lock(&mqp->mutex); + list_add(&ib_steering->list, &mqp->steering_rules); + mutex_unlock(&mqp->mutex); + } return 0; err_add: - mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6); + mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, + MLX4_PROT_IB_IPV6, reg_id); +err_malloc: + kfree(ib_steering); + return err; } @@ -765,9 +792,30 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) u8 mac[6]; struct net_device *ndev; struct mlx4_ib_gid_entry *ge; + u64 reg_id = 0; + + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + struct mlx4_ib_steering *ib_steering; + + mutex_lock(&mqp->mutex); + list_for_each_entry(ib_steering, &mqp->steering_rules, list) { + if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) { + list_del(&ib_steering->list); + break; + } + } + mutex_unlock(&mqp->mutex); + if (&ib_steering->list == &mqp->steering_rules) { + pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n"); + return -EINVAL; + } + reg_id = ib_steering->reg_id; + kfree(ib_steering); + } - err = mlx4_multicast_detach(mdev->dev, - &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6); + err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, + MLX4_PROT_IB_IPV6, reg_id); if (err) return err; @@ -898,7 +946,6 @@ static void update_gids_task(struct work_struct *work) union ib_gid *gids; int err; struct mlx4_dev *dev = gw->dev->dev; - struct ib_event event; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { @@ -916,10 +963,7 @@ static void update_gids_task(struct work_struct *work) pr_warn("set port command failed\n"); else { memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids); - event.device = &gw->dev->ib_dev; - event.element.port_num = gw->port; - event.event = IB_EVENT_GID_CHANGE; - ib_dispatch_event(&event); + mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE); } mlx4_free_cmd_mailbox(dev, mailbox); @@ -1111,7 +1155,8 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) sprintf(name, "mlx4-ib-%d-%d@%s", i, j, dev->pdev->bus->name); /* Set IRQ for specific name (per ring) */ - if (mlx4_assign_eq(dev, name, &ibdev->eq_table[eq])) { + if (mlx4_assign_eq(dev, name, NULL, + &ibdev->eq_table[eq])) { /* Use legacy (same as mlx4_en driver) */ pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); ibdev->eq_table[eq] = @@ -1383,10 +1428,18 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) } static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, - enum mlx4_dev_event event, int port) + enum mlx4_dev_event event, unsigned long param) { struct ib_event ibev; struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); + struct mlx4_eqe *eqe = NULL; + struct ib_event_work *ew; + int port = 0; + + if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) + eqe = (struct mlx4_eqe *)param; + else + port = (u8)param; if (port > ibdev->num_ports) return; @@ -1405,6 +1458,19 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, ibev.event = IB_EVENT_DEVICE_FATAL; break; + case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: + ew = kmalloc(sizeof *ew, GFP_ATOMIC); + if (!ew) { + pr_err("failed to allocate memory for events work\n"); + break; + } + + INIT_WORK(&ew->work, handle_port_mgmt_change_event); + memcpy(&ew->ib_eqe, eqe, sizeof *eqe); + ew->ib_dev = ibdev; + handle_port_mgmt_change_event(&ew->work); + return; + default: return; } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index ff36655d23d3..c136bb618e29 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -44,6 +44,16 @@ #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> +#define MLX4_IB_DRV_NAME "mlx4_ib" + +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) "<" MLX4_IB_DRV_NAME "> %s: " fmt, __func__ + +#define mlx4_ib_warn(ibdev, format, arg...) \ + dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg) + enum { MLX4_IB_SQ_MIN_WQE_SHIFT = 6, MLX4_IB_MAX_HEADROOM = 2048 @@ -163,6 +173,7 @@ struct mlx4_ib_qp { u8 state; int mlx_type; struct list_head gid_list; + struct list_head steering_rules; }; struct mlx4_ib_srq { @@ -214,6 +225,12 @@ struct mlx4_ib_dev { int eq_added; }; +struct ib_event_work { + struct work_struct work; + struct mlx4_ib_dev *ib_dev; + struct mlx4_eqe ib_eqe; +}; + static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx4_ib_dev, ib_dev); @@ -371,4 +388,7 @@ static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, union ib_gid *gid); +void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, + enum ib_event_type type); + #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8d4ed24aef93..a6d8ea060ea8 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -495,6 +495,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); INIT_LIST_HEAD(&qp->gid_list); + INIT_LIST_HEAD(&qp->steering_rules); qp->state = IB_QPS_RESET; if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) @@ -1335,11 +1336,21 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { + pr_debug("qpn 0x%x: invalid attribute mask specified " + "for transition %d to %d. qp_type %d," + " attr_mask 0x%x\n", + ibqp->qp_num, cur_state, new_state, + ibqp->qp_type, attr_mask); goto out; + } if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { + pr_debug("qpn 0x%x: invalid port number (%d) specified " + "for transition %d to %d. qp_type %d\n", + ibqp->qp_num, attr->port_num, cur_state, + new_state, ibqp->qp_type); goto out; } @@ -1350,17 +1361,30 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_PKEY_INDEX) { int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) + if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) { + pr_debug("qpn 0x%x: invalid pkey index (%d) specified " + "for transition %d to %d. qp_type %d\n", + ibqp->qp_num, attr->pkey_index, cur_state, + new_state, ibqp->qp_type); goto out; + } } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { + pr_debug("qpn 0x%x: max_rd_atomic (%d) too large. " + "Transition %d to %d. qp_type %d\n", + ibqp->qp_num, attr->max_rd_atomic, cur_state, + new_state, ibqp->qp_type); goto out; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) { + pr_debug("qpn 0x%x: max_dest_rd_atomic (%d) too large. " + "Transition %d to %d. qp_type %d\n", + ibqp->qp_num, attr->max_dest_rd_atomic, cur_state, + new_state, ibqp->qp_type); goto out; } diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 9601049e14d0..26a684536109 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -247,7 +247,8 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn, spin_unlock(&dev->qp_table.lock); if (!qp) { - mthca_warn(dev, "Async event for bogus QP %08x\n", qpn); + mthca_warn(dev, "Async event %d for bogus QP %08x\n", + event_type, qpn); return; } @@ -501,6 +502,7 @@ done: qp_attr->cap.max_inline_data = qp->max_inline_data; qp_init_attr->cap = qp_attr->cap; + qp_init_attr->sq_sig_type = qp->sq_policy; out_mailbox: mthca_free_mailbox(dev, mailbox); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index b050e629e9c3..5a044526e4f4 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -202,8 +202,7 @@ static int ocrdma_build_sgid_tbl(struct ocrdma_dev *dev) return 0; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) || \ -defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_VLAN_8021Q) static int ocrdma_inet6addr_event(struct notifier_block *notifier, unsigned long event, void *ptr) @@ -549,7 +548,7 @@ static struct ocrdma_driver ocrdma_drv = { static void ocrdma_unregister_inet6addr_notifier(void) { -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&ocrdma_inet6addr_notifier); #endif } @@ -558,7 +557,7 @@ static int __init ocrdma_init_module(void) { int status; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier); if (status) return status; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 2e2e7aecc990..cb5b7f7d4d38 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -97,7 +97,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp); attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp; attr->max_srq = (dev->attr.max_qp - 1); - attr->max_srq_sge = attr->max_srq_sge; + attr->max_srq_sge = dev->attr.max_srq_sge; attr->max_srq_wr = dev->attr.max_rqe; attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay; attr->max_fast_reg_page_list_len = 0; @@ -893,7 +893,9 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, /* verify consumer QPs are not trying to use GSI QP's CQ */ if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) { if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) || - (dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq))) { + (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) || + (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) || + (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) { ocrdma_err("%s(%d) Consumer QP cannot use GSI CQs.\n", __func__, dev->id); return -EINVAL; diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 7e62f4137148..7b1b86690024 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1,8 +1,8 @@ #ifndef _QIB_KERNEL_H #define _QIB_KERNEL_H /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -519,6 +519,7 @@ struct qib_pportdata { struct qib_devdata *dd; struct qib_chippport_specific *cpspec; /* chip-specific per-port */ struct kobject pport_kobj; + struct kobject pport_cc_kobj; struct kobject sl2vl_kobj; struct kobject diagc_kobj; @@ -544,6 +545,7 @@ struct qib_pportdata { /* read mostly */ struct qib_sdma_desc *sdma_descq; + struct workqueue_struct *qib_wq; struct qib_sdma_state sdma_state; dma_addr_t sdma_descq_phys; volatile __le64 *sdma_head_dma; /* DMA'ed by chip */ @@ -637,6 +639,39 @@ struct qib_pportdata { struct timer_list led_override_timer; struct xmit_wait cong_stats; struct timer_list symerr_clear_timer; + + /* Synchronize access between driver writes and sysfs reads */ + spinlock_t cc_shadow_lock + ____cacheline_aligned_in_smp; + + /* Shadow copy of the congestion control table */ + struct cc_table_shadow *ccti_entries_shadow; + + /* Shadow copy of the congestion control entries */ + struct ib_cc_congestion_setting_attr_shadow *congestion_entries_shadow; + + /* List of congestion control table entries */ + struct ib_cc_table_entry_shadow *ccti_entries; + + /* 16 congestion entries with each entry corresponding to a SL */ + struct ib_cc_congestion_entry_shadow *congestion_entries; + + /* Maximum number of congestion control entries that the agent expects + * the manager to send. + */ + u16 cc_supported_table_entries; + + /* Total number of congestion control table entries */ + u16 total_cct_entry; + + /* Bit map identifying service level */ + u16 cc_sl_control_map; + + /* maximum congestion control table index */ + u16 ccti_limit; + + /* CA's max number of 64 entry units in the congestion control table */ + u8 cc_max_table_entries; }; /* Observers. Not to be taken lightly, possibly not to ship. */ @@ -1077,6 +1112,7 @@ extern u32 qib_cpulist_count; extern unsigned long *qib_cpulist; extern unsigned qib_wc_pat; +extern unsigned qib_cc_table_size; int qib_init(struct qib_devdata *, int); int init_chip_wc_pat(struct qib_devdata *dd, u32); int qib_enable_wc(struct qib_devdata *dd); @@ -1267,6 +1303,11 @@ int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *, /* ppd->sdma_lock should be locked before calling this. */ int qib_sdma_make_progress(struct qib_pportdata *dd); +static inline int qib_sdma_empty(const struct qib_pportdata *ppd) +{ + return ppd->sdma_descq_added == ppd->sdma_descq_removed; +} + /* must be called under qib_sdma_lock */ static inline u16 qib_sdma_descq_freecnt(const struct qib_pportdata *ppd) { diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c index 9892456a4348..1686fd4bda87 100644 --- a/drivers/infiniband/hw/qib/qib_diag.c +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2010 QLogic Corporation. All rights reserved. - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -53,6 +53,9 @@ #include "qib.h" #include "qib_common.h" +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt + /* * Each client that opens the diag device must read then write * offset 0, to prevent lossage from random cat or od. diag_state @@ -598,8 +601,8 @@ static ssize_t qib_diagpkt_write(struct file *fp, } tmpbuf = vmalloc(plen); if (!tmpbuf) { - qib_devinfo(dd->pcidev, "Unable to allocate tmp buffer, " - "failing\n"); + qib_devinfo(dd->pcidev, + "Unable to allocate tmp buffer, failing\n"); ret = -ENOMEM; goto bail; } @@ -693,7 +696,7 @@ int qib_register_observer(struct qib_devdata *dd, ret = -ENOMEM; olp = vmalloc(sizeof *olp); if (!olp) { - printk(KERN_ERR QIB_DRV_NAME ": vmalloc for observer failed\n"); + pr_err("vmalloc for observer failed\n"); goto bail; } if (olp) { diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 8895cfec5019..e41e7f7fc763 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -764,8 +764,9 @@ int qib_reset_device(int unit) qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit); if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) { - qib_devinfo(dd->pcidev, "Invalid unit number %u or " - "not initialized or not present\n", unit); + qib_devinfo(dd->pcidev, + "Invalid unit number %u or not initialized or not present\n", + unit); ret = -ENXIO; goto bail; } @@ -802,11 +803,13 @@ int qib_reset_device(int unit) else ret = -EAGAIN; if (ret) - qib_dev_err(dd, "Reinitialize unit %u after " - "reset failed with %d\n", unit, ret); + qib_dev_err(dd, + "Reinitialize unit %u after reset failed with %d\n", + unit, ret); else - qib_devinfo(dd->pcidev, "Reinitialized unit %u after " - "resetting\n", unit); + qib_devinfo(dd->pcidev, + "Reinitialized unit %u after resetting\n", + unit); bail: return ret; diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c index 92d9cfe98a68..4d5d71aaa2b4 100644 --- a/drivers/infiniband/hw/qib/qib_eeprom.c +++ b/drivers/infiniband/hw/qib/qib_eeprom.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -160,10 +161,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd) if (oguid > bguid[7]) { if (bguid[6] == 0xff) { if (bguid[5] == 0xff) { - qib_dev_err(dd, "Can't set %s GUID" - " from base, wraps to" - " OUI!\n", - qib_get_unit_name(t)); + qib_dev_err(dd, + "Can't set %s GUID from base, wraps to OUI!\n", + qib_get_unit_name(t)); dd->base_guid = 0; goto bail; } @@ -182,8 +182,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd) len = sizeof(struct qib_flash); buf = vmalloc(len); if (!buf) { - qib_dev_err(dd, "Couldn't allocate memory to read %u " - "bytes from eeprom for GUID\n", len); + qib_dev_err(dd, + "Couldn't allocate memory to read %u bytes from eeprom for GUID\n", + len); goto bail; } @@ -201,23 +202,25 @@ void qib_get_eeprom_info(struct qib_devdata *dd) csum = flash_csum(ifp, 0); if (csum != ifp->if_csum) { - qib_devinfo(dd->pcidev, "Bad I2C flash checksum: " - "0x%x, not 0x%x\n", csum, ifp->if_csum); + qib_devinfo(dd->pcidev, + "Bad I2C flash checksum: 0x%x, not 0x%x\n", + csum, ifp->if_csum); goto done; } if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) || *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) { - qib_dev_err(dd, "Invalid GUID %llx from flash; ignoring\n", - *(unsigned long long *) ifp->if_guid); + qib_dev_err(dd, + "Invalid GUID %llx from flash; ignoring\n", + *(unsigned long long *) ifp->if_guid); /* don't allow GUID if all 0 or all 1's */ goto done; } /* complain, but allow it */ if (*(u64 *) ifp->if_guid == 0x100007511000000ULL) - qib_devinfo(dd->pcidev, "Warning, GUID %llx is " - "default, probably not correct!\n", - *(unsigned long long *) ifp->if_guid); + qib_devinfo(dd->pcidev, + "Warning, GUID %llx is default, probably not correct!\n", + *(unsigned long long *) ifp->if_guid); bguid = ifp->if_guid; if (!bguid[0] && !bguid[1] && !bguid[2]) { @@ -260,8 +263,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd) memcpy(dd->serial, ifp->if_serial, sizeof ifp->if_serial); if (!strstr(ifp->if_comment, "Tested successfully")) - qib_dev_err(dd, "Board SN %s did not pass functional " - "test: %s\n", dd->serial, ifp->if_comment); + qib_dev_err(dd, + "Board SN %s did not pass functional test: %s\n", + dd->serial, ifp->if_comment); memcpy(&dd->eep_st_errs, &ifp->if_errcntp, QIB_EEP_LOG_CNT); /* @@ -323,8 +327,9 @@ int qib_update_eeprom_log(struct qib_devdata *dd) buf = vmalloc(len); ret = 1; if (!buf) { - qib_dev_err(dd, "Couldn't allocate memory to read %u " - "bytes from eeprom for logging\n", len); + qib_dev_err(dd, + "Couldn't allocate memory to read %u bytes from eeprom for logging\n", + len); goto bail; } diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index a7403248d83d..faa44cb08071 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -49,6 +49,9 @@ #include "qib_common.h" #include "qib_user_sdma.h" +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt + static int qib_open(struct inode *, struct file *); static int qib_close(struct inode *, struct file *); static ssize_t qib_write(struct file *, const char __user *, size_t, loff_t *); @@ -315,8 +318,9 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp, } if (cnt > tidcnt) { /* make sure it all fits in tid_pg_list */ - qib_devinfo(dd->pcidev, "Process tried to allocate %u " - "TIDs, only trying max (%u)\n", cnt, tidcnt); + qib_devinfo(dd->pcidev, + "Process tried to allocate %u TIDs, only trying max (%u)\n", + cnt, tidcnt); cnt = tidcnt; } pagep = (struct page **) rcd->tid_pg_list; @@ -750,9 +754,9 @@ static int qib_mmap_mem(struct vm_area_struct *vma, struct qib_ctxtdata *rcd, ret = remap_pfn_range(vma, vma->vm_start, pfn, len, vma->vm_page_prot); if (ret) - qib_devinfo(dd->pcidev, "%s ctxt%u mmap of %lx, %x " - "bytes failed: %d\n", what, rcd->ctxt, - pfn, len, ret); + qib_devinfo(dd->pcidev, + "%s ctxt%u mmap of %lx, %x bytes failed: %d\n", + what, rcd->ctxt, pfn, len, ret); bail: return ret; } @@ -771,8 +775,9 @@ static int mmap_ureg(struct vm_area_struct *vma, struct qib_devdata *dd, */ sz = dd->flags & QIB_HAS_HDRSUPP ? 2 * PAGE_SIZE : PAGE_SIZE; if ((vma->vm_end - vma->vm_start) > sz) { - qib_devinfo(dd->pcidev, "FAIL mmap userreg: reqlen " - "%lx > PAGE\n", vma->vm_end - vma->vm_start); + qib_devinfo(dd->pcidev, + "FAIL mmap userreg: reqlen %lx > PAGE\n", + vma->vm_end - vma->vm_start); ret = -EFAULT; } else { phys = dd->physaddr + ureg; @@ -802,8 +807,8 @@ static int mmap_piobufs(struct vm_area_struct *vma, * for it. */ if ((vma->vm_end - vma->vm_start) > (piocnt * dd->palign)) { - qib_devinfo(dd->pcidev, "FAIL mmap piobufs: " - "reqlen %lx > PAGE\n", + qib_devinfo(dd->pcidev, + "FAIL mmap piobufs: reqlen %lx > PAGE\n", vma->vm_end - vma->vm_start); ret = -EINVAL; goto bail; @@ -847,8 +852,8 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, size = rcd->rcvegrbuf_size; total_size = rcd->rcvegrbuf_chunks * size; if ((vma->vm_end - vma->vm_start) > total_size) { - qib_devinfo(dd->pcidev, "FAIL on egr bufs: " - "reqlen %lx > actual %lx\n", + qib_devinfo(dd->pcidev, + "FAIL on egr bufs: reqlen %lx > actual %lx\n", vma->vm_end - vma->vm_start, (unsigned long) total_size); ret = -EINVAL; @@ -856,8 +861,9 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, } if (vma->vm_flags & VM_WRITE) { - qib_devinfo(dd->pcidev, "Can't map eager buffers as " - "writable (flags=%lx)\n", vma->vm_flags); + qib_devinfo(dd->pcidev, + "Can't map eager buffers as writable (flags=%lx)\n", + vma->vm_flags); ret = -EPERM; goto bail; } @@ -1270,8 +1276,8 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, GFP_KERNEL); if (!rcd || !ptmp) { - qib_dev_err(dd, "Unable to allocate ctxtdata " - "memory, failing open\n"); + qib_dev_err(dd, + "Unable to allocate ctxtdata memory, failing open\n"); ret = -ENOMEM; goto bailerr; } @@ -1560,10 +1566,10 @@ done_chk_sdma: } else if (weight == 1 && test_bit(cpumask_first(tsk_cpus_allowed(current)), qib_cpulist)) - qib_devinfo(dd->pcidev, "%s PID %u affinity " - "set to cpu %d; already allocated\n", - current->comm, current->pid, - cpumask_first(tsk_cpus_allowed(current))); + qib_devinfo(dd->pcidev, + "%s PID %u affinity set to cpu %d; already allocated\n", + current->comm, current->pid, + cpumask_first(tsk_cpus_allowed(current))); } mutex_unlock(&qib_mutex); @@ -2185,8 +2191,7 @@ int qib_cdev_init(int minor, const char *name, cdev = cdev_alloc(); if (!cdev) { - printk(KERN_ERR QIB_DRV_NAME - ": Could not allocate cdev for minor %d, %s\n", + pr_err("Could not allocate cdev for minor %d, %s\n", minor, name); ret = -ENOMEM; goto done; @@ -2198,8 +2203,7 @@ int qib_cdev_init(int minor, const char *name, ret = cdev_add(cdev, dev, 1); if (ret < 0) { - printk(KERN_ERR QIB_DRV_NAME - ": Could not add cdev for minor %d, %s (err %d)\n", + pr_err("Could not add cdev for minor %d, %s (err %d)\n", minor, name, -ret); goto err_cdev; } @@ -2209,8 +2213,7 @@ int qib_cdev_init(int minor, const char *name, goto done; ret = PTR_ERR(device); device = NULL; - printk(KERN_ERR QIB_DRV_NAME ": Could not create " - "device for minor %d, %s (err %d)\n", + pr_err("Could not create device for minor %d, %s (err %d)\n", minor, name, -ret); err_cdev: cdev_del(cdev); @@ -2245,16 +2248,14 @@ int __init qib_dev_init(void) ret = alloc_chrdev_region(&qib_dev, 0, QIB_NMINORS, QIB_DRV_NAME); if (ret < 0) { - printk(KERN_ERR QIB_DRV_NAME ": Could not allocate " - "chrdev region (err %d)\n", -ret); + pr_err("Could not allocate chrdev region (err %d)\n", -ret); goto done; } qib_class = class_create(THIS_MODULE, "ipath"); if (IS_ERR(qib_class)) { ret = PTR_ERR(qib_class); - printk(KERN_ERR QIB_DRV_NAME ": Could not create " - "device class (err %d)\n", -ret); + pr_err("Could not create device class (err %d)\n", -ret); unregister_chrdev_region(qib_dev, QIB_NMINORS); } diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 05e0f17c5b44..cff8a6c32161 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -382,7 +383,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir, &simple_dir_operations, dd); if (ret) { - printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret); + pr_err("create_file(%s) failed: %d\n", unit, ret); goto bail; } @@ -390,21 +391,21 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file("counters", S_IFREG|S_IRUGO, dir, &tmp, &cntr_ops[0], dd); if (ret) { - printk(KERN_ERR "create_file(%s/counters) failed: %d\n", + pr_err("create_file(%s/counters) failed: %d\n", unit, ret); goto bail; } ret = create_file("counter_names", S_IFREG|S_IRUGO, dir, &tmp, &cntr_ops[1], dd); if (ret) { - printk(KERN_ERR "create_file(%s/counter_names) failed: %d\n", + pr_err("create_file(%s/counter_names) failed: %d\n", unit, ret); goto bail; } ret = create_file("portcounter_names", S_IFREG|S_IRUGO, dir, &tmp, &portcntr_ops[0], dd); if (ret) { - printk(KERN_ERR "create_file(%s/%s) failed: %d\n", + pr_err("create_file(%s/%s) failed: %d\n", unit, "portcounter_names", ret); goto bail; } @@ -416,7 +417,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp, &portcntr_ops[i], dd); if (ret) { - printk(KERN_ERR "create_file(%s/%s) failed: %d\n", + pr_err("create_file(%s/%s) failed: %d\n", unit, fname, ret); goto bail; } @@ -426,7 +427,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp, &qsfp_ops[i - 1], dd); if (ret) { - printk(KERN_ERR "create_file(%s/%s) failed: %d\n", + pr_err("create_file(%s/%s) failed: %d\n", unit, fname, ret); goto bail; } @@ -435,7 +436,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp, &flash_ops, dd); if (ret) - printk(KERN_ERR "create_file(%s/flash) failed: %d\n", + pr_err("create_file(%s/flash) failed: %d\n", unit, ret); bail: return ret; @@ -486,7 +487,7 @@ static int remove_device_files(struct super_block *sb, if (IS_ERR(dir)) { ret = PTR_ERR(dir); - printk(KERN_ERR "Lookup of %s failed\n", unit); + pr_err("Lookup of %s failed\n", unit); goto bail; } @@ -532,7 +533,7 @@ static int qibfs_fill_super(struct super_block *sb, void *data, int silent) ret = simple_fill_super(sb, QIBFS_MAGIC, files); if (ret) { - printk(KERN_ERR "simple_fill_super failed: %d\n", ret); + pr_err("simple_fill_super failed: %d\n", ret); goto bail; } diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 4d352b90750a..a099ac171e22 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -753,8 +753,8 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, if (!hwerrs) return; if (hwerrs == ~0ULL) { - qib_dev_err(dd, "Read of hardware error status failed " - "(all bits set); ignoring\n"); + qib_dev_err(dd, + "Read of hardware error status failed (all bits set); ignoring\n"); return; } qib_stats.sps_hwerrs++; @@ -779,13 +779,14 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, * or it's occurred within the last 5 seconds. */ if (hwerrs & ~(TXE_PIO_PARITY | RXEMEMPARITYERR_EAGERTID)) - qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx " - "(cleared)\n", (unsigned long long) hwerrs); + qib_devinfo(dd->pcidev, + "Hardware error: hwerr=0x%llx (cleared)\n", + (unsigned long long) hwerrs); if (hwerrs & ~IB_HWE_BITSEXTANT) - qib_dev_err(dd, "hwerror interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (hwerrs & ~IB_HWE_BITSEXTANT)); + qib_dev_err(dd, + "hwerror interrupt with unknown errors %llx set\n", + (unsigned long long)(hwerrs & ~IB_HWE_BITSEXTANT)); ctrl = qib_read_kreg32(dd, kr_control); if ((ctrl & QLOGIC_IB_C_FREEZEMODE) && !dd->diag_client) { @@ -815,8 +816,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { isfatal = 1; - strlcat(msg, "[Memory BIST test failed, InfiniPath hardware" - " unusable]", msgl); + strlcat(msg, + "[Memory BIST test failed, InfiniPath hardware unusable]", + msgl); /* ignore from now on, so disable until driver reloaded */ dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); @@ -868,8 +870,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, *msg = 0; /* recovered from all of them */ if (isfatal && !dd->diag_client) { - qib_dev_err(dd, "Fatal Hardware Error, no longer" - " usable, SN %.16s\n", dd->serial); + qib_dev_err(dd, + "Fatal Hardware Error, no longer usable, SN %.16s\n", + dd->serial); /* * for /sys status file and user programs to print; if no * trailing brace is copied, we'll know it was truncated. @@ -1017,9 +1020,9 @@ static void handle_6120_errors(struct qib_devdata *dd, u64 errs) qib_inc_eeprom_err(dd, log_idx, 1); if (errs & ~IB_E_BITSEXTANT) - qib_dev_err(dd, "error interrupt with unknown errors " - "%llx set\n", - (unsigned long long) (errs & ~IB_E_BITSEXTANT)); + qib_dev_err(dd, + "error interrupt with unknown errors %llx set\n", + (unsigned long long) (errs & ~IB_E_BITSEXTANT)); if (errs & E_SUM_ERRS) { qib_disarm_6120_senderrbufs(ppd); @@ -1089,8 +1092,8 @@ static void handle_6120_errors(struct qib_devdata *dd, u64 errs) } if (errs & ERR_MASK(ResetNegated)) { - qib_dev_err(dd, "Got reset, requires re-init " - "(unload and reload driver)\n"); + qib_dev_err(dd, + "Got reset, requires re-init (unload and reload driver)\n"); dd->flags &= ~QIB_INITTED; /* needs re-init */ /* mark as having had error */ *dd->devstatusp |= QIB_STATUS_HWERROR; @@ -1541,8 +1544,9 @@ static noinline void unlikely_6120_intr(struct qib_devdata *dd, u64 istat) qib_stats.sps_errints++; estat = qib_read_kreg64(dd, kr_errstatus); if (!estat) - qib_devinfo(dd->pcidev, "error interrupt (%Lx), " - "but no error bits set!\n", istat); + qib_devinfo(dd->pcidev, + "error interrupt (%Lx), but no error bits set!\n", + istat); handle_6120_errors(dd, estat); } @@ -1715,16 +1719,16 @@ static void qib_setup_6120_interrupt(struct qib_devdata *dd) } if (!dd->cspec->irq) - qib_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " - "work\n"); + qib_dev_err(dd, + "irq is 0, BIOS error? Interrupts won't work\n"); else { int ret; ret = request_irq(dd->cspec->irq, qib_6120intr, 0, QIB_DRV_NAME, dd); if (ret) - qib_dev_err(dd, "Couldn't setup interrupt " - "(irq=%d): %d\n", dd->cspec->irq, - ret); + qib_dev_err(dd, + "Couldn't setup interrupt (irq=%d): %d\n", + dd->cspec->irq, ret); } } @@ -1759,8 +1763,9 @@ static void pe_boardname(struct qib_devdata *dd) snprintf(dd->boardname, namelen, "%s", n); if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2) - qib_dev_err(dd, "Unsupported InfiniPath hardware revision " - "%u.%u!\n", dd->majrev, dd->minrev); + qib_dev_err(dd, + "Unsupported InfiniPath hardware revision %u.%u!\n", + dd->majrev, dd->minrev); snprintf(dd->boardversion, sizeof(dd->boardversion), "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", @@ -1833,8 +1838,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd) bail: if (ret) { if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) - qib_dev_err(dd, "Reset failed to setup PCIe or " - "interrupts; continuing anyway\n"); + qib_dev_err(dd, + "Reset failed to setup PCIe or interrupts; continuing anyway\n"); /* clear the reset error, init error/hwerror mask */ qib_6120_init_hwerrors(dd); /* for Rev2 error interrupts; nop for rev 1 */ @@ -1876,8 +1881,9 @@ static void qib_6120_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, } pa >>= 11; if (pa & ~QLOGIC_IB_RT_ADDR_MASK) { - qib_dev_err(dd, "Physical page address 0x%lx " - "larger than supported\n", pa); + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); return; } @@ -1941,8 +1947,9 @@ static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr, } pa >>= 11; if (pa & ~QLOGIC_IB_RT_ADDR_MASK) { - qib_dev_err(dd, "Physical page address 0x%lx " - "larger than supported\n", pa); + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); return; } @@ -2928,8 +2935,9 @@ static int qib_6120_set_loopback(struct qib_pportdata *ppd, const char *what) ppd->dd->unit, ppd->port); } else if (!strncmp(what, "off", 3)) { ppd->dd->cspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback); - qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback " - "(normal)\n", ppd->dd->unit, ppd->port); + qib_devinfo(ppd->dd->pcidev, + "Disabling IB%u:%u IBC loopback (normal)\n", + ppd->dd->unit, ppd->port); } else ret = -EINVAL; if (!ret) { @@ -3186,11 +3194,10 @@ static int qib_late_6120_initreg(struct qib_devdata *dd) qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); val = qib_read_kreg64(dd, kr_sendpioavailaddr); if (val != dd->pioavailregs_phys) { - qib_dev_err(dd, "Catastrophic software error, " - "SendPIOAvailAddr written as %lx, " - "read back as %llx\n", - (unsigned long) dd->pioavailregs_phys, - (unsigned long long) val); + qib_dev_err(dd, + "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); ret = -EINVAL; } return ret; @@ -3218,8 +3225,8 @@ static int init_6120_variables(struct qib_devdata *dd) dd->revision = readq(&dd->kregbase[kr_revision]); if ((dd->revision & 0xffffffffU) == 0xffffffffU) { - qib_dev_err(dd, "Revision register read failure, " - "giving up initialization\n"); + qib_dev_err(dd, + "Revision register read failure, giving up initialization\n"); ret = -ENODEV; goto bail; } @@ -3551,8 +3558,8 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev, goto bail; if (qib_pcie_params(dd, 8, NULL, NULL)) - qib_dev_err(dd, "Failed to setup PCIe or interrupts; " - "continuing anyway\n"); + qib_dev_err(dd, + "Failed to setup PCIe or interrupts; continuing anyway\n"); dd->cspec->irq = pdev->irq; /* save IRQ */ /* clear diagctrl register, in case diags were running and crashed */ diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 86a0ba7ca0c2..64d0ecb90cdc 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1111,9 +1111,9 @@ static void handle_7220_errors(struct qib_devdata *dd, u64 errs) sdma_7220_errors(ppd, errs); if (errs & ~IB_E_BITSEXTANT) - qib_dev_err(dd, "error interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (errs & ~IB_E_BITSEXTANT)); + qib_dev_err(dd, + "error interrupt with unknown errors %llx set\n", + (unsigned long long) (errs & ~IB_E_BITSEXTANT)); if (errs & E_SUM_ERRS) { qib_disarm_7220_senderrbufs(ppd); @@ -1192,8 +1192,8 @@ static void handle_7220_errors(struct qib_devdata *dd, u64 errs) } if (errs & ERR_MASK(ResetNegated)) { - qib_dev_err(dd, "Got reset, requires re-init " - "(unload and reload driver)\n"); + qib_dev_err(dd, + "Got reset, requires re-init (unload and reload driver)\n"); dd->flags &= ~QIB_INITTED; /* needs re-init */ /* mark as having had error */ *dd->devstatusp |= QIB_STATUS_HWERROR; @@ -1305,8 +1305,8 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, if (!hwerrs) goto bail; if (hwerrs == ~0ULL) { - qib_dev_err(dd, "Read of hardware error status failed " - "(all bits set); ignoring\n"); + qib_dev_err(dd, + "Read of hardware error status failed (all bits set); ignoring\n"); goto bail; } qib_stats.sps_hwerrs++; @@ -1329,13 +1329,14 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, qib_inc_eeprom_err(dd, log_idx, 1); if (hwerrs & ~(TXEMEMPARITYERR_PIOBUF | TXEMEMPARITYERR_PIOPBC | RXE_PARITY)) - qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx " - "(cleared)\n", (unsigned long long) hwerrs); + qib_devinfo(dd->pcidev, + "Hardware error: hwerr=0x%llx (cleared)\n", + (unsigned long long) hwerrs); if (hwerrs & ~IB_HWE_BITSEXTANT) - qib_dev_err(dd, "hwerror interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (hwerrs & ~IB_HWE_BITSEXTANT)); + qib_dev_err(dd, + "hwerror interrupt with unknown errors %llx set\n", + (unsigned long long) (hwerrs & ~IB_HWE_BITSEXTANT)); if (hwerrs & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR) qib_sd7220_clr_ibpar(dd); @@ -1362,8 +1363,9 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { isfatal = 1; - strlcat(msg, "[Memory BIST test failed, " - "InfiniPath hardware unusable]", msgl); + strlcat(msg, + "[Memory BIST test failed, InfiniPath hardware unusable]", + msgl); /* ignore from now on, so disable until driver reloaded */ dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); @@ -1409,8 +1411,9 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, qib_dev_err(dd, "%s hardware error\n", msg); if (isfatal && !dd->diag_client) { - qib_dev_err(dd, "Fatal Hardware Error, no longer" - " usable, SN %.16s\n", dd->serial); + qib_dev_err(dd, + "Fatal Hardware Error, no longer usable, SN %.16s\n", + dd->serial); /* * For /sys status file and user programs to print; if no * trailing brace is copied, we'll know it was truncated. @@ -1918,8 +1921,9 @@ static noinline void unlikely_7220_intr(struct qib_devdata *dd, u64 istat) qib_stats.sps_errints++; estat = qib_read_kreg64(dd, kr_errstatus); if (!estat) - qib_devinfo(dd->pcidev, "error interrupt (%Lx), " - "but no error bits set!\n", istat); + qib_devinfo(dd->pcidev, + "error interrupt (%Lx), but no error bits set!\n", + istat); else handle_7220_errors(dd, estat); } @@ -2023,17 +2027,18 @@ bail: static void qib_setup_7220_interrupt(struct qib_devdata *dd) { if (!dd->cspec->irq) - qib_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " - "work\n"); + qib_dev_err(dd, + "irq is 0, BIOS error? Interrupts won't work\n"); else { int ret = request_irq(dd->cspec->irq, qib_7220intr, dd->msi_lo ? 0 : IRQF_SHARED, QIB_DRV_NAME, dd); if (ret) - qib_dev_err(dd, "Couldn't setup %s interrupt " - "(irq=%d): %d\n", dd->msi_lo ? - "MSI" : "INTx", dd->cspec->irq, ret); + qib_dev_err(dd, + "Couldn't setup %s interrupt (irq=%d): %d\n", + dd->msi_lo ? "MSI" : "INTx", + dd->cspec->irq, ret); } } @@ -2072,9 +2077,9 @@ static void qib_7220_boardname(struct qib_devdata *dd) snprintf(dd->boardname, namelen, "%s", n); if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2) - qib_dev_err(dd, "Unsupported InfiniPath hardware " - "revision %u.%u!\n", - dd->majrev, dd->minrev); + qib_dev_err(dd, + "Unsupported InfiniPath hardware revision %u.%u!\n", + dd->majrev, dd->minrev); snprintf(dd->boardversion, sizeof(dd->boardversion), "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", @@ -2146,8 +2151,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd) bail: if (ret) { if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) - qib_dev_err(dd, "Reset failed to setup PCIe or " - "interrupts; continuing anyway\n"); + qib_dev_err(dd, + "Reset failed to setup PCIe or interrupts; continuing anyway\n"); /* hold IBC in reset, no sends, etc till later */ qib_write_kreg(dd, kr_control, 0ULL); @@ -2187,8 +2192,9 @@ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, return; } if (chippa >= (1UL << IBA7220_TID_SZ_SHIFT)) { - qib_dev_err(dd, "Physical page address 0x%lx " - "larger than supported\n", pa); + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); return; } @@ -2706,8 +2712,9 @@ static int qib_7220_set_loopback(struct qib_pportdata *ppd, const char *what) ppd->cpspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback); /* enable heart beat again */ val = IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT; - qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback " - "(normal)\n", ppd->dd->unit, ppd->port); + qib_devinfo(ppd->dd->pcidev, + "Disabling IB%u:%u IBC loopback (normal)\n", + ppd->dd->unit, ppd->port); } else ret = -EINVAL; if (!ret) { @@ -3307,8 +3314,8 @@ static int qib_7220_intr_fallback(struct qib_devdata *dd) if (!dd->msi_lo) return 0; - qib_devinfo(dd->pcidev, "MSI interrupt not detected," - " trying INTx interrupts\n"); + qib_devinfo(dd->pcidev, + "MSI interrupt not detected, trying INTx interrupts\n"); qib_7220_free_irq(dd); qib_enable_intx(dd->pcidev); /* @@ -3980,11 +3987,10 @@ static int qib_late_7220_initreg(struct qib_devdata *dd) qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); val = qib_read_kreg64(dd, kr_sendpioavailaddr); if (val != dd->pioavailregs_phys) { - qib_dev_err(dd, "Catastrophic software error, " - "SendPIOAvailAddr written as %lx, " - "read back as %llx\n", - (unsigned long) dd->pioavailregs_phys, - (unsigned long long) val); + qib_dev_err(dd, + "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); ret = -EINVAL; } qib_register_observer(dd, &sendctrl_observer); @@ -4014,8 +4020,8 @@ static int qib_init_7220_variables(struct qib_devdata *dd) dd->revision = readq(&dd->kregbase[kr_revision]); if ((dd->revision & 0xffffffffU) == 0xffffffffU) { - qib_dev_err(dd, "Revision register read failure, " - "giving up initialization\n"); + qib_dev_err(dd, + "Revision register read failure, giving up initialization\n"); ret = -ENODEV; goto bail; } @@ -4613,8 +4619,8 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev, break; } if (qib_pcie_params(dd, minwidth, NULL, NULL)) - qib_dev_err(dd, "Failed to setup PCIe or interrupts; " - "continuing anyway\n"); + qib_dev_err(dd, + "Failed to setup PCIe or interrupts; continuing anyway\n"); /* save IRQ for possible later use */ dd->cspec->irq = pdev->irq; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index c881e744c091..0d7280af99bc 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2008 - 2012 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -49,6 +50,10 @@ #include "qib_qsfp.h" #include "qib_mad.h" +#include "qib_verbs.h" + +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME " " fmt static void qib_setup_7322_setextled(struct qib_pportdata *, u32); static void qib_7322_handle_hwerrors(struct qib_devdata *, char *, size_t); @@ -1575,8 +1580,8 @@ static noinline void handle_7322_errors(struct qib_devdata *dd) qib_stats.sps_errints++; errs = qib_read_kreg64(dd, kr_errstatus); if (!errs) { - qib_devinfo(dd->pcidev, "device error interrupt, " - "but no error bits set!\n"); + qib_devinfo(dd->pcidev, + "device error interrupt, but no error bits set!\n"); goto done; } @@ -1622,8 +1627,8 @@ static noinline void handle_7322_errors(struct qib_devdata *dd) if (errs & QIB_E_RESET) { int pidx; - qib_dev_err(dd, "Got reset, requires re-init " - "(unload and reload driver)\n"); + qib_dev_err(dd, + "Got reset, requires re-init (unload and reload driver)\n"); dd->flags &= ~QIB_INITTED; /* needs re-init */ /* mark as having had error */ *dd->devstatusp |= QIB_STATUS_HWERROR; @@ -1760,9 +1765,9 @@ static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst) ppd->dd->cspec->r1 ? QDR_STATIC_ADAPT_DOWN_R1 : QDR_STATIC_ADAPT_DOWN); - printk(KERN_INFO QIB_DRV_NAME - " IB%u:%u re-enabled QDR adaptation " - "ibclt %x\n", ppd->dd->unit, ppd->port, ibclt); + pr_info( + "IB%u:%u re-enabled QDR adaptation ibclt %x\n", + ppd->dd->unit, ppd->port, ibclt); } } } @@ -1804,9 +1809,9 @@ static noinline void handle_7322_p_errors(struct qib_pportdata *ppd) if (!*msg) snprintf(msg, sizeof ppd->cpspec->epmsgbuf, "no others"); - qib_dev_porterr(dd, ppd->port, "error interrupt with unknown" - " errors 0x%016Lx set (and %s)\n", - (errs & ~QIB_E_P_BITSEXTANT), msg); + qib_dev_porterr(dd, ppd->port, + "error interrupt with unknown errors 0x%016Lx set (and %s)\n", + (errs & ~QIB_E_P_BITSEXTANT), msg); *msg = '\0'; } @@ -2024,8 +2029,8 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, if (!hwerrs) goto bail; if (hwerrs == ~0ULL) { - qib_dev_err(dd, "Read of hardware error status failed " - "(all bits set); ignoring\n"); + qib_dev_err(dd, + "Read of hardware error status failed (all bits set); ignoring\n"); goto bail; } qib_stats.sps_hwerrs++; @@ -2039,8 +2044,9 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, /* no EEPROM logging, yet */ if (hwerrs) - qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx " - "(cleared)\n", (unsigned long long) hwerrs); + qib_devinfo(dd->pcidev, + "Hardware error: hwerr=0x%llx (cleared)\n", + (unsigned long long) hwerrs); ctrl = qib_read_kreg32(dd, kr_control); if ((ctrl & SYM_MASK(Control, FreezeMode)) && !dd->diag_client) { @@ -2064,8 +2070,9 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { isfatal = 1; - strlcpy(msg, "[Memory BIST test failed, " - "InfiniPath hardware unusable]", msgl); + strlcpy(msg, + "[Memory BIST test failed, InfiniPath hardware unusable]", + msgl); /* ignore from now on, so disable until driver reloaded */ dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); @@ -2078,8 +2085,9 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, qib_dev_err(dd, "%s hardware error\n", msg); if (isfatal && !dd->diag_client) { - qib_dev_err(dd, "Fatal Hardware Error, no longer" - " usable, SN %.16s\n", dd->serial); + qib_dev_err(dd, + "Fatal Hardware Error, no longer usable, SN %.16s\n", + dd->serial); /* * for /sys status file and user programs to print; if no * trailing brace is copied, we'll know it was truncated. @@ -2667,8 +2675,9 @@ static noinline void unknown_7322_ibits(struct qib_devdata *dd, u64 istat) char msg[128]; kills = istat & ~QIB_I_BITSEXTANT; - qib_dev_err(dd, "Clearing reserved interrupt(s) 0x%016llx:" - " %s\n", (unsigned long long) kills, msg); + qib_dev_err(dd, + "Clearing reserved interrupt(s) 0x%016llx: %s\n", + (unsigned long long) kills, msg); qib_write_kreg(dd, kr_intmask, (dd->cspec->int_enable_mask & ~kills)); } @@ -3101,16 +3110,16 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) /* Try to get INTx interrupt */ try_intx: if (!dd->pcidev->irq) { - qib_dev_err(dd, "irq is 0, BIOS error? " - "Interrupts won't work\n"); + qib_dev_err(dd, + "irq is 0, BIOS error? Interrupts won't work\n"); goto bail; } ret = request_irq(dd->pcidev->irq, qib_7322intr, IRQF_SHARED, QIB_DRV_NAME, dd); if (ret) { - qib_dev_err(dd, "Couldn't setup INTx " - "interrupt (irq=%d): %d\n", - dd->pcidev->irq, ret); + qib_dev_err(dd, + "Couldn't setup INTx interrupt (irq=%d): %d\n", + dd->pcidev->irq, ret); goto bail; } dd->cspec->irq = dd->pcidev->irq; @@ -3185,8 +3194,9 @@ try_intx: * Shouldn't happen since the enable said we could * have as many as we are trying to setup here. */ - qib_dev_err(dd, "Couldn't setup MSIx " - "interrupt (vec=%d, irq=%d): %d\n", msixnum, + qib_dev_err(dd, + "Couldn't setup MSIx interrupt (vec=%d, irq=%d): %d\n", + msixnum, dd->cspec->msix_entries[msixnum].msix.vector, ret); qib_7322_nomsix(dd); @@ -3305,8 +3315,9 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd) (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); if (qib_singleport && (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) { - qib_devinfo(dd->pcidev, "IB%u: Forced to single port mode" - " by module parameter\n", dd->unit); + qib_devinfo(dd->pcidev, + "IB%u: Forced to single port mode by module parameter\n", + dd->unit); features &= PORT_SPD_CAP; } @@ -3400,8 +3411,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd) if (val == dd->revision) break; if (i == 5) { - qib_dev_err(dd, "Failed to initialize after reset, " - "unusable\n"); + qib_dev_err(dd, + "Failed to initialize after reset, unusable\n"); ret = 0; goto bail; } @@ -3432,8 +3443,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd) if (qib_pcie_params(dd, dd->lbus_width, &dd->cspec->num_msix_entries, dd->cspec->msix_entries)) - qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; " - "continuing anyway\n"); + qib_dev_err(dd, + "Reset failed to setup PCIe or interrupts; continuing anyway\n"); qib_setup_7322_interrupt(dd, 1); @@ -3474,8 +3485,9 @@ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, return; } if (chippa >= (1UL << IBA7322_TID_SZ_SHIFT)) { - qib_dev_err(dd, "Physical page address 0x%lx " - "larger than supported\n", pa); + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); return; } @@ -4029,8 +4041,9 @@ static int qib_7322_set_loopback(struct qib_pportdata *ppd, const char *what) Loopback); /* enable heart beat again */ val = IBA7322_IBC_HRTBT_RMASK << IBA7322_IBC_HRTBT_LSB; - qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback " - "(normal)\n", ppd->dd->unit, ppd->port); + qib_devinfo(ppd->dd->pcidev, + "Disabling IB%u:%u IBC loopback (normal)\n", + ppd->dd->unit, ppd->port); } else ret = -EINVAL; if (!ret) { @@ -4714,8 +4727,8 @@ static void init_7322_cntrnames(struct qib_devdata *dd) dd->pport[i].cpspec->portcntrs = kmalloc(dd->cspec->nportcntrs * sizeof(u64), GFP_KERNEL); if (!dd->pport[i].cpspec->portcntrs) - qib_dev_err(dd, "Failed allocation for" - " portcounters\n"); + qib_dev_err(dd, + "Failed allocation for portcounters\n"); } } @@ -4865,8 +4878,8 @@ static int qib_7322_intr_fallback(struct qib_devdata *dd) if (!dd->cspec->num_msix_entries) return 0; /* already using INTx */ - qib_devinfo(dd->pcidev, "MSIx interrupt not detected," - " trying INTx interrupts\n"); + qib_devinfo(dd->pcidev, + "MSIx interrupt not detected, trying INTx interrupts\n"); qib_7322_nomsix(dd); qib_enable_intx(dd->pcidev); qib_setup_7322_interrupt(dd, 0); @@ -5151,15 +5164,11 @@ static void try_7322_ipg(struct qib_pportdata *ppd) goto retry; if (!ibp->smi_ah) { - struct ib_ah_attr attr; struct ib_ah *ah; - memset(&attr, 0, sizeof attr); - attr.dlid = be16_to_cpu(IB_LID_PERMISSIVE); - attr.port_num = ppd->port; - ah = ib_create_ah(ibp->qp0->ibqp.pd, &attr); + ah = qib_create_qp0_ah(ibp, be16_to_cpu(IB_LID_PERMISSIVE)); if (IS_ERR(ah)) - ret = -EINVAL; + ret = PTR_ERR(ah); else { send_buf->ah = ah; ibp->smi_ah = to_iah(ah); @@ -5844,22 +5853,21 @@ static int setup_txselect(const char *str, struct kernel_param *kp) { struct qib_devdata *dd; unsigned long val; - char *n; + int ret; + if (strlen(str) >= MAX_ATTEN_LEN) { - printk(KERN_INFO QIB_DRV_NAME " txselect_values string " - "too long\n"); + pr_info("txselect_values string too long\n"); return -ENOSPC; } - val = simple_strtoul(str, &n, 0); - if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + + ret = kstrtoul(str, 0, &val); + if (ret || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ)) { - printk(KERN_INFO QIB_DRV_NAME - "txselect_values must start with a number < %d\n", + pr_info("txselect_values must start with a number < %d\n", TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ); - return -EINVAL; + return ret ? ret : -EINVAL; } - strcpy(txselect_list, str); + strcpy(txselect_list, str); list_for_each_entry(dd, &qib_dev_list, list) if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322) set_no_qsfp_atten(dd, 1); @@ -5882,11 +5890,10 @@ static int qib_late_7322_initreg(struct qib_devdata *dd) qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); val = qib_read_kreg64(dd, kr_sendpioavailaddr); if (val != dd->pioavailregs_phys) { - qib_dev_err(dd, "Catastrophic software error, " - "SendPIOAvailAddr written as %lx, " - "read back as %llx\n", - (unsigned long) dd->pioavailregs_phys, - (unsigned long long) val); + qib_dev_err(dd, + "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); ret = -EINVAL; } @@ -6098,8 +6105,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd) dd->revision = readq(&dd->kregbase[kr_revision]); if ((dd->revision & 0xffffffffU) == 0xffffffffU) { - qib_dev_err(dd, "Revision register read failure, " - "giving up initialization\n"); + qib_dev_err(dd, + "Revision register read failure, giving up initialization\n"); ret = -ENODEV; goto bail; } @@ -6265,9 +6272,9 @@ static int qib_init_7322_variables(struct qib_devdata *dd) */ if (!(dd->flags & QIB_HAS_QSFP)) { if (!IS_QMH(dd) && !IS_QME(dd)) - qib_devinfo(dd->pcidev, "IB%u:%u: " - "Unknown mezzanine card type\n", - dd->unit, ppd->port); + qib_devinfo(dd->pcidev, + "IB%u:%u: Unknown mezzanine card type\n", + dd->unit, ppd->port); cp->h1_val = IS_QMH(dd) ? H1_FORCE_QMH : H1_FORCE_QME; /* * Choose center value as default tx serdes setting @@ -6922,8 +6929,8 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, dd->cspec->msix_entries[i].msix.entry = i; if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries)) - qib_dev_err(dd, "Failed to setup PCIe or interrupts; " - "continuing anyway\n"); + qib_dev_err(dd, + "Failed to setup PCIe or interrupts; continuing anyway\n"); /* may be less than we wanted, if not enough available */ dd->cspec->num_msix_entries = tabsize; @@ -7276,8 +7283,7 @@ static void find_best_ent(struct qib_pportdata *ppd, ppd->cpspec->no_eep < (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ)) { idx = ppd->cpspec->no_eep - (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ); - printk(KERN_INFO QIB_DRV_NAME - " IB%u:%u use idx %u into txdds_mfg\n", + pr_info("IB%u:%u use idx %u into txdds_mfg\n", ppd->dd->unit, ppd->port, idx); *sdr_dds = &txdds_extra_mfg[idx]; *ddr_dds = &txdds_extra_mfg[idx]; @@ -7432,11 +7438,11 @@ static void serdes_7322_los_enable(struct qib_pportdata *ppd, int enable) u8 state = SYM_FIELD(data, IBSerdesCtrl_0, RXLOSEN); if (enable && !state) { - printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS on\n", + pr_info("IB%u:%u Turning LOS on\n", ppd->dd->unit, ppd->port); data |= SYM_MASK(IBSerdesCtrl_0, RXLOSEN); } else if (!enable && state) { - printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS off\n", + pr_info("IB%u:%u Turning LOS off\n", ppd->dd->unit, ppd->port); data &= ~SYM_MASK(IBSerdesCtrl_0, RXLOSEN); } @@ -7672,8 +7678,7 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd) } } if (chan_done) { - printk(KERN_INFO QIB_DRV_NAME - " Serdes %d calibration not done after .5 sec: 0x%x\n", + pr_info("Serdes %d calibration not done after .5 sec: 0x%x\n", IBSD(ppd->hw_pidx), chan_done); } else { for (chan = 0; chan < SERDES_CHANS; ++chan) { @@ -7681,9 +7686,8 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd) (chan + (chan >> 1)), 25, 0, 0); if ((~rxcaldone & (u32)BMASK(10, 10)) == 0) - printk(KERN_INFO QIB_DRV_NAME - " Serdes %d chan %d calibration " - "failed\n", IBSD(ppd->hw_pidx), chan); + pr_info("Serdes %d chan %d calibration failed\n", + IBSD(ppd->hw_pidx), chan); } } diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index dc14e100a7f1..4443adfcd9ee 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -38,9 +38,14 @@ #include <linux/delay.h> #include <linux/idr.h> #include <linux/module.h> +#include <linux/printk.h> #include "qib.h" #include "qib_common.h" +#include "qib_mad.h" + +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt /* * min buffers we want to have per context, after driver @@ -71,6 +76,9 @@ unsigned qib_n_krcv_queues; module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO); MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); +unsigned qib_cc_table_size; +module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO); +MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984"); /* * qib_wc_pat parameter: * 0 is WC via MTRR @@ -120,8 +128,8 @@ int qib_create_ctxts(struct qib_devdata *dd) */ dd->rcd = kzalloc(sizeof(*dd->rcd) * dd->ctxtcnt, GFP_KERNEL); if (!dd->rcd) { - qib_dev_err(dd, "Unable to allocate ctxtdata array, " - "failing\n"); + qib_dev_err(dd, + "Unable to allocate ctxtdata array, failing\n"); ret = -ENOMEM; goto done; } @@ -137,8 +145,8 @@ int qib_create_ctxts(struct qib_devdata *dd) ppd = dd->pport + (i % dd->num_pports); rcd = qib_create_ctxtdata(ppd, i); if (!rcd) { - qib_dev_err(dd, "Unable to allocate ctxtdata" - " for Kernel ctxt, failing\n"); + qib_dev_err(dd, + "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); ret = -ENOMEM; goto done; } @@ -199,6 +207,7 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, u8 hw_pidx, u8 port) { + int size; ppd->dd = dd; ppd->hw_pidx = hw_pidx; ppd->port = port; /* IB port number, not index */ @@ -210,6 +219,83 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, init_timer(&ppd->symerr_clear_timer); ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup; ppd->symerr_clear_timer.data = (unsigned long)ppd; + + ppd->qib_wq = NULL; + + spin_lock_init(&ppd->cc_shadow_lock); + + if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) + goto bail; + + ppd->cc_supported_table_entries = min(max_t(int, qib_cc_table_size, + IB_CCT_MIN_ENTRIES), IB_CCT_ENTRIES*IB_CC_TABLE_CAP_DEFAULT); + + ppd->cc_max_table_entries = + ppd->cc_supported_table_entries/IB_CCT_ENTRIES; + + size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry) + * IB_CCT_ENTRIES; + ppd->ccti_entries = kzalloc(size, GFP_KERNEL); + if (!ppd->ccti_entries) { + qib_dev_err(dd, + "failed to allocate congestion control table for port %d!\n", + port); + goto bail; + } + + size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry); + ppd->congestion_entries = kzalloc(size, GFP_KERNEL); + if (!ppd->congestion_entries) { + qib_dev_err(dd, + "failed to allocate congestion setting list for port %d!\n", + port); + goto bail_1; + } + + size = sizeof(struct cc_table_shadow); + ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL); + if (!ppd->ccti_entries_shadow) { + qib_dev_err(dd, + "failed to allocate shadow ccti list for port %d!\n", + port); + goto bail_2; + } + + size = sizeof(struct ib_cc_congestion_setting_attr); + ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL); + if (!ppd->congestion_entries_shadow) { + qib_dev_err(dd, + "failed to allocate shadow congestion setting list for port %d!\n", + port); + goto bail_3; + } + + return; + +bail_3: + kfree(ppd->ccti_entries_shadow); + ppd->ccti_entries_shadow = NULL; +bail_2: + kfree(ppd->congestion_entries); + ppd->congestion_entries = NULL; +bail_1: + kfree(ppd->ccti_entries); + ppd->ccti_entries = NULL; +bail: + /* User is intentionally disabling the congestion control agent */ + if (!qib_cc_table_size) + return; + + if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) { + qib_cc_table_size = 0; + qib_dev_err(dd, + "Congestion Control table size %d less than minimum %d for port %d\n", + qib_cc_table_size, IB_CCT_MIN_ENTRIES, port); + } + + qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n", + port); + return; } static int init_pioavailregs(struct qib_devdata *dd) @@ -221,8 +307,8 @@ static int init_pioavailregs(struct qib_devdata *dd) &dd->pcidev->dev, PAGE_SIZE, &dd->pioavailregs_phys, GFP_KERNEL); if (!dd->pioavailregs_dma) { - qib_dev_err(dd, "failed to allocate PIOavail reg area " - "in memory\n"); + qib_dev_err(dd, + "failed to allocate PIOavail reg area in memory\n"); ret = -ENOMEM; goto done; } @@ -277,15 +363,15 @@ static void init_shadow_tids(struct qib_devdata *dd) pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); if (!pages) { - qib_dev_err(dd, "failed to allocate shadow page * " - "array, no expected sends!\n"); + qib_dev_err(dd, + "failed to allocate shadow page * array, no expected sends!\n"); goto bail; } addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); if (!addrs) { - qib_dev_err(dd, "failed to allocate shadow dma handle " - "array, no expected sends!\n"); + qib_dev_err(dd, + "failed to allocate shadow dma handle array, no expected sends!\n"); goto bail_free; } @@ -309,13 +395,13 @@ static int loadtime_init(struct qib_devdata *dd) if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) { - qib_dev_err(dd, "Driver only handles version %d, " - "chip swversion is %d (%llx), failng\n", - QIB_CHIP_SWVERSION, - (int)(dd->revision >> + qib_dev_err(dd, + "Driver only handles version %d, chip swversion is %d (%llx), failng\n", + QIB_CHIP_SWVERSION, + (int)(dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & - QLOGIC_IB_R_SOFTWARE_MASK, - (unsigned long long) dd->revision); + QLOGIC_IB_R_SOFTWARE_MASK, + (unsigned long long) dd->revision); ret = -ENOSYS; goto done; } @@ -419,8 +505,8 @@ static void verify_interrupt(unsigned long opaque) */ if (dd->int_counter == 0) { if (!dd->f_intr_fallback(dd)) - dev_err(&dd->pcidev->dev, "No interrupts detected, " - "not usable.\n"); + dev_err(&dd->pcidev->dev, + "No interrupts detected, not usable.\n"); else /* re-arm the timer to see if fallback works */ mod_timer(&dd->intrchk_timer, jiffies + HZ/2); } @@ -483,6 +569,41 @@ static void init_piobuf_state(struct qib_devdata *dd) } /** + * qib_create_workqueues - create per port workqueues + * @dd: the qlogic_ib device + */ +static int qib_create_workqueues(struct qib_devdata *dd) +{ + int pidx; + struct qib_pportdata *ppd; + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (!ppd->qib_wq) { + char wq_name[8]; /* 3 + 2 + 1 + 1 + 1 */ + snprintf(wq_name, sizeof(wq_name), "qib%d_%d", + dd->unit, pidx); + ppd->qib_wq = + create_singlethread_workqueue(wq_name); + if (!ppd->qib_wq) + goto wq_error; + } + } + return 0; +wq_error: + pr_err("create_singlethread_workqueue failed for port %d\n", + pidx + 1); + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (ppd->qib_wq) { + destroy_workqueue(ppd->qib_wq); + ppd->qib_wq = NULL; + } + } + return -ENOMEM; +} + +/** * qib_init - do the actual initialization sequence on the chip * @dd: the qlogic_ib device * @reinit: reinitializing, so don't allocate new memory @@ -547,8 +668,8 @@ int qib_init(struct qib_devdata *dd, int reinit) if (!lastfail) lastfail = qib_setup_eagerbufs(rcd); if (lastfail) { - qib_dev_err(dd, "failed to allocate kernel ctxt's " - "rcvhdrq and/or egr bufs\n"); + qib_dev_err(dd, + "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); continue; } } @@ -764,6 +885,11 @@ static void qib_shutdown_device(struct qib_devdata *dd) * We can't count on interrupts since we are stopping. */ dd->f_quiet_serdes(ppd); + + if (ppd->qib_wq) { + destroy_workqueue(ppd->qib_wq); + ppd->qib_wq = NULL; + } } qib_update_eeprom_log(dd); @@ -893,8 +1019,7 @@ static void qib_verify_pioperf(struct qib_devdata *dd) /* 1 GiB/sec, slightly over IB SDR line rate */ if (lcnt < (emsecs * 1024U)) qib_dev_err(dd, - "Performance problem: bandwidth to PIO buffers is " - "only %u MiB/sec\n", + "Performance problem: bandwidth to PIO buffers is only %u MiB/sec\n", lcnt / (u32) emsecs); preempt_enable(); @@ -967,8 +1092,8 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) if (qib_cpulist) qib_cpulist_count = count; else - qib_early_err(&pdev->dev, "Could not alloc cpulist " - "info, cpu affinity might be wrong\n"); + qib_early_err(&pdev->dev, + "Could not alloc cpulist info, cpu affinity might be wrong\n"); } bail: @@ -1057,21 +1182,20 @@ static int __init qlogic_ib_init(void) */ idr_init(&qib_unit_table); if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { - printk(KERN_ERR QIB_DRV_NAME ": idr_pre_get() failed\n"); + pr_err("idr_pre_get() failed\n"); ret = -ENOMEM; goto bail_cq_wq; } ret = pci_register_driver(&qib_driver); if (ret < 0) { - printk(KERN_ERR QIB_DRV_NAME - ": Unable to register driver: error %d\n", -ret); + pr_err("Unable to register driver: error %d\n", -ret); goto bail_unit; } /* not fatal if it doesn't work */ if (qib_init_qibfs()) - printk(KERN_ERR QIB_DRV_NAME ": Unable to register ipathfs\n"); + pr_err("Unable to register ipathfs\n"); goto bail; /* all OK */ bail_unit: @@ -1095,9 +1219,9 @@ static void __exit qlogic_ib_cleanup(void) ret = qib_exit_qibfs(); if (ret) - printk(KERN_ERR QIB_DRV_NAME ": " - "Unable to cleanup counter filesystem: " - "error %d\n", -ret); + pr_err( + "Unable to cleanup counter filesystem: error %d\n", + -ret); pci_unregister_driver(&qib_driver); @@ -1121,10 +1245,24 @@ static void cleanup_device_data(struct qib_devdata *dd) unsigned long flags; /* users can't do anything more with chip */ - for (pidx = 0; pidx < dd->num_pports; ++pidx) + for (pidx = 0; pidx < dd->num_pports; ++pidx) { if (dd->pport[pidx].statusp) *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT; + spin_lock(&dd->pport[pidx].cc_shadow_lock); + + kfree(dd->pport[pidx].congestion_entries); + dd->pport[pidx].congestion_entries = NULL; + kfree(dd->pport[pidx].ccti_entries); + dd->pport[pidx].ccti_entries = NULL; + kfree(dd->pport[pidx].ccti_entries_shadow); + dd->pport[pidx].ccti_entries_shadow = NULL; + kfree(dd->pport[pidx].congestion_entries_shadow); + dd->pport[pidx].congestion_entries_shadow = NULL; + + spin_unlock(&dd->pport[pidx].cc_shadow_lock); + } + if (!qib_wc_pat) qib_disable_wc(dd); @@ -1223,9 +1361,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev, #ifdef CONFIG_PCI_MSI dd = qib_init_iba6120_funcs(pdev, ent); #else - qib_early_err(&pdev->dev, "QLogic PCIE device 0x%x cannot " - "work if CONFIG_PCI_MSI is not enabled\n", - ent->device); + qib_early_err(&pdev->dev, + "QLogic PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n", + ent->device); dd = ERR_PTR(-ENODEV); #endif break; @@ -1239,8 +1377,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev, break; default: - qib_early_err(&pdev->dev, "Failing on unknown QLogic " - "deviceid 0x%x\n", ent->device); + qib_early_err(&pdev->dev, + "Failing on unknown QLogic deviceid 0x%x\n", + ent->device); ret = -ENODEV; } @@ -1249,6 +1388,10 @@ static int __devinit qib_init_one(struct pci_dev *pdev, if (ret) goto bail; /* error already printed */ + ret = qib_create_workqueues(dd); + if (ret) + goto bail; + /* do the generic initialization */ initfail = qib_init(dd, 0); @@ -1293,9 +1436,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev, if (!qib_wc_pat) { ret = qib_enable_wc(dd); if (ret) { - qib_dev_err(dd, "Write combining not enabled " - "(err %d): performance may be poor\n", - -ret); + qib_dev_err(dd, + "Write combining not enabled (err %d): performance may be poor\n", + -ret); ret = 0; } } @@ -1361,9 +1504,9 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) gfp_flags | __GFP_COMP); if (!rcd->rcvhdrq) { - qib_dev_err(dd, "attempt to allocate %d bytes " - "for ctxt %u rcvhdrq failed\n", - amt, rcd->ctxt); + qib_dev_err(dd, + "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n", + amt, rcd->ctxt); goto bail; } @@ -1392,8 +1535,9 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) return 0; bail_free: - qib_dev_err(dd, "attempt to allocate 1 page for ctxt %u " - "rcvhdrqtailaddr failed\n", rcd->ctxt); + qib_dev_err(dd, + "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n", + rcd->ctxt); vfree(rcd->user_event_mask); rcd->user_event_mask = NULL; bail_free_hdrq: diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c index 6ae57d23004a..f4918f2165ec 100644 --- a/drivers/infiniband/hw/qib/qib_intr.c +++ b/drivers/infiniband/hw/qib/qib_intr.c @@ -224,15 +224,15 @@ void qib_bad_intrstatus(struct qib_devdata *dd) * We print the message and disable interrupts, in hope of * having a better chance of debugging the problem. */ - qib_dev_err(dd, "Read of chip interrupt status failed" - " disabling interrupts\n"); + qib_dev_err(dd, + "Read of chip interrupt status failed disabling interrupts\n"); if (allbits++) { /* disable interrupt delivery, something is very wrong */ if (allbits == 2) dd->f_set_intr_state(dd, 0); if (allbits == 3) { - qib_dev_err(dd, "2nd bad interrupt status, " - "unregistering interrupts\n"); + qib_dev_err(dd, + "2nd bad interrupt status, unregistering interrupts\n"); dd->flags |= QIB_BADINTR; dd->flags &= ~QIB_INITTED; dd->f_free_irq(dd); diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index 8fd19a47df0c..e9486c74c226 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -35,21 +35,41 @@ /** * qib_alloc_lkey - allocate an lkey - * @rkt: lkey table in which to allocate the lkey * @mr: memory region that this lkey protects + * @dma_region: 0->normal key, 1->restricted DMA key + * + * Returns 0 if successful, otherwise returns -errno. + * + * Increments mr reference count as required. + * + * Sets the lkey field mr for non-dma regions. * - * Returns 1 if successful, otherwise returns 0. */ -int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) +int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) { unsigned long flags; u32 r; u32 n; - int ret; + int ret = 0; + struct qib_ibdev *dev = to_idev(mr->pd->device); + struct qib_lkey_table *rkt = &dev->lk_table; spin_lock_irqsave(&rkt->lock, flags); + /* special case for dma_mr lkey == 0 */ + if (dma_region) { + struct qib_mregion *tmr; + + tmr = rcu_dereference(dev->dma_mr); + if (!tmr) { + qib_get_mr(mr); + rcu_assign_pointer(dev->dma_mr, mr); + mr->lkey_published = 1; + } + goto success; + } + /* Find the next available LKEY */ r = rkt->next; n = r; @@ -57,11 +77,8 @@ int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) if (rkt->table[r] == NULL) break; r = (r + 1) & (rkt->max - 1); - if (r == n) { - spin_unlock_irqrestore(&rkt->lock, flags); - ret = 0; + if (r == n) goto bail; - } } rkt->next = (r + 1) & (rkt->max - 1); /* @@ -76,57 +93,58 @@ int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) mr->lkey |= 1 << 8; rkt->gen++; } - rkt->table[r] = mr; + qib_get_mr(mr); + rcu_assign_pointer(rkt->table[r], mr); + mr->lkey_published = 1; +success: spin_unlock_irqrestore(&rkt->lock, flags); - - ret = 1; - -bail: +out: return ret; +bail: + spin_unlock_irqrestore(&rkt->lock, flags); + ret = -ENOMEM; + goto out; } /** * qib_free_lkey - free an lkey - * @rkt: table from which to free the lkey - * @lkey: lkey id to free + * @mr: mr to free from tables */ -int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr) +void qib_free_lkey(struct qib_mregion *mr) { unsigned long flags; u32 lkey = mr->lkey; u32 r; - int ret; + struct qib_ibdev *dev = to_idev(mr->pd->device); + struct qib_lkey_table *rkt = &dev->lk_table; - spin_lock_irqsave(&dev->lk_table.lock, flags); - if (lkey == 0) { - if (dev->dma_mr && dev->dma_mr == mr) { - ret = atomic_read(&dev->dma_mr->refcount); - if (!ret) - dev->dma_mr = NULL; - } else - ret = 0; - } else { + spin_lock_irqsave(&rkt->lock, flags); + if (!mr->lkey_published) + goto out; + if (lkey == 0) + rcu_assign_pointer(dev->dma_mr, NULL); + else { r = lkey >> (32 - ib_qib_lkey_table_size); - ret = atomic_read(&dev->lk_table.table[r]->refcount); - if (!ret) - dev->lk_table.table[r] = NULL; + rcu_assign_pointer(rkt->table[r], NULL); } - spin_unlock_irqrestore(&dev->lk_table.lock, flags); - - if (ret) - ret = -EBUSY; - return ret; + qib_put_mr(mr); + mr->lkey_published = 0; +out: + spin_unlock_irqrestore(&rkt->lock, flags); } /** * qib_lkey_ok - check IB SGE for validity and initialize * @rkt: table containing lkey to check SGE against + * @pd: protection domain * @isge: outgoing internal SGE * @sge: SGE to check * @acc: access flags * * Return 1 if valid and successful, otherwise returns 0. * + * increments the reference count upon success + * * Check the IB SGE for validity and initialize our internal version * of it. */ @@ -136,24 +154,25 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, struct qib_mregion *mr; unsigned n, m; size_t off; - unsigned long flags; /* * We use LKEY == zero for kernel virtual addresses * (see qib_get_dma_mr and qib_dma.c). */ - spin_lock_irqsave(&rkt->lock, flags); + rcu_read_lock(); if (sge->lkey == 0) { struct qib_ibdev *dev = to_idev(pd->ibpd.device); if (pd->user) goto bail; - if (!dev->dma_mr) + mr = rcu_dereference(dev->dma_mr); + if (!mr) goto bail; - atomic_inc(&dev->dma_mr->refcount); - spin_unlock_irqrestore(&rkt->lock, flags); + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); - isge->mr = dev->dma_mr; + isge->mr = mr; isge->vaddr = (void *) sge->addr; isge->length = sge->length; isge->sge_length = sge->length; @@ -161,18 +180,18 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, isge->n = 0; goto ok; } - mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != sge->lkey || - mr->pd != &pd->ibpd)) + mr = rcu_dereference( + rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]); + if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) goto bail; off = sge->addr - mr->user_base; - if (unlikely(sge->addr < mr->user_base || - off + sge->length > mr->length || - (mr->access_flags & acc) != acc)) + if (unlikely(sge->addr < mr->iova || off + sge->length > mr->length || + (mr->access_flags & acc) == 0)) goto bail; - atomic_inc(&mr->refcount); - spin_unlock_irqrestore(&rkt->lock, flags); + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); off += mr->offset; if (mr->page_shift) { @@ -208,20 +227,22 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, ok: return 1; bail: - spin_unlock_irqrestore(&rkt->lock, flags); + rcu_read_unlock(); return 0; } /** * qib_rkey_ok - check the IB virtual address, length, and RKEY - * @dev: infiniband device - * @ss: SGE state + * @qp: qp for validation + * @sge: SGE state * @len: length of data * @vaddr: virtual address to place data * @rkey: rkey to check * @acc: access flags * * Return 1 if successful, otherwise 0. + * + * increments the reference count upon success */ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc) @@ -230,25 +251,26 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, struct qib_mregion *mr; unsigned n, m; size_t off; - unsigned long flags; /* * We use RKEY == zero for kernel virtual addresses * (see qib_get_dma_mr and qib_dma.c). */ - spin_lock_irqsave(&rkt->lock, flags); + rcu_read_lock(); if (rkey == 0) { struct qib_pd *pd = to_ipd(qp->ibqp.pd); struct qib_ibdev *dev = to_idev(pd->ibpd.device); if (pd->user) goto bail; - if (!dev->dma_mr) + mr = rcu_dereference(dev->dma_mr); + if (!mr) goto bail; - atomic_inc(&dev->dma_mr->refcount); - spin_unlock_irqrestore(&rkt->lock, flags); + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); - sge->mr = dev->dma_mr; + sge->mr = mr; sge->vaddr = (void *) vaddr; sge->length = len; sge->sge_length = len; @@ -257,16 +279,18 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, goto ok; } - mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; - if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) + mr = rcu_dereference( + rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]); + if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) goto bail; off = vaddr - mr->iova; if (unlikely(vaddr < mr->iova || off + len > mr->length || (mr->access_flags & acc) == 0)) goto bail; - atomic_inc(&mr->refcount); - spin_unlock_irqrestore(&rkt->lock, flags); + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); off += mr->offset; if (mr->page_shift) { @@ -302,7 +326,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, ok: return 1; bail: - spin_unlock_irqrestore(&rkt->lock, flags); + rcu_read_unlock(); return 0; } @@ -325,7 +349,9 @@ int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr) if (pd->user || rkey == 0) goto bail; - mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; + mr = rcu_dereference_protected( + rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))], + lockdep_is_held(&rkt->lock)); if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) goto bail; diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 43390217a026..19f1e6c45fb6 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -49,6 +49,18 @@ static int reply(struct ib_smp *smp) return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } +static int reply_failure(struct ib_smp *smp) +{ + /* + * The verbs framework will handle the directed/LID route + * packet changes. + */ + smp->method = IB_MGMT_METHOD_GET_RESP; + if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + smp->status |= IB_SMP_DIRECTION; + return IB_MAD_RESULT_FAILURE | IB_MAD_RESULT_REPLY; +} + static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) { struct ib_mad_send_buf *send_buf; @@ -90,14 +102,10 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) if (!ibp->sm_ah) { if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) { struct ib_ah *ah; - struct ib_ah_attr attr; - memset(&attr, 0, sizeof attr); - attr.dlid = ibp->sm_lid; - attr.port_num = ppd_from_ibp(ibp)->port; - ah = ib_create_ah(ibp->qp0->ibqp.pd, &attr); + ah = qib_create_qp0_ah(ibp, ibp->sm_lid); if (IS_ERR(ah)) - ret = -EINVAL; + ret = PTR_ERR(ah); else { send_buf->ah = ah; ibp->sm_ah = to_iah(ah); @@ -2051,6 +2059,298 @@ bail: return ret; } +static int cc_get_classportinfo(struct ib_cc_mad *ccp, + struct ib_device *ibdev) +{ + struct ib_cc_classportinfo_attr *p = + (struct ib_cc_classportinfo_attr *)ccp->mgmt_data; + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + p->base_version = 1; + p->class_version = 1; + p->cap_mask = 0; + + /* + * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec. + */ + p->resp_time_value = 18; + + return reply((struct ib_smp *) ccp); +} + +static int cc_get_congestion_info(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_info_attr *p = + (struct ib_cc_info_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + p->congestion_info = 0; + p->control_table_cap = ppd->cc_max_table_entries; + + return reply((struct ib_smp *) ccp); +} + +static int cc_get_congestion_setting(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + int i; + struct ib_cc_congestion_setting_attr *p = + (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct ib_cc_congestion_entry_shadow *entries; + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + spin_lock(&ppd->cc_shadow_lock); + + entries = ppd->congestion_entries_shadow->entries; + p->port_control = cpu_to_be16( + ppd->congestion_entries_shadow->port_control); + p->control_map = cpu_to_be16( + ppd->congestion_entries_shadow->control_map); + for (i = 0; i < IB_CC_CCS_ENTRIES; i++) { + p->entries[i].ccti_increase = entries[i].ccti_increase; + p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer); + p->entries[i].trigger_threshold = entries[i].trigger_threshold; + p->entries[i].ccti_min = entries[i].ccti_min; + } + + spin_unlock(&ppd->cc_shadow_lock); + + return reply((struct ib_smp *) ccp); +} + +static int cc_get_congestion_control_table(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_table_attr *p = + (struct ib_cc_table_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + u32 cct_block_index = be32_to_cpu(ccp->attr_mod); + u32 max_cct_block; + u32 cct_entry; + struct ib_cc_table_entry_shadow *entries; + int i; + + /* Is the table index more than what is supported? */ + if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1) + goto bail; + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + spin_lock(&ppd->cc_shadow_lock); + + max_cct_block = + (ppd->ccti_entries_shadow->ccti_last_entry + 1)/IB_CCT_ENTRIES; + max_cct_block = max_cct_block ? max_cct_block - 1 : 0; + + if (cct_block_index > max_cct_block) { + spin_unlock(&ppd->cc_shadow_lock); + goto bail; + } + + ccp->attr_mod = cpu_to_be32(cct_block_index); + + cct_entry = IB_CCT_ENTRIES * (cct_block_index + 1); + + cct_entry--; + + p->ccti_limit = cpu_to_be16(cct_entry); + + entries = &ppd->ccti_entries_shadow-> + entries[IB_CCT_ENTRIES * cct_block_index]; + cct_entry %= IB_CCT_ENTRIES; + + for (i = 0; i <= cct_entry; i++) + p->ccti_entries[i].entry = cpu_to_be16(entries[i].entry); + + spin_unlock(&ppd->cc_shadow_lock); + + return reply((struct ib_smp *) ccp); + +bail: + return reply_failure((struct ib_smp *) ccp); +} + +static int cc_set_congestion_setting(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_congestion_setting_attr *p = + (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + int i; + + ppd->cc_sl_control_map = be16_to_cpu(p->control_map); + + for (i = 0; i < IB_CC_CCS_ENTRIES; i++) { + ppd->congestion_entries[i].ccti_increase = + p->entries[i].ccti_increase; + + ppd->congestion_entries[i].ccti_timer = + be16_to_cpu(p->entries[i].ccti_timer); + + ppd->congestion_entries[i].trigger_threshold = + p->entries[i].trigger_threshold; + + ppd->congestion_entries[i].ccti_min = + p->entries[i].ccti_min; + } + + return reply((struct ib_smp *) ccp); +} + +static int cc_set_congestion_control_table(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_table_attr *p = + (struct ib_cc_table_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + u32 cct_block_index = be32_to_cpu(ccp->attr_mod); + u32 cct_entry; + struct ib_cc_table_entry_shadow *entries; + int i; + + /* Is the table index more than what is supported? */ + if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1) + goto bail; + + /* If this packet is the first in the sequence then + * zero the total table entry count. + */ + if (be16_to_cpu(p->ccti_limit) < IB_CCT_ENTRIES) + ppd->total_cct_entry = 0; + + cct_entry = (be16_to_cpu(p->ccti_limit))%IB_CCT_ENTRIES; + + /* ccti_limit is 0 to 63 */ + ppd->total_cct_entry += (cct_entry + 1); + + if (ppd->total_cct_entry > ppd->cc_supported_table_entries) + goto bail; + + ppd->ccti_limit = be16_to_cpu(p->ccti_limit); + + entries = ppd->ccti_entries + (IB_CCT_ENTRIES * cct_block_index); + + for (i = 0; i <= cct_entry; i++) + entries[i].entry = be16_to_cpu(p->ccti_entries[i].entry); + + spin_lock(&ppd->cc_shadow_lock); + + ppd->ccti_entries_shadow->ccti_last_entry = ppd->total_cct_entry - 1; + memcpy(ppd->ccti_entries_shadow->entries, ppd->ccti_entries, + (ppd->total_cct_entry * sizeof(struct ib_cc_table_entry))); + + ppd->congestion_entries_shadow->port_control = IB_CC_CCS_PC_SL_BASED; + ppd->congestion_entries_shadow->control_map = ppd->cc_sl_control_map; + memcpy(ppd->congestion_entries_shadow->entries, ppd->congestion_entries, + IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry)); + + spin_unlock(&ppd->cc_shadow_lock); + + return reply((struct ib_smp *) ccp); + +bail: + return reply_failure((struct ib_smp *) ccp); +} + +static int check_cc_key(struct qib_ibport *ibp, + struct ib_cc_mad *ccp, int mad_flags) +{ + return 0; +} + +static int process_cc(struct ib_device *ibdev, int mad_flags, + u8 port, struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad; + struct qib_ibport *ibp = to_iport(ibdev, port); + int ret; + + *out_mad = *in_mad; + + if (ccp->class_version != 2) { + ccp->status |= IB_SMP_UNSUP_VERSION; + ret = reply((struct ib_smp *)ccp); + goto bail; + } + + ret = check_cc_key(ibp, ccp, mad_flags); + if (ret) + goto bail; + + switch (ccp->method) { + case IB_MGMT_METHOD_GET: + switch (ccp->attr_id) { + case IB_CC_ATTR_CLASSPORTINFO: + ret = cc_get_classportinfo(ccp, ibdev); + goto bail; + + case IB_CC_ATTR_CONGESTION_INFO: + ret = cc_get_congestion_info(ccp, ibdev, port); + goto bail; + + case IB_CC_ATTR_CA_CONGESTION_SETTING: + ret = cc_get_congestion_setting(ccp, ibdev, port); + goto bail; + + case IB_CC_ATTR_CONGESTION_CONTROL_TABLE: + ret = cc_get_congestion_control_table(ccp, ibdev, port); + goto bail; + + /* FALLTHROUGH */ + default: + ccp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_smp *) ccp); + goto bail; + } + + case IB_MGMT_METHOD_SET: + switch (ccp->attr_id) { + case IB_CC_ATTR_CA_CONGESTION_SETTING: + ret = cc_set_congestion_setting(ccp, ibdev, port); + goto bail; + + case IB_CC_ATTR_CONGESTION_CONTROL_TABLE: + ret = cc_set_congestion_control_table(ccp, ibdev, port); + goto bail; + + /* FALLTHROUGH */ + default: + ccp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_smp *) ccp); + goto bail; + } + + case IB_MGMT_METHOD_GET_RESP: + /* + * The ib_mad module will call us to process responses + * before checking for other consumers. + * Just tell the caller to process it normally. + */ + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + + case IB_MGMT_METHOD_TRAP: + default: + ccp->status |= IB_SMP_UNSUP_METHOD; + ret = reply((struct ib_smp *) ccp); + } + +bail: + return ret; +} + /** * qib_process_mad - process an incoming MAD packet * @ibdev: the infiniband device this packet came in on @@ -2075,6 +2375,8 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, struct ib_mad *in_mad, struct ib_mad *out_mad) { int ret; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: @@ -2086,6 +2388,15 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, ret = process_perf(ibdev, port, in_mad, out_mad); goto bail; + case IB_MGMT_CLASS_CONG_MGMT: + if (!ppd->congestion_entries_shadow || + !qib_cc_table_size) { + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + } + ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad); + goto bail; + default: ret = IB_MAD_RESULT_SUCCESS; } diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h index ecc416cdbaaa..57bd3fa016bc 100644 --- a/drivers/infiniband/hw/qib/qib_mad.h +++ b/drivers/infiniband/hw/qib/qib_mad.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -31,6 +31,8 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#ifndef _QIB_MAD_H +#define _QIB_MAD_H #include <rdma/ib_pma.h> @@ -223,6 +225,198 @@ struct ib_pma_portcounters_cong { #define IB_PMA_SEL_CONG_ROUTING 0x08 /* + * Congestion control class attributes + */ +#define IB_CC_ATTR_CLASSPORTINFO cpu_to_be16(0x0001) +#define IB_CC_ATTR_NOTICE cpu_to_be16(0x0002) +#define IB_CC_ATTR_CONGESTION_INFO cpu_to_be16(0x0011) +#define IB_CC_ATTR_CONGESTION_KEY_INFO cpu_to_be16(0x0012) +#define IB_CC_ATTR_CONGESTION_LOG cpu_to_be16(0x0013) +#define IB_CC_ATTR_SWITCH_CONGESTION_SETTING cpu_to_be16(0x0014) +#define IB_CC_ATTR_SWITCH_PORT_CONGESTION_SETTING cpu_to_be16(0x0015) +#define IB_CC_ATTR_CA_CONGESTION_SETTING cpu_to_be16(0x0016) +#define IB_CC_ATTR_CONGESTION_CONTROL_TABLE cpu_to_be16(0x0017) +#define IB_CC_ATTR_TIME_STAMP cpu_to_be16(0x0018) + +/* generalizations for threshold values */ +#define IB_CC_THRESHOLD_NONE 0x0 +#define IB_CC_THRESHOLD_MIN 0x1 +#define IB_CC_THRESHOLD_MAX 0xf + +/* CCA MAD header constants */ +#define IB_CC_MAD_LOGDATA_LEN 32 +#define IB_CC_MAD_MGMTDATA_LEN 192 + +struct ib_cc_mad { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + __be16 status; + __be16 class_specific; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; + __be64 cckey; + + /* For CongestionLog attribute only */ + u8 log_data[IB_CC_MAD_LOGDATA_LEN]; + + u8 mgmt_data[IB_CC_MAD_MGMTDATA_LEN]; +} __packed; + +/* + * Congestion Control class portinfo capability mask bits + */ +#define IB_CC_CPI_CM_TRAP_GEN cpu_to_be16(1 << 0) +#define IB_CC_CPI_CM_GET_SET_NOTICE cpu_to_be16(1 << 1) +#define IB_CC_CPI_CM_CAP2 cpu_to_be16(1 << 2) +#define IB_CC_CPI_CM_ENHANCEDPORT0_CC cpu_to_be16(1 << 8) + +struct ib_cc_classportinfo_attr { + u8 base_version; + u8 class_version; + __be16 cap_mask; + u8 reserved[3]; + u8 resp_time_value; /* only lower 5 bits */ + union ib_gid redirect_gid; + __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */ + __be16 redirect_lid; + __be16 redirect_pkey; + __be32 redirect_qp; /* only lower 24 bits */ + __be32 redirect_qkey; + union ib_gid trap_gid; + __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */ + __be16 trap_lid; + __be16 trap_pkey; + __be32 trap_hl_qp; /* 8, 24 bits respectively */ + __be32 trap_qkey; +} __packed; + +/* Congestion control traps */ +#define IB_CC_TRAP_KEY_VIOLATION 0x0000 + +struct ib_cc_trap_key_violation_attr { + __be16 source_lid; + u8 method; + u8 reserved1; + __be16 attrib_id; + __be32 attrib_mod; + __be32 qp; + __be64 cckey; + u8 sgid[16]; + u8 padding[24]; +} __packed; + +/* Congestion info flags */ +#define IB_CC_CI_FLAGS_CREDIT_STARVATION 0x1 +#define IB_CC_TABLE_CAP_DEFAULT 31 + +struct ib_cc_info_attr { + __be16 congestion_info; + u8 control_table_cap; /* Multiple of 64 entry unit CCTs */ +} __packed; + +struct ib_cc_key_info_attr { + __be64 cckey; + u8 protect; + __be16 lease_period; + __be16 violations; +} __packed; + +#define IB_CC_CL_CA_LOGEVENTS_LEN 208 + +struct ib_cc_log_attr { + u8 log_type; + u8 congestion_flags; + __be16 threshold_event_counter; + __be16 threshold_congestion_event_map; + __be16 current_time_stamp; + u8 log_events[IB_CC_CL_CA_LOGEVENTS_LEN]; +} __packed; + +#define IB_CC_CLEC_SERVICETYPE_RC 0x0 +#define IB_CC_CLEC_SERVICETYPE_UC 0x1 +#define IB_CC_CLEC_SERVICETYPE_RD 0x2 +#define IB_CC_CLEC_SERVICETYPE_UD 0x3 + +struct ib_cc_log_event { + u8 local_qp_cn_entry; + u8 remote_qp_number_cn_entry[3]; + u8 sl_cn_entry:4; + u8 service_type_cn_entry:4; + __be32 remote_lid_cn_entry; + __be32 timestamp_cn_entry; +} __packed; + +/* Sixteen congestion entries */ +#define IB_CC_CCS_ENTRIES 16 + +/* Port control flags */ +#define IB_CC_CCS_PC_SL_BASED 0x01 + +struct ib_cc_congestion_entry { + u8 ccti_increase; + __be16 ccti_timer; + u8 trigger_threshold; + u8 ccti_min; /* min CCTI for cc table */ +} __packed; + +struct ib_cc_congestion_entry_shadow { + u8 ccti_increase; + u16 ccti_timer; + u8 trigger_threshold; + u8 ccti_min; /* min CCTI for cc table */ +} __packed; + +struct ib_cc_congestion_setting_attr { + __be16 port_control; + __be16 control_map; + struct ib_cc_congestion_entry entries[IB_CC_CCS_ENTRIES]; +} __packed; + +struct ib_cc_congestion_setting_attr_shadow { + u16 port_control; + u16 control_map; + struct ib_cc_congestion_entry_shadow entries[IB_CC_CCS_ENTRIES]; +} __packed; + +#define IB_CC_TABLE_ENTRY_INCREASE_DEFAULT 1 +#define IB_CC_TABLE_ENTRY_TIMER_DEFAULT 1 + +/* 64 Congestion Control table entries in a single MAD */ +#define IB_CCT_ENTRIES 64 +#define IB_CCT_MIN_ENTRIES (IB_CCT_ENTRIES * 2) + +struct ib_cc_table_entry { + __be16 entry; /* shift:2, multiplier:14 */ +}; + +struct ib_cc_table_entry_shadow { + u16 entry; /* shift:2, multiplier:14 */ +}; + +struct ib_cc_table_attr { + __be16 ccti_limit; /* max CCTI for cc table */ + struct ib_cc_table_entry ccti_entries[IB_CCT_ENTRIES]; +} __packed; + +struct ib_cc_table_attr_shadow { + u16 ccti_limit; /* max CCTI for cc table */ + struct ib_cc_table_entry_shadow ccti_entries[IB_CCT_ENTRIES]; +} __packed; + +#define CC_TABLE_SHADOW_MAX \ + (IB_CC_TABLE_CAP_DEFAULT * IB_CCT_ENTRIES) + +struct cc_table_shadow { + u16 ccti_last_entry; + struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX]; +} __packed; + +#endif /* _QIB_MAD_H */ +/* * The PortSamplesControl.CounterMasks field is an array of 3 bit fields * which specify the N'th counter's capabilities. See ch. 16.1.3.2. * We support 5 counters which only count the mandatory quantities. diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index 08944e2ee334..e6687ded8210 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c @@ -47,6 +47,43 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr) return container_of(ibfmr, struct qib_fmr, ibfmr); } +static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd, + int count) +{ + int m, i = 0; + int rval = 0; + + m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; + for (; i < m; i++) { + mr->map[i] = kzalloc(sizeof *mr->map[0], GFP_KERNEL); + if (!mr->map[i]) + goto bail; + } + mr->mapsz = m; + init_completion(&mr->comp); + /* count returning the ptr to user */ + atomic_set(&mr->refcount, 1); + mr->pd = pd; + mr->max_segs = count; +out: + return rval; +bail: + while (i) + kfree(mr->map[--i]); + rval = -ENOMEM; + goto out; +} + +static void deinit_qib_mregion(struct qib_mregion *mr) +{ + int i = mr->mapsz; + + mr->mapsz = 0; + while (i) + kfree(mr->map[--i]); +} + + /** * qib_get_dma_mr - get a DMA memory region * @pd: protection domain for this memory region @@ -58,10 +95,9 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr) */ struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) { - struct qib_ibdev *dev = to_idev(pd->device); - struct qib_mr *mr; + struct qib_mr *mr = NULL; struct ib_mr *ret; - unsigned long flags; + int rval; if (to_ipd(pd)->user) { ret = ERR_PTR(-EPERM); @@ -74,61 +110,64 @@ struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) goto bail; } - mr->mr.access_flags = acc; - atomic_set(&mr->mr.refcount, 0); + rval = init_qib_mregion(&mr->mr, pd, 0); + if (rval) { + ret = ERR_PTR(rval); + goto bail; + } + - spin_lock_irqsave(&dev->lk_table.lock, flags); - if (!dev->dma_mr) - dev->dma_mr = &mr->mr; - spin_unlock_irqrestore(&dev->lk_table.lock, flags); + rval = qib_alloc_lkey(&mr->mr, 1); + if (rval) { + ret = ERR_PTR(rval); + goto bail_mregion; + } + mr->mr.access_flags = acc; ret = &mr->ibmr; +done: + return ret; +bail_mregion: + deinit_qib_mregion(&mr->mr); bail: - return ret; + kfree(mr); + goto done; } -static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table) +static struct qib_mr *alloc_mr(int count, struct ib_pd *pd) { struct qib_mr *mr; - int m, i = 0; + int rval = -ENOMEM; + int m; /* Allocate struct plus pointers to first level page tables. */ m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; - mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); + mr = kzalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); if (!mr) - goto done; - - /* Allocate first level page tables. */ - for (; i < m; i++) { - mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL); - if (!mr->mr.map[i]) - goto bail; - } - mr->mr.mapsz = m; - mr->mr.page_shift = 0; - mr->mr.max_segs = count; + goto bail; + rval = init_qib_mregion(&mr->mr, pd, count); + if (rval) + goto bail; /* * ib_reg_phys_mr() will initialize mr->ibmr except for * lkey and rkey. */ - if (!qib_alloc_lkey(lk_table, &mr->mr)) - goto bail; + rval = qib_alloc_lkey(&mr->mr, 0); + if (rval) + goto bail_mregion; mr->ibmr.lkey = mr->mr.lkey; mr->ibmr.rkey = mr->mr.lkey; +done: + return mr; - atomic_set(&mr->mr.refcount, 0); - goto done; - +bail_mregion: + deinit_qib_mregion(&mr->mr); bail: - while (i) - kfree(mr->mr.map[--i]); kfree(mr); - mr = NULL; - -done: - return mr; + mr = ERR_PTR(rval); + goto done; } /** @@ -148,19 +187,15 @@ struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd, int n, m, i; struct ib_mr *ret; - mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table); - if (mr == NULL) { - ret = ERR_PTR(-ENOMEM); + mr = alloc_mr(num_phys_buf, pd); + if (IS_ERR(mr)) { + ret = (struct ib_mr *)mr; goto bail; } - mr->mr.pd = pd; mr->mr.user_base = *iova_start; mr->mr.iova = *iova_start; - mr->mr.length = 0; - mr->mr.offset = 0; mr->mr.access_flags = acc; - mr->umem = NULL; m = 0; n = 0; @@ -186,7 +221,6 @@ bail: * @pd: protection domain for this memory region * @start: starting userspace address * @length: length of region to register - * @virt_addr: virtual address to use (from HCA's point of view) * @mr_access_flags: access flags for this memory region * @udata: unused by the QLogic_IB driver * @@ -216,14 +250,13 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, list_for_each_entry(chunk, &umem->chunk_list, list) n += chunk->nents; - mr = alloc_mr(n, &to_idev(pd->device)->lk_table); - if (!mr) { - ret = ERR_PTR(-ENOMEM); + mr = alloc_mr(n, pd); + if (IS_ERR(mr)) { + ret = (struct ib_mr *)mr; ib_umem_release(umem); goto bail; } - mr->mr.pd = pd; mr->mr.user_base = start; mr->mr.iova = virt_addr; mr->mr.length = length; @@ -271,21 +304,25 @@ bail: int qib_dereg_mr(struct ib_mr *ibmr) { struct qib_mr *mr = to_imr(ibmr); - struct qib_ibdev *dev = to_idev(ibmr->device); - int ret; - int i; - - ret = qib_free_lkey(dev, &mr->mr); - if (ret) - return ret; - - i = mr->mr.mapsz; - while (i) - kfree(mr->mr.map[--i]); + int ret = 0; + unsigned long timeout; + + qib_free_lkey(&mr->mr); + + qib_put_mr(&mr->mr); /* will set completion if last */ + timeout = wait_for_completion_timeout(&mr->mr.comp, + 5 * HZ); + if (!timeout) { + qib_get_mr(&mr->mr); + ret = -EBUSY; + goto out; + } + deinit_qib_mregion(&mr->mr); if (mr->umem) ib_umem_release(mr->umem); kfree(mr); - return 0; +out: + return ret; } /* @@ -298,17 +335,9 @@ struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { struct qib_mr *mr; - mr = alloc_mr(max_page_list_len, &to_idev(pd->device)->lk_table); - if (mr == NULL) - return ERR_PTR(-ENOMEM); - - mr->mr.pd = pd; - mr->mr.user_base = 0; - mr->mr.iova = 0; - mr->mr.length = 0; - mr->mr.offset = 0; - mr->mr.access_flags = 0; - mr->umem = NULL; + mr = alloc_mr(max_page_list_len, pd); + if (IS_ERR(mr)) + return (struct ib_mr *)mr; return &mr->ibmr; } @@ -322,11 +351,11 @@ qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len) if (size > PAGE_SIZE) return ERR_PTR(-EINVAL); - pl = kmalloc(sizeof *pl, GFP_KERNEL); + pl = kzalloc(sizeof *pl, GFP_KERNEL); if (!pl) return ERR_PTR(-ENOMEM); - pl->page_list = kmalloc(size, GFP_KERNEL); + pl->page_list = kzalloc(size, GFP_KERNEL); if (!pl->page_list) goto err_free; @@ -355,57 +384,47 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr) { struct qib_fmr *fmr; - int m, i = 0; + int m; struct ib_fmr *ret; + int rval = -ENOMEM; /* Allocate struct plus pointers to first level page tables. */ m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ; - fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); + fmr = kzalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); if (!fmr) goto bail; - /* Allocate first level page tables. */ - for (; i < m; i++) { - fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0], - GFP_KERNEL); - if (!fmr->mr.map[i]) - goto bail; - } - fmr->mr.mapsz = m; + rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages); + if (rval) + goto bail; /* * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & * rkey. */ - if (!qib_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr)) - goto bail; + rval = qib_alloc_lkey(&fmr->mr, 0); + if (rval) + goto bail_mregion; fmr->ibfmr.rkey = fmr->mr.lkey; fmr->ibfmr.lkey = fmr->mr.lkey; /* * Resources are allocated but no valid mapping (RKEY can't be * used). */ - fmr->mr.pd = pd; - fmr->mr.user_base = 0; - fmr->mr.iova = 0; - fmr->mr.length = 0; - fmr->mr.offset = 0; fmr->mr.access_flags = mr_access_flags; fmr->mr.max_segs = fmr_attr->max_pages; fmr->mr.page_shift = fmr_attr->page_shift; - atomic_set(&fmr->mr.refcount, 0); ret = &fmr->ibfmr; - goto done; +done: + return ret; +bail_mregion: + deinit_qib_mregion(&fmr->mr); bail: - while (i) - kfree(fmr->mr.map[--i]); kfree(fmr); - ret = ERR_PTR(-ENOMEM); - -done: - return ret; + ret = ERR_PTR(rval); + goto done; } /** @@ -428,7 +447,8 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, u32 ps; int ret; - if (atomic_read(&fmr->mr.refcount)) + i = atomic_read(&fmr->mr.refcount); + if (i > 2) return -EBUSY; if (list_len > fmr->mr.max_segs) { @@ -490,16 +510,27 @@ int qib_unmap_fmr(struct list_head *fmr_list) int qib_dealloc_fmr(struct ib_fmr *ibfmr) { struct qib_fmr *fmr = to_ifmr(ibfmr); - int ret; - int i; + int ret = 0; + unsigned long timeout; + + qib_free_lkey(&fmr->mr); + qib_put_mr(&fmr->mr); /* will set completion if last */ + timeout = wait_for_completion_timeout(&fmr->mr.comp, + 5 * HZ); + if (!timeout) { + qib_get_mr(&fmr->mr); + ret = -EBUSY; + goto out; + } + deinit_qib_mregion(&fmr->mr); + kfree(fmr); +out: + return ret; +} - ret = qib_free_lkey(to_idev(ibfmr->device), &fmr->mr); - if (ret) - return ret; +void mr_rcu_callback(struct rcu_head *list) +{ + struct qib_mregion *mr = container_of(list, struct qib_mregion, list); - i = fmr->mr.mapsz; - while (i) - kfree(fmr->mr.map[--i]); - kfree(fmr); - return 0; + complete(&mr->comp); } diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 790646ef5106..062c301ebf53 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -224,8 +224,9 @@ static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt, } do_intx: if (ret) { - qib_dev_err(dd, "pci_enable_msix %d vectors failed: %d, " - "falling back to INTx\n", tabsize, ret); + qib_dev_err(dd, + "pci_enable_msix %d vectors failed: %d, falling back to INTx\n", + tabsize, ret); tabsize = 0; } for (i = 0; i < tabsize; i++) @@ -251,8 +252,9 @@ static int qib_msi_setup(struct qib_devdata *dd, int pos) ret = pci_enable_msi(pdev); if (ret) - qib_dev_err(dd, "pci_enable_msi failed: %d, " - "interrupts may not work\n", ret); + qib_dev_err(dd, + "pci_enable_msi failed: %d, interrupts may not work\n", + ret); /* continue even if it fails, we may still be OK... */ pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, @@ -358,8 +360,8 @@ int qib_reinit_intr(struct qib_devdata *dd) pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI); if (!pos) { - qib_dev_err(dd, "Can't find MSI capability, " - "can't restore MSI settings\n"); + qib_dev_err(dd, + "Can't find MSI capability, can't restore MSI settings\n"); ret = 0; /* nothing special for MSIx, just MSI */ goto bail; @@ -471,8 +473,8 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline) pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline); r = pci_enable_device(dd->pcidev); if (r) - qib_dev_err(dd, "pci_enable_device failed after " - "reset: %d\n", r); + qib_dev_err(dd, + "pci_enable_device failed after reset: %d\n", r); } /* code to adjust PCIe capabilities. */ @@ -717,15 +719,16 @@ qib_pci_mmio_enabled(struct pci_dev *pdev) if (words == ~0ULL) ret = PCI_ERS_RESULT_NEED_RESET; } - qib_devinfo(pdev, "QIB mmio_enabled function called, " - "read wordscntr %Lx, returning %d\n", words, ret); + qib_devinfo(pdev, + "QIB mmio_enabled function called, read wordscntr %Lx, returning %d\n", + words, ret); return ret; } static pci_ers_result_t qib_pci_slot_reset(struct pci_dev *pdev) { - qib_devinfo(pdev, "QIB link_reset function called, ignored\n"); + qib_devinfo(pdev, "QIB slot_reset function called, ignored\n"); return PCI_ERS_RESULT_CAN_RECOVER; } diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 1ce56b51ab1a..4850d03870c2 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -250,23 +250,33 @@ static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) spin_lock_irqsave(&dev->qpt_lock, flags); - if (ibp->qp0 == qp) { + if (rcu_dereference_protected(ibp->qp0, + lockdep_is_held(&dev->qpt_lock)) == qp) { atomic_dec(&qp->refcount); rcu_assign_pointer(ibp->qp0, NULL); - } else if (ibp->qp1 == qp) { + } else if (rcu_dereference_protected(ibp->qp1, + lockdep_is_held(&dev->qpt_lock)) == qp) { atomic_dec(&qp->refcount); rcu_assign_pointer(ibp->qp1, NULL); } else { - struct qib_qp *q, **qpp; + struct qib_qp *q; + struct qib_qp __rcu **qpp; qpp = &dev->qp_table[n]; - for (; (q = *qpp) != NULL; qpp = &q->next) + q = rcu_dereference_protected(*qpp, + lockdep_is_held(&dev->qpt_lock)); + for (; q; qpp = &q->next) { if (q == qp) { atomic_dec(&qp->refcount); - rcu_assign_pointer(*qpp, qp->next); - qp->next = NULL; + *qpp = qp->next; + rcu_assign_pointer(qp->next, NULL); + q = rcu_dereference_protected(*qpp, + lockdep_is_held(&dev->qpt_lock)); break; } + q = rcu_dereference_protected(*qpp, + lockdep_is_held(&dev->qpt_lock)); + } } spin_unlock_irqrestore(&dev->qpt_lock, flags); @@ -302,10 +312,12 @@ unsigned qib_free_all_qps(struct qib_devdata *dd) spin_lock_irqsave(&dev->qpt_lock, flags); for (n = 0; n < dev->qp_table_size; n++) { - qp = dev->qp_table[n]; + qp = rcu_dereference_protected(dev->qp_table[n], + lockdep_is_held(&dev->qpt_lock)); rcu_assign_pointer(dev->qp_table[n], NULL); - for (; qp; qp = qp->next) + for (; qp; qp = rcu_dereference_protected(qp->next, + lockdep_is_held(&dev->qpt_lock))) qp_inuse++; } spin_unlock_irqrestore(&dev->qpt_lock, flags); @@ -337,7 +349,8 @@ struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) unsigned n = qpn_hash(dev, qpn); rcu_read_lock(); - for (qp = dev->qp_table[n]; rcu_dereference(qp); qp = qp->next) + for (qp = rcu_dereference(dev->qp_table[n]); qp; + qp = rcu_dereference(qp->next)) if (qp->ibqp.qp_num == qpn) break; } @@ -406,18 +419,9 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) unsigned n; if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) - while (qp->s_rdma_read_sge.num_sge) { - atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); - if (--qp->s_rdma_read_sge.num_sge) - qp->s_rdma_read_sge.sge = - *qp->s_rdma_read_sge.sg_list++; - } + qib_put_ss(&qp->s_rdma_read_sge); - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); if (clr_sends) { while (qp->s_last != qp->s_head) { @@ -427,7 +431,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) for (i = 0; i < wqe->wr.num_sge; i++) { struct qib_sge *sge = &wqe->sg_list[i]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || @@ -437,7 +441,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) qp->s_last = 0; } if (qp->s_rdma_mr) { - atomic_dec(&qp->s_rdma_mr->refcount); + qib_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } } @@ -450,7 +454,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } } @@ -495,7 +499,7 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) if (!(qp->s_flags & QIB_S_BUSY)) { qp->s_hdrwords = 0; if (qp->s_rdma_mr) { - atomic_dec(&qp->s_rdma_mr->refcount); + qib_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } if (qp->s_tx) { diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index b641416148eb..3ab341320ead 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -95,7 +95,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, case OP(RDMA_READ_RESPONSE_ONLY): e = &qp->s_ack_queue[qp->s_tail_ack_queue]; if (e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } /* FALLTHROUGH */ @@ -133,7 +133,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, /* Copy SGE state in case we need to resend */ qp->s_rdma_mr = e->rdma_sge.mr; if (qp->s_rdma_mr) - atomic_inc(&qp->s_rdma_mr->refcount); + qib_get_mr(qp->s_rdma_mr); qp->s_ack_rdma_sge.sge = e->rdma_sge; qp->s_ack_rdma_sge.num_sge = 1; qp->s_cur_sge = &qp->s_ack_rdma_sge; @@ -172,7 +172,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, qp->s_cur_sge = &qp->s_ack_rdma_sge; qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr; if (qp->s_rdma_mr) - atomic_inc(&qp->s_rdma_mr->refcount); + qib_get_mr(qp->s_rdma_mr); len = qp->s_ack_rdma_sge.sge.sge_length; if (len > pmtu) len = pmtu; @@ -1012,7 +1012,7 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr) for (i = 0; i < wqe->wr.num_sge; i++) { struct qib_sge *sge = &wqe->sg_list[i]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || @@ -1068,7 +1068,7 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp, for (i = 0; i < wqe->wr.num_sge; i++) { struct qib_sge *sge = &wqe->sg_list[i]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || @@ -1730,7 +1730,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, if (unlikely(offset + len != e->rdma_sge.sge_length)) goto unlock_done; if (e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } if (len != 0) { @@ -2024,11 +2024,7 @@ send_last: if (unlikely(wc.byte_len > qp->r_len)) goto nack_inv; qib_copy_sge(&qp->r_sge, data, tlen, 1); - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); qp->r_msn++; if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) break; @@ -2116,7 +2112,7 @@ send_last: } e = &qp->s_ack_queue[qp->r_head_ack_queue]; if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } reth = &ohdr->u.rc.reth; @@ -2188,7 +2184,7 @@ send_last: } e = &qp->s_ack_queue[qp->r_head_ack_queue]; if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { - atomic_dec(&e->rdma_sge.mr->refcount); + qib_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } ateth = &ohdr->u.atomic_eth; @@ -2210,7 +2206,7 @@ send_last: (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, be64_to_cpu(ateth->compare_data), sdata); - atomic_dec(&qp->r_sge.sge.mr->refcount); + qib_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; e->opcode = opcode; e->sent = 0; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index c0ee7e095d81..357b6cfcd46c 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -110,7 +110,7 @@ bad_lkey: while (j) { struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } ss->num_sge = 0; memset(&wc, 0, sizeof(wc)); @@ -501,7 +501,7 @@ again: (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, sdata, wqe->wr.wr.atomic.swap); - atomic_dec(&qp->r_sge.sge.mr->refcount); + qib_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; goto send_comp; @@ -525,7 +525,7 @@ again: sge->sge_length -= len; if (sge->sge_length == 0) { if (!release) - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); if (--sqp->s_sge.num_sge) *sge = *sqp->s_sge.sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { @@ -542,11 +542,7 @@ again: sqp->s_len -= len; } if (release) - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) goto send_comp; @@ -782,7 +778,7 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe, for (i = 0; i < wqe->wr.num_sge; i++) { struct qib_sge *sge = &wqe->sg_list[i]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c index ac065dd6b693..a322d5171a2c 100644 --- a/drivers/infiniband/hw/qib/qib_sd7220.c +++ b/drivers/infiniband/hw/qib/qib_sd7220.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -342,15 +342,17 @@ static void qib_sd_trimdone_monitor(struct qib_devdata *dd, ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(chn), 0, 0); if (ret < 0) - qib_dev_err(dd, "Failed checking TRIMDONE, chn %d" - " (%s)\n", chn, where); + qib_dev_err(dd, + "Failed checking TRIMDONE, chn %d (%s)\n", + chn, where); if (!(ret & 0x10)) { int probe; baduns |= (1 << chn); - qib_dev_err(dd, "TRIMDONE cleared on chn %d (%02X)." - " (%s)\n", chn, ret, where); + qib_dev_err(dd, + "TRIMDONE cleared on chn %d (%02X). (%s)\n", + chn, ret, where); probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_PGUDP(0), 0, 0); qib_dev_err(dd, "probe is %d (%02X)\n", @@ -375,8 +377,8 @@ static void qib_sd_trimdone_monitor(struct qib_devdata *dd, ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(chn), 0x10, 0x10); if (ret < 0) - qib_dev_err(dd, "Failed re-setting " - "TRIMDONE, chn %d (%s)\n", + qib_dev_err(dd, + "Failed re-setting TRIMDONE, chn %d (%s)\n", chn, where); } } @@ -1144,10 +1146,10 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val, if (ret < 0) { int sloc = loc >> EPB_ADDR_SHF; - qib_dev_err(dd, "pre-read failed: elt %d," - " addr 0x%X, chnl %d\n", - (sloc & 0xF), - (sloc >> 9) & 0x3f, chnl); + qib_dev_err(dd, + "pre-read failed: elt %d, addr 0x%X, chnl %d\n", + (sloc & 0xF), + (sloc >> 9) & 0x3f, chnl); return ret; } val = (ret & ~mask) | (val & mask); @@ -1157,9 +1159,9 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val, if (ret < 0) { int sloc = loc >> EPB_ADDR_SHF; - qib_dev_err(dd, "Global WR failed: elt %d," - " addr 0x%X, val %02X\n", - (sloc & 0xF), (sloc >> 9) & 0x3f, val); + qib_dev_err(dd, + "Global WR failed: elt %d, addr 0x%X, val %02X\n", + (sloc & 0xF), (sloc >> 9) & 0x3f, val); } return ret; } @@ -1173,11 +1175,10 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val, if (ret < 0) { int sloc = loc >> EPB_ADDR_SHF; - qib_dev_err(dd, "Write failed: elt %d," - " addr 0x%X, chnl %d, val 0x%02X," - " mask 0x%02X\n", - (sloc & 0xF), (sloc >> 9) & 0x3f, chnl, - val & 0xFF, mask & 0xFF); + qib_dev_err(dd, + "Write failed: elt %d, addr 0x%X, chnl %d, val 0x%02X, mask 0x%02X\n", + (sloc & 0xF), (sloc >> 9) & 0x3f, chnl, + val & 0xFF, mask & 0xFF); break; } } diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 12a9604310d7..3fc514431212 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2007, 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2007 - 2012 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -276,8 +277,8 @@ static int alloc_sdma(struct qib_pportdata *ppd) GFP_KERNEL); if (!ppd->sdma_descq) { - qib_dev_err(ppd->dd, "failed to allocate SendDMA descriptor " - "FIFO memory\n"); + qib_dev_err(ppd->dd, + "failed to allocate SendDMA descriptor FIFO memory\n"); goto bail; } @@ -285,8 +286,8 @@ static int alloc_sdma(struct qib_pportdata *ppd) ppd->sdma_head_dma = dma_alloc_coherent(&ppd->dd->pcidev->dev, PAGE_SIZE, &ppd->sdma_head_phys, GFP_KERNEL); if (!ppd->sdma_head_dma) { - qib_dev_err(ppd->dd, "failed to allocate SendDMA " - "head memory\n"); + qib_dev_err(ppd->dd, + "failed to allocate SendDMA head memory\n"); goto cleanup_descq; } ppd->sdma_head_dma[0] = 0; diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index dd9cd49d0979..034cc821de5c 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -33,41 +34,7 @@ #include <linux/ctype.h> #include "qib.h" - -/** - * qib_parse_ushort - parse an unsigned short value in an arbitrary base - * @str: the string containing the number - * @valp: where to put the result - * - * Returns the number of bytes consumed, or negative value on error. - */ -static int qib_parse_ushort(const char *str, unsigned short *valp) -{ - unsigned long val; - char *end; - int ret; - - if (!isdigit(str[0])) { - ret = -EINVAL; - goto bail; - } - - val = simple_strtoul(str, &end, 0); - - if (val > 0xffff) { - ret = -EINVAL; - goto bail; - } - - *valp = val; - - ret = end + 1 - str; - if (ret == 0) - ret = -EINVAL; - -bail: - return ret; -} +#include "qib_mad.h" /* start of per-port functions */ /* @@ -90,7 +57,11 @@ static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf, int ret; u16 val; - ret = qib_parse_ushort(buf, &val); + ret = kstrtou16(buf, 0, &val); + if (ret) { + qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n"); + return ret; + } /* * Set the "intentional" heartbeat enable per either of @@ -99,10 +70,7 @@ static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf, * because entering loopback mode overrides it and automatically * disables heartbeat. */ - if (ret >= 0) - ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val); - if (ret < 0) - qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n"); + ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val); return ret < 0 ? ret : count; } @@ -126,12 +94,14 @@ static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf, int ret; u16 val; - ret = qib_parse_ushort(buf, &val); - if (ret > 0) - qib_set_led_override(ppd, val); - else + ret = kstrtou16(buf, 0, &val); + if (ret) { qib_dev_err(dd, "attempt to set invalid LED override\n"); - return ret < 0 ? ret : count; + return ret; + } + + qib_set_led_override(ppd, val); + return count; } static ssize_t show_status(struct qib_pportdata *ppd, char *buf) @@ -231,6 +201,98 @@ static struct attribute *port_default_attributes[] = { NULL }; +/* + * Start of per-port congestion control structures and support code + */ + +/* + * Congestion control table size followed by table entries + */ +static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) +{ + int ret; + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, pport_cc_kobj); + + if (!qib_cc_table_size || !ppd->ccti_entries_shadow) + return -EINVAL; + + ret = ppd->total_cct_entry * sizeof(struct ib_cc_table_entry_shadow) + + sizeof(__be16); + + if (pos > ret) + return -EINVAL; + + if (count > ret - pos) + count = ret - pos; + + if (!count) + return count; + + spin_lock(&ppd->cc_shadow_lock); + memcpy(buf, ppd->ccti_entries_shadow, count); + spin_unlock(&ppd->cc_shadow_lock); + + return count; +} + +static void qib_port_release(struct kobject *kobj) +{ + /* nothing to do since memory is freed by qib_free_devdata() */ +} + +static struct kobj_type qib_port_cc_ktype = { + .release = qib_port_release, +}; + +static struct bin_attribute cc_table_bin_attr = { + .attr = {.name = "cc_table_bin", .mode = 0444}, + .read = read_cc_table_bin, + .size = PAGE_SIZE, +}; + +/* + * Congestion settings: port control, control map and an array of 16 + * entries for the congestion entries - increase, timer, event log + * trigger threshold and the minimum injection rate delay. + */ +static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) +{ + int ret; + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, pport_cc_kobj); + + if (!qib_cc_table_size || !ppd->congestion_entries_shadow) + return -EINVAL; + + ret = sizeof(struct ib_cc_congestion_setting_attr_shadow); + + if (pos > ret) + return -EINVAL; + if (count > ret - pos) + count = ret - pos; + + if (!count) + return count; + + spin_lock(&ppd->cc_shadow_lock); + memcpy(buf, ppd->congestion_entries_shadow, count); + spin_unlock(&ppd->cc_shadow_lock); + + return count; +} + +static struct bin_attribute cc_setting_bin_attr = { + .attr = {.name = "cc_settings_bin", .mode = 0444}, + .read = read_cc_setting_bin, + .size = PAGE_SIZE, +}; + + static ssize_t qib_portattr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -253,10 +315,6 @@ static ssize_t qib_portattr_store(struct kobject *kobj, return pattr->store(ppd, buf, len); } -static void qib_port_release(struct kobject *kobj) -{ - /* nothing to do since memory is freed by qib_free_devdata() */ -} static const struct sysfs_ops qib_port_ops = { .show = qib_portattr_show, @@ -411,12 +469,12 @@ static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, diagc_kobj); struct qib_ibport *qibp = &ppd->ibport_data; - char *endp; - long val = simple_strtol(buf, &endp, 0); - - if (val < 0 || endp == buf) - return -EINVAL; + u32 val; + int ret; + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; *(u32 *)((char *) qibp + dattr->counter) = val; return size; } @@ -649,8 +707,9 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, int ret; if (!port_num || port_num > dd->num_pports) { - qib_dev_err(dd, "Skipping infiniband class with " - "invalid port %u\n", port_num); + qib_dev_err(dd, + "Skipping infiniband class with invalid port %u\n", + port_num); ret = -ENODEV; goto bail; } @@ -659,8 +718,9 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, ret = kobject_init_and_add(&ppd->pport_kobj, &qib_port_ktype, kobj, "linkcontrol"); if (ret) { - qib_dev_err(dd, "Skipping linkcontrol sysfs info, " - "(err %d) port %u\n", ret, port_num); + qib_dev_err(dd, + "Skipping linkcontrol sysfs info, (err %d) port %u\n", + ret, port_num); goto bail; } kobject_uevent(&ppd->pport_kobj, KOBJ_ADD); @@ -668,26 +728,70 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, ret = kobject_init_and_add(&ppd->sl2vl_kobj, &qib_sl2vl_ktype, kobj, "sl2vl"); if (ret) { - qib_dev_err(dd, "Skipping sl2vl sysfs info, " - "(err %d) port %u\n", ret, port_num); - goto bail_sl; + qib_dev_err(dd, + "Skipping sl2vl sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_link; } kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD); ret = kobject_init_and_add(&ppd->diagc_kobj, &qib_diagc_ktype, kobj, "diag_counters"); if (ret) { - qib_dev_err(dd, "Skipping diag_counters sysfs info, " - "(err %d) port %u\n", ret, port_num); - goto bail_diagc; + qib_dev_err(dd, + "Skipping diag_counters sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_sl; } kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD); + if (!qib_cc_table_size || !ppd->congestion_entries_shadow) + return 0; + + ret = kobject_init_and_add(&ppd->pport_cc_kobj, &qib_port_cc_ktype, + kobj, "CCMgtA"); + if (ret) { + qib_dev_err(dd, + "Skipping Congestion Control sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_diagc; + } + + kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD); + + ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, + &cc_setting_bin_attr); + if (ret) { + qib_dev_err(dd, + "Skipping Congestion Control setting sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_cc; + } + + ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, + &cc_table_bin_attr); + if (ret) { + qib_dev_err(dd, + "Skipping Congestion Control table sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_cc_entry_bin; + } + + qib_devinfo(dd->pcidev, + "IB%u: Congestion Control Agent enabled for port %d\n", + dd->unit, port_num); + return 0; +bail_cc_entry_bin: + sysfs_remove_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr); +bail_cc: + kobject_put(&ppd->pport_cc_kobj); bail_diagc: - kobject_put(&ppd->sl2vl_kobj); + kobject_put(&ppd->diagc_kobj); bail_sl: + kobject_put(&ppd->sl2vl_kobj); +bail_link: kobject_put(&ppd->pport_kobj); bail: return ret; @@ -720,7 +824,15 @@ void qib_verbs_unregister_sysfs(struct qib_devdata *dd) for (i = 0; i < dd->num_pports; i++) { ppd = &dd->pport[i]; - kobject_put(&ppd->pport_kobj); + if (qib_cc_table_size && + ppd->congestion_entries_shadow) { + sysfs_remove_bin_file(&ppd->pport_cc_kobj, + &cc_setting_bin_attr); + sysfs_remove_bin_file(&ppd->pport_cc_kobj, + &cc_table_bin_attr); + kobject_put(&ppd->pport_cc_kobj); + } kobject_put(&ppd->sl2vl_kobj); + kobject_put(&ppd->pport_kobj); } } diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c index ddde72e11edb..647f7beb1b0a 100644 --- a/drivers/infiniband/hw/qib/qib_twsi.c +++ b/drivers/infiniband/hw/qib/qib_twsi.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -449,8 +450,9 @@ int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr, goto failed_write; ret = qib_twsi_wr(dd, addr, 0); if (ret) { - qib_dev_err(dd, "Failed to write interface" - " write addr %02X\n", addr); + qib_dev_err(dd, + "Failed to write interface write addr %02X\n", + addr); goto failed_write; } } diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index ce7387ff5d91..aa3a8035bb68 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -281,11 +281,7 @@ inv: set_bit(QIB_R_REWIND_SGE, &qp->r_aflags); qp->r_sge.num_sge = 0; } else - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); qp->r_state = OP(SEND_LAST); switch (opcode) { case OP(SEND_FIRST): @@ -403,14 +399,9 @@ send_last: if (unlikely(wc.byte_len > qp->r_len)) goto rewind; wc.opcode = IB_WC_RECV; -last_imm: qib_copy_sge(&qp->r_sge, data, tlen, 0); - while (qp->s_rdma_read_sge.num_sge) { - atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); - if (--qp->s_rdma_read_sge.num_sge) - qp->s_rdma_read_sge.sge = - *qp->s_rdma_read_sge.sg_list++; - } + qib_put_ss(&qp->s_rdma_read_sge); +last_imm: wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.qp = &qp->ibqp; @@ -493,13 +484,7 @@ rdma_last_imm: if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) - while (qp->s_rdma_read_sge.num_sge) { - atomic_dec(&qp->s_rdma_read_sge.sge.mr-> - refcount); - if (--qp->s_rdma_read_sge.num_sge) - qp->s_rdma_read_sge.sge = - *qp->s_rdma_read_sge.sg_list++; - } + qib_put_ss(&qp->s_rdma_read_sge); else { ret = qib_get_rwqe(qp, 1); if (ret < 0) @@ -509,6 +494,8 @@ rdma_last_imm: } wc.byte_len = qp->r_len; wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; + qib_copy_sge(&qp->r_sge, data, tlen, 1); + qib_put_ss(&qp->r_sge); goto last_imm; case OP(RDMA_WRITE_LAST): @@ -524,11 +511,7 @@ rdma_last: if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; qib_copy_sge(&qp->r_sge, data, tlen, 1); - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); break; default: diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index a468bf2d4465..d6c7fe7f88d5 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -194,11 +194,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) } length -= len; } - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) goto bail_unlock; wc.wr_id = qp->r_wr_id; @@ -556,11 +552,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, } else qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); - while (qp->r_sge.num_sge) { - atomic_dec(&qp->r_sge.sge.mr->refcount); - if (--qp->r_sge.num_sge) - qp->r_sge.sge = *qp->r_sge.sg_list++; - } + qib_put_ss(&qp->r_sge); if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) return; wc.wr_id = qp->r_wr_id; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 7b6c3bffa9d9..fc9b205c2412 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -183,7 +183,7 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release) sge->sge_length -= len; if (sge->sge_length == 0) { if (release) - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { @@ -224,7 +224,7 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release) sge->sge_length -= len; if (sge->sge_length == 0) { if (release) - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); if (--ss->num_sge) *sge = *ss->sg_list++; } else if (sge->length == 0 && sge->mr->lkey) { @@ -333,7 +333,8 @@ static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length) * @qp: the QP to post on * @wr: the work request to send */ -static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr) +static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, + int *scheduled) { struct qib_swqe *wqe; u32 next; @@ -435,11 +436,17 @@ bail_inval_free: while (j) { struct qib_sge *sge = &wqe->sg_list[--j]; - atomic_dec(&sge->mr->refcount); + qib_put_mr(sge->mr); } bail_inval: ret = -EINVAL; bail: + if (!ret && !wr->next && + !qib_sdma_empty( + dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) { + qib_schedule_send(qp); + *scheduled = 1; + } spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -457,9 +464,10 @@ static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, { struct qib_qp *qp = to_iqp(ibqp); int err = 0; + int scheduled = 0; for (; wr; wr = wr->next) { - err = qib_post_one_send(qp, wr); + err = qib_post_one_send(qp, wr, &scheduled); if (err) { *bad_wr = wr; goto bail; @@ -467,7 +475,8 @@ static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } /* Try to do the send work in the caller's context. */ - qib_do_send(&qp->s_work); + if (!scheduled) + qib_do_send(&qp->s_work); bail: return err; @@ -978,7 +987,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); if (tx->mr) { - atomic_dec(&tx->mr->refcount); + qib_put_mr(tx->mr); tx->mr = NULL; } if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) { @@ -1336,7 +1345,7 @@ done: } qib_sendbuf_done(dd, pbufn); if (qp->s_rdma_mr) { - atomic_dec(&qp->s_rdma_mr->refcount); + qib_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; } if (qp->s_wqe) { @@ -1845,6 +1854,23 @@ bail: return ret; } +struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) +{ + struct ib_ah_attr attr; + struct ib_ah *ah = ERR_PTR(-EINVAL); + struct qib_qp *qp0; + + memset(&attr, 0, sizeof attr); + attr.dlid = dlid; + attr.port_num = ppd_from_ibp(ibp)->port; + rcu_read_lock(); + qp0 = rcu_dereference(ibp->qp0); + if (qp0) + ah = ib_create_ah(qp0->ibqp.pd, &attr); + rcu_read_unlock(); + return ah; +} + /** * qib_destroy_ah - destroy an address handle * @ibah: the AH to destroy @@ -2060,13 +2086,15 @@ int qib_register_ib_device(struct qib_devdata *dd) spin_lock_init(&dev->lk_table.lock); dev->lk_table.max = 1 << ib_qib_lkey_table_size; lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); - dev->lk_table.table = (struct qib_mregion **) + dev->lk_table.table = (struct qib_mregion __rcu **) __get_free_pages(GFP_KERNEL, get_order(lk_tab_size)); if (dev->lk_table.table == NULL) { ret = -ENOMEM; goto err_lk; } - memset(dev->lk_table.table, 0, lk_tab_size); + RCU_INIT_POINTER(dev->dma_mr, NULL); + for (i = 0; i < dev->lk_table.max; i++) + RCU_INIT_POINTER(dev->lk_table.table[i], NULL); INIT_LIST_HEAD(&dev->pending_mmaps); spin_lock_init(&dev->pending_lock); dev->mmap_offset = PAGE_SIZE; @@ -2289,3 +2317,17 @@ void qib_unregister_ib_device(struct qib_devdata *dd) get_order(lk_tab_size)); kfree(dev->qp_table); } + +/* + * This must be called with s_lock held. + */ +void qib_schedule_send(struct qib_qp *qp) +{ + if (qib_send_ok(qp)) { + struct qib_ibport *ibp = + to_iport(qp->ibqp.device, qp->port_num); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + queue_work(ppd->qib_wq, &qp->s_work); + } +} diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 487606024659..aff8b2c17886 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. - * All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -41,6 +41,7 @@ #include <linux/interrupt.h> #include <linux/kref.h> #include <linux/workqueue.h> +#include <linux/completion.h> #include <rdma/ib_pack.h> #include <rdma/ib_user_verbs.h> @@ -302,6 +303,9 @@ struct qib_mregion { u32 max_segs; /* number of qib_segs in all the arrays */ u32 mapsz; /* size of the map array */ u8 page_shift; /* 0 - non unform/non powerof2 sizes */ + u8 lkey_published; /* in global table */ + struct completion comp; /* complete when refcount goes to zero */ + struct rcu_head list; atomic_t refcount; struct qib_segarray *map[0]; /* the segments */ }; @@ -416,7 +420,7 @@ struct qib_qp { /* read mostly fields above and below */ struct ib_ah_attr remote_ah_attr; struct ib_ah_attr alt_ah_attr; - struct qib_qp *next; /* link list for QPN hash table */ + struct qib_qp __rcu *next; /* link list for QPN hash table */ struct qib_swqe *s_wq; /* send work queue */ struct qib_mmap_info *ip; struct qib_ib_header *s_hdr; /* next packet header to send */ @@ -646,7 +650,7 @@ struct qib_lkey_table { u32 next; /* next unused index (speeds search) */ u32 gen; /* generation count */ u32 max; /* size of the table */ - struct qib_mregion **table; + struct qib_mregion __rcu **table; }; struct qib_opcode_stats { @@ -655,8 +659,8 @@ struct qib_opcode_stats { }; struct qib_ibport { - struct qib_qp *qp0; - struct qib_qp *qp1; + struct qib_qp __rcu *qp0; + struct qib_qp __rcu *qp1; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ struct qib_ah *sm_ah; struct qib_ah *smi_ah; @@ -723,12 +727,13 @@ struct qib_ibport { struct qib_opcode_stats opstats[128]; }; + struct qib_ibdev { struct ib_device ibdev; struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* protect mmap_offset */ u32 mmap_offset; - struct qib_mregion *dma_mr; + struct qib_mregion __rcu *dma_mr; /* QP numbers are shared by all IB ports */ struct qib_qpn_table qpn_table; @@ -739,7 +744,7 @@ struct qib_ibdev { struct list_head memwait; /* list for wait kernel memory */ struct list_head txreq_free; struct timer_list mem_timer; - struct qib_qp **qp_table; + struct qib_qp __rcu **qp_table; struct qib_pio_header *pio_hdrs; dma_addr_t pio_hdrs_phys; /* list of QPs waiting for RNR timer */ @@ -832,11 +837,7 @@ extern struct workqueue_struct *qib_cq_wq; /* * This must be called with s_lock held. */ -static inline void qib_schedule_send(struct qib_qp *qp) -{ - if (qib_send_ok(qp)) - queue_work(ib_wq, &qp->s_work); -} +void qib_schedule_send(struct qib_qp *qp); static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) { @@ -933,6 +934,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); +struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); + void qib_rc_rnr_retry(unsigned long arg); void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr); @@ -944,9 +947,9 @@ int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr); void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, int has_grh, void *data, u32 tlen, struct qib_qp *qp); -int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr); +int qib_alloc_lkey(struct qib_mregion *mr, int dma_region); -int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr); +void qib_free_lkey(struct qib_mregion *mr); int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, struct qib_sge *isge, struct ib_sge *sge, int acc); @@ -1014,6 +1017,29 @@ int qib_unmap_fmr(struct list_head *fmr_list); int qib_dealloc_fmr(struct ib_fmr *ibfmr); +static inline void qib_get_mr(struct qib_mregion *mr) +{ + atomic_inc(&mr->refcount); +} + +void mr_rcu_callback(struct rcu_head *list); + +static inline void qib_put_mr(struct qib_mregion *mr) +{ + if (unlikely(atomic_dec_and_test(&mr->refcount))) + call_rcu(&mr->list, mr_rcu_callback); +} + +static inline void qib_put_ss(struct qib_sge_state *ss) +{ + while (ss->num_sge) { + qib_put_mr(ss->sge.mr); + if (--ss->num_sge) + ss->sge = *ss->sg_list++; + } +} + + void qib_release_mmap_info(struct kref *ref); struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size, diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c index 561b8bca4060..1d7281c5a02e 100644 --- a/drivers/infiniband/hw/qib/qib_wc_x86_64.c +++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -102,10 +103,10 @@ int qib_enable_wc(struct qib_devdata *dd) u64 atmp; atmp = pioaddr & ~(piolen - 1); if (atmp < addr || (atmp + piolen) > (addr + len)) { - qib_dev_err(dd, "No way to align address/size " - "(%llx/%llx), no WC mtrr\n", - (unsigned long long) atmp, - (unsigned long long) piolen << 1); + qib_dev_err(dd, + "No way to align address/size (%llx/%llx), no WC mtrr\n", + (unsigned long long) atmp, + (unsigned long long) piolen << 1); ret = -ENODEV; } else { pioaddr = atmp; @@ -120,8 +121,7 @@ int qib_enable_wc(struct qib_devdata *dd) if (cookie < 0) { { qib_devinfo(dd->pcidev, - "mtrr_add() WC for PIO bufs " - "failed (%d)\n", + "mtrr_add() WC for PIO bufs failed (%d)\n", cookie); ret = -EINVAL; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 86df632ea612..ca43901ed861 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -92,6 +92,8 @@ enum { IPOIB_STOP_REAPER = 7, IPOIB_FLAG_ADMIN_CM = 9, IPOIB_FLAG_UMCAST = 10, + IPOIB_STOP_NEIGH_GC = 11, + IPOIB_NEIGH_TBL_FLUSH = 12, IPOIB_MAX_BACKOFF_SECONDS = 16, @@ -260,6 +262,20 @@ struct ipoib_ethtool_st { u16 max_coalesced_frames; }; +struct ipoib_neigh_hash { + struct ipoib_neigh __rcu **buckets; + struct rcu_head rcu; + u32 mask; + u32 size; +}; + +struct ipoib_neigh_table { + struct ipoib_neigh_hash __rcu *htbl; + rwlock_t rwlock; + atomic_t entries; + struct completion flushed; +}; + /* * Device private locking: network stack tx_lock protects members used * in TX fast path, lock protects everything else. lock nests inside @@ -279,6 +295,8 @@ struct ipoib_dev_priv { struct rb_root path_tree; struct list_head path_list; + struct ipoib_neigh_table ntbl; + struct ipoib_mcast *broadcast; struct list_head multicast_list; struct rb_root multicast_tree; @@ -291,7 +309,7 @@ struct ipoib_dev_priv { struct work_struct flush_heavy; struct work_struct restart_task; struct delayed_work ah_reap_task; - + struct delayed_work neigh_reap_task; struct ib_device *ca; u8 port; u16 pkey; @@ -377,13 +395,16 @@ struct ipoib_neigh { #ifdef CONFIG_INFINIBAND_IPOIB_CM struct ipoib_cm_tx *cm; #endif - union ib_gid dgid; + u8 daddr[INFINIBAND_ALEN]; struct sk_buff_head queue; - struct neighbour *neighbour; struct net_device *dev; struct list_head list; + struct ipoib_neigh __rcu *hnext; + struct rcu_head rcu; + atomic_t refcnt; + unsigned long alive; }; #define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) @@ -394,21 +415,17 @@ static inline int ipoib_ud_need_sg(unsigned int ib_mtu) return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE; } -/* - * We stash a pointer to our private neighbour information after our - * hardware address in neigh->ha. The ALIGN() expression here makes - * sure that this pointer is stored aligned so that an unaligned - * load is not needed to dereference it. - */ -static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh) +void ipoib_neigh_dtor(struct ipoib_neigh *neigh); +static inline void ipoib_neigh_put(struct ipoib_neigh *neigh) { - return (void*) neigh + ALIGN(offsetof(struct neighbour, ha) + - INFINIBAND_ALEN, sizeof(void *)); + if (atomic_dec_and_test(&neigh->refcnt)) + ipoib_neigh_dtor(neigh); } - -struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh, +struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr); +struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, struct net_device *dev); -void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh); +void ipoib_neigh_free(struct ipoib_neigh *neigh); +void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid); extern struct workqueue_struct *ipoib_workqueue; @@ -425,7 +442,6 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah) { kref_put(&ah->ref, ipoib_free_ah); } - int ipoib_open(struct net_device *dev); int ipoib_add_pkey_attr(struct net_device *dev); int ipoib_add_umcast_attr(struct net_device *dev); @@ -455,7 +471,7 @@ void ipoib_dev_cleanup(struct net_device *dev); void ipoib_mcast_join_task(struct work_struct *work); void ipoib_mcast_carrier_on_task(struct work_struct *work); -void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb); +void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb); void ipoib_mcast_restart_task(struct work_struct *work); int ipoib_mcast_start_thread(struct net_device *dev); @@ -517,10 +533,10 @@ static inline int ipoib_cm_admin_enabled(struct net_device *dev) test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); } -static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) +static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr) { struct ipoib_dev_priv *priv = netdev_priv(dev); - return IPOIB_CM_SUPPORTED(n->ha) && + return IPOIB_CM_SUPPORTED(hwaddr) && test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); } @@ -575,7 +591,7 @@ static inline int ipoib_cm_admin_enabled(struct net_device *dev) { return 0; } -static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) +static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr) { return 0; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 014504d8e43c..95ecf4eadf5f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -811,9 +811,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) if (neigh) { neigh->cm = NULL; list_del(&neigh->list); - if (neigh->ah) - ipoib_put_ah(neigh->ah); - ipoib_neigh_free(dev, neigh); + ipoib_neigh_free(neigh); tx->neigh = NULL; } @@ -1230,9 +1228,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, if (neigh) { neigh->cm = NULL; list_del(&neigh->list); - if (neigh->ah) - ipoib_put_ah(neigh->ah); - ipoib_neigh_free(dev, neigh); + ipoib_neigh_free(neigh); tx->neigh = NULL; } @@ -1279,7 +1275,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) list_move(&tx->list, &priv->cm.reap_list); queue_work(ipoib_workqueue, &priv->cm.reap_task); ipoib_dbg(priv, "Reap connection for gid %pI6\n", - tx->neigh->dgid.raw); + tx->neigh->daddr + 4); tx->neigh = NULL; } } @@ -1304,7 +1300,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) p = list_entry(priv->cm.start_list.next, typeof(*p), list); list_del_init(&p->list); neigh = p->neigh; - qpn = IPOIB_QPN(neigh->neighbour->ha); + qpn = IPOIB_QPN(neigh->daddr); memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); spin_unlock_irqrestore(&priv->lock, flags); @@ -1320,9 +1316,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) if (neigh) { neigh->cm = NULL; list_del(&neigh->list); - if (neigh->ah) - ipoib_put_ah(neigh->ah); - ipoib_neigh_free(dev, neigh); + ipoib_neigh_free(neigh); } list_del(&p->list); kfree(p); @@ -1376,7 +1370,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work) if (skb->protocol == htons(ETH_P_IP)) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); #endif @@ -1397,7 +1391,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, int e = skb_queue_empty(&priv->cm.skb_queue); if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); skb_queue_tail(&priv->cm.skb_queue, skb); if (e) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 5c1bc995e560..f10221f40803 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -123,7 +123,7 @@ static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv, skb_frag_size_set(frag, size); skb->data_len += size; - skb->truesize += size; + skb->truesize += PAGE_SIZE; } else skb_put(skb, length); @@ -156,14 +156,18 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id) struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; int buf_size; + int tailroom; u64 *mapping; - if (ipoib_ud_need_sg(priv->max_ib_mtu)) + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { buf_size = IPOIB_UD_HEAD_SIZE; - else + tailroom = 128; /* reserve some tailroom for IP/TCP headers */ + } else { buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); + tailroom = 0; + } - skb = dev_alloc_skb(buf_size + 4); + skb = dev_alloc_skb(buf_size + tailroom + 4); if (unlikely(!skb)) return NULL; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 3974c290b667..97920b77a5d0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -46,7 +46,8 @@ #include <linux/ip.h> #include <linux/in.h> -#include <net/dst.h> +#include <linux/jhash.h> +#include <net/arp.h> MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); @@ -84,6 +85,7 @@ struct ib_sa_client ipoib_sa_client; static void ipoib_add_one(struct ib_device *device); static void ipoib_remove_one(struct ib_device *device); +static void ipoib_neigh_reclaim(struct rcu_head *rp); static struct ib_client ipoib_client = { .name = "ipoib", @@ -264,30 +266,15 @@ static int __path_add(struct net_device *dev, struct ipoib_path *path) static void path_free(struct net_device *dev, struct ipoib_path *path) { - struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_neigh *neigh, *tn; struct sk_buff *skb; - unsigned long flags; while ((skb = __skb_dequeue(&path->queue))) dev_kfree_skb_irq(skb); - spin_lock_irqsave(&priv->lock, flags); - - list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { - /* - * It's safe to call ipoib_put_ah() inside priv->lock - * here, because we know that path->ah will always - * hold one more reference, so ipoib_put_ah() will - * never do more than decrement the ref count. - */ - if (neigh->ah) - ipoib_put_ah(neigh->ah); - - ipoib_neigh_free(dev, neigh); - } + ipoib_dbg(netdev_priv(dev), "path_free\n"); - spin_unlock_irqrestore(&priv->lock, flags); + /* remove all neigh connected to this path */ + ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw); if (path->ah) ipoib_put_ah(path->ah); @@ -458,19 +445,15 @@ static void path_rec_completion(int status, } kref_get(&path->ah->ref); neigh->ah = path->ah; - memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, - sizeof(union ib_gid)); - if (ipoib_cm_enabled(dev, neigh->neighbour)) { + if (ipoib_cm_enabled(dev, neigh->daddr)) { if (!ipoib_cm_get(neigh)) ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); if (!ipoib_cm_get(neigh)) { list_del(&neigh->list); - if (neigh->ah) - ipoib_put_ah(neigh->ah); - ipoib_neigh_free(dev, neigh); + ipoib_neigh_free(neigh); continue; } } @@ -555,15 +538,15 @@ static int path_rec_start(struct net_device *dev, return 0; } -/* called with rcu_read_lock */ -static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_device *dev) +static void neigh_add_path(struct sk_buff *skb, u8 *daddr, + struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; struct ipoib_neigh *neigh; unsigned long flags; - neigh = ipoib_neigh_alloc(n, skb->dev); + neigh = ipoib_neigh_alloc(daddr, dev); if (!neigh) { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -572,9 +555,9 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_ spin_lock_irqsave(&priv->lock, flags); - path = __path_find(dev, n->ha + 4); + path = __path_find(dev, daddr + 4); if (!path) { - path = path_rec_create(dev, n->ha + 4); + path = path_rec_create(dev, daddr + 4); if (!path) goto err_path; @@ -586,17 +569,13 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_ if (path->ah) { kref_get(&path->ah->ref); neigh->ah = path->ah; - memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, - sizeof(union ib_gid)); - if (ipoib_cm_enabled(dev, neigh->neighbour)) { + if (ipoib_cm_enabled(dev, neigh->daddr)) { if (!ipoib_cm_get(neigh)) ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); if (!ipoib_cm_get(neigh)) { list_del(&neigh->list); - if (neigh->ah) - ipoib_put_ah(neigh->ah); - ipoib_neigh_free(dev, neigh); + ipoib_neigh_free(neigh); goto err_drop; } if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) @@ -608,7 +587,8 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_ } } else { spin_unlock_irqrestore(&priv->lock, flags); - ipoib_send(dev, skb, path->ah, IPOIB_QPN(n->ha)); + ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr)); + ipoib_neigh_put(neigh); return; } } else { @@ -621,35 +601,20 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_ } spin_unlock_irqrestore(&priv->lock, flags); + ipoib_neigh_put(neigh); return; err_list: list_del(&neigh->list); err_path: - ipoib_neigh_free(dev, neigh); + ipoib_neigh_free(neigh); err_drop: ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); spin_unlock_irqrestore(&priv->lock, flags); -} - -/* called with rcu_read_lock */ -static void ipoib_path_lookup(struct sk_buff *skb, struct neighbour *n, struct net_device *dev) -{ - struct ipoib_dev_priv *priv = netdev_priv(skb->dev); - - /* Look up path record for unicasts */ - if (n->ha[4] != 0xff) { - neigh_add_path(skb, n, dev); - return; - } - - /* Add in the P_Key for multicasts */ - n->ha[8] = (priv->pkey >> 8) & 0xff; - n->ha[9] = priv->pkey & 0xff; - ipoib_mcast_send(dev, n->ha + 4, skb); + ipoib_neigh_put(neigh); } static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, @@ -710,94 +675,80 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_neigh *neigh; - struct neighbour *n = NULL; + struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; + struct ipoib_header *header; unsigned long flags; - rcu_read_lock(); - if (likely(skb_dst(skb))) { - n = dst_get_neighbour_noref(skb_dst(skb)); - if (!n) { + header = (struct ipoib_header *) skb->data; + + if (unlikely(cb->hwaddr[4] == 0xff)) { + /* multicast, arrange "if" according to probability */ + if ((header->proto != htons(ETH_P_IP)) && + (header->proto != htons(ETH_P_IPV6)) && + (header->proto != htons(ETH_P_ARP)) && + (header->proto != htons(ETH_P_RARP))) { + /* ethertype not supported by IPoIB */ ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); - goto unlock; + return NETDEV_TX_OK; } + /* Add in the P_Key for multicast*/ + cb->hwaddr[8] = (priv->pkey >> 8) & 0xff; + cb->hwaddr[9] = priv->pkey & 0xff; + + neigh = ipoib_neigh_get(dev, cb->hwaddr); + if (likely(neigh)) + goto send_using_neigh; + ipoib_mcast_send(dev, cb->hwaddr, skb); + return NETDEV_TX_OK; } - if (likely(n)) { - if (unlikely(!*to_ipoib_neigh(n))) { - ipoib_path_lookup(skb, n, dev); - goto unlock; - } - - neigh = *to_ipoib_neigh(n); - if (unlikely((memcmp(&neigh->dgid.raw, - n->ha + 4, - sizeof(union ib_gid))) || - (neigh->dev != dev))) { - spin_lock_irqsave(&priv->lock, flags); - /* - * It's safe to call ipoib_put_ah() inside - * priv->lock here, because we know that - * path->ah will always hold one more reference, - * so ipoib_put_ah() will never do more than - * decrement the ref count. - */ - if (neigh->ah) - ipoib_put_ah(neigh->ah); - list_del(&neigh->list); - ipoib_neigh_free(dev, neigh); - spin_unlock_irqrestore(&priv->lock, flags); - ipoib_path_lookup(skb, n, dev); - goto unlock; + /* unicast, arrange "switch" according to probability */ + switch (header->proto) { + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + neigh = ipoib_neigh_get(dev, cb->hwaddr); + if (unlikely(!neigh)) { + neigh_add_path(skb, cb->hwaddr, dev); + return NETDEV_TX_OK; } + break; + case htons(ETH_P_ARP): + case htons(ETH_P_RARP): + /* for unicast ARP and RARP should always perform path find */ + unicast_arp_send(skb, dev, cb); + return NETDEV_TX_OK; + default: + /* ethertype not supported by IPoIB */ + ++dev->stats.tx_dropped; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } - if (ipoib_cm_get(neigh)) { - if (ipoib_cm_up(neigh)) { - ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); - goto unlock; - } - } else if (neigh->ah) { - ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(n->ha)); - goto unlock; +send_using_neigh: + /* note we now hold a ref to neigh */ + if (ipoib_cm_get(neigh)) { + if (ipoib_cm_up(neigh)) { + ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); + goto unref; } + } else if (neigh->ah) { + ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(cb->hwaddr)); + goto unref; + } - if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { - spin_lock_irqsave(&priv->lock, flags); - __skb_queue_tail(&neigh->queue, skb); - spin_unlock_irqrestore(&priv->lock, flags); - } else { - ++dev->stats.tx_dropped; - dev_kfree_skb_any(skb); - } + if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { + spin_lock_irqsave(&priv->lock, flags); + __skb_queue_tail(&neigh->queue, skb); + spin_unlock_irqrestore(&priv->lock, flags); } else { - struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; - - if (cb->hwaddr[4] == 0xff) { - /* Add in the P_Key for multicast*/ - cb->hwaddr[8] = (priv->pkey >> 8) & 0xff; - cb->hwaddr[9] = priv->pkey & 0xff; + ++dev->stats.tx_dropped; + dev_kfree_skb_any(skb); + } - ipoib_mcast_send(dev, cb->hwaddr + 4, skb); - } else { - /* unicast GID -- should be ARP or RARP reply */ - - if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) && - (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) { - ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %pI6\n", - skb_dst(skb) ? "neigh" : "dst", - be16_to_cpup((__be16 *) skb->data), - IPOIB_QPN(cb->hwaddr), - cb->hwaddr + 4); - dev_kfree_skb_any(skb); - ++dev->stats.tx_dropped; - goto unlock; - } +unref: + ipoib_neigh_put(neigh); - unicast_arp_send(skb, dev, cb); - } - } -unlock: - rcu_read_unlock(); return NETDEV_TX_OK; } @@ -819,6 +770,7 @@ static int ipoib_hard_header(struct sk_buff *skb, const void *daddr, const void *saddr, unsigned len) { struct ipoib_header *header; + struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; header = (struct ipoib_header *) skb_push(skb, sizeof *header); @@ -826,14 +778,11 @@ static int ipoib_hard_header(struct sk_buff *skb, header->reserved = 0; /* - * If we don't have a dst_entry structure, stuff the + * we don't rely on dst_entry structure, always stuff the * destination address into skb->cb so we can figure out where * to send the packet later. */ - if (!skb_dst(skb)) { - struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; - memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN); - } + memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN); return 0; } @@ -850,86 +799,438 @@ static void ipoib_set_mcast_list(struct net_device *dev) queue_work(ipoib_workqueue, &priv->restart_task); } -static void ipoib_neigh_cleanup(struct neighbour *n) +static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr) { - struct ipoib_neigh *neigh; - struct ipoib_dev_priv *priv = netdev_priv(n->dev); + /* + * Use only the address parts that contributes to spreading + * The subnet prefix is not used as one can not connect to + * same remote port (GUID) using the same remote QPN via two + * different subnets. + */ + /* qpn octets[1:4) & port GUID octets[12:20) */ + u32 *daddr_32 = (u32 *) daddr; + u32 hv; + + hv = jhash_3words(daddr_32[3], daddr_32[4], 0xFFFFFF & daddr_32[0], 0); + return hv & htbl->mask; +} + +struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_neigh_table *ntbl = &priv->ntbl; + struct ipoib_neigh_hash *htbl; + struct ipoib_neigh *neigh = NULL; + u32 hash_val; + + rcu_read_lock_bh(); + + htbl = rcu_dereference_bh(ntbl->htbl); + + if (!htbl) + goto out_unlock; + + hash_val = ipoib_addr_hash(htbl, daddr); + for (neigh = rcu_dereference_bh(htbl->buckets[hash_val]); + neigh != NULL; + neigh = rcu_dereference_bh(neigh->hnext)) { + if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { + /* found, take one ref on behalf of the caller */ + if (!atomic_inc_not_zero(&neigh->refcnt)) { + /* deleted */ + neigh = NULL; + goto out_unlock; + } + neigh->alive = jiffies; + goto out_unlock; + } + } + +out_unlock: + rcu_read_unlock_bh(); + return neigh; +} + +static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) +{ + struct ipoib_neigh_table *ntbl = &priv->ntbl; + struct ipoib_neigh_hash *htbl; + unsigned long neigh_obsolete; + unsigned long dt; unsigned long flags; - struct ipoib_ah *ah = NULL; + int i; - neigh = *to_ipoib_neigh(n); - if (neigh) - priv = netdev_priv(neigh->dev); - else + if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) return; - ipoib_dbg(priv, - "neigh_cleanup for %06x %pI6\n", - IPOIB_QPN(n->ha), - n->ha + 4); - spin_lock_irqsave(&priv->lock, flags); + write_lock_bh(&ntbl->rwlock); + + htbl = rcu_dereference_protected(ntbl->htbl, + lockdep_is_held(&ntbl->rwlock)); + + if (!htbl) + goto out_unlock; + + /* neigh is obsolete if it was idle for two GC periods */ + dt = 2 * arp_tbl.gc_interval; + neigh_obsolete = jiffies - dt; + /* handle possible race condition */ + if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) + goto out_unlock; + + for (i = 0; i < htbl->size; i++) { + struct ipoib_neigh *neigh; + struct ipoib_neigh __rcu **np = &htbl->buckets[i]; + + while ((neigh = rcu_dereference_protected(*np, + lockdep_is_held(&ntbl->rwlock))) != NULL) { + /* was the neigh idle for two GC periods */ + if (time_after(neigh_obsolete, neigh->alive)) { + rcu_assign_pointer(*np, + rcu_dereference_protected(neigh->hnext, + lockdep_is_held(&ntbl->rwlock))); + /* remove from path/mc list */ + spin_lock_irqsave(&priv->lock, flags); + list_del(&neigh->list); + spin_unlock_irqrestore(&priv->lock, flags); + call_rcu(&neigh->rcu, ipoib_neigh_reclaim); + } else { + np = &neigh->hnext; + } - if (neigh->ah) - ah = neigh->ah; - list_del(&neigh->list); - ipoib_neigh_free(n->dev, neigh); + } + } - spin_unlock_irqrestore(&priv->lock, flags); +out_unlock: + write_unlock_bh(&ntbl->rwlock); +} - if (ah) - ipoib_put_ah(ah); +static void ipoib_reap_neigh(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = + container_of(work, struct ipoib_dev_priv, neigh_reap_task.work); + + __ipoib_reap_neigh(priv); + + if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) + queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, + arp_tbl.gc_interval); } -struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour, + +static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr, struct net_device *dev) { struct ipoib_neigh *neigh; - neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); + neigh = kzalloc(sizeof *neigh, GFP_ATOMIC); if (!neigh) return NULL; - neigh->neighbour = neighbour; neigh->dev = dev; - memset(&neigh->dgid.raw, 0, sizeof (union ib_gid)); - *to_ipoib_neigh(neighbour) = neigh; + memcpy(&neigh->daddr, daddr, sizeof(neigh->daddr)); skb_queue_head_init(&neigh->queue); + INIT_LIST_HEAD(&neigh->list); ipoib_cm_set(neigh, NULL); + /* one ref on behalf of the caller */ + atomic_set(&neigh->refcnt, 1); + + return neigh; +} + +struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, + struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_neigh_table *ntbl = &priv->ntbl; + struct ipoib_neigh_hash *htbl; + struct ipoib_neigh *neigh; + u32 hash_val; + + write_lock_bh(&ntbl->rwlock); + + htbl = rcu_dereference_protected(ntbl->htbl, + lockdep_is_held(&ntbl->rwlock)); + if (!htbl) { + neigh = NULL; + goto out_unlock; + } + + /* need to add a new neigh, but maybe some other thread succeeded? + * recalc hash, maybe hash resize took place so we do a search + */ + hash_val = ipoib_addr_hash(htbl, daddr); + for (neigh = rcu_dereference_protected(htbl->buckets[hash_val], + lockdep_is_held(&ntbl->rwlock)); + neigh != NULL; + neigh = rcu_dereference_protected(neigh->hnext, + lockdep_is_held(&ntbl->rwlock))) { + if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { + /* found, take one ref on behalf of the caller */ + if (!atomic_inc_not_zero(&neigh->refcnt)) { + /* deleted */ + neigh = NULL; + break; + } + neigh->alive = jiffies; + goto out_unlock; + } + } + + neigh = ipoib_neigh_ctor(daddr, dev); + if (!neigh) + goto out_unlock; + + /* one ref on behalf of the hash table */ + atomic_inc(&neigh->refcnt); + neigh->alive = jiffies; + /* put in hash */ + rcu_assign_pointer(neigh->hnext, + rcu_dereference_protected(htbl->buckets[hash_val], + lockdep_is_held(&ntbl->rwlock))); + rcu_assign_pointer(htbl->buckets[hash_val], neigh); + atomic_inc(&ntbl->entries); + +out_unlock: + write_unlock_bh(&ntbl->rwlock); return neigh; } -void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) +void ipoib_neigh_dtor(struct ipoib_neigh *neigh) { + /* neigh reference count was dropprd to zero */ + struct net_device *dev = neigh->dev; + struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; - *to_ipoib_neigh(neigh->neighbour) = NULL; + if (neigh->ah) + ipoib_put_ah(neigh->ah); while ((skb = __skb_dequeue(&neigh->queue))) { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); } if (ipoib_cm_get(neigh)) ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); + ipoib_dbg(netdev_priv(dev), + "neigh free for %06x %pI6\n", + IPOIB_QPN(neigh->daddr), + neigh->daddr + 4); kfree(neigh); + if (atomic_dec_and_test(&priv->ntbl.entries)) { + if (test_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags)) + complete(&priv->ntbl.flushed); + } +} + +static void ipoib_neigh_reclaim(struct rcu_head *rp) +{ + /* Called as a result of removal from hash table */ + struct ipoib_neigh *neigh = container_of(rp, struct ipoib_neigh, rcu); + /* note TX context may hold another ref */ + ipoib_neigh_put(neigh); } -static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) +void ipoib_neigh_free(struct ipoib_neigh *neigh) { - parms->neigh_cleanup = ipoib_neigh_cleanup; + struct net_device *dev = neigh->dev; + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_neigh_table *ntbl = &priv->ntbl; + struct ipoib_neigh_hash *htbl; + struct ipoib_neigh __rcu **np; + struct ipoib_neigh *n; + u32 hash_val; + + write_lock_bh(&ntbl->rwlock); + + htbl = rcu_dereference_protected(ntbl->htbl, + lockdep_is_held(&ntbl->rwlock)); + if (!htbl) + goto out_unlock; + + hash_val = ipoib_addr_hash(htbl, neigh->daddr); + np = &htbl->buckets[hash_val]; + for (n = rcu_dereference_protected(*np, + lockdep_is_held(&ntbl->rwlock)); + n != NULL; + n = rcu_dereference_protected(neigh->hnext, + lockdep_is_held(&ntbl->rwlock))) { + if (n == neigh) { + /* found */ + rcu_assign_pointer(*np, + rcu_dereference_protected(neigh->hnext, + lockdep_is_held(&ntbl->rwlock))); + call_rcu(&neigh->rcu, ipoib_neigh_reclaim); + goto out_unlock; + } else { + np = &n->hnext; + } + } + +out_unlock: + write_unlock_bh(&ntbl->rwlock); + +} + +static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) +{ + struct ipoib_neigh_table *ntbl = &priv->ntbl; + struct ipoib_neigh_hash *htbl; + struct ipoib_neigh **buckets; + u32 size; + + clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); + ntbl->htbl = NULL; + rwlock_init(&ntbl->rwlock); + htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); + if (!htbl) + return -ENOMEM; + set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); + size = roundup_pow_of_two(arp_tbl.gc_thresh3); + buckets = kzalloc(size * sizeof(*buckets), GFP_KERNEL); + if (!buckets) { + kfree(htbl); + return -ENOMEM; + } + htbl->size = size; + htbl->mask = (size - 1); + htbl->buckets = buckets; + ntbl->htbl = htbl; + atomic_set(&ntbl->entries, 0); + + /* start garbage collection */ + clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); + queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, + arp_tbl.gc_interval); return 0; } +static void neigh_hash_free_rcu(struct rcu_head *head) +{ + struct ipoib_neigh_hash *htbl = container_of(head, + struct ipoib_neigh_hash, + rcu); + struct ipoib_neigh __rcu **buckets = htbl->buckets; + + kfree(buckets); + kfree(htbl); +} + +void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_neigh_table *ntbl = &priv->ntbl; + struct ipoib_neigh_hash *htbl; + unsigned long flags; + int i; + + /* remove all neigh connected to a given path or mcast */ + write_lock_bh(&ntbl->rwlock); + + htbl = rcu_dereference_protected(ntbl->htbl, + lockdep_is_held(&ntbl->rwlock)); + + if (!htbl) + goto out_unlock; + + for (i = 0; i < htbl->size; i++) { + struct ipoib_neigh *neigh; + struct ipoib_neigh __rcu **np = &htbl->buckets[i]; + + while ((neigh = rcu_dereference_protected(*np, + lockdep_is_held(&ntbl->rwlock))) != NULL) { + /* delete neighs belong to this parent */ + if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) { + rcu_assign_pointer(*np, + rcu_dereference_protected(neigh->hnext, + lockdep_is_held(&ntbl->rwlock))); + /* remove from parent list */ + spin_lock_irqsave(&priv->lock, flags); + list_del(&neigh->list); + spin_unlock_irqrestore(&priv->lock, flags); + call_rcu(&neigh->rcu, ipoib_neigh_reclaim); + } else { + np = &neigh->hnext; + } + + } + } +out_unlock: + write_unlock_bh(&ntbl->rwlock); +} + +static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) +{ + struct ipoib_neigh_table *ntbl = &priv->ntbl; + struct ipoib_neigh_hash *htbl; + unsigned long flags; + int i; + + write_lock_bh(&ntbl->rwlock); + + htbl = rcu_dereference_protected(ntbl->htbl, + lockdep_is_held(&ntbl->rwlock)); + if (!htbl) + goto out_unlock; + + for (i = 0; i < htbl->size; i++) { + struct ipoib_neigh *neigh; + struct ipoib_neigh __rcu **np = &htbl->buckets[i]; + + while ((neigh = rcu_dereference_protected(*np, + lockdep_is_held(&ntbl->rwlock))) != NULL) { + rcu_assign_pointer(*np, + rcu_dereference_protected(neigh->hnext, + lockdep_is_held(&ntbl->rwlock))); + /* remove from path/mc list */ + spin_lock_irqsave(&priv->lock, flags); + list_del(&neigh->list); + spin_unlock_irqrestore(&priv->lock, flags); + call_rcu(&neigh->rcu, ipoib_neigh_reclaim); + } + } + + rcu_assign_pointer(ntbl->htbl, NULL); + call_rcu(&htbl->rcu, neigh_hash_free_rcu); + +out_unlock: + write_unlock_bh(&ntbl->rwlock); +} + +static void ipoib_neigh_hash_uninit(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int stopped; + + ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n"); + init_completion(&priv->ntbl.flushed); + set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); + + /* Stop GC if called at init fail need to cancel work */ + stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); + if (!stopped) + cancel_delayed_work(&priv->neigh_reap_task); + + if (atomic_read(&priv->ntbl.entries)) { + ipoib_flush_neighs(priv); + wait_for_completion(&priv->ntbl.flushed); + } +} + + int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) { struct ipoib_dev_priv *priv = netdev_priv(dev); + if (ipoib_neigh_hash_init(priv) < 0) + goto out; /* Allocate RX/TX "rings" to hold queued skbs */ priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, GFP_KERNEL); if (!priv->rx_ring) { printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", ca->name, ipoib_recvq_size); - goto out; + goto out_neigh_hash_cleanup; } priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); @@ -952,6 +1253,8 @@ out_tx_ring_cleanup: out_rx_ring_cleanup: kfree(priv->rx_ring); +out_neigh_hash_cleanup: + ipoib_neigh_hash_uninit(dev); out: return -ENOMEM; } @@ -964,6 +1267,9 @@ void ipoib_dev_cleanup(struct net_device *dev) /* Delete any child interfaces first */ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { + /* Stop GC on child */ + set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags); + cancel_delayed_work(&cpriv->neigh_reap_task); unregister_netdev(cpriv->dev); ipoib_dev_cleanup(cpriv->dev); free_netdev(cpriv->dev); @@ -976,6 +1282,8 @@ void ipoib_dev_cleanup(struct net_device *dev) priv->rx_ring = NULL; priv->tx_ring = NULL; + + ipoib_neigh_hash_uninit(dev); } static const struct header_ops ipoib_header_ops = { @@ -990,7 +1298,6 @@ static const struct net_device_ops ipoib_netdev_ops = { .ndo_start_xmit = ipoib_start_xmit, .ndo_tx_timeout = ipoib_timeout, .ndo_set_rx_mode = ipoib_set_mcast_list, - .ndo_neigh_setup = ipoib_neigh_setup_dev, }; static void ipoib_setup(struct net_device *dev) @@ -1039,6 +1346,7 @@ static void ipoib_setup(struct net_device *dev) INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); + INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh); } struct ipoib_dev_priv *ipoib_intf_alloc(const char *name) @@ -1279,6 +1587,9 @@ sysfs_failed: register_failed: ib_unregister_event_handler(&priv->event_handler); + /* Stop GC if started before flush */ + set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); + cancel_delayed_work(&priv->neigh_reap_task); flush_workqueue(ipoib_workqueue); event_failed: @@ -1345,6 +1656,9 @@ static void ipoib_remove_one(struct ib_device *device) dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); rtnl_unlock(); + /* Stop GC */ + set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); + cancel_delayed_work(&priv->neigh_reap_task); flush_workqueue(ipoib_workqueue); unregister_netdev(priv->dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 20ebc6fd1bb9..13f4aa7593c8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -69,28 +69,13 @@ struct ipoib_mcast_iter { static void ipoib_mcast_free(struct ipoib_mcast *mcast) { struct net_device *dev = mcast->dev; - struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_neigh *neigh, *tmp; int tx_dropped = 0; ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n", mcast->mcmember.mgid.raw); - spin_lock_irq(&priv->lock); - - list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { - /* - * It's safe to call ipoib_put_ah() inside priv->lock - * here, because we know that mcast->ah will always - * hold one more reference, so ipoib_put_ah() will - * never do more than decrement the ref count. - */ - if (neigh->ah) - ipoib_put_ah(neigh->ah); - ipoib_neigh_free(dev, neigh); - } - - spin_unlock_irq(&priv->lock); + /* remove all neigh connected to this mcast */ + ipoib_del_neighs_by_gid(dev, mcast->mcmember.mgid.raw); if (mcast->ah) ipoib_put_ah(mcast->ah); @@ -655,11 +640,12 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) return 0; } -void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) +void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_mcast *mcast; unsigned long flags; + void *mgid = daddr + 4; spin_lock_irqsave(&priv->lock, flags); @@ -715,25 +701,25 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) out: if (mcast && mcast->ah) { - struct dst_entry *dst = skb_dst(skb); - struct neighbour *n = NULL; - - rcu_read_lock(); - if (dst) - n = dst_get_neighbour_noref(dst); - if (n && !*to_ipoib_neigh(n)) { - struct ipoib_neigh *neigh = ipoib_neigh_alloc(n, - skb->dev); + struct ipoib_neigh *neigh; + spin_unlock_irqrestore(&priv->lock, flags); + neigh = ipoib_neigh_get(dev, daddr); + spin_lock_irqsave(&priv->lock, flags); + if (!neigh) { + spin_unlock_irqrestore(&priv->lock, flags); + neigh = ipoib_neigh_alloc(daddr, dev); + spin_lock_irqsave(&priv->lock, flags); if (neigh) { kref_get(&mcast->ah->ref); neigh->ah = mcast->ah; list_add_tail(&neigh->list, &mcast->neigh_list); } } - rcu_read_unlock(); spin_unlock_irqrestore(&priv->lock, flags); ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); + if (neigh) + ipoib_neigh_put(neigh); return; } diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 5f6b7f63cdef..7a0ce8d42887 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1377,10 +1377,14 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) break; case SRPT_STATE_NEED_DATA: /* DMA_TO_DEVICE (write) - RDMA read error. */ + + /* XXX(hch): this is a horrible layering violation.. */ spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); ioctx->cmd.transport_state |= CMD_T_LUN_STOP; + ioctx->cmd.transport_state &= ~CMD_T_ACTIVE; spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags); - transport_generic_handle_data(&ioctx->cmd); + + complete(&ioctx->cmd.transport_lun_stop_comp); break; case SRPT_STATE_CMD_RSP_SENT: /* @@ -1463,9 +1467,10 @@ static void srpt_handle_send_comp(struct srpt_rdma_ch *ch, /** * srpt_handle_rdma_comp() - Process an IB RDMA completion notification. * - * Note: transport_generic_handle_data() is asynchronous so unmapping the - * data that has been transferred via IB RDMA must be postponed until the - * check_stop_free() callback. + * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping + * the data that has been transferred via IB RDMA had to be postponed until the + * check_stop_free() callback. None of this is nessecary anymore and needs to + * be cleaned up. */ static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx, @@ -1477,7 +1482,7 @@ static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch, if (opcode == SRPT_RDMA_READ_LAST) { if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA, SRPT_STATE_DATA_IN)) - transport_generic_handle_data(&ioctx->cmd); + target_execute_cmd(&ioctx->cmd); else printk(KERN_ERR "%s[%d]: wrong state = %d\n", __func__, __LINE__, srpt_get_cmd_state(ioctx)); |