summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/addr.c4
-rw-r--r--drivers/infiniband/core/cm.c16
-rw-r--r--drivers/infiniband/core/cm_msgs.h12
-rw-r--r--drivers/infiniband/core/cma.c82
-rw-r--r--drivers/infiniband/core/netlink.c17
-rw-r--r--drivers/infiniband/core/sa_query.c133
-rw-r--r--drivers/infiniband/core/ucma.c26
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c5
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c12
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c141
-rw-r--r--drivers/infiniband/hw/mlx4/main.c98
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h20
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c28
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c6
-rw-r--r--drivers/infiniband/hw/qib/qib.h45
-rw-r--r--drivers/infiniband/hw/qib/qib_diag.c13
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c15
-rw-r--r--drivers/infiniband/hw/qib/qib_eeprom.c41
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c63
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c21
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c91
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c92
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c160
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c238
-rw-r--r--drivers/infiniband/hw/qib/qib_intr.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c152
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c327
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.h198
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c247
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c25
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c56
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c24
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c14
-rw-r--r--drivers/infiniband/hw/qib/qib_sd7220.c41
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c11
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c246
-rw-r--r--drivers/infiniband/hw/qib/qib_twsi.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c31
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c66
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h56
-rw-r--r--drivers/infiniband/hw/qib/qib_wc_x86_64.c14
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h56
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c20
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c12
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c644
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c42
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c15
50 files changed, 2669 insertions, 1046 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 6ef660c1332f..28058ae33d38 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -129,7 +129,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
dev_put(dev);
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
rcu_read_lock();
for_each_netdev_rcu(&init_net, dev) {
@@ -243,7 +243,7 @@ out:
return ret;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static int addr6_resolve(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c889aaef3416..d67999f6e34a 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3848,24 +3848,28 @@ static int __init ib_cm_init(void)
INIT_LIST_HEAD(&cm.timewait_list);
ret = class_register(&cm_class);
- if (ret)
- return -ENOMEM;
+ if (ret) {
+ ret = -ENOMEM;
+ goto error1;
+ }
cm.wq = create_workqueue("ib_cm");
if (!cm.wq) {
ret = -ENOMEM;
- goto error1;
+ goto error2;
}
ret = ib_register_client(&cm_client);
if (ret)
- goto error2;
+ goto error3;
return 0;
-error2:
+error3:
destroy_workqueue(cm.wq);
-error1:
+error2:
class_unregister(&cm_class);
+error1:
+ idr_destroy(&cm.local_id_table);
return ret;
}
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 7da9b2102341..be068f47e47e 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -44,18 +44,6 @@
#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */
-#define CM_REQ_ATTR_ID cpu_to_be16(0x0010)
-#define CM_MRA_ATTR_ID cpu_to_be16(0x0011)
-#define CM_REJ_ATTR_ID cpu_to_be16(0x0012)
-#define CM_REP_ATTR_ID cpu_to_be16(0x0013)
-#define CM_RTU_ATTR_ID cpu_to_be16(0x0014)
-#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015)
-#define CM_DREP_ATTR_ID cpu_to_be16(0x0016)
-#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017)
-#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018)
-#define CM_LAP_ATTR_ID cpu_to_be16(0x0019)
-#define CM_APR_ATTR_ID cpu_to_be16(0x001A)
-
enum cm_msg_sequence {
CM_MSG_SEQUENCE_REQ,
CM_MSG_SEQUENCE_LAP,
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 2e826f9702c6..7172559ce0c1 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -99,6 +99,10 @@ struct rdma_bind_list {
unsigned short port;
};
+enum {
+ CMA_OPTION_AFONLY,
+};
+
/*
* Device removal can occur at anytime, so we need extra handling to
* serialize notifying the user of device removal with other callbacks.
@@ -137,9 +141,11 @@ struct rdma_id_private {
u32 qkey;
u32 qp_num;
pid_t owner;
+ u32 options;
u8 srq;
u8 tos;
u8 reuseaddr;
+ u8 afonly;
};
struct cma_multicast {
@@ -1297,8 +1303,10 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
} else {
cma_set_ip_ver(cma_data, 4);
cma_set_ip_ver(cma_mask, 0xF);
- cma_data->dst_addr.ip4.addr = ip4_addr;
- cma_mask->dst_addr.ip4.addr = htonl(~0);
+ if (!cma_any_addr(addr)) {
+ cma_data->dst_addr.ip4.addr = ip4_addr;
+ cma_mask->dst_addr.ip4.addr = htonl(~0);
+ }
}
break;
case AF_INET6:
@@ -1312,9 +1320,11 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
} else {
cma_set_ip_ver(cma_data, 6);
cma_set_ip_ver(cma_mask, 0xF);
- cma_data->dst_addr.ip6 = ip6_addr;
- memset(&cma_mask->dst_addr.ip6, 0xFF,
- sizeof cma_mask->dst_addr.ip6);
+ if (!cma_any_addr(addr)) {
+ cma_data->dst_addr.ip6 = ip6_addr;
+ memset(&cma_mask->dst_addr.ip6, 0xFF,
+ sizeof cma_mask->dst_addr.ip6);
+ }
}
break;
default:
@@ -1499,7 +1509,7 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
svc_id = cma_get_service_id(id_priv->id.ps, addr);
- if (cma_any_addr(addr))
+ if (cma_any_addr(addr) && !id_priv->afonly)
ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
else {
cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
@@ -1573,6 +1583,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
atomic_inc(&id_priv->refcount);
dev_id_priv->internal_id = 1;
+ dev_id_priv->afonly = id_priv->afonly;
ret = rdma_listen(id, id_priv->backlog);
if (ret)
@@ -2098,6 +2109,26 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
}
EXPORT_SYMBOL(rdma_set_reuseaddr);
+int rdma_set_afonly(struct rdma_cm_id *id, int afonly)
+{
+ struct rdma_id_private *id_priv;
+ unsigned long flags;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ spin_lock_irqsave(&id_priv->lock, flags);
+ if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) {
+ id_priv->options |= (1 << CMA_OPTION_AFONLY);
+ id_priv->afonly = afonly;
+ ret = 0;
+ } else {
+ ret = -EINVAL;
+ }
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_set_afonly);
+
static void cma_bind_port(struct rdma_bind_list *bind_list,
struct rdma_id_private *id_priv)
{
@@ -2187,22 +2218,24 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
struct hlist_node *node;
addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
- if (cma_any_addr(addr) && !reuseaddr)
- return -EADDRNOTAVAIL;
-
hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
if (id_priv == cur_id)
continue;
- if ((cur_id->state == RDMA_CM_LISTEN) ||
- !reuseaddr || !cur_id->reuseaddr) {
- cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr;
- if (cma_any_addr(cur_addr))
- return -EADDRNOTAVAIL;
+ if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
+ cur_id->reuseaddr)
+ continue;
- if (!cma_addr_cmp(addr, cur_addr))
- return -EADDRINUSE;
- }
+ cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr;
+ if (id_priv->afonly && cur_id->afonly &&
+ (addr->sa_family != cur_addr->sa_family))
+ continue;
+
+ if (cma_any_addr(addr) || cma_any_addr(cur_addr))
+ return -EADDRNOTAVAIL;
+
+ if (!cma_addr_cmp(addr, cur_addr))
+ return -EADDRINUSE;
}
return 0;
}
@@ -2278,7 +2311,7 @@ static int cma_get_port(struct rdma_id_private *id_priv)
static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
struct sockaddr *addr)
{
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
struct sockaddr_in6 *sin6;
if (addr->sa_family != AF_INET6)
@@ -2371,6 +2404,14 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
}
memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
+ if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
+ if (addr->sa_family == AF_INET)
+ id_priv->afonly = 1;
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (addr->sa_family == AF_INET6)
+ id_priv->afonly = init_net.ipv6.sysctl.bindv6only;
+#endif
+ }
ret = cma_get_port(id_priv);
if (ret)
goto err2;
@@ -3023,10 +3064,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
id_priv->id.port_num, &rec,
comp_mask, GFP_KERNEL,
cma_ib_mc_handler, mc);
- if (IS_ERR(mc->multicast.ib))
- return PTR_ERR(mc->multicast.ib);
-
- return 0;
+ return PTR_RET(mc->multicast.ib);
}
static void iboe_mcast_work_handler(struct work_struct *work)
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index e497dfbee435..3ae2bfd31015 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -108,12 +108,14 @@ void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
unsigned char *prev_tail;
prev_tail = skb_tail_pointer(skb);
- *nlh = NLMSG_NEW(skb, 0, seq, RDMA_NL_GET_TYPE(client, op),
- len, NLM_F_MULTI);
+ *nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op),
+ len, NLM_F_MULTI);
+ if (!*nlh)
+ goto out_nlmsg_trim;
(*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail;
- return NLMSG_DATA(*nlh);
+ return nlmsg_data(*nlh);
-nlmsg_failure:
+out_nlmsg_trim:
nlmsg_trim(skb, prev_tail);
return NULL;
}
@@ -171,8 +173,11 @@ static void ibnl_rcv(struct sk_buff *skb)
int __init ibnl_init(void)
{
- nls = netlink_kernel_create(&init_net, NETLINK_RDMA, 0, ibnl_rcv,
- NULL, THIS_MODULE);
+ struct netlink_kernel_cfg cfg = {
+ .input = ibnl_rcv,
+ };
+
+ nls = netlink_kernel_create(&init_net, NETLINK_RDMA, THIS_MODULE, &cfg);
if (!nls) {
pr_warn("Failed to create netlink socket\n");
return -ENOMEM;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index fbbfa24cf572..a8905abc56e4 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -94,6 +94,12 @@ struct ib_sa_path_query {
struct ib_sa_query sa_query;
};
+struct ib_sa_guidinfo_query {
+ void (*callback)(int, struct ib_sa_guidinfo_rec *, void *);
+ void *context;
+ struct ib_sa_query sa_query;
+};
+
struct ib_sa_mcmember_query {
void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
void *context;
@@ -347,6 +353,34 @@ static const struct ib_field service_rec_table[] = {
.size_bits = 2*64 },
};
+#define GUIDINFO_REC_FIELD(field) \
+ .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
+ .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
+ .field_name = "sa_guidinfo_rec:" #field
+
+static const struct ib_field guidinfo_rec_table[] = {
+ { GUIDINFO_REC_FIELD(lid),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { GUIDINFO_REC_FIELD(block_num),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 8 },
+ { GUIDINFO_REC_FIELD(res1),
+ .offset_words = 0,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { GUIDINFO_REC_FIELD(res2),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { GUIDINFO_REC_FIELD(guid_info_list),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 512 },
+};
+
static void free_sm_ah(struct kref *kref)
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
@@ -945,6 +979,105 @@ err1:
return ret;
}
+/* Support GuidInfoRecord */
+static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
+ int status,
+ struct ib_sa_mad *mad)
+{
+ struct ib_sa_guidinfo_query *query =
+ container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
+
+ if (mad) {
+ struct ib_sa_guidinfo_rec rec;
+
+ ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table),
+ mad->data, &rec);
+ query->callback(status, &rec, query->context);
+ } else
+ query->callback(status, NULL, query->context);
+}
+
+static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
+{
+ kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query));
+}
+
+int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_guidinfo_rec *rec,
+ ib_sa_comp_mask comp_mask, u8 method,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_guidinfo_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
+{
+ struct ib_sa_guidinfo_query *query;
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ struct ib_mad_agent *agent;
+ struct ib_sa_mad *mad;
+ int ret;
+
+ if (!sa_dev)
+ return -ENODEV;
+
+ if (method != IB_MGMT_METHOD_GET &&
+ method != IB_MGMT_METHOD_SET &&
+ method != IB_SA_METHOD_DELETE) {
+ return -EINVAL;
+ }
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ agent = port->agent;
+
+ query = kmalloc(sizeof *query, gfp_mask);
+ if (!query)
+ return -ENOMEM;
+
+ query->sa_query.port = port;
+ ret = alloc_mad(&query->sa_query, gfp_mask);
+ if (ret)
+ goto err1;
+
+ ib_sa_client_get(client);
+ query->sa_query.client = client;
+ query->callback = callback;
+ query->context = context;
+
+ mad = query->sa_query.mad_buf->mad;
+ init_mad(mad, agent);
+
+ query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
+ query->sa_query.release = ib_sa_guidinfo_rec_release;
+
+ mad->mad_hdr.method = method;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC);
+ mad->sa_hdr.comp_mask = comp_mask;
+
+ ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec,
+ mad->data);
+
+ *sa_query = &query->sa_query;
+
+ ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
+ if (ret < 0)
+ goto err2;
+
+ return ret;
+
+err2:
+ *sa_query = NULL;
+ ib_sa_client_put(query->sa_query.client);
+ free_mad(&query->sa_query);
+
+err1:
+ kfree(query);
+ return ret;
+}
+EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
+
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 8002ae642cfe..6bf850422895 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -909,6 +909,13 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
}
ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0);
break;
+ case RDMA_OPTION_ID_AFONLY:
+ if (optlen != sizeof(int)) {
+ ret = -EINVAL;
+ break;
+ }
+ ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0);
+ break;
default:
ret = -ENOSYS;
}
@@ -995,23 +1002,18 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- optval = kmalloc(cmd.optlen, GFP_KERNEL);
- if (!optval) {
- ret = -ENOMEM;
- goto out1;
- }
-
- if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval,
- cmd.optlen)) {
- ret = -EFAULT;
- goto out2;
+ optval = memdup_user((void __user *) (unsigned long) cmd.optval,
+ cmd.optlen);
+ if (IS_ERR(optval)) {
+ ret = PTR_ERR(optval);
+ goto out;
}
ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
cmd.optlen);
-out2:
kfree(optval);
-out1:
+
+out:
ucma_put_ctx(ctx);
return ret;
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 740dcc065cf2..77b6b182778a 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1374,7 +1374,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
goto reject;
}
dst = &rt->dst;
- l2t = t3_l2t_get(tdev, dst, NULL);
+ l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip);
if (!l2t) {
printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
__func__);
@@ -1942,7 +1942,8 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
goto fail3;
}
ep->dst = &rt->dst;
- ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL);
+ ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL,
+ &cm_id->remote_addr.sin_addr.s_addr);
if (!ep->l2t) {
printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
err = -ENOMEM;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index b18870c455ad..51f42061dae9 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -548,8 +548,8 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
}
if (mpa_rev_to_use == 2) {
- mpa->private_data_size +=
- htons(sizeof(struct mpa_v2_conn_params));
+ mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
+ sizeof (struct mpa_v2_conn_params));
mpa_v2_params.ird = htons((u16)ep->ird);
mpa_v2_params.ord = htons((u16)ep->ord);
@@ -635,8 +635,8 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
mpa->flags |= MPA_ENHANCED_RDMA_CONN;
- mpa->private_data_size +=
- htons(sizeof(struct mpa_v2_conn_params));
+ mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
+ sizeof (struct mpa_v2_conn_params));
mpa_v2_params.ird = htons(((u16)ep->ird) |
(peer2peer ? MPA_V2_PEER2PEER_MODEL :
0));
@@ -715,8 +715,8 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
mpa->flags |= MPA_ENHANCED_RDMA_CONN;
- mpa->private_data_size +=
- htons(sizeof(struct mpa_v2_conn_params));
+ mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
+ sizeof (struct mpa_v2_conn_params));
mpa_v2_params.ird = htons((u16)ep->ird);
mpa_v2_params.ord = htons((u16)ep->ord);
if (peer2peer && (ep->mpa_attr.p2p_type !=
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 259b0670b51c..c27141fef1ab 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -147,47 +147,51 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
}
/*
- * Snoop SM MADs for port info and P_Key table sets, so we can
- * synthesize LID change and P_Key change events.
+ * Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can
+ * synthesize LID change, Client-Rereg, GID change, and P_Key change events.
*/
static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad,
- u16 prev_lid)
+ u16 prev_lid)
{
- struct ib_event event;
+ struct ib_port_info *pinfo;
+ u16 lid;
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
- mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
- if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
- struct ib_port_info *pinfo =
- (struct ib_port_info *) ((struct ib_smp *) mad)->data;
- u16 lid = be16_to_cpu(pinfo->lid);
+ mad->mad_hdr.method == IB_MGMT_METHOD_SET)
+ switch (mad->mad_hdr.attr_id) {
+ case IB_SMP_ATTR_PORT_INFO:
+ pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data;
+ lid = be16_to_cpu(pinfo->lid);
- update_sm_ah(to_mdev(ibdev), port_num,
+ update_sm_ah(dev, port_num,
be16_to_cpu(pinfo->sm_lid),
pinfo->neighbormtu_mastersmsl & 0xf);
- event.device = ibdev;
- event.element.port_num = port_num;
+ if (pinfo->clientrereg_resv_subnetto & 0x80)
+ mlx4_ib_dispatch_event(dev, port_num,
+ IB_EVENT_CLIENT_REREGISTER);
- if (pinfo->clientrereg_resv_subnetto & 0x80) {
- event.event = IB_EVENT_CLIENT_REREGISTER;
- ib_dispatch_event(&event);
- }
+ if (prev_lid != lid)
+ mlx4_ib_dispatch_event(dev, port_num,
+ IB_EVENT_LID_CHANGE);
+ break;
- if (prev_lid != lid) {
- event.event = IB_EVENT_LID_CHANGE;
- ib_dispatch_event(&event);
- }
- }
+ case IB_SMP_ATTR_PKEY_TABLE:
+ mlx4_ib_dispatch_event(dev, port_num,
+ IB_EVENT_PKEY_CHANGE);
+ break;
- if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
- event.device = ibdev;
- event.event = IB_EVENT_PKEY_CHANGE;
- event.element.port_num = port_num;
- ib_dispatch_event(&event);
+ case IB_SMP_ATTR_GUID_INFO:
+ /* paravirtualized master's guid is guid 0 -- does not change */
+ if (!mlx4_is_master(dev->dev))
+ mlx4_ib_dispatch_event(dev, port_num,
+ IB_EVENT_GID_CHANGE);
+ break;
+ default:
+ break;
}
- }
}
static void node_desc_override(struct ib_device *dev,
@@ -242,6 +246,25 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
int err;
struct ib_port_attr pattr;
+ if (in_wc && in_wc->qp->qp_num) {
+ pr_debug("received MAD: slid:%d sqpn:%d "
+ "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n",
+ in_wc->slid, in_wc->src_qp,
+ in_wc->dlid_path_bits,
+ in_wc->qp->qp_num,
+ in_wc->wc_flags,
+ in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
+ be16_to_cpu(in_mad->mad_hdr.attr_id));
+ if (in_wc->wc_flags & IB_WC_GRH) {
+ pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
+ be64_to_cpu(in_grh->sgid.global.subnet_prefix),
+ be64_to_cpu(in_grh->sgid.global.interface_id));
+ pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n",
+ be64_to_cpu(in_grh->dgid.global.subnet_prefix),
+ be64_to_cpu(in_grh->dgid.global.interface_id));
+ }
+ }
+
slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
@@ -286,7 +309,8 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
return IB_MAD_RESULT_FAILURE;
if (!out_mad->mad_hdr.status) {
- smp_snoop(ibdev, port_num, in_mad, prev_lid);
+ if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV))
+ smp_snoop(ibdev, port_num, in_mad, prev_lid);
node_desc_override(ibdev, out_mad);
}
@@ -427,3 +451,64 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
ib_destroy_ah(dev->sm_ah[p]);
}
}
+
+void handle_port_mgmt_change_event(struct work_struct *work)
+{
+ struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
+ struct mlx4_ib_dev *dev = ew->ib_dev;
+ struct mlx4_eqe *eqe = &(ew->ib_eqe);
+ u8 port = eqe->event.port_mgmt_change.port;
+ u32 changed_attr;
+
+ switch (eqe->subtype) {
+ case MLX4_DEV_PMC_SUBTYPE_PORT_INFO:
+ changed_attr = be32_to_cpu(eqe->event.port_mgmt_change.params.port_info.changed_attr);
+
+ /* Update the SM ah - This should be done before handling
+ the other changed attributes so that MADs can be sent to the SM */
+ if (changed_attr & MSTR_SM_CHANGE_MASK) {
+ u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid);
+ u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf;
+ update_sm_ah(dev, port, lid, sl);
+ }
+
+ /* Check if it is a lid change event */
+ if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK)
+ mlx4_ib_dispatch_event(dev, port, IB_EVENT_LID_CHANGE);
+
+ /* Generate GUID changed event */
+ if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK)
+ mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
+
+ if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK)
+ mlx4_ib_dispatch_event(dev, port,
+ IB_EVENT_CLIENT_REREGISTER);
+ break;
+
+ case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE:
+ mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE);
+ break;
+ case MLX4_DEV_PMC_SUBTYPE_GUID_INFO:
+ /* paravirtualized master's guid is guid 0 -- does not change */
+ if (!mlx4_is_master(dev->dev))
+ mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
+ break;
+ default:
+ pr_warn("Unsupported subtype 0x%x for "
+ "Port Management Change event\n", eqe->subtype);
+ }
+
+ kfree(ew);
+}
+
+void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
+ enum ib_event_type type)
+{
+ struct ib_event event;
+
+ event.device = &dev->ib_dev;
+ event.element.port_num = port_num;
+ event.event = type;
+
+ ib_dispatch_event(&event);
+}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 3530c41fcd1f..fe2088cfa6ee 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -50,7 +50,7 @@
#include "mlx4_ib.h"
#include "user.h"
-#define DRV_NAME "mlx4_ib"
+#define DRV_NAME MLX4_IB_DRV_NAME
#define DRV_VERSION "1.0"
#define DRV_RELDATE "April 4, 2008"
@@ -157,7 +157,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
- props->masked_atomic_cap = IB_ATOMIC_HCA;
+ props->masked_atomic_cap = props->atomic_cap;
props->max_pkeys = dev->dev->caps.pkey_table_len[1];
props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
@@ -718,26 +718,53 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
return ret;
}
+struct mlx4_ib_steering {
+ struct list_head list;
+ u64 reg_id;
+ union ib_gid gid;
+};
+
static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+ u64 reg_id;
+ struct mlx4_ib_steering *ib_steering = NULL;
+
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
+ if (!ib_steering)
+ return -ENOMEM;
+ }
- err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw,
- !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
- MLX4_PROT_IB_IPV6);
+ err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
+ !!(mqp->flags &
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
+ MLX4_PROT_IB_IPV6, &reg_id);
if (err)
- return err;
+ goto err_malloc;
err = add_gid_entry(ibqp, gid);
if (err)
goto err_add;
+ if (ib_steering) {
+ memcpy(ib_steering->gid.raw, gid->raw, 16);
+ ib_steering->reg_id = reg_id;
+ mutex_lock(&mqp->mutex);
+ list_add(&ib_steering->list, &mqp->steering_rules);
+ mutex_unlock(&mqp->mutex);
+ }
return 0;
err_add:
- mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6);
+ mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
+ MLX4_PROT_IB_IPV6, reg_id);
+err_malloc:
+ kfree(ib_steering);
+
return err;
}
@@ -765,9 +792,30 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
u8 mac[6];
struct net_device *ndev;
struct mlx4_ib_gid_entry *ge;
+ u64 reg_id = 0;
+
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ struct mlx4_ib_steering *ib_steering;
+
+ mutex_lock(&mqp->mutex);
+ list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
+ if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
+ list_del(&ib_steering->list);
+ break;
+ }
+ }
+ mutex_unlock(&mqp->mutex);
+ if (&ib_steering->list == &mqp->steering_rules) {
+ pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
+ return -EINVAL;
+ }
+ reg_id = ib_steering->reg_id;
+ kfree(ib_steering);
+ }
- err = mlx4_multicast_detach(mdev->dev,
- &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6);
+ err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
+ MLX4_PROT_IB_IPV6, reg_id);
if (err)
return err;
@@ -898,7 +946,6 @@ static void update_gids_task(struct work_struct *work)
union ib_gid *gids;
int err;
struct mlx4_dev *dev = gw->dev->dev;
- struct ib_event event;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
@@ -916,10 +963,7 @@ static void update_gids_task(struct work_struct *work)
pr_warn("set port command failed\n");
else {
memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids);
- event.device = &gw->dev->ib_dev;
- event.element.port_num = gw->port;
- event.event = IB_EVENT_GID_CHANGE;
- ib_dispatch_event(&event);
+ mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
}
mlx4_free_cmd_mailbox(dev, mailbox);
@@ -1111,7 +1155,8 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
sprintf(name, "mlx4-ib-%d-%d@%s",
i, j, dev->pdev->bus->name);
/* Set IRQ for specific name (per ring) */
- if (mlx4_assign_eq(dev, name, &ibdev->eq_table[eq])) {
+ if (mlx4_assign_eq(dev, name, NULL,
+ &ibdev->eq_table[eq])) {
/* Use legacy (same as mlx4_en driver) */
pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq);
ibdev->eq_table[eq] =
@@ -1383,10 +1428,18 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
}
static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
- enum mlx4_dev_event event, int port)
+ enum mlx4_dev_event event, unsigned long param)
{
struct ib_event ibev;
struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
+ struct mlx4_eqe *eqe = NULL;
+ struct ib_event_work *ew;
+ int port = 0;
+
+ if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
+ eqe = (struct mlx4_eqe *)param;
+ else
+ port = (u8)param;
if (port > ibdev->num_ports)
return;
@@ -1405,6 +1458,19 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
ibev.event = IB_EVENT_DEVICE_FATAL;
break;
+ case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
+ ew = kmalloc(sizeof *ew, GFP_ATOMIC);
+ if (!ew) {
+ pr_err("failed to allocate memory for events work\n");
+ break;
+ }
+
+ INIT_WORK(&ew->work, handle_port_mgmt_change_event);
+ memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
+ ew->ib_dev = ibdev;
+ handle_port_mgmt_change_event(&ew->work);
+ return;
+
default:
return;
}
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index ff36655d23d3..c136bb618e29 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -44,6 +44,16 @@
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
+#define MLX4_IB_DRV_NAME "mlx4_ib"
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+#define pr_fmt(fmt) "<" MLX4_IB_DRV_NAME "> %s: " fmt, __func__
+
+#define mlx4_ib_warn(ibdev, format, arg...) \
+ dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg)
+
enum {
MLX4_IB_SQ_MIN_WQE_SHIFT = 6,
MLX4_IB_MAX_HEADROOM = 2048
@@ -163,6 +173,7 @@ struct mlx4_ib_qp {
u8 state;
int mlx_type;
struct list_head gid_list;
+ struct list_head steering_rules;
};
struct mlx4_ib_srq {
@@ -214,6 +225,12 @@ struct mlx4_ib_dev {
int eq_added;
};
+struct ib_event_work {
+ struct work_struct work;
+ struct mlx4_ib_dev *ib_dev;
+ struct mlx4_eqe ib_eqe;
+};
+
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
{
return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
@@ -371,4 +388,7 @@ static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid);
+void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
+ enum ib_event_type type);
+
#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 8d4ed24aef93..a6d8ea060ea8 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -495,6 +495,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
INIT_LIST_HEAD(&qp->gid_list);
+ INIT_LIST_HEAD(&qp->steering_rules);
qp->state = IB_QPS_RESET;
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
@@ -1335,11 +1336,21 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
+ pr_debug("qpn 0x%x: invalid attribute mask specified "
+ "for transition %d to %d. qp_type %d,"
+ " attr_mask 0x%x\n",
+ ibqp->qp_num, cur_state, new_state,
+ ibqp->qp_type, attr_mask);
goto out;
+ }
if ((attr_mask & IB_QP_PORT) &&
(attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
+ pr_debug("qpn 0x%x: invalid port number (%d) specified "
+ "for transition %d to %d. qp_type %d\n",
+ ibqp->qp_num, attr->port_num, cur_state,
+ new_state, ibqp->qp_type);
goto out;
}
@@ -1350,17 +1361,30 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_PKEY_INDEX) {
int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p])
+ if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) {
+ pr_debug("qpn 0x%x: invalid pkey index (%d) specified "
+ "for transition %d to %d. qp_type %d\n",
+ ibqp->qp_num, attr->pkey_index, cur_state,
+ new_state, ibqp->qp_type);
goto out;
+ }
}
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
+ pr_debug("qpn 0x%x: max_rd_atomic (%d) too large. "
+ "Transition %d to %d. qp_type %d\n",
+ ibqp->qp_num, attr->max_rd_atomic, cur_state,
+ new_state, ibqp->qp_type);
goto out;
}
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {
+ pr_debug("qpn 0x%x: max_dest_rd_atomic (%d) too large. "
+ "Transition %d to %d. qp_type %d\n",
+ ibqp->qp_num, attr->max_dest_rd_atomic, cur_state,
+ new_state, ibqp->qp_type);
goto out;
}
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 9601049e14d0..26a684536109 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -247,7 +247,8 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
spin_unlock(&dev->qp_table.lock);
if (!qp) {
- mthca_warn(dev, "Async event for bogus QP %08x\n", qpn);
+ mthca_warn(dev, "Async event %d for bogus QP %08x\n",
+ event_type, qpn);
return;
}
@@ -501,6 +502,7 @@ done:
qp_attr->cap.max_inline_data = qp->max_inline_data;
qp_init_attr->cap = qp_attr->cap;
+ qp_init_attr->sq_sig_type = qp->sq_policy;
out_mailbox:
mthca_free_mailbox(dev, mailbox);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index b050e629e9c3..5a044526e4f4 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -202,8 +202,7 @@ static int ocrdma_build_sgid_tbl(struct ocrdma_dev *dev)
return 0;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) || \
-defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_VLAN_8021Q)
static int ocrdma_inet6addr_event(struct notifier_block *notifier,
unsigned long event, void *ptr)
@@ -549,7 +548,7 @@ static struct ocrdma_driver ocrdma_drv = {
static void ocrdma_unregister_inet6addr_notifier(void)
{
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
unregister_inet6addr_notifier(&ocrdma_inet6addr_notifier);
#endif
}
@@ -558,7 +557,7 @@ static int __init ocrdma_init_module(void)
{
int status;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier);
if (status)
return status;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 2e2e7aecc990..cb5b7f7d4d38 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -97,7 +97,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp);
attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp;
attr->max_srq = (dev->attr.max_qp - 1);
- attr->max_srq_sge = attr->max_srq_sge;
+ attr->max_srq_sge = dev->attr.max_srq_sge;
attr->max_srq_wr = dev->attr.max_rqe;
attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay;
attr->max_fast_reg_page_list_len = 0;
@@ -893,7 +893,9 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
/* verify consumer QPs are not trying to use GSI QP's CQ */
if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) {
if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) ||
- (dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq))) {
+ (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) ||
+ (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) ||
+ (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
ocrdma_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
__func__, dev->id);
return -EINVAL;
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 7e62f4137148..7b1b86690024 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1,8 +1,8 @@
#ifndef _QIB_KERNEL_H
#define _QIB_KERNEL_H
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -519,6 +519,7 @@ struct qib_pportdata {
struct qib_devdata *dd;
struct qib_chippport_specific *cpspec; /* chip-specific per-port */
struct kobject pport_kobj;
+ struct kobject pport_cc_kobj;
struct kobject sl2vl_kobj;
struct kobject diagc_kobj;
@@ -544,6 +545,7 @@ struct qib_pportdata {
/* read mostly */
struct qib_sdma_desc *sdma_descq;
+ struct workqueue_struct *qib_wq;
struct qib_sdma_state sdma_state;
dma_addr_t sdma_descq_phys;
volatile __le64 *sdma_head_dma; /* DMA'ed by chip */
@@ -637,6 +639,39 @@ struct qib_pportdata {
struct timer_list led_override_timer;
struct xmit_wait cong_stats;
struct timer_list symerr_clear_timer;
+
+ /* Synchronize access between driver writes and sysfs reads */
+ spinlock_t cc_shadow_lock
+ ____cacheline_aligned_in_smp;
+
+ /* Shadow copy of the congestion control table */
+ struct cc_table_shadow *ccti_entries_shadow;
+
+ /* Shadow copy of the congestion control entries */
+ struct ib_cc_congestion_setting_attr_shadow *congestion_entries_shadow;
+
+ /* List of congestion control table entries */
+ struct ib_cc_table_entry_shadow *ccti_entries;
+
+ /* 16 congestion entries with each entry corresponding to a SL */
+ struct ib_cc_congestion_entry_shadow *congestion_entries;
+
+ /* Maximum number of congestion control entries that the agent expects
+ * the manager to send.
+ */
+ u16 cc_supported_table_entries;
+
+ /* Total number of congestion control table entries */
+ u16 total_cct_entry;
+
+ /* Bit map identifying service level */
+ u16 cc_sl_control_map;
+
+ /* maximum congestion control table index */
+ u16 ccti_limit;
+
+ /* CA's max number of 64 entry units in the congestion control table */
+ u8 cc_max_table_entries;
};
/* Observers. Not to be taken lightly, possibly not to ship. */
@@ -1077,6 +1112,7 @@ extern u32 qib_cpulist_count;
extern unsigned long *qib_cpulist;
extern unsigned qib_wc_pat;
+extern unsigned qib_cc_table_size;
int qib_init(struct qib_devdata *, int);
int init_chip_wc_pat(struct qib_devdata *dd, u32);
int qib_enable_wc(struct qib_devdata *dd);
@@ -1267,6 +1303,11 @@ int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *,
/* ppd->sdma_lock should be locked before calling this. */
int qib_sdma_make_progress(struct qib_pportdata *dd);
+static inline int qib_sdma_empty(const struct qib_pportdata *ppd)
+{
+ return ppd->sdma_descq_added == ppd->sdma_descq_removed;
+}
+
/* must be called under qib_sdma_lock */
static inline u16 qib_sdma_descq_freecnt(const struct qib_pportdata *ppd)
{
diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c
index 9892456a4348..1686fd4bda87 100644
--- a/drivers/infiniband/hw/qib/qib_diag.c
+++ b/drivers/infiniband/hw/qib/qib_diag.c
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2010 QLogic Corporation. All rights reserved.
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -53,6 +53,9 @@
#include "qib.h"
#include "qib_common.h"
+#undef pr_fmt
+#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
+
/*
* Each client that opens the diag device must read then write
* offset 0, to prevent lossage from random cat or od. diag_state
@@ -598,8 +601,8 @@ static ssize_t qib_diagpkt_write(struct file *fp,
}
tmpbuf = vmalloc(plen);
if (!tmpbuf) {
- qib_devinfo(dd->pcidev, "Unable to allocate tmp buffer, "
- "failing\n");
+ qib_devinfo(dd->pcidev,
+ "Unable to allocate tmp buffer, failing\n");
ret = -ENOMEM;
goto bail;
}
@@ -693,7 +696,7 @@ int qib_register_observer(struct qib_devdata *dd,
ret = -ENOMEM;
olp = vmalloc(sizeof *olp);
if (!olp) {
- printk(KERN_ERR QIB_DRV_NAME ": vmalloc for observer failed\n");
+ pr_err("vmalloc for observer failed\n");
goto bail;
}
if (olp) {
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 8895cfec5019..e41e7f7fc763 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -764,8 +764,9 @@ int qib_reset_device(int unit)
qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit);
if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) {
- qib_devinfo(dd->pcidev, "Invalid unit number %u or "
- "not initialized or not present\n", unit);
+ qib_devinfo(dd->pcidev,
+ "Invalid unit number %u or not initialized or not present\n",
+ unit);
ret = -ENXIO;
goto bail;
}
@@ -802,11 +803,13 @@ int qib_reset_device(int unit)
else
ret = -EAGAIN;
if (ret)
- qib_dev_err(dd, "Reinitialize unit %u after "
- "reset failed with %d\n", unit, ret);
+ qib_dev_err(dd,
+ "Reinitialize unit %u after reset failed with %d\n",
+ unit, ret);
else
- qib_devinfo(dd->pcidev, "Reinitialized unit %u after "
- "resetting\n", unit);
+ qib_devinfo(dd->pcidev,
+ "Reinitialized unit %u after resetting\n",
+ unit);
bail:
return ret;
diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c
index 92d9cfe98a68..4d5d71aaa2b4 100644
--- a/drivers/infiniband/hw/qib/qib_eeprom.c
+++ b/drivers/infiniband/hw/qib/qib_eeprom.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -160,10 +161,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd)
if (oguid > bguid[7]) {
if (bguid[6] == 0xff) {
if (bguid[5] == 0xff) {
- qib_dev_err(dd, "Can't set %s GUID"
- " from base, wraps to"
- " OUI!\n",
- qib_get_unit_name(t));
+ qib_dev_err(dd,
+ "Can't set %s GUID from base, wraps to OUI!\n",
+ qib_get_unit_name(t));
dd->base_guid = 0;
goto bail;
}
@@ -182,8 +182,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd)
len = sizeof(struct qib_flash);
buf = vmalloc(len);
if (!buf) {
- qib_dev_err(dd, "Couldn't allocate memory to read %u "
- "bytes from eeprom for GUID\n", len);
+ qib_dev_err(dd,
+ "Couldn't allocate memory to read %u bytes from eeprom for GUID\n",
+ len);
goto bail;
}
@@ -201,23 +202,25 @@ void qib_get_eeprom_info(struct qib_devdata *dd)
csum = flash_csum(ifp, 0);
if (csum != ifp->if_csum) {
- qib_devinfo(dd->pcidev, "Bad I2C flash checksum: "
- "0x%x, not 0x%x\n", csum, ifp->if_csum);
+ qib_devinfo(dd->pcidev,
+ "Bad I2C flash checksum: 0x%x, not 0x%x\n",
+ csum, ifp->if_csum);
goto done;
}
if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) ||
*(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) {
- qib_dev_err(dd, "Invalid GUID %llx from flash; ignoring\n",
- *(unsigned long long *) ifp->if_guid);
+ qib_dev_err(dd,
+ "Invalid GUID %llx from flash; ignoring\n",
+ *(unsigned long long *) ifp->if_guid);
/* don't allow GUID if all 0 or all 1's */
goto done;
}
/* complain, but allow it */
if (*(u64 *) ifp->if_guid == 0x100007511000000ULL)
- qib_devinfo(dd->pcidev, "Warning, GUID %llx is "
- "default, probably not correct!\n",
- *(unsigned long long *) ifp->if_guid);
+ qib_devinfo(dd->pcidev,
+ "Warning, GUID %llx is default, probably not correct!\n",
+ *(unsigned long long *) ifp->if_guid);
bguid = ifp->if_guid;
if (!bguid[0] && !bguid[1] && !bguid[2]) {
@@ -260,8 +263,9 @@ void qib_get_eeprom_info(struct qib_devdata *dd)
memcpy(dd->serial, ifp->if_serial,
sizeof ifp->if_serial);
if (!strstr(ifp->if_comment, "Tested successfully"))
- qib_dev_err(dd, "Board SN %s did not pass functional "
- "test: %s\n", dd->serial, ifp->if_comment);
+ qib_dev_err(dd,
+ "Board SN %s did not pass functional test: %s\n",
+ dd->serial, ifp->if_comment);
memcpy(&dd->eep_st_errs, &ifp->if_errcntp, QIB_EEP_LOG_CNT);
/*
@@ -323,8 +327,9 @@ int qib_update_eeprom_log(struct qib_devdata *dd)
buf = vmalloc(len);
ret = 1;
if (!buf) {
- qib_dev_err(dd, "Couldn't allocate memory to read %u "
- "bytes from eeprom for logging\n", len);
+ qib_dev_err(dd,
+ "Couldn't allocate memory to read %u bytes from eeprom for logging\n",
+ len);
goto bail;
}
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index a7403248d83d..faa44cb08071 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -49,6 +49,9 @@
#include "qib_common.h"
#include "qib_user_sdma.h"
+#undef pr_fmt
+#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
+
static int qib_open(struct inode *, struct file *);
static int qib_close(struct inode *, struct file *);
static ssize_t qib_write(struct file *, const char __user *, size_t, loff_t *);
@@ -315,8 +318,9 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp,
}
if (cnt > tidcnt) {
/* make sure it all fits in tid_pg_list */
- qib_devinfo(dd->pcidev, "Process tried to allocate %u "
- "TIDs, only trying max (%u)\n", cnt, tidcnt);
+ qib_devinfo(dd->pcidev,
+ "Process tried to allocate %u TIDs, only trying max (%u)\n",
+ cnt, tidcnt);
cnt = tidcnt;
}
pagep = (struct page **) rcd->tid_pg_list;
@@ -750,9 +754,9 @@ static int qib_mmap_mem(struct vm_area_struct *vma, struct qib_ctxtdata *rcd,
ret = remap_pfn_range(vma, vma->vm_start, pfn,
len, vma->vm_page_prot);
if (ret)
- qib_devinfo(dd->pcidev, "%s ctxt%u mmap of %lx, %x "
- "bytes failed: %d\n", what, rcd->ctxt,
- pfn, len, ret);
+ qib_devinfo(dd->pcidev,
+ "%s ctxt%u mmap of %lx, %x bytes failed: %d\n",
+ what, rcd->ctxt, pfn, len, ret);
bail:
return ret;
}
@@ -771,8 +775,9 @@ static int mmap_ureg(struct vm_area_struct *vma, struct qib_devdata *dd,
*/
sz = dd->flags & QIB_HAS_HDRSUPP ? 2 * PAGE_SIZE : PAGE_SIZE;
if ((vma->vm_end - vma->vm_start) > sz) {
- qib_devinfo(dd->pcidev, "FAIL mmap userreg: reqlen "
- "%lx > PAGE\n", vma->vm_end - vma->vm_start);
+ qib_devinfo(dd->pcidev,
+ "FAIL mmap userreg: reqlen %lx > PAGE\n",
+ vma->vm_end - vma->vm_start);
ret = -EFAULT;
} else {
phys = dd->physaddr + ureg;
@@ -802,8 +807,8 @@ static int mmap_piobufs(struct vm_area_struct *vma,
* for it.
*/
if ((vma->vm_end - vma->vm_start) > (piocnt * dd->palign)) {
- qib_devinfo(dd->pcidev, "FAIL mmap piobufs: "
- "reqlen %lx > PAGE\n",
+ qib_devinfo(dd->pcidev,
+ "FAIL mmap piobufs: reqlen %lx > PAGE\n",
vma->vm_end - vma->vm_start);
ret = -EINVAL;
goto bail;
@@ -847,8 +852,8 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
size = rcd->rcvegrbuf_size;
total_size = rcd->rcvegrbuf_chunks * size;
if ((vma->vm_end - vma->vm_start) > total_size) {
- qib_devinfo(dd->pcidev, "FAIL on egr bufs: "
- "reqlen %lx > actual %lx\n",
+ qib_devinfo(dd->pcidev,
+ "FAIL on egr bufs: reqlen %lx > actual %lx\n",
vma->vm_end - vma->vm_start,
(unsigned long) total_size);
ret = -EINVAL;
@@ -856,8 +861,9 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
}
if (vma->vm_flags & VM_WRITE) {
- qib_devinfo(dd->pcidev, "Can't map eager buffers as "
- "writable (flags=%lx)\n", vma->vm_flags);
+ qib_devinfo(dd->pcidev,
+ "Can't map eager buffers as writable (flags=%lx)\n",
+ vma->vm_flags);
ret = -EPERM;
goto bail;
}
@@ -1270,8 +1276,8 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
GFP_KERNEL);
if (!rcd || !ptmp) {
- qib_dev_err(dd, "Unable to allocate ctxtdata "
- "memory, failing open\n");
+ qib_dev_err(dd,
+ "Unable to allocate ctxtdata memory, failing open\n");
ret = -ENOMEM;
goto bailerr;
}
@@ -1560,10 +1566,10 @@ done_chk_sdma:
} else if (weight == 1 &&
test_bit(cpumask_first(tsk_cpus_allowed(current)),
qib_cpulist))
- qib_devinfo(dd->pcidev, "%s PID %u affinity "
- "set to cpu %d; already allocated\n",
- current->comm, current->pid,
- cpumask_first(tsk_cpus_allowed(current)));
+ qib_devinfo(dd->pcidev,
+ "%s PID %u affinity set to cpu %d; already allocated\n",
+ current->comm, current->pid,
+ cpumask_first(tsk_cpus_allowed(current)));
}
mutex_unlock(&qib_mutex);
@@ -2185,8 +2191,7 @@ int qib_cdev_init(int minor, const char *name,
cdev = cdev_alloc();
if (!cdev) {
- printk(KERN_ERR QIB_DRV_NAME
- ": Could not allocate cdev for minor %d, %s\n",
+ pr_err("Could not allocate cdev for minor %d, %s\n",
minor, name);
ret = -ENOMEM;
goto done;
@@ -2198,8 +2203,7 @@ int qib_cdev_init(int minor, const char *name,
ret = cdev_add(cdev, dev, 1);
if (ret < 0) {
- printk(KERN_ERR QIB_DRV_NAME
- ": Could not add cdev for minor %d, %s (err %d)\n",
+ pr_err("Could not add cdev for minor %d, %s (err %d)\n",
minor, name, -ret);
goto err_cdev;
}
@@ -2209,8 +2213,7 @@ int qib_cdev_init(int minor, const char *name,
goto done;
ret = PTR_ERR(device);
device = NULL;
- printk(KERN_ERR QIB_DRV_NAME ": Could not create "
- "device for minor %d, %s (err %d)\n",
+ pr_err("Could not create device for minor %d, %s (err %d)\n",
minor, name, -ret);
err_cdev:
cdev_del(cdev);
@@ -2245,16 +2248,14 @@ int __init qib_dev_init(void)
ret = alloc_chrdev_region(&qib_dev, 0, QIB_NMINORS, QIB_DRV_NAME);
if (ret < 0) {
- printk(KERN_ERR QIB_DRV_NAME ": Could not allocate "
- "chrdev region (err %d)\n", -ret);
+ pr_err("Could not allocate chrdev region (err %d)\n", -ret);
goto done;
}
qib_class = class_create(THIS_MODULE, "ipath");
if (IS_ERR(qib_class)) {
ret = PTR_ERR(qib_class);
- printk(KERN_ERR QIB_DRV_NAME ": Could not create "
- "device class (err %d)\n", -ret);
+ pr_err("Could not create device class (err %d)\n", -ret);
unregister_chrdev_region(qib_dev, QIB_NMINORS);
}
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index 05e0f17c5b44..cff8a6c32161 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -382,7 +383,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd)
ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir,
&simple_dir_operations, dd);
if (ret) {
- printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret);
+ pr_err("create_file(%s) failed: %d\n", unit, ret);
goto bail;
}
@@ -390,21 +391,21 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd)
ret = create_file("counters", S_IFREG|S_IRUGO, dir, &tmp,
&cntr_ops[0], dd);
if (ret) {
- printk(KERN_ERR "create_file(%s/counters) failed: %d\n",
+ pr_err("create_file(%s/counters) failed: %d\n",
unit, ret);
goto bail;
}
ret = create_file("counter_names", S_IFREG|S_IRUGO, dir, &tmp,
&cntr_ops[1], dd);
if (ret) {
- printk(KERN_ERR "create_file(%s/counter_names) failed: %d\n",
+ pr_err("create_file(%s/counter_names) failed: %d\n",
unit, ret);
goto bail;
}
ret = create_file("portcounter_names", S_IFREG|S_IRUGO, dir, &tmp,
&portcntr_ops[0], dd);
if (ret) {
- printk(KERN_ERR "create_file(%s/%s) failed: %d\n",
+ pr_err("create_file(%s/%s) failed: %d\n",
unit, "portcounter_names", ret);
goto bail;
}
@@ -416,7 +417,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd)
ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp,
&portcntr_ops[i], dd);
if (ret) {
- printk(KERN_ERR "create_file(%s/%s) failed: %d\n",
+ pr_err("create_file(%s/%s) failed: %d\n",
unit, fname, ret);
goto bail;
}
@@ -426,7 +427,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd)
ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp,
&qsfp_ops[i - 1], dd);
if (ret) {
- printk(KERN_ERR "create_file(%s/%s) failed: %d\n",
+ pr_err("create_file(%s/%s) failed: %d\n",
unit, fname, ret);
goto bail;
}
@@ -435,7 +436,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd)
ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
&flash_ops, dd);
if (ret)
- printk(KERN_ERR "create_file(%s/flash) failed: %d\n",
+ pr_err("create_file(%s/flash) failed: %d\n",
unit, ret);
bail:
return ret;
@@ -486,7 +487,7 @@ static int remove_device_files(struct super_block *sb,
if (IS_ERR(dir)) {
ret = PTR_ERR(dir);
- printk(KERN_ERR "Lookup of %s failed\n", unit);
+ pr_err("Lookup of %s failed\n", unit);
goto bail;
}
@@ -532,7 +533,7 @@ static int qibfs_fill_super(struct super_block *sb, void *data, int silent)
ret = simple_fill_super(sb, QIBFS_MAGIC, files);
if (ret) {
- printk(KERN_ERR "simple_fill_super failed: %d\n", ret);
+ pr_err("simple_fill_super failed: %d\n", ret);
goto bail;
}
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 4d352b90750a..a099ac171e22 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -753,8 +753,8 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg,
if (!hwerrs)
return;
if (hwerrs == ~0ULL) {
- qib_dev_err(dd, "Read of hardware error status failed "
- "(all bits set); ignoring\n");
+ qib_dev_err(dd,
+ "Read of hardware error status failed (all bits set); ignoring\n");
return;
}
qib_stats.sps_hwerrs++;
@@ -779,13 +779,14 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg,
* or it's occurred within the last 5 seconds.
*/
if (hwerrs & ~(TXE_PIO_PARITY | RXEMEMPARITYERR_EAGERTID))
- qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx "
- "(cleared)\n", (unsigned long long) hwerrs);
+ qib_devinfo(dd->pcidev,
+ "Hardware error: hwerr=0x%llx (cleared)\n",
+ (unsigned long long) hwerrs);
if (hwerrs & ~IB_HWE_BITSEXTANT)
- qib_dev_err(dd, "hwerror interrupt with unknown errors "
- "%llx set\n", (unsigned long long)
- (hwerrs & ~IB_HWE_BITSEXTANT));
+ qib_dev_err(dd,
+ "hwerror interrupt with unknown errors %llx set\n",
+ (unsigned long long)(hwerrs & ~IB_HWE_BITSEXTANT));
ctrl = qib_read_kreg32(dd, kr_control);
if ((ctrl & QLOGIC_IB_C_FREEZEMODE) && !dd->diag_client) {
@@ -815,8 +816,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg,
if (hwerrs & HWE_MASK(PowerOnBISTFailed)) {
isfatal = 1;
- strlcat(msg, "[Memory BIST test failed, InfiniPath hardware"
- " unusable]", msgl);
+ strlcat(msg,
+ "[Memory BIST test failed, InfiniPath hardware unusable]",
+ msgl);
/* ignore from now on, so disable until driver reloaded */
dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed);
qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
@@ -868,8 +870,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg,
*msg = 0; /* recovered from all of them */
if (isfatal && !dd->diag_client) {
- qib_dev_err(dd, "Fatal Hardware Error, no longer"
- " usable, SN %.16s\n", dd->serial);
+ qib_dev_err(dd,
+ "Fatal Hardware Error, no longer usable, SN %.16s\n",
+ dd->serial);
/*
* for /sys status file and user programs to print; if no
* trailing brace is copied, we'll know it was truncated.
@@ -1017,9 +1020,9 @@ static void handle_6120_errors(struct qib_devdata *dd, u64 errs)
qib_inc_eeprom_err(dd, log_idx, 1);
if (errs & ~IB_E_BITSEXTANT)
- qib_dev_err(dd, "error interrupt with unknown errors "
- "%llx set\n",
- (unsigned long long) (errs & ~IB_E_BITSEXTANT));
+ qib_dev_err(dd,
+ "error interrupt with unknown errors %llx set\n",
+ (unsigned long long) (errs & ~IB_E_BITSEXTANT));
if (errs & E_SUM_ERRS) {
qib_disarm_6120_senderrbufs(ppd);
@@ -1089,8 +1092,8 @@ static void handle_6120_errors(struct qib_devdata *dd, u64 errs)
}
if (errs & ERR_MASK(ResetNegated)) {
- qib_dev_err(dd, "Got reset, requires re-init "
- "(unload and reload driver)\n");
+ qib_dev_err(dd,
+ "Got reset, requires re-init (unload and reload driver)\n");
dd->flags &= ~QIB_INITTED; /* needs re-init */
/* mark as having had error */
*dd->devstatusp |= QIB_STATUS_HWERROR;
@@ -1541,8 +1544,9 @@ static noinline void unlikely_6120_intr(struct qib_devdata *dd, u64 istat)
qib_stats.sps_errints++;
estat = qib_read_kreg64(dd, kr_errstatus);
if (!estat)
- qib_devinfo(dd->pcidev, "error interrupt (%Lx), "
- "but no error bits set!\n", istat);
+ qib_devinfo(dd->pcidev,
+ "error interrupt (%Lx), but no error bits set!\n",
+ istat);
handle_6120_errors(dd, estat);
}
@@ -1715,16 +1719,16 @@ static void qib_setup_6120_interrupt(struct qib_devdata *dd)
}
if (!dd->cspec->irq)
- qib_dev_err(dd, "irq is 0, BIOS error? Interrupts won't "
- "work\n");
+ qib_dev_err(dd,
+ "irq is 0, BIOS error? Interrupts won't work\n");
else {
int ret;
ret = request_irq(dd->cspec->irq, qib_6120intr, 0,
QIB_DRV_NAME, dd);
if (ret)
- qib_dev_err(dd, "Couldn't setup interrupt "
- "(irq=%d): %d\n", dd->cspec->irq,
- ret);
+ qib_dev_err(dd,
+ "Couldn't setup interrupt (irq=%d): %d\n",
+ dd->cspec->irq, ret);
}
}
@@ -1759,8 +1763,9 @@ static void pe_boardname(struct qib_devdata *dd)
snprintf(dd->boardname, namelen, "%s", n);
if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2)
- qib_dev_err(dd, "Unsupported InfiniPath hardware revision "
- "%u.%u!\n", dd->majrev, dd->minrev);
+ qib_dev_err(dd,
+ "Unsupported InfiniPath hardware revision %u.%u!\n",
+ dd->majrev, dd->minrev);
snprintf(dd->boardversion, sizeof(dd->boardversion),
"ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n",
@@ -1833,8 +1838,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd)
bail:
if (ret) {
if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL))
- qib_dev_err(dd, "Reset failed to setup PCIe or "
- "interrupts; continuing anyway\n");
+ qib_dev_err(dd,
+ "Reset failed to setup PCIe or interrupts; continuing anyway\n");
/* clear the reset error, init error/hwerror mask */
qib_6120_init_hwerrors(dd);
/* for Rev2 error interrupts; nop for rev 1 */
@@ -1876,8 +1881,9 @@ static void qib_6120_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
}
pa >>= 11;
if (pa & ~QLOGIC_IB_RT_ADDR_MASK) {
- qib_dev_err(dd, "Physical page address 0x%lx "
- "larger than supported\n", pa);
+ qib_dev_err(dd,
+ "Physical page address 0x%lx larger than supported\n",
+ pa);
return;
}
@@ -1941,8 +1947,9 @@ static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr,
}
pa >>= 11;
if (pa & ~QLOGIC_IB_RT_ADDR_MASK) {
- qib_dev_err(dd, "Physical page address 0x%lx "
- "larger than supported\n", pa);
+ qib_dev_err(dd,
+ "Physical page address 0x%lx larger than supported\n",
+ pa);
return;
}
@@ -2928,8 +2935,9 @@ static int qib_6120_set_loopback(struct qib_pportdata *ppd, const char *what)
ppd->dd->unit, ppd->port);
} else if (!strncmp(what, "off", 3)) {
ppd->dd->cspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback);
- qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback "
- "(normal)\n", ppd->dd->unit, ppd->port);
+ qib_devinfo(ppd->dd->pcidev,
+ "Disabling IB%u:%u IBC loopback (normal)\n",
+ ppd->dd->unit, ppd->port);
} else
ret = -EINVAL;
if (!ret) {
@@ -3186,11 +3194,10 @@ static int qib_late_6120_initreg(struct qib_devdata *dd)
qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys);
val = qib_read_kreg64(dd, kr_sendpioavailaddr);
if (val != dd->pioavailregs_phys) {
- qib_dev_err(dd, "Catastrophic software error, "
- "SendPIOAvailAddr written as %lx, "
- "read back as %llx\n",
- (unsigned long) dd->pioavailregs_phys,
- (unsigned long long) val);
+ qib_dev_err(dd,
+ "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n",
+ (unsigned long) dd->pioavailregs_phys,
+ (unsigned long long) val);
ret = -EINVAL;
}
return ret;
@@ -3218,8 +3225,8 @@ static int init_6120_variables(struct qib_devdata *dd)
dd->revision = readq(&dd->kregbase[kr_revision]);
if ((dd->revision & 0xffffffffU) == 0xffffffffU) {
- qib_dev_err(dd, "Revision register read failure, "
- "giving up initialization\n");
+ qib_dev_err(dd,
+ "Revision register read failure, giving up initialization\n");
ret = -ENODEV;
goto bail;
}
@@ -3551,8 +3558,8 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev,
goto bail;
if (qib_pcie_params(dd, 8, NULL, NULL))
- qib_dev_err(dd, "Failed to setup PCIe or interrupts; "
- "continuing anyway\n");
+ qib_dev_err(dd,
+ "Failed to setup PCIe or interrupts; continuing anyway\n");
dd->cspec->irq = pdev->irq; /* save IRQ */
/* clear diagctrl register, in case diags were running and crashed */
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 86a0ba7ca0c2..64d0ecb90cdc 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -1111,9 +1111,9 @@ static void handle_7220_errors(struct qib_devdata *dd, u64 errs)
sdma_7220_errors(ppd, errs);
if (errs & ~IB_E_BITSEXTANT)
- qib_dev_err(dd, "error interrupt with unknown errors "
- "%llx set\n", (unsigned long long)
- (errs & ~IB_E_BITSEXTANT));
+ qib_dev_err(dd,
+ "error interrupt with unknown errors %llx set\n",
+ (unsigned long long) (errs & ~IB_E_BITSEXTANT));
if (errs & E_SUM_ERRS) {
qib_disarm_7220_senderrbufs(ppd);
@@ -1192,8 +1192,8 @@ static void handle_7220_errors(struct qib_devdata *dd, u64 errs)
}
if (errs & ERR_MASK(ResetNegated)) {
- qib_dev_err(dd, "Got reset, requires re-init "
- "(unload and reload driver)\n");
+ qib_dev_err(dd,
+ "Got reset, requires re-init (unload and reload driver)\n");
dd->flags &= ~QIB_INITTED; /* needs re-init */
/* mark as having had error */
*dd->devstatusp |= QIB_STATUS_HWERROR;
@@ -1305,8 +1305,8 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg,
if (!hwerrs)
goto bail;
if (hwerrs == ~0ULL) {
- qib_dev_err(dd, "Read of hardware error status failed "
- "(all bits set); ignoring\n");
+ qib_dev_err(dd,
+ "Read of hardware error status failed (all bits set); ignoring\n");
goto bail;
}
qib_stats.sps_hwerrs++;
@@ -1329,13 +1329,14 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg,
qib_inc_eeprom_err(dd, log_idx, 1);
if (hwerrs & ~(TXEMEMPARITYERR_PIOBUF | TXEMEMPARITYERR_PIOPBC |
RXE_PARITY))
- qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx "
- "(cleared)\n", (unsigned long long) hwerrs);
+ qib_devinfo(dd->pcidev,
+ "Hardware error: hwerr=0x%llx (cleared)\n",
+ (unsigned long long) hwerrs);
if (hwerrs & ~IB_HWE_BITSEXTANT)
- qib_dev_err(dd, "hwerror interrupt with unknown errors "
- "%llx set\n", (unsigned long long)
- (hwerrs & ~IB_HWE_BITSEXTANT));
+ qib_dev_err(dd,
+ "hwerror interrupt with unknown errors %llx set\n",
+ (unsigned long long) (hwerrs & ~IB_HWE_BITSEXTANT));
if (hwerrs & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR)
qib_sd7220_clr_ibpar(dd);
@@ -1362,8 +1363,9 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg,
if (hwerrs & HWE_MASK(PowerOnBISTFailed)) {
isfatal = 1;
- strlcat(msg, "[Memory BIST test failed, "
- "InfiniPath hardware unusable]", msgl);
+ strlcat(msg,
+ "[Memory BIST test failed, InfiniPath hardware unusable]",
+ msgl);
/* ignore from now on, so disable until driver reloaded */
dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed);
qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
@@ -1409,8 +1411,9 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg,
qib_dev_err(dd, "%s hardware error\n", msg);
if (isfatal && !dd->diag_client) {
- qib_dev_err(dd, "Fatal Hardware Error, no longer"
- " usable, SN %.16s\n", dd->serial);
+ qib_dev_err(dd,
+ "Fatal Hardware Error, no longer usable, SN %.16s\n",
+ dd->serial);
/*
* For /sys status file and user programs to print; if no
* trailing brace is copied, we'll know it was truncated.
@@ -1918,8 +1921,9 @@ static noinline void unlikely_7220_intr(struct qib_devdata *dd, u64 istat)
qib_stats.sps_errints++;
estat = qib_read_kreg64(dd, kr_errstatus);
if (!estat)
- qib_devinfo(dd->pcidev, "error interrupt (%Lx), "
- "but no error bits set!\n", istat);
+ qib_devinfo(dd->pcidev,
+ "error interrupt (%Lx), but no error bits set!\n",
+ istat);
else
handle_7220_errors(dd, estat);
}
@@ -2023,17 +2027,18 @@ bail:
static void qib_setup_7220_interrupt(struct qib_devdata *dd)
{
if (!dd->cspec->irq)
- qib_dev_err(dd, "irq is 0, BIOS error? Interrupts won't "
- "work\n");
+ qib_dev_err(dd,
+ "irq is 0, BIOS error? Interrupts won't work\n");
else {
int ret = request_irq(dd->cspec->irq, qib_7220intr,
dd->msi_lo ? 0 : IRQF_SHARED,
QIB_DRV_NAME, dd);
if (ret)
- qib_dev_err(dd, "Couldn't setup %s interrupt "
- "(irq=%d): %d\n", dd->msi_lo ?
- "MSI" : "INTx", dd->cspec->irq, ret);
+ qib_dev_err(dd,
+ "Couldn't setup %s interrupt (irq=%d): %d\n",
+ dd->msi_lo ? "MSI" : "INTx",
+ dd->cspec->irq, ret);
}
}
@@ -2072,9 +2077,9 @@ static void qib_7220_boardname(struct qib_devdata *dd)
snprintf(dd->boardname, namelen, "%s", n);
if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2)
- qib_dev_err(dd, "Unsupported InfiniPath hardware "
- "revision %u.%u!\n",
- dd->majrev, dd->minrev);
+ qib_dev_err(dd,
+ "Unsupported InfiniPath hardware revision %u.%u!\n",
+ dd->majrev, dd->minrev);
snprintf(dd->boardversion, sizeof(dd->boardversion),
"ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n",
@@ -2146,8 +2151,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd)
bail:
if (ret) {
if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL))
- qib_dev_err(dd, "Reset failed to setup PCIe or "
- "interrupts; continuing anyway\n");
+ qib_dev_err(dd,
+ "Reset failed to setup PCIe or interrupts; continuing anyway\n");
/* hold IBC in reset, no sends, etc till later */
qib_write_kreg(dd, kr_control, 0ULL);
@@ -2187,8 +2192,9 @@ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
return;
}
if (chippa >= (1UL << IBA7220_TID_SZ_SHIFT)) {
- qib_dev_err(dd, "Physical page address 0x%lx "
- "larger than supported\n", pa);
+ qib_dev_err(dd,
+ "Physical page address 0x%lx larger than supported\n",
+ pa);
return;
}
@@ -2706,8 +2712,9 @@ static int qib_7220_set_loopback(struct qib_pportdata *ppd, const char *what)
ppd->cpspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback);
/* enable heart beat again */
val = IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT;
- qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback "
- "(normal)\n", ppd->dd->unit, ppd->port);
+ qib_devinfo(ppd->dd->pcidev,
+ "Disabling IB%u:%u IBC loopback (normal)\n",
+ ppd->dd->unit, ppd->port);
} else
ret = -EINVAL;
if (!ret) {
@@ -3307,8 +3314,8 @@ static int qib_7220_intr_fallback(struct qib_devdata *dd)
if (!dd->msi_lo)
return 0;
- qib_devinfo(dd->pcidev, "MSI interrupt not detected,"
- " trying INTx interrupts\n");
+ qib_devinfo(dd->pcidev,
+ "MSI interrupt not detected, trying INTx interrupts\n");
qib_7220_free_irq(dd);
qib_enable_intx(dd->pcidev);
/*
@@ -3980,11 +3987,10 @@ static int qib_late_7220_initreg(struct qib_devdata *dd)
qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys);
val = qib_read_kreg64(dd, kr_sendpioavailaddr);
if (val != dd->pioavailregs_phys) {
- qib_dev_err(dd, "Catastrophic software error, "
- "SendPIOAvailAddr written as %lx, "
- "read back as %llx\n",
- (unsigned long) dd->pioavailregs_phys,
- (unsigned long long) val);
+ qib_dev_err(dd,
+ "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n",
+ (unsigned long) dd->pioavailregs_phys,
+ (unsigned long long) val);
ret = -EINVAL;
}
qib_register_observer(dd, &sendctrl_observer);
@@ -4014,8 +4020,8 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
dd->revision = readq(&dd->kregbase[kr_revision]);
if ((dd->revision & 0xffffffffU) == 0xffffffffU) {
- qib_dev_err(dd, "Revision register read failure, "
- "giving up initialization\n");
+ qib_dev_err(dd,
+ "Revision register read failure, giving up initialization\n");
ret = -ENODEV;
goto bail;
}
@@ -4613,8 +4619,8 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev,
break;
}
if (qib_pcie_params(dd, minwidth, NULL, NULL))
- qib_dev_err(dd, "Failed to setup PCIe or interrupts; "
- "continuing anyway\n");
+ qib_dev_err(dd,
+ "Failed to setup PCIe or interrupts; continuing anyway\n");
/* save IRQ for possible later use */
dd->cspec->irq = pdev->irq;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index c881e744c091..0d7280af99bc 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2008 - 2012 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -49,6 +50,10 @@
#include "qib_qsfp.h"
#include "qib_mad.h"
+#include "qib_verbs.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) QIB_DRV_NAME " " fmt
static void qib_setup_7322_setextled(struct qib_pportdata *, u32);
static void qib_7322_handle_hwerrors(struct qib_devdata *, char *, size_t);
@@ -1575,8 +1580,8 @@ static noinline void handle_7322_errors(struct qib_devdata *dd)
qib_stats.sps_errints++;
errs = qib_read_kreg64(dd, kr_errstatus);
if (!errs) {
- qib_devinfo(dd->pcidev, "device error interrupt, "
- "but no error bits set!\n");
+ qib_devinfo(dd->pcidev,
+ "device error interrupt, but no error bits set!\n");
goto done;
}
@@ -1622,8 +1627,8 @@ static noinline void handle_7322_errors(struct qib_devdata *dd)
if (errs & QIB_E_RESET) {
int pidx;
- qib_dev_err(dd, "Got reset, requires re-init "
- "(unload and reload driver)\n");
+ qib_dev_err(dd,
+ "Got reset, requires re-init (unload and reload driver)\n");
dd->flags &= ~QIB_INITTED; /* needs re-init */
/* mark as having had error */
*dd->devstatusp |= QIB_STATUS_HWERROR;
@@ -1760,9 +1765,9 @@ static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst)
ppd->dd->cspec->r1 ?
QDR_STATIC_ADAPT_DOWN_R1 :
QDR_STATIC_ADAPT_DOWN);
- printk(KERN_INFO QIB_DRV_NAME
- " IB%u:%u re-enabled QDR adaptation "
- "ibclt %x\n", ppd->dd->unit, ppd->port, ibclt);
+ pr_info(
+ "IB%u:%u re-enabled QDR adaptation ibclt %x\n",
+ ppd->dd->unit, ppd->port, ibclt);
}
}
}
@@ -1804,9 +1809,9 @@ static noinline void handle_7322_p_errors(struct qib_pportdata *ppd)
if (!*msg)
snprintf(msg, sizeof ppd->cpspec->epmsgbuf,
"no others");
- qib_dev_porterr(dd, ppd->port, "error interrupt with unknown"
- " errors 0x%016Lx set (and %s)\n",
- (errs & ~QIB_E_P_BITSEXTANT), msg);
+ qib_dev_porterr(dd, ppd->port,
+ "error interrupt with unknown errors 0x%016Lx set (and %s)\n",
+ (errs & ~QIB_E_P_BITSEXTANT), msg);
*msg = '\0';
}
@@ -2024,8 +2029,8 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
if (!hwerrs)
goto bail;
if (hwerrs == ~0ULL) {
- qib_dev_err(dd, "Read of hardware error status failed "
- "(all bits set); ignoring\n");
+ qib_dev_err(dd,
+ "Read of hardware error status failed (all bits set); ignoring\n");
goto bail;
}
qib_stats.sps_hwerrs++;
@@ -2039,8 +2044,9 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
/* no EEPROM logging, yet */
if (hwerrs)
- qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx "
- "(cleared)\n", (unsigned long long) hwerrs);
+ qib_devinfo(dd->pcidev,
+ "Hardware error: hwerr=0x%llx (cleared)\n",
+ (unsigned long long) hwerrs);
ctrl = qib_read_kreg32(dd, kr_control);
if ((ctrl & SYM_MASK(Control, FreezeMode)) && !dd->diag_client) {
@@ -2064,8 +2070,9 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
if (hwerrs & HWE_MASK(PowerOnBISTFailed)) {
isfatal = 1;
- strlcpy(msg, "[Memory BIST test failed, "
- "InfiniPath hardware unusable]", msgl);
+ strlcpy(msg,
+ "[Memory BIST test failed, InfiniPath hardware unusable]",
+ msgl);
/* ignore from now on, so disable until driver reloaded */
dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed);
qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
@@ -2078,8 +2085,9 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
qib_dev_err(dd, "%s hardware error\n", msg);
if (isfatal && !dd->diag_client) {
- qib_dev_err(dd, "Fatal Hardware Error, no longer"
- " usable, SN %.16s\n", dd->serial);
+ qib_dev_err(dd,
+ "Fatal Hardware Error, no longer usable, SN %.16s\n",
+ dd->serial);
/*
* for /sys status file and user programs to print; if no
* trailing brace is copied, we'll know it was truncated.
@@ -2667,8 +2675,9 @@ static noinline void unknown_7322_ibits(struct qib_devdata *dd, u64 istat)
char msg[128];
kills = istat & ~QIB_I_BITSEXTANT;
- qib_dev_err(dd, "Clearing reserved interrupt(s) 0x%016llx:"
- " %s\n", (unsigned long long) kills, msg);
+ qib_dev_err(dd,
+ "Clearing reserved interrupt(s) 0x%016llx: %s\n",
+ (unsigned long long) kills, msg);
qib_write_kreg(dd, kr_intmask, (dd->cspec->int_enable_mask & ~kills));
}
@@ -3101,16 +3110,16 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend)
/* Try to get INTx interrupt */
try_intx:
if (!dd->pcidev->irq) {
- qib_dev_err(dd, "irq is 0, BIOS error? "
- "Interrupts won't work\n");
+ qib_dev_err(dd,
+ "irq is 0, BIOS error? Interrupts won't work\n");
goto bail;
}
ret = request_irq(dd->pcidev->irq, qib_7322intr,
IRQF_SHARED, QIB_DRV_NAME, dd);
if (ret) {
- qib_dev_err(dd, "Couldn't setup INTx "
- "interrupt (irq=%d): %d\n",
- dd->pcidev->irq, ret);
+ qib_dev_err(dd,
+ "Couldn't setup INTx interrupt (irq=%d): %d\n",
+ dd->pcidev->irq, ret);
goto bail;
}
dd->cspec->irq = dd->pcidev->irq;
@@ -3185,8 +3194,9 @@ try_intx:
* Shouldn't happen since the enable said we could
* have as many as we are trying to setup here.
*/
- qib_dev_err(dd, "Couldn't setup MSIx "
- "interrupt (vec=%d, irq=%d): %d\n", msixnum,
+ qib_dev_err(dd,
+ "Couldn't setup MSIx interrupt (vec=%d, irq=%d): %d\n",
+ msixnum,
dd->cspec->msix_entries[msixnum].msix.vector,
ret);
qib_7322_nomsix(dd);
@@ -3305,8 +3315,9 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd)
(unsigned)SYM_FIELD(dd->revision, Revision_R, SW));
if (qib_singleport && (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) {
- qib_devinfo(dd->pcidev, "IB%u: Forced to single port mode"
- " by module parameter\n", dd->unit);
+ qib_devinfo(dd->pcidev,
+ "IB%u: Forced to single port mode by module parameter\n",
+ dd->unit);
features &= PORT_SPD_CAP;
}
@@ -3400,8 +3411,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
if (val == dd->revision)
break;
if (i == 5) {
- qib_dev_err(dd, "Failed to initialize after reset, "
- "unusable\n");
+ qib_dev_err(dd,
+ "Failed to initialize after reset, unusable\n");
ret = 0;
goto bail;
}
@@ -3432,8 +3443,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
if (qib_pcie_params(dd, dd->lbus_width,
&dd->cspec->num_msix_entries,
dd->cspec->msix_entries))
- qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; "
- "continuing anyway\n");
+ qib_dev_err(dd,
+ "Reset failed to setup PCIe or interrupts; continuing anyway\n");
qib_setup_7322_interrupt(dd, 1);
@@ -3474,8 +3485,9 @@ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
return;
}
if (chippa >= (1UL << IBA7322_TID_SZ_SHIFT)) {
- qib_dev_err(dd, "Physical page address 0x%lx "
- "larger than supported\n", pa);
+ qib_dev_err(dd,
+ "Physical page address 0x%lx larger than supported\n",
+ pa);
return;
}
@@ -4029,8 +4041,9 @@ static int qib_7322_set_loopback(struct qib_pportdata *ppd, const char *what)
Loopback);
/* enable heart beat again */
val = IBA7322_IBC_HRTBT_RMASK << IBA7322_IBC_HRTBT_LSB;
- qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback "
- "(normal)\n", ppd->dd->unit, ppd->port);
+ qib_devinfo(ppd->dd->pcidev,
+ "Disabling IB%u:%u IBC loopback (normal)\n",
+ ppd->dd->unit, ppd->port);
} else
ret = -EINVAL;
if (!ret) {
@@ -4714,8 +4727,8 @@ static void init_7322_cntrnames(struct qib_devdata *dd)
dd->pport[i].cpspec->portcntrs = kmalloc(dd->cspec->nportcntrs
* sizeof(u64), GFP_KERNEL);
if (!dd->pport[i].cpspec->portcntrs)
- qib_dev_err(dd, "Failed allocation for"
- " portcounters\n");
+ qib_dev_err(dd,
+ "Failed allocation for portcounters\n");
}
}
@@ -4865,8 +4878,8 @@ static int qib_7322_intr_fallback(struct qib_devdata *dd)
if (!dd->cspec->num_msix_entries)
return 0; /* already using INTx */
- qib_devinfo(dd->pcidev, "MSIx interrupt not detected,"
- " trying INTx interrupts\n");
+ qib_devinfo(dd->pcidev,
+ "MSIx interrupt not detected, trying INTx interrupts\n");
qib_7322_nomsix(dd);
qib_enable_intx(dd->pcidev);
qib_setup_7322_interrupt(dd, 0);
@@ -5151,15 +5164,11 @@ static void try_7322_ipg(struct qib_pportdata *ppd)
goto retry;
if (!ibp->smi_ah) {
- struct ib_ah_attr attr;
struct ib_ah *ah;
- memset(&attr, 0, sizeof attr);
- attr.dlid = be16_to_cpu(IB_LID_PERMISSIVE);
- attr.port_num = ppd->port;
- ah = ib_create_ah(ibp->qp0->ibqp.pd, &attr);
+ ah = qib_create_qp0_ah(ibp, be16_to_cpu(IB_LID_PERMISSIVE));
if (IS_ERR(ah))
- ret = -EINVAL;
+ ret = PTR_ERR(ah);
else {
send_buf->ah = ah;
ibp->smi_ah = to_iah(ah);
@@ -5844,22 +5853,21 @@ static int setup_txselect(const char *str, struct kernel_param *kp)
{
struct qib_devdata *dd;
unsigned long val;
- char *n;
+ int ret;
+
if (strlen(str) >= MAX_ATTEN_LEN) {
- printk(KERN_INFO QIB_DRV_NAME " txselect_values string "
- "too long\n");
+ pr_info("txselect_values string too long\n");
return -ENOSPC;
}
- val = simple_strtoul(str, &n, 0);
- if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
+ ret = kstrtoul(str, 0, &val);
+ if (ret || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
TXDDS_MFG_SZ)) {
- printk(KERN_INFO QIB_DRV_NAME
- "txselect_values must start with a number < %d\n",
+ pr_info("txselect_values must start with a number < %d\n",
TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ);
- return -EINVAL;
+ return ret ? ret : -EINVAL;
}
- strcpy(txselect_list, str);
+ strcpy(txselect_list, str);
list_for_each_entry(dd, &qib_dev_list, list)
if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322)
set_no_qsfp_atten(dd, 1);
@@ -5882,11 +5890,10 @@ static int qib_late_7322_initreg(struct qib_devdata *dd)
qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys);
val = qib_read_kreg64(dd, kr_sendpioavailaddr);
if (val != dd->pioavailregs_phys) {
- qib_dev_err(dd, "Catastrophic software error, "
- "SendPIOAvailAddr written as %lx, "
- "read back as %llx\n",
- (unsigned long) dd->pioavailregs_phys,
- (unsigned long long) val);
+ qib_dev_err(dd,
+ "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n",
+ (unsigned long) dd->pioavailregs_phys,
+ (unsigned long long) val);
ret = -EINVAL;
}
@@ -6098,8 +6105,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
dd->revision = readq(&dd->kregbase[kr_revision]);
if ((dd->revision & 0xffffffffU) == 0xffffffffU) {
- qib_dev_err(dd, "Revision register read failure, "
- "giving up initialization\n");
+ qib_dev_err(dd,
+ "Revision register read failure, giving up initialization\n");
ret = -ENODEV;
goto bail;
}
@@ -6265,9 +6272,9 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
*/
if (!(dd->flags & QIB_HAS_QSFP)) {
if (!IS_QMH(dd) && !IS_QME(dd))
- qib_devinfo(dd->pcidev, "IB%u:%u: "
- "Unknown mezzanine card type\n",
- dd->unit, ppd->port);
+ qib_devinfo(dd->pcidev,
+ "IB%u:%u: Unknown mezzanine card type\n",
+ dd->unit, ppd->port);
cp->h1_val = IS_QMH(dd) ? H1_FORCE_QMH : H1_FORCE_QME;
/*
* Choose center value as default tx serdes setting
@@ -6922,8 +6929,8 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
dd->cspec->msix_entries[i].msix.entry = i;
if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries))
- qib_dev_err(dd, "Failed to setup PCIe or interrupts; "
- "continuing anyway\n");
+ qib_dev_err(dd,
+ "Failed to setup PCIe or interrupts; continuing anyway\n");
/* may be less than we wanted, if not enough available */
dd->cspec->num_msix_entries = tabsize;
@@ -7276,8 +7283,7 @@ static void find_best_ent(struct qib_pportdata *ppd,
ppd->cpspec->no_eep < (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
TXDDS_MFG_SZ)) {
idx = ppd->cpspec->no_eep - (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ);
- printk(KERN_INFO QIB_DRV_NAME
- " IB%u:%u use idx %u into txdds_mfg\n",
+ pr_info("IB%u:%u use idx %u into txdds_mfg\n",
ppd->dd->unit, ppd->port, idx);
*sdr_dds = &txdds_extra_mfg[idx];
*ddr_dds = &txdds_extra_mfg[idx];
@@ -7432,11 +7438,11 @@ static void serdes_7322_los_enable(struct qib_pportdata *ppd, int enable)
u8 state = SYM_FIELD(data, IBSerdesCtrl_0, RXLOSEN);
if (enable && !state) {
- printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS on\n",
+ pr_info("IB%u:%u Turning LOS on\n",
ppd->dd->unit, ppd->port);
data |= SYM_MASK(IBSerdesCtrl_0, RXLOSEN);
} else if (!enable && state) {
- printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS off\n",
+ pr_info("IB%u:%u Turning LOS off\n",
ppd->dd->unit, ppd->port);
data &= ~SYM_MASK(IBSerdesCtrl_0, RXLOSEN);
}
@@ -7672,8 +7678,7 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd)
}
}
if (chan_done) {
- printk(KERN_INFO QIB_DRV_NAME
- " Serdes %d calibration not done after .5 sec: 0x%x\n",
+ pr_info("Serdes %d calibration not done after .5 sec: 0x%x\n",
IBSD(ppd->hw_pidx), chan_done);
} else {
for (chan = 0; chan < SERDES_CHANS; ++chan) {
@@ -7681,9 +7686,8 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd)
(chan + (chan >> 1)),
25, 0, 0);
if ((~rxcaldone & (u32)BMASK(10, 10)) == 0)
- printk(KERN_INFO QIB_DRV_NAME
- " Serdes %d chan %d calibration "
- "failed\n", IBSD(ppd->hw_pidx), chan);
+ pr_info("Serdes %d chan %d calibration failed\n",
+ IBSD(ppd->hw_pidx), chan);
}
}
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index dc14e100a7f1..4443adfcd9ee 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -38,9 +38,14 @@
#include <linux/delay.h>
#include <linux/idr.h>
#include <linux/module.h>
+#include <linux/printk.h>
#include "qib.h"
#include "qib_common.h"
+#include "qib_mad.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
/*
* min buffers we want to have per context, after driver
@@ -71,6 +76,9 @@ unsigned qib_n_krcv_queues;
module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO);
MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port");
+unsigned qib_cc_table_size;
+module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO);
+MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984");
/*
* qib_wc_pat parameter:
* 0 is WC via MTRR
@@ -120,8 +128,8 @@ int qib_create_ctxts(struct qib_devdata *dd)
*/
dd->rcd = kzalloc(sizeof(*dd->rcd) * dd->ctxtcnt, GFP_KERNEL);
if (!dd->rcd) {
- qib_dev_err(dd, "Unable to allocate ctxtdata array, "
- "failing\n");
+ qib_dev_err(dd,
+ "Unable to allocate ctxtdata array, failing\n");
ret = -ENOMEM;
goto done;
}
@@ -137,8 +145,8 @@ int qib_create_ctxts(struct qib_devdata *dd)
ppd = dd->pport + (i % dd->num_pports);
rcd = qib_create_ctxtdata(ppd, i);
if (!rcd) {
- qib_dev_err(dd, "Unable to allocate ctxtdata"
- " for Kernel ctxt, failing\n");
+ qib_dev_err(dd,
+ "Unable to allocate ctxtdata for Kernel ctxt, failing\n");
ret = -ENOMEM;
goto done;
}
@@ -199,6 +207,7 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt)
void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
u8 hw_pidx, u8 port)
{
+ int size;
ppd->dd = dd;
ppd->hw_pidx = hw_pidx;
ppd->port = port; /* IB port number, not index */
@@ -210,6 +219,83 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
init_timer(&ppd->symerr_clear_timer);
ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup;
ppd->symerr_clear_timer.data = (unsigned long)ppd;
+
+ ppd->qib_wq = NULL;
+
+ spin_lock_init(&ppd->cc_shadow_lock);
+
+ if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
+ goto bail;
+
+ ppd->cc_supported_table_entries = min(max_t(int, qib_cc_table_size,
+ IB_CCT_MIN_ENTRIES), IB_CCT_ENTRIES*IB_CC_TABLE_CAP_DEFAULT);
+
+ ppd->cc_max_table_entries =
+ ppd->cc_supported_table_entries/IB_CCT_ENTRIES;
+
+ size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry)
+ * IB_CCT_ENTRIES;
+ ppd->ccti_entries = kzalloc(size, GFP_KERNEL);
+ if (!ppd->ccti_entries) {
+ qib_dev_err(dd,
+ "failed to allocate congestion control table for port %d!\n",
+ port);
+ goto bail;
+ }
+
+ size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry);
+ ppd->congestion_entries = kzalloc(size, GFP_KERNEL);
+ if (!ppd->congestion_entries) {
+ qib_dev_err(dd,
+ "failed to allocate congestion setting list for port %d!\n",
+ port);
+ goto bail_1;
+ }
+
+ size = sizeof(struct cc_table_shadow);
+ ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL);
+ if (!ppd->ccti_entries_shadow) {
+ qib_dev_err(dd,
+ "failed to allocate shadow ccti list for port %d!\n",
+ port);
+ goto bail_2;
+ }
+
+ size = sizeof(struct ib_cc_congestion_setting_attr);
+ ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL);
+ if (!ppd->congestion_entries_shadow) {
+ qib_dev_err(dd,
+ "failed to allocate shadow congestion setting list for port %d!\n",
+ port);
+ goto bail_3;
+ }
+
+ return;
+
+bail_3:
+ kfree(ppd->ccti_entries_shadow);
+ ppd->ccti_entries_shadow = NULL;
+bail_2:
+ kfree(ppd->congestion_entries);
+ ppd->congestion_entries = NULL;
+bail_1:
+ kfree(ppd->ccti_entries);
+ ppd->ccti_entries = NULL;
+bail:
+ /* User is intentionally disabling the congestion control agent */
+ if (!qib_cc_table_size)
+ return;
+
+ if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) {
+ qib_cc_table_size = 0;
+ qib_dev_err(dd,
+ "Congestion Control table size %d less than minimum %d for port %d\n",
+ qib_cc_table_size, IB_CCT_MIN_ENTRIES, port);
+ }
+
+ qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n",
+ port);
+ return;
}
static int init_pioavailregs(struct qib_devdata *dd)
@@ -221,8 +307,8 @@ static int init_pioavailregs(struct qib_devdata *dd)
&dd->pcidev->dev, PAGE_SIZE, &dd->pioavailregs_phys,
GFP_KERNEL);
if (!dd->pioavailregs_dma) {
- qib_dev_err(dd, "failed to allocate PIOavail reg area "
- "in memory\n");
+ qib_dev_err(dd,
+ "failed to allocate PIOavail reg area in memory\n");
ret = -ENOMEM;
goto done;
}
@@ -277,15 +363,15 @@ static void init_shadow_tids(struct qib_devdata *dd)
pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *));
if (!pages) {
- qib_dev_err(dd, "failed to allocate shadow page * "
- "array, no expected sends!\n");
+ qib_dev_err(dd,
+ "failed to allocate shadow page * array, no expected sends!\n");
goto bail;
}
addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t));
if (!addrs) {
- qib_dev_err(dd, "failed to allocate shadow dma handle "
- "array, no expected sends!\n");
+ qib_dev_err(dd,
+ "failed to allocate shadow dma handle array, no expected sends!\n");
goto bail_free;
}
@@ -309,13 +395,13 @@ static int loadtime_init(struct qib_devdata *dd)
if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) &
QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) {
- qib_dev_err(dd, "Driver only handles version %d, "
- "chip swversion is %d (%llx), failng\n",
- QIB_CHIP_SWVERSION,
- (int)(dd->revision >>
+ qib_dev_err(dd,
+ "Driver only handles version %d, chip swversion is %d (%llx), failng\n",
+ QIB_CHIP_SWVERSION,
+ (int)(dd->revision >>
QLOGIC_IB_R_SOFTWARE_SHIFT) &
- QLOGIC_IB_R_SOFTWARE_MASK,
- (unsigned long long) dd->revision);
+ QLOGIC_IB_R_SOFTWARE_MASK,
+ (unsigned long long) dd->revision);
ret = -ENOSYS;
goto done;
}
@@ -419,8 +505,8 @@ static void verify_interrupt(unsigned long opaque)
*/
if (dd->int_counter == 0) {
if (!dd->f_intr_fallback(dd))
- dev_err(&dd->pcidev->dev, "No interrupts detected, "
- "not usable.\n");
+ dev_err(&dd->pcidev->dev,
+ "No interrupts detected, not usable.\n");
else /* re-arm the timer to see if fallback works */
mod_timer(&dd->intrchk_timer, jiffies + HZ/2);
}
@@ -483,6 +569,41 @@ static void init_piobuf_state(struct qib_devdata *dd)
}
/**
+ * qib_create_workqueues - create per port workqueues
+ * @dd: the qlogic_ib device
+ */
+static int qib_create_workqueues(struct qib_devdata *dd)
+{
+ int pidx;
+ struct qib_pportdata *ppd;
+
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+ if (!ppd->qib_wq) {
+ char wq_name[8]; /* 3 + 2 + 1 + 1 + 1 */
+ snprintf(wq_name, sizeof(wq_name), "qib%d_%d",
+ dd->unit, pidx);
+ ppd->qib_wq =
+ create_singlethread_workqueue(wq_name);
+ if (!ppd->qib_wq)
+ goto wq_error;
+ }
+ }
+ return 0;
+wq_error:
+ pr_err("create_singlethread_workqueue failed for port %d\n",
+ pidx + 1);
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+ if (ppd->qib_wq) {
+ destroy_workqueue(ppd->qib_wq);
+ ppd->qib_wq = NULL;
+ }
+ }
+ return -ENOMEM;
+}
+
+/**
* qib_init - do the actual initialization sequence on the chip
* @dd: the qlogic_ib device
* @reinit: reinitializing, so don't allocate new memory
@@ -547,8 +668,8 @@ int qib_init(struct qib_devdata *dd, int reinit)
if (!lastfail)
lastfail = qib_setup_eagerbufs(rcd);
if (lastfail) {
- qib_dev_err(dd, "failed to allocate kernel ctxt's "
- "rcvhdrq and/or egr bufs\n");
+ qib_dev_err(dd,
+ "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
continue;
}
}
@@ -764,6 +885,11 @@ static void qib_shutdown_device(struct qib_devdata *dd)
* We can't count on interrupts since we are stopping.
*/
dd->f_quiet_serdes(ppd);
+
+ if (ppd->qib_wq) {
+ destroy_workqueue(ppd->qib_wq);
+ ppd->qib_wq = NULL;
+ }
}
qib_update_eeprom_log(dd);
@@ -893,8 +1019,7 @@ static void qib_verify_pioperf(struct qib_devdata *dd)
/* 1 GiB/sec, slightly over IB SDR line rate */
if (lcnt < (emsecs * 1024U))
qib_dev_err(dd,
- "Performance problem: bandwidth to PIO buffers is "
- "only %u MiB/sec\n",
+ "Performance problem: bandwidth to PIO buffers is only %u MiB/sec\n",
lcnt / (u32) emsecs);
preempt_enable();
@@ -967,8 +1092,8 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
if (qib_cpulist)
qib_cpulist_count = count;
else
- qib_early_err(&pdev->dev, "Could not alloc cpulist "
- "info, cpu affinity might be wrong\n");
+ qib_early_err(&pdev->dev,
+ "Could not alloc cpulist info, cpu affinity might be wrong\n");
}
bail:
@@ -1057,21 +1182,20 @@ static int __init qlogic_ib_init(void)
*/
idr_init(&qib_unit_table);
if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) {
- printk(KERN_ERR QIB_DRV_NAME ": idr_pre_get() failed\n");
+ pr_err("idr_pre_get() failed\n");
ret = -ENOMEM;
goto bail_cq_wq;
}
ret = pci_register_driver(&qib_driver);
if (ret < 0) {
- printk(KERN_ERR QIB_DRV_NAME
- ": Unable to register driver: error %d\n", -ret);
+ pr_err("Unable to register driver: error %d\n", -ret);
goto bail_unit;
}
/* not fatal if it doesn't work */
if (qib_init_qibfs())
- printk(KERN_ERR QIB_DRV_NAME ": Unable to register ipathfs\n");
+ pr_err("Unable to register ipathfs\n");
goto bail; /* all OK */
bail_unit:
@@ -1095,9 +1219,9 @@ static void __exit qlogic_ib_cleanup(void)
ret = qib_exit_qibfs();
if (ret)
- printk(KERN_ERR QIB_DRV_NAME ": "
- "Unable to cleanup counter filesystem: "
- "error %d\n", -ret);
+ pr_err(
+ "Unable to cleanup counter filesystem: error %d\n",
+ -ret);
pci_unregister_driver(&qib_driver);
@@ -1121,10 +1245,24 @@ static void cleanup_device_data(struct qib_devdata *dd)
unsigned long flags;
/* users can't do anything more with chip */
- for (pidx = 0; pidx < dd->num_pports; ++pidx)
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
if (dd->pport[pidx].statusp)
*dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT;
+ spin_lock(&dd->pport[pidx].cc_shadow_lock);
+
+ kfree(dd->pport[pidx].congestion_entries);
+ dd->pport[pidx].congestion_entries = NULL;
+ kfree(dd->pport[pidx].ccti_entries);
+ dd->pport[pidx].ccti_entries = NULL;
+ kfree(dd->pport[pidx].ccti_entries_shadow);
+ dd->pport[pidx].ccti_entries_shadow = NULL;
+ kfree(dd->pport[pidx].congestion_entries_shadow);
+ dd->pport[pidx].congestion_entries_shadow = NULL;
+
+ spin_unlock(&dd->pport[pidx].cc_shadow_lock);
+ }
+
if (!qib_wc_pat)
qib_disable_wc(dd);
@@ -1223,9 +1361,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev,
#ifdef CONFIG_PCI_MSI
dd = qib_init_iba6120_funcs(pdev, ent);
#else
- qib_early_err(&pdev->dev, "QLogic PCIE device 0x%x cannot "
- "work if CONFIG_PCI_MSI is not enabled\n",
- ent->device);
+ qib_early_err(&pdev->dev,
+ "QLogic PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n",
+ ent->device);
dd = ERR_PTR(-ENODEV);
#endif
break;
@@ -1239,8 +1377,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev,
break;
default:
- qib_early_err(&pdev->dev, "Failing on unknown QLogic "
- "deviceid 0x%x\n", ent->device);
+ qib_early_err(&pdev->dev,
+ "Failing on unknown QLogic deviceid 0x%x\n",
+ ent->device);
ret = -ENODEV;
}
@@ -1249,6 +1388,10 @@ static int __devinit qib_init_one(struct pci_dev *pdev,
if (ret)
goto bail; /* error already printed */
+ ret = qib_create_workqueues(dd);
+ if (ret)
+ goto bail;
+
/* do the generic initialization */
initfail = qib_init(dd, 0);
@@ -1293,9 +1436,9 @@ static int __devinit qib_init_one(struct pci_dev *pdev,
if (!qib_wc_pat) {
ret = qib_enable_wc(dd);
if (ret) {
- qib_dev_err(dd, "Write combining not enabled "
- "(err %d): performance may be poor\n",
- -ret);
+ qib_dev_err(dd,
+ "Write combining not enabled (err %d): performance may be poor\n",
+ -ret);
ret = 0;
}
}
@@ -1361,9 +1504,9 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
gfp_flags | __GFP_COMP);
if (!rcd->rcvhdrq) {
- qib_dev_err(dd, "attempt to allocate %d bytes "
- "for ctxt %u rcvhdrq failed\n",
- amt, rcd->ctxt);
+ qib_dev_err(dd,
+ "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n",
+ amt, rcd->ctxt);
goto bail;
}
@@ -1392,8 +1535,9 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
return 0;
bail_free:
- qib_dev_err(dd, "attempt to allocate 1 page for ctxt %u "
- "rcvhdrqtailaddr failed\n", rcd->ctxt);
+ qib_dev_err(dd,
+ "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
+ rcd->ctxt);
vfree(rcd->user_event_mask);
rcd->user_event_mask = NULL;
bail_free_hdrq:
diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c
index 6ae57d23004a..f4918f2165ec 100644
--- a/drivers/infiniband/hw/qib/qib_intr.c
+++ b/drivers/infiniband/hw/qib/qib_intr.c
@@ -224,15 +224,15 @@ void qib_bad_intrstatus(struct qib_devdata *dd)
* We print the message and disable interrupts, in hope of
* having a better chance of debugging the problem.
*/
- qib_dev_err(dd, "Read of chip interrupt status failed"
- " disabling interrupts\n");
+ qib_dev_err(dd,
+ "Read of chip interrupt status failed disabling interrupts\n");
if (allbits++) {
/* disable interrupt delivery, something is very wrong */
if (allbits == 2)
dd->f_set_intr_state(dd, 0);
if (allbits == 3) {
- qib_dev_err(dd, "2nd bad interrupt status, "
- "unregistering interrupts\n");
+ qib_dev_err(dd,
+ "2nd bad interrupt status, unregistering interrupts\n");
dd->flags |= QIB_BADINTR;
dd->flags &= ~QIB_INITTED;
dd->f_free_irq(dd);
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index 8fd19a47df0c..e9486c74c226 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -35,21 +35,41 @@
/**
* qib_alloc_lkey - allocate an lkey
- * @rkt: lkey table in which to allocate the lkey
* @mr: memory region that this lkey protects
+ * @dma_region: 0->normal key, 1->restricted DMA key
+ *
+ * Returns 0 if successful, otherwise returns -errno.
+ *
+ * Increments mr reference count as required.
+ *
+ * Sets the lkey field mr for non-dma regions.
*
- * Returns 1 if successful, otherwise returns 0.
*/
-int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr)
+int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
{
unsigned long flags;
u32 r;
u32 n;
- int ret;
+ int ret = 0;
+ struct qib_ibdev *dev = to_idev(mr->pd->device);
+ struct qib_lkey_table *rkt = &dev->lk_table;
spin_lock_irqsave(&rkt->lock, flags);
+ /* special case for dma_mr lkey == 0 */
+ if (dma_region) {
+ struct qib_mregion *tmr;
+
+ tmr = rcu_dereference(dev->dma_mr);
+ if (!tmr) {
+ qib_get_mr(mr);
+ rcu_assign_pointer(dev->dma_mr, mr);
+ mr->lkey_published = 1;
+ }
+ goto success;
+ }
+
/* Find the next available LKEY */
r = rkt->next;
n = r;
@@ -57,11 +77,8 @@ int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr)
if (rkt->table[r] == NULL)
break;
r = (r + 1) & (rkt->max - 1);
- if (r == n) {
- spin_unlock_irqrestore(&rkt->lock, flags);
- ret = 0;
+ if (r == n)
goto bail;
- }
}
rkt->next = (r + 1) & (rkt->max - 1);
/*
@@ -76,57 +93,58 @@ int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr)
mr->lkey |= 1 << 8;
rkt->gen++;
}
- rkt->table[r] = mr;
+ qib_get_mr(mr);
+ rcu_assign_pointer(rkt->table[r], mr);
+ mr->lkey_published = 1;
+success:
spin_unlock_irqrestore(&rkt->lock, flags);
-
- ret = 1;
-
-bail:
+out:
return ret;
+bail:
+ spin_unlock_irqrestore(&rkt->lock, flags);
+ ret = -ENOMEM;
+ goto out;
}
/**
* qib_free_lkey - free an lkey
- * @rkt: table from which to free the lkey
- * @lkey: lkey id to free
+ * @mr: mr to free from tables
*/
-int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr)
+void qib_free_lkey(struct qib_mregion *mr)
{
unsigned long flags;
u32 lkey = mr->lkey;
u32 r;
- int ret;
+ struct qib_ibdev *dev = to_idev(mr->pd->device);
+ struct qib_lkey_table *rkt = &dev->lk_table;
- spin_lock_irqsave(&dev->lk_table.lock, flags);
- if (lkey == 0) {
- if (dev->dma_mr && dev->dma_mr == mr) {
- ret = atomic_read(&dev->dma_mr->refcount);
- if (!ret)
- dev->dma_mr = NULL;
- } else
- ret = 0;
- } else {
+ spin_lock_irqsave(&rkt->lock, flags);
+ if (!mr->lkey_published)
+ goto out;
+ if (lkey == 0)
+ rcu_assign_pointer(dev->dma_mr, NULL);
+ else {
r = lkey >> (32 - ib_qib_lkey_table_size);
- ret = atomic_read(&dev->lk_table.table[r]->refcount);
- if (!ret)
- dev->lk_table.table[r] = NULL;
+ rcu_assign_pointer(rkt->table[r], NULL);
}
- spin_unlock_irqrestore(&dev->lk_table.lock, flags);
-
- if (ret)
- ret = -EBUSY;
- return ret;
+ qib_put_mr(mr);
+ mr->lkey_published = 0;
+out:
+ spin_unlock_irqrestore(&rkt->lock, flags);
}
/**
* qib_lkey_ok - check IB SGE for validity and initialize
* @rkt: table containing lkey to check SGE against
+ * @pd: protection domain
* @isge: outgoing internal SGE
* @sge: SGE to check
* @acc: access flags
*
* Return 1 if valid and successful, otherwise returns 0.
*
+ * increments the reference count upon success
+ *
* Check the IB SGE for validity and initialize our internal version
* of it.
*/
@@ -136,24 +154,25 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
struct qib_mregion *mr;
unsigned n, m;
size_t off;
- unsigned long flags;
/*
* We use LKEY == zero for kernel virtual addresses
* (see qib_get_dma_mr and qib_dma.c).
*/
- spin_lock_irqsave(&rkt->lock, flags);
+ rcu_read_lock();
if (sge->lkey == 0) {
struct qib_ibdev *dev = to_idev(pd->ibpd.device);
if (pd->user)
goto bail;
- if (!dev->dma_mr)
+ mr = rcu_dereference(dev->dma_mr);
+ if (!mr)
goto bail;
- atomic_inc(&dev->dma_mr->refcount);
- spin_unlock_irqrestore(&rkt->lock, flags);
+ if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
+ goto bail;
+ rcu_read_unlock();
- isge->mr = dev->dma_mr;
+ isge->mr = mr;
isge->vaddr = (void *) sge->addr;
isge->length = sge->length;
isge->sge_length = sge->length;
@@ -161,18 +180,18 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
isge->n = 0;
goto ok;
}
- mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))];
- if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
- mr->pd != &pd->ibpd))
+ mr = rcu_dereference(
+ rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]);
+ if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
goto bail;
off = sge->addr - mr->user_base;
- if (unlikely(sge->addr < mr->user_base ||
- off + sge->length > mr->length ||
- (mr->access_flags & acc) != acc))
+ if (unlikely(sge->addr < mr->iova || off + sge->length > mr->length ||
+ (mr->access_flags & acc) == 0))
goto bail;
- atomic_inc(&mr->refcount);
- spin_unlock_irqrestore(&rkt->lock, flags);
+ if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
+ goto bail;
+ rcu_read_unlock();
off += mr->offset;
if (mr->page_shift) {
@@ -208,20 +227,22 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
ok:
return 1;
bail:
- spin_unlock_irqrestore(&rkt->lock, flags);
+ rcu_read_unlock();
return 0;
}
/**
* qib_rkey_ok - check the IB virtual address, length, and RKEY
- * @dev: infiniband device
- * @ss: SGE state
+ * @qp: qp for validation
+ * @sge: SGE state
* @len: length of data
* @vaddr: virtual address to place data
* @rkey: rkey to check
* @acc: access flags
*
* Return 1 if successful, otherwise 0.
+ *
+ * increments the reference count upon success
*/
int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
u32 len, u64 vaddr, u32 rkey, int acc)
@@ -230,25 +251,26 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
struct qib_mregion *mr;
unsigned n, m;
size_t off;
- unsigned long flags;
/*
* We use RKEY == zero for kernel virtual addresses
* (see qib_get_dma_mr and qib_dma.c).
*/
- spin_lock_irqsave(&rkt->lock, flags);
+ rcu_read_lock();
if (rkey == 0) {
struct qib_pd *pd = to_ipd(qp->ibqp.pd);
struct qib_ibdev *dev = to_idev(pd->ibpd.device);
if (pd->user)
goto bail;
- if (!dev->dma_mr)
+ mr = rcu_dereference(dev->dma_mr);
+ if (!mr)
goto bail;
- atomic_inc(&dev->dma_mr->refcount);
- spin_unlock_irqrestore(&rkt->lock, flags);
+ if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
+ goto bail;
+ rcu_read_unlock();
- sge->mr = dev->dma_mr;
+ sge->mr = mr;
sge->vaddr = (void *) vaddr;
sge->length = len;
sge->sge_length = len;
@@ -257,16 +279,18 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
goto ok;
}
- mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))];
- if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
+ mr = rcu_dereference(
+ rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]);
+ if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
goto bail;
off = vaddr - mr->iova;
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0))
goto bail;
- atomic_inc(&mr->refcount);
- spin_unlock_irqrestore(&rkt->lock, flags);
+ if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
+ goto bail;
+ rcu_read_unlock();
off += mr->offset;
if (mr->page_shift) {
@@ -302,7 +326,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
ok:
return 1;
bail:
- spin_unlock_irqrestore(&rkt->lock, flags);
+ rcu_read_unlock();
return 0;
}
@@ -325,7 +349,9 @@ int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr)
if (pd->user || rkey == 0)
goto bail;
- mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))];
+ mr = rcu_dereference_protected(
+ rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))],
+ lockdep_is_held(&rkt->lock));
if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd))
goto bail;
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 43390217a026..19f1e6c45fb6 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -49,6 +49,18 @@ static int reply(struct ib_smp *smp)
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
+static int reply_failure(struct ib_smp *smp)
+{
+ /*
+ * The verbs framework will handle the directed/LID route
+ * packet changes.
+ */
+ smp->method = IB_MGMT_METHOD_GET_RESP;
+ if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ smp->status |= IB_SMP_DIRECTION;
+ return IB_MAD_RESULT_FAILURE | IB_MAD_RESULT_REPLY;
+}
+
static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
{
struct ib_mad_send_buf *send_buf;
@@ -90,14 +102,10 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
if (!ibp->sm_ah) {
if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
struct ib_ah *ah;
- struct ib_ah_attr attr;
- memset(&attr, 0, sizeof attr);
- attr.dlid = ibp->sm_lid;
- attr.port_num = ppd_from_ibp(ibp)->port;
- ah = ib_create_ah(ibp->qp0->ibqp.pd, &attr);
+ ah = qib_create_qp0_ah(ibp, ibp->sm_lid);
if (IS_ERR(ah))
- ret = -EINVAL;
+ ret = PTR_ERR(ah);
else {
send_buf->ah = ah;
ibp->sm_ah = to_iah(ah);
@@ -2051,6 +2059,298 @@ bail:
return ret;
}
+static int cc_get_classportinfo(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev)
+{
+ struct ib_cc_classportinfo_attr *p =
+ (struct ib_cc_classportinfo_attr *)ccp->mgmt_data;
+
+ memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+ p->base_version = 1;
+ p->class_version = 1;
+ p->cap_mask = 0;
+
+ /*
+ * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
+ */
+ p->resp_time_value = 18;
+
+ return reply((struct ib_smp *) ccp);
+}
+
+static int cc_get_congestion_info(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_cc_info_attr *p =
+ (struct ib_cc_info_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+ memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+ p->congestion_info = 0;
+ p->control_table_cap = ppd->cc_max_table_entries;
+
+ return reply((struct ib_smp *) ccp);
+}
+
+static int cc_get_congestion_setting(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ int i;
+ struct ib_cc_congestion_setting_attr *p =
+ (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct ib_cc_congestion_entry_shadow *entries;
+
+ memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+ spin_lock(&ppd->cc_shadow_lock);
+
+ entries = ppd->congestion_entries_shadow->entries;
+ p->port_control = cpu_to_be16(
+ ppd->congestion_entries_shadow->port_control);
+ p->control_map = cpu_to_be16(
+ ppd->congestion_entries_shadow->control_map);
+ for (i = 0; i < IB_CC_CCS_ENTRIES; i++) {
+ p->entries[i].ccti_increase = entries[i].ccti_increase;
+ p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
+ p->entries[i].trigger_threshold = entries[i].trigger_threshold;
+ p->entries[i].ccti_min = entries[i].ccti_min;
+ }
+
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return reply((struct ib_smp *) ccp);
+}
+
+static int cc_get_congestion_control_table(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_cc_table_attr *p =
+ (struct ib_cc_table_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ u32 cct_block_index = be32_to_cpu(ccp->attr_mod);
+ u32 max_cct_block;
+ u32 cct_entry;
+ struct ib_cc_table_entry_shadow *entries;
+ int i;
+
+ /* Is the table index more than what is supported? */
+ if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
+ goto bail;
+
+ memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+ spin_lock(&ppd->cc_shadow_lock);
+
+ max_cct_block =
+ (ppd->ccti_entries_shadow->ccti_last_entry + 1)/IB_CCT_ENTRIES;
+ max_cct_block = max_cct_block ? max_cct_block - 1 : 0;
+
+ if (cct_block_index > max_cct_block) {
+ spin_unlock(&ppd->cc_shadow_lock);
+ goto bail;
+ }
+
+ ccp->attr_mod = cpu_to_be32(cct_block_index);
+
+ cct_entry = IB_CCT_ENTRIES * (cct_block_index + 1);
+
+ cct_entry--;
+
+ p->ccti_limit = cpu_to_be16(cct_entry);
+
+ entries = &ppd->ccti_entries_shadow->
+ entries[IB_CCT_ENTRIES * cct_block_index];
+ cct_entry %= IB_CCT_ENTRIES;
+
+ for (i = 0; i <= cct_entry; i++)
+ p->ccti_entries[i].entry = cpu_to_be16(entries[i].entry);
+
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return reply((struct ib_smp *) ccp);
+
+bail:
+ return reply_failure((struct ib_smp *) ccp);
+}
+
+static int cc_set_congestion_setting(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_cc_congestion_setting_attr *p =
+ (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ int i;
+
+ ppd->cc_sl_control_map = be16_to_cpu(p->control_map);
+
+ for (i = 0; i < IB_CC_CCS_ENTRIES; i++) {
+ ppd->congestion_entries[i].ccti_increase =
+ p->entries[i].ccti_increase;
+
+ ppd->congestion_entries[i].ccti_timer =
+ be16_to_cpu(p->entries[i].ccti_timer);
+
+ ppd->congestion_entries[i].trigger_threshold =
+ p->entries[i].trigger_threshold;
+
+ ppd->congestion_entries[i].ccti_min =
+ p->entries[i].ccti_min;
+ }
+
+ return reply((struct ib_smp *) ccp);
+}
+
+static int cc_set_congestion_control_table(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_cc_table_attr *p =
+ (struct ib_cc_table_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ u32 cct_block_index = be32_to_cpu(ccp->attr_mod);
+ u32 cct_entry;
+ struct ib_cc_table_entry_shadow *entries;
+ int i;
+
+ /* Is the table index more than what is supported? */
+ if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
+ goto bail;
+
+ /* If this packet is the first in the sequence then
+ * zero the total table entry count.
+ */
+ if (be16_to_cpu(p->ccti_limit) < IB_CCT_ENTRIES)
+ ppd->total_cct_entry = 0;
+
+ cct_entry = (be16_to_cpu(p->ccti_limit))%IB_CCT_ENTRIES;
+
+ /* ccti_limit is 0 to 63 */
+ ppd->total_cct_entry += (cct_entry + 1);
+
+ if (ppd->total_cct_entry > ppd->cc_supported_table_entries)
+ goto bail;
+
+ ppd->ccti_limit = be16_to_cpu(p->ccti_limit);
+
+ entries = ppd->ccti_entries + (IB_CCT_ENTRIES * cct_block_index);
+
+ for (i = 0; i <= cct_entry; i++)
+ entries[i].entry = be16_to_cpu(p->ccti_entries[i].entry);
+
+ spin_lock(&ppd->cc_shadow_lock);
+
+ ppd->ccti_entries_shadow->ccti_last_entry = ppd->total_cct_entry - 1;
+ memcpy(ppd->ccti_entries_shadow->entries, ppd->ccti_entries,
+ (ppd->total_cct_entry * sizeof(struct ib_cc_table_entry)));
+
+ ppd->congestion_entries_shadow->port_control = IB_CC_CCS_PC_SL_BASED;
+ ppd->congestion_entries_shadow->control_map = ppd->cc_sl_control_map;
+ memcpy(ppd->congestion_entries_shadow->entries, ppd->congestion_entries,
+ IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry));
+
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return reply((struct ib_smp *) ccp);
+
+bail:
+ return reply_failure((struct ib_smp *) ccp);
+}
+
+static int check_cc_key(struct qib_ibport *ibp,
+ struct ib_cc_mad *ccp, int mad_flags)
+{
+ return 0;
+}
+
+static int process_cc(struct ib_device *ibdev, int mad_flags,
+ u8 port, struct ib_mad *in_mad,
+ struct ib_mad *out_mad)
+{
+ struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ int ret;
+
+ *out_mad = *in_mad;
+
+ if (ccp->class_version != 2) {
+ ccp->status |= IB_SMP_UNSUP_VERSION;
+ ret = reply((struct ib_smp *)ccp);
+ goto bail;
+ }
+
+ ret = check_cc_key(ibp, ccp, mad_flags);
+ if (ret)
+ goto bail;
+
+ switch (ccp->method) {
+ case IB_MGMT_METHOD_GET:
+ switch (ccp->attr_id) {
+ case IB_CC_ATTR_CLASSPORTINFO:
+ ret = cc_get_classportinfo(ccp, ibdev);
+ goto bail;
+
+ case IB_CC_ATTR_CONGESTION_INFO:
+ ret = cc_get_congestion_info(ccp, ibdev, port);
+ goto bail;
+
+ case IB_CC_ATTR_CA_CONGESTION_SETTING:
+ ret = cc_get_congestion_setting(ccp, ibdev, port);
+ goto bail;
+
+ case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
+ ret = cc_get_congestion_control_table(ccp, ibdev, port);
+ goto bail;
+
+ /* FALLTHROUGH */
+ default:
+ ccp->status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply((struct ib_smp *) ccp);
+ goto bail;
+ }
+
+ case IB_MGMT_METHOD_SET:
+ switch (ccp->attr_id) {
+ case IB_CC_ATTR_CA_CONGESTION_SETTING:
+ ret = cc_set_congestion_setting(ccp, ibdev, port);
+ goto bail;
+
+ case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
+ ret = cc_set_congestion_control_table(ccp, ibdev, port);
+ goto bail;
+
+ /* FALLTHROUGH */
+ default:
+ ccp->status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply((struct ib_smp *) ccp);
+ goto bail;
+ }
+
+ case IB_MGMT_METHOD_GET_RESP:
+ /*
+ * The ib_mad module will call us to process responses
+ * before checking for other consumers.
+ * Just tell the caller to process it normally.
+ */
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+
+ case IB_MGMT_METHOD_TRAP:
+ default:
+ ccp->status |= IB_SMP_UNSUP_METHOD;
+ ret = reply((struct ib_smp *) ccp);
+ }
+
+bail:
+ return ret;
+}
+
/**
* qib_process_mad - process an incoming MAD packet
* @ibdev: the infiniband device this packet came in on
@@ -2075,6 +2375,8 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
struct ib_mad *in_mad, struct ib_mad *out_mad)
{
int ret;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
switch (in_mad->mad_hdr.mgmt_class) {
case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
@@ -2086,6 +2388,15 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
ret = process_perf(ibdev, port, in_mad, out_mad);
goto bail;
+ case IB_MGMT_CLASS_CONG_MGMT:
+ if (!ppd->congestion_entries_shadow ||
+ !qib_cc_table_size) {
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+ }
+ ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad);
+ goto bail;
+
default:
ret = IB_MAD_RESULT_SUCCESS;
}
diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h
index ecc416cdbaaa..57bd3fa016bc 100644
--- a/drivers/infiniband/hw/qib/qib_mad.h
+++ b/drivers/infiniband/hw/qib/qib_mad.h
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,6 +31,8 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#ifndef _QIB_MAD_H
+#define _QIB_MAD_H
#include <rdma/ib_pma.h>
@@ -223,6 +225,198 @@ struct ib_pma_portcounters_cong {
#define IB_PMA_SEL_CONG_ROUTING 0x08
/*
+ * Congestion control class attributes
+ */
+#define IB_CC_ATTR_CLASSPORTINFO cpu_to_be16(0x0001)
+#define IB_CC_ATTR_NOTICE cpu_to_be16(0x0002)
+#define IB_CC_ATTR_CONGESTION_INFO cpu_to_be16(0x0011)
+#define IB_CC_ATTR_CONGESTION_KEY_INFO cpu_to_be16(0x0012)
+#define IB_CC_ATTR_CONGESTION_LOG cpu_to_be16(0x0013)
+#define IB_CC_ATTR_SWITCH_CONGESTION_SETTING cpu_to_be16(0x0014)
+#define IB_CC_ATTR_SWITCH_PORT_CONGESTION_SETTING cpu_to_be16(0x0015)
+#define IB_CC_ATTR_CA_CONGESTION_SETTING cpu_to_be16(0x0016)
+#define IB_CC_ATTR_CONGESTION_CONTROL_TABLE cpu_to_be16(0x0017)
+#define IB_CC_ATTR_TIME_STAMP cpu_to_be16(0x0018)
+
+/* generalizations for threshold values */
+#define IB_CC_THRESHOLD_NONE 0x0
+#define IB_CC_THRESHOLD_MIN 0x1
+#define IB_CC_THRESHOLD_MAX 0xf
+
+/* CCA MAD header constants */
+#define IB_CC_MAD_LOGDATA_LEN 32
+#define IB_CC_MAD_MGMTDATA_LEN 192
+
+struct ib_cc_mad {
+ u8 base_version;
+ u8 mgmt_class;
+ u8 class_version;
+ u8 method;
+ __be16 status;
+ __be16 class_specific;
+ __be64 tid;
+ __be16 attr_id;
+ __be16 resv;
+ __be32 attr_mod;
+ __be64 cckey;
+
+ /* For CongestionLog attribute only */
+ u8 log_data[IB_CC_MAD_LOGDATA_LEN];
+
+ u8 mgmt_data[IB_CC_MAD_MGMTDATA_LEN];
+} __packed;
+
+/*
+ * Congestion Control class portinfo capability mask bits
+ */
+#define IB_CC_CPI_CM_TRAP_GEN cpu_to_be16(1 << 0)
+#define IB_CC_CPI_CM_GET_SET_NOTICE cpu_to_be16(1 << 1)
+#define IB_CC_CPI_CM_CAP2 cpu_to_be16(1 << 2)
+#define IB_CC_CPI_CM_ENHANCEDPORT0_CC cpu_to_be16(1 << 8)
+
+struct ib_cc_classportinfo_attr {
+ u8 base_version;
+ u8 class_version;
+ __be16 cap_mask;
+ u8 reserved[3];
+ u8 resp_time_value; /* only lower 5 bits */
+ union ib_gid redirect_gid;
+ __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */
+ __be16 redirect_lid;
+ __be16 redirect_pkey;
+ __be32 redirect_qp; /* only lower 24 bits */
+ __be32 redirect_qkey;
+ union ib_gid trap_gid;
+ __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */
+ __be16 trap_lid;
+ __be16 trap_pkey;
+ __be32 trap_hl_qp; /* 8, 24 bits respectively */
+ __be32 trap_qkey;
+} __packed;
+
+/* Congestion control traps */
+#define IB_CC_TRAP_KEY_VIOLATION 0x0000
+
+struct ib_cc_trap_key_violation_attr {
+ __be16 source_lid;
+ u8 method;
+ u8 reserved1;
+ __be16 attrib_id;
+ __be32 attrib_mod;
+ __be32 qp;
+ __be64 cckey;
+ u8 sgid[16];
+ u8 padding[24];
+} __packed;
+
+/* Congestion info flags */
+#define IB_CC_CI_FLAGS_CREDIT_STARVATION 0x1
+#define IB_CC_TABLE_CAP_DEFAULT 31
+
+struct ib_cc_info_attr {
+ __be16 congestion_info;
+ u8 control_table_cap; /* Multiple of 64 entry unit CCTs */
+} __packed;
+
+struct ib_cc_key_info_attr {
+ __be64 cckey;
+ u8 protect;
+ __be16 lease_period;
+ __be16 violations;
+} __packed;
+
+#define IB_CC_CL_CA_LOGEVENTS_LEN 208
+
+struct ib_cc_log_attr {
+ u8 log_type;
+ u8 congestion_flags;
+ __be16 threshold_event_counter;
+ __be16 threshold_congestion_event_map;
+ __be16 current_time_stamp;
+ u8 log_events[IB_CC_CL_CA_LOGEVENTS_LEN];
+} __packed;
+
+#define IB_CC_CLEC_SERVICETYPE_RC 0x0
+#define IB_CC_CLEC_SERVICETYPE_UC 0x1
+#define IB_CC_CLEC_SERVICETYPE_RD 0x2
+#define IB_CC_CLEC_SERVICETYPE_UD 0x3
+
+struct ib_cc_log_event {
+ u8 local_qp_cn_entry;
+ u8 remote_qp_number_cn_entry[3];
+ u8 sl_cn_entry:4;
+ u8 service_type_cn_entry:4;
+ __be32 remote_lid_cn_entry;
+ __be32 timestamp_cn_entry;
+} __packed;
+
+/* Sixteen congestion entries */
+#define IB_CC_CCS_ENTRIES 16
+
+/* Port control flags */
+#define IB_CC_CCS_PC_SL_BASED 0x01
+
+struct ib_cc_congestion_entry {
+ u8 ccti_increase;
+ __be16 ccti_timer;
+ u8 trigger_threshold;
+ u8 ccti_min; /* min CCTI for cc table */
+} __packed;
+
+struct ib_cc_congestion_entry_shadow {
+ u8 ccti_increase;
+ u16 ccti_timer;
+ u8 trigger_threshold;
+ u8 ccti_min; /* min CCTI for cc table */
+} __packed;
+
+struct ib_cc_congestion_setting_attr {
+ __be16 port_control;
+ __be16 control_map;
+ struct ib_cc_congestion_entry entries[IB_CC_CCS_ENTRIES];
+} __packed;
+
+struct ib_cc_congestion_setting_attr_shadow {
+ u16 port_control;
+ u16 control_map;
+ struct ib_cc_congestion_entry_shadow entries[IB_CC_CCS_ENTRIES];
+} __packed;
+
+#define IB_CC_TABLE_ENTRY_INCREASE_DEFAULT 1
+#define IB_CC_TABLE_ENTRY_TIMER_DEFAULT 1
+
+/* 64 Congestion Control table entries in a single MAD */
+#define IB_CCT_ENTRIES 64
+#define IB_CCT_MIN_ENTRIES (IB_CCT_ENTRIES * 2)
+
+struct ib_cc_table_entry {
+ __be16 entry; /* shift:2, multiplier:14 */
+};
+
+struct ib_cc_table_entry_shadow {
+ u16 entry; /* shift:2, multiplier:14 */
+};
+
+struct ib_cc_table_attr {
+ __be16 ccti_limit; /* max CCTI for cc table */
+ struct ib_cc_table_entry ccti_entries[IB_CCT_ENTRIES];
+} __packed;
+
+struct ib_cc_table_attr_shadow {
+ u16 ccti_limit; /* max CCTI for cc table */
+ struct ib_cc_table_entry_shadow ccti_entries[IB_CCT_ENTRIES];
+} __packed;
+
+#define CC_TABLE_SHADOW_MAX \
+ (IB_CC_TABLE_CAP_DEFAULT * IB_CCT_ENTRIES)
+
+struct cc_table_shadow {
+ u16 ccti_last_entry;
+ struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX];
+} __packed;
+
+#endif /* _QIB_MAD_H */
+/*
* The PortSamplesControl.CounterMasks field is an array of 3 bit fields
* which specify the N'th counter's capabilities. See ch. 16.1.3.2.
* We support 5 counters which only count the mandatory quantities.
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index 08944e2ee334..e6687ded8210 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -47,6 +47,43 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr)
return container_of(ibfmr, struct qib_fmr, ibfmr);
}
+static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd,
+ int count)
+{
+ int m, i = 0;
+ int rval = 0;
+
+ m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
+ for (; i < m; i++) {
+ mr->map[i] = kzalloc(sizeof *mr->map[0], GFP_KERNEL);
+ if (!mr->map[i])
+ goto bail;
+ }
+ mr->mapsz = m;
+ init_completion(&mr->comp);
+ /* count returning the ptr to user */
+ atomic_set(&mr->refcount, 1);
+ mr->pd = pd;
+ mr->max_segs = count;
+out:
+ return rval;
+bail:
+ while (i)
+ kfree(mr->map[--i]);
+ rval = -ENOMEM;
+ goto out;
+}
+
+static void deinit_qib_mregion(struct qib_mregion *mr)
+{
+ int i = mr->mapsz;
+
+ mr->mapsz = 0;
+ while (i)
+ kfree(mr->map[--i]);
+}
+
+
/**
* qib_get_dma_mr - get a DMA memory region
* @pd: protection domain for this memory region
@@ -58,10 +95,9 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr)
*/
struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc)
{
- struct qib_ibdev *dev = to_idev(pd->device);
- struct qib_mr *mr;
+ struct qib_mr *mr = NULL;
struct ib_mr *ret;
- unsigned long flags;
+ int rval;
if (to_ipd(pd)->user) {
ret = ERR_PTR(-EPERM);
@@ -74,61 +110,64 @@ struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc)
goto bail;
}
- mr->mr.access_flags = acc;
- atomic_set(&mr->mr.refcount, 0);
+ rval = init_qib_mregion(&mr->mr, pd, 0);
+ if (rval) {
+ ret = ERR_PTR(rval);
+ goto bail;
+ }
+
- spin_lock_irqsave(&dev->lk_table.lock, flags);
- if (!dev->dma_mr)
- dev->dma_mr = &mr->mr;
- spin_unlock_irqrestore(&dev->lk_table.lock, flags);
+ rval = qib_alloc_lkey(&mr->mr, 1);
+ if (rval) {
+ ret = ERR_PTR(rval);
+ goto bail_mregion;
+ }
+ mr->mr.access_flags = acc;
ret = &mr->ibmr;
+done:
+ return ret;
+bail_mregion:
+ deinit_qib_mregion(&mr->mr);
bail:
- return ret;
+ kfree(mr);
+ goto done;
}
-static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table)
+static struct qib_mr *alloc_mr(int count, struct ib_pd *pd)
{
struct qib_mr *mr;
- int m, i = 0;
+ int rval = -ENOMEM;
+ int m;
/* Allocate struct plus pointers to first level page tables. */
m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
- mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
+ mr = kzalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
if (!mr)
- goto done;
-
- /* Allocate first level page tables. */
- for (; i < m; i++) {
- mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL);
- if (!mr->mr.map[i])
- goto bail;
- }
- mr->mr.mapsz = m;
- mr->mr.page_shift = 0;
- mr->mr.max_segs = count;
+ goto bail;
+ rval = init_qib_mregion(&mr->mr, pd, count);
+ if (rval)
+ goto bail;
/*
* ib_reg_phys_mr() will initialize mr->ibmr except for
* lkey and rkey.
*/
- if (!qib_alloc_lkey(lk_table, &mr->mr))
- goto bail;
+ rval = qib_alloc_lkey(&mr->mr, 0);
+ if (rval)
+ goto bail_mregion;
mr->ibmr.lkey = mr->mr.lkey;
mr->ibmr.rkey = mr->mr.lkey;
+done:
+ return mr;
- atomic_set(&mr->mr.refcount, 0);
- goto done;
-
+bail_mregion:
+ deinit_qib_mregion(&mr->mr);
bail:
- while (i)
- kfree(mr->mr.map[--i]);
kfree(mr);
- mr = NULL;
-
-done:
- return mr;
+ mr = ERR_PTR(rval);
+ goto done;
}
/**
@@ -148,19 +187,15 @@ struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
int n, m, i;
struct ib_mr *ret;
- mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table);
- if (mr == NULL) {
- ret = ERR_PTR(-ENOMEM);
+ mr = alloc_mr(num_phys_buf, pd);
+ if (IS_ERR(mr)) {
+ ret = (struct ib_mr *)mr;
goto bail;
}
- mr->mr.pd = pd;
mr->mr.user_base = *iova_start;
mr->mr.iova = *iova_start;
- mr->mr.length = 0;
- mr->mr.offset = 0;
mr->mr.access_flags = acc;
- mr->umem = NULL;
m = 0;
n = 0;
@@ -186,7 +221,6 @@ bail:
* @pd: protection domain for this memory region
* @start: starting userspace address
* @length: length of region to register
- * @virt_addr: virtual address to use (from HCA's point of view)
* @mr_access_flags: access flags for this memory region
* @udata: unused by the QLogic_IB driver
*
@@ -216,14 +250,13 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
list_for_each_entry(chunk, &umem->chunk_list, list)
n += chunk->nents;
- mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
- if (!mr) {
- ret = ERR_PTR(-ENOMEM);
+ mr = alloc_mr(n, pd);
+ if (IS_ERR(mr)) {
+ ret = (struct ib_mr *)mr;
ib_umem_release(umem);
goto bail;
}
- mr->mr.pd = pd;
mr->mr.user_base = start;
mr->mr.iova = virt_addr;
mr->mr.length = length;
@@ -271,21 +304,25 @@ bail:
int qib_dereg_mr(struct ib_mr *ibmr)
{
struct qib_mr *mr = to_imr(ibmr);
- struct qib_ibdev *dev = to_idev(ibmr->device);
- int ret;
- int i;
-
- ret = qib_free_lkey(dev, &mr->mr);
- if (ret)
- return ret;
-
- i = mr->mr.mapsz;
- while (i)
- kfree(mr->mr.map[--i]);
+ int ret = 0;
+ unsigned long timeout;
+
+ qib_free_lkey(&mr->mr);
+
+ qib_put_mr(&mr->mr); /* will set completion if last */
+ timeout = wait_for_completion_timeout(&mr->mr.comp,
+ 5 * HZ);
+ if (!timeout) {
+ qib_get_mr(&mr->mr);
+ ret = -EBUSY;
+ goto out;
+ }
+ deinit_qib_mregion(&mr->mr);
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
- return 0;
+out:
+ return ret;
}
/*
@@ -298,17 +335,9 @@ struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
{
struct qib_mr *mr;
- mr = alloc_mr(max_page_list_len, &to_idev(pd->device)->lk_table);
- if (mr == NULL)
- return ERR_PTR(-ENOMEM);
-
- mr->mr.pd = pd;
- mr->mr.user_base = 0;
- mr->mr.iova = 0;
- mr->mr.length = 0;
- mr->mr.offset = 0;
- mr->mr.access_flags = 0;
- mr->umem = NULL;
+ mr = alloc_mr(max_page_list_len, pd);
+ if (IS_ERR(mr))
+ return (struct ib_mr *)mr;
return &mr->ibmr;
}
@@ -322,11 +351,11 @@ qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len)
if (size > PAGE_SIZE)
return ERR_PTR(-EINVAL);
- pl = kmalloc(sizeof *pl, GFP_KERNEL);
+ pl = kzalloc(sizeof *pl, GFP_KERNEL);
if (!pl)
return ERR_PTR(-ENOMEM);
- pl->page_list = kmalloc(size, GFP_KERNEL);
+ pl->page_list = kzalloc(size, GFP_KERNEL);
if (!pl->page_list)
goto err_free;
@@ -355,57 +384,47 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
struct ib_fmr_attr *fmr_attr)
{
struct qib_fmr *fmr;
- int m, i = 0;
+ int m;
struct ib_fmr *ret;
+ int rval = -ENOMEM;
/* Allocate struct plus pointers to first level page tables. */
m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ;
- fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
+ fmr = kzalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
if (!fmr)
goto bail;
- /* Allocate first level page tables. */
- for (; i < m; i++) {
- fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0],
- GFP_KERNEL);
- if (!fmr->mr.map[i])
- goto bail;
- }
- fmr->mr.mapsz = m;
+ rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages);
+ if (rval)
+ goto bail;
/*
* ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
* rkey.
*/
- if (!qib_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr))
- goto bail;
+ rval = qib_alloc_lkey(&fmr->mr, 0);
+ if (rval)
+ goto bail_mregion;
fmr->ibfmr.rkey = fmr->mr.lkey;
fmr->ibfmr.lkey = fmr->mr.lkey;
/*
* Resources are allocated but no valid mapping (RKEY can't be
* used).
*/
- fmr->mr.pd = pd;
- fmr->mr.user_base = 0;
- fmr->mr.iova = 0;
- fmr->mr.length = 0;
- fmr->mr.offset = 0;
fmr->mr.access_flags = mr_access_flags;
fmr->mr.max_segs = fmr_attr->max_pages;
fmr->mr.page_shift = fmr_attr->page_shift;
- atomic_set(&fmr->mr.refcount, 0);
ret = &fmr->ibfmr;
- goto done;
+done:
+ return ret;
+bail_mregion:
+ deinit_qib_mregion(&fmr->mr);
bail:
- while (i)
- kfree(fmr->mr.map[--i]);
kfree(fmr);
- ret = ERR_PTR(-ENOMEM);
-
-done:
- return ret;
+ ret = ERR_PTR(rval);
+ goto done;
}
/**
@@ -428,7 +447,8 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
u32 ps;
int ret;
- if (atomic_read(&fmr->mr.refcount))
+ i = atomic_read(&fmr->mr.refcount);
+ if (i > 2)
return -EBUSY;
if (list_len > fmr->mr.max_segs) {
@@ -490,16 +510,27 @@ int qib_unmap_fmr(struct list_head *fmr_list)
int qib_dealloc_fmr(struct ib_fmr *ibfmr)
{
struct qib_fmr *fmr = to_ifmr(ibfmr);
- int ret;
- int i;
+ int ret = 0;
+ unsigned long timeout;
+
+ qib_free_lkey(&fmr->mr);
+ qib_put_mr(&fmr->mr); /* will set completion if last */
+ timeout = wait_for_completion_timeout(&fmr->mr.comp,
+ 5 * HZ);
+ if (!timeout) {
+ qib_get_mr(&fmr->mr);
+ ret = -EBUSY;
+ goto out;
+ }
+ deinit_qib_mregion(&fmr->mr);
+ kfree(fmr);
+out:
+ return ret;
+}
- ret = qib_free_lkey(to_idev(ibfmr->device), &fmr->mr);
- if (ret)
- return ret;
+void mr_rcu_callback(struct rcu_head *list)
+{
+ struct qib_mregion *mr = container_of(list, struct qib_mregion, list);
- i = fmr->mr.mapsz;
- while (i)
- kfree(fmr->mr.map[--i]);
- kfree(fmr);
- return 0;
+ complete(&mr->comp);
}
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index 790646ef5106..062c301ebf53 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -224,8 +224,9 @@ static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt,
}
do_intx:
if (ret) {
- qib_dev_err(dd, "pci_enable_msix %d vectors failed: %d, "
- "falling back to INTx\n", tabsize, ret);
+ qib_dev_err(dd,
+ "pci_enable_msix %d vectors failed: %d, falling back to INTx\n",
+ tabsize, ret);
tabsize = 0;
}
for (i = 0; i < tabsize; i++)
@@ -251,8 +252,9 @@ static int qib_msi_setup(struct qib_devdata *dd, int pos)
ret = pci_enable_msi(pdev);
if (ret)
- qib_dev_err(dd, "pci_enable_msi failed: %d, "
- "interrupts may not work\n", ret);
+ qib_dev_err(dd,
+ "pci_enable_msi failed: %d, interrupts may not work\n",
+ ret);
/* continue even if it fails, we may still be OK... */
pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO,
@@ -358,8 +360,8 @@ int qib_reinit_intr(struct qib_devdata *dd)
pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI);
if (!pos) {
- qib_dev_err(dd, "Can't find MSI capability, "
- "can't restore MSI settings\n");
+ qib_dev_err(dd,
+ "Can't find MSI capability, can't restore MSI settings\n");
ret = 0;
/* nothing special for MSIx, just MSI */
goto bail;
@@ -471,8 +473,8 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline)
pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline);
r = pci_enable_device(dd->pcidev);
if (r)
- qib_dev_err(dd, "pci_enable_device failed after "
- "reset: %d\n", r);
+ qib_dev_err(dd,
+ "pci_enable_device failed after reset: %d\n", r);
}
/* code to adjust PCIe capabilities. */
@@ -717,15 +719,16 @@ qib_pci_mmio_enabled(struct pci_dev *pdev)
if (words == ~0ULL)
ret = PCI_ERS_RESULT_NEED_RESET;
}
- qib_devinfo(pdev, "QIB mmio_enabled function called, "
- "read wordscntr %Lx, returning %d\n", words, ret);
+ qib_devinfo(pdev,
+ "QIB mmio_enabled function called, read wordscntr %Lx, returning %d\n",
+ words, ret);
return ret;
}
static pci_ers_result_t
qib_pci_slot_reset(struct pci_dev *pdev)
{
- qib_devinfo(pdev, "QIB link_reset function called, ignored\n");
+ qib_devinfo(pdev, "QIB slot_reset function called, ignored\n");
return PCI_ERS_RESULT_CAN_RECOVER;
}
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 1ce56b51ab1a..4850d03870c2 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -250,23 +250,33 @@ static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp)
spin_lock_irqsave(&dev->qpt_lock, flags);
- if (ibp->qp0 == qp) {
+ if (rcu_dereference_protected(ibp->qp0,
+ lockdep_is_held(&dev->qpt_lock)) == qp) {
atomic_dec(&qp->refcount);
rcu_assign_pointer(ibp->qp0, NULL);
- } else if (ibp->qp1 == qp) {
+ } else if (rcu_dereference_protected(ibp->qp1,
+ lockdep_is_held(&dev->qpt_lock)) == qp) {
atomic_dec(&qp->refcount);
rcu_assign_pointer(ibp->qp1, NULL);
} else {
- struct qib_qp *q, **qpp;
+ struct qib_qp *q;
+ struct qib_qp __rcu **qpp;
qpp = &dev->qp_table[n];
- for (; (q = *qpp) != NULL; qpp = &q->next)
+ q = rcu_dereference_protected(*qpp,
+ lockdep_is_held(&dev->qpt_lock));
+ for (; q; qpp = &q->next) {
if (q == qp) {
atomic_dec(&qp->refcount);
- rcu_assign_pointer(*qpp, qp->next);
- qp->next = NULL;
+ *qpp = qp->next;
+ rcu_assign_pointer(qp->next, NULL);
+ q = rcu_dereference_protected(*qpp,
+ lockdep_is_held(&dev->qpt_lock));
break;
}
+ q = rcu_dereference_protected(*qpp,
+ lockdep_is_held(&dev->qpt_lock));
+ }
}
spin_unlock_irqrestore(&dev->qpt_lock, flags);
@@ -302,10 +312,12 @@ unsigned qib_free_all_qps(struct qib_devdata *dd)
spin_lock_irqsave(&dev->qpt_lock, flags);
for (n = 0; n < dev->qp_table_size; n++) {
- qp = dev->qp_table[n];
+ qp = rcu_dereference_protected(dev->qp_table[n],
+ lockdep_is_held(&dev->qpt_lock));
rcu_assign_pointer(dev->qp_table[n], NULL);
- for (; qp; qp = qp->next)
+ for (; qp; qp = rcu_dereference_protected(qp->next,
+ lockdep_is_held(&dev->qpt_lock)))
qp_inuse++;
}
spin_unlock_irqrestore(&dev->qpt_lock, flags);
@@ -337,7 +349,8 @@ struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn)
unsigned n = qpn_hash(dev, qpn);
rcu_read_lock();
- for (qp = dev->qp_table[n]; rcu_dereference(qp); qp = qp->next)
+ for (qp = rcu_dereference(dev->qp_table[n]); qp;
+ qp = rcu_dereference(qp->next))
if (qp->ibqp.qp_num == qpn)
break;
}
@@ -406,18 +419,9 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
unsigned n;
if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
- while (qp->s_rdma_read_sge.num_sge) {
- atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount);
- if (--qp->s_rdma_read_sge.num_sge)
- qp->s_rdma_read_sge.sge =
- *qp->s_rdma_read_sge.sg_list++;
- }
+ qib_put_ss(&qp->s_rdma_read_sge);
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
if (clr_sends) {
while (qp->s_last != qp->s_head) {
@@ -427,7 +431,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
for (i = 0; i < wqe->wr.num_sge; i++) {
struct qib_sge *sge = &wqe->sg_list[i];
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
}
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
@@ -437,7 +441,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
qp->s_last = 0;
}
if (qp->s_rdma_mr) {
- atomic_dec(&qp->s_rdma_mr->refcount);
+ qib_put_mr(qp->s_rdma_mr);
qp->s_rdma_mr = NULL;
}
}
@@ -450,7 +454,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
e->rdma_sge.mr) {
- atomic_dec(&e->rdma_sge.mr->refcount);
+ qib_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
}
@@ -495,7 +499,7 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err)
if (!(qp->s_flags & QIB_S_BUSY)) {
qp->s_hdrwords = 0;
if (qp->s_rdma_mr) {
- atomic_dec(&qp->s_rdma_mr->refcount);
+ qib_put_mr(qp->s_rdma_mr);
qp->s_rdma_mr = NULL;
}
if (qp->s_tx) {
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index b641416148eb..3ab341320ead 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -95,7 +95,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
case OP(RDMA_READ_RESPONSE_ONLY):
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
if (e->rdma_sge.mr) {
- atomic_dec(&e->rdma_sge.mr->refcount);
+ qib_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
/* FALLTHROUGH */
@@ -133,7 +133,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
/* Copy SGE state in case we need to resend */
qp->s_rdma_mr = e->rdma_sge.mr;
if (qp->s_rdma_mr)
- atomic_inc(&qp->s_rdma_mr->refcount);
+ qib_get_mr(qp->s_rdma_mr);
qp->s_ack_rdma_sge.sge = e->rdma_sge;
qp->s_ack_rdma_sge.num_sge = 1;
qp->s_cur_sge = &qp->s_ack_rdma_sge;
@@ -172,7 +172,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
qp->s_cur_sge = &qp->s_ack_rdma_sge;
qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
if (qp->s_rdma_mr)
- atomic_inc(&qp->s_rdma_mr->refcount);
+ qib_get_mr(qp->s_rdma_mr);
len = qp->s_ack_rdma_sge.sge.sge_length;
if (len > pmtu)
len = pmtu;
@@ -1012,7 +1012,7 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
for (i = 0; i < wqe->wr.num_sge; i++) {
struct qib_sge *sge = &wqe->sg_list[i];
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
}
/* Post a send completion queue entry if requested. */
if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
@@ -1068,7 +1068,7 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
for (i = 0; i < wqe->wr.num_sge; i++) {
struct qib_sge *sge = &wqe->sg_list[i];
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
}
/* Post a send completion queue entry if requested. */
if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
@@ -1730,7 +1730,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
if (unlikely(offset + len != e->rdma_sge.sge_length))
goto unlock_done;
if (e->rdma_sge.mr) {
- atomic_dec(&e->rdma_sge.mr->refcount);
+ qib_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
if (len != 0) {
@@ -2024,11 +2024,7 @@ send_last:
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
qib_copy_sge(&qp->r_sge, data, tlen, 1);
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
qp->r_msn++;
if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
break;
@@ -2116,7 +2112,7 @@ send_last:
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
- atomic_dec(&e->rdma_sge.mr->refcount);
+ qib_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
reth = &ohdr->u.rc.reth;
@@ -2188,7 +2184,7 @@ send_last:
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
- atomic_dec(&e->rdma_sge.mr->refcount);
+ qib_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
ateth = &ohdr->u.atomic_eth;
@@ -2210,7 +2206,7 @@ send_last:
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
be64_to_cpu(ateth->compare_data),
sdata);
- atomic_dec(&qp->r_sge.sge.mr->refcount);
+ qib_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
e->opcode = opcode;
e->sent = 0;
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index c0ee7e095d81..357b6cfcd46c 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -110,7 +110,7 @@ bad_lkey:
while (j) {
struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
}
ss->num_sge = 0;
memset(&wc, 0, sizeof(wc));
@@ -501,7 +501,7 @@ again:
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
sdata, wqe->wr.wr.atomic.swap);
- atomic_dec(&qp->r_sge.sge.mr->refcount);
+ qib_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
goto send_comp;
@@ -525,7 +525,7 @@ again:
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (!release)
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
if (--sqp->s_sge.num_sge)
*sge = *sqp->s_sge.sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
@@ -542,11 +542,7 @@ again:
sqp->s_len -= len;
}
if (release)
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
goto send_comp;
@@ -782,7 +778,7 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
for (i = 0; i < wqe->wr.num_sge; i++) {
struct qib_sge *sge = &wqe->sg_list[i];
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
}
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c
index ac065dd6b693..a322d5171a2c 100644
--- a/drivers/infiniband/hw/qib/qib_sd7220.c
+++ b/drivers/infiniband/hw/qib/qib_sd7220.c
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -342,15 +342,17 @@ static void qib_sd_trimdone_monitor(struct qib_devdata *dd,
ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
IB_CTRL2(chn), 0, 0);
if (ret < 0)
- qib_dev_err(dd, "Failed checking TRIMDONE, chn %d"
- " (%s)\n", chn, where);
+ qib_dev_err(dd,
+ "Failed checking TRIMDONE, chn %d (%s)\n",
+ chn, where);
if (!(ret & 0x10)) {
int probe;
baduns |= (1 << chn);
- qib_dev_err(dd, "TRIMDONE cleared on chn %d (%02X)."
- " (%s)\n", chn, ret, where);
+ qib_dev_err(dd,
+ "TRIMDONE cleared on chn %d (%02X). (%s)\n",
+ chn, ret, where);
probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
IB_PGUDP(0), 0, 0);
qib_dev_err(dd, "probe is %d (%02X)\n",
@@ -375,8 +377,8 @@ static void qib_sd_trimdone_monitor(struct qib_devdata *dd,
ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
IB_CTRL2(chn), 0x10, 0x10);
if (ret < 0)
- qib_dev_err(dd, "Failed re-setting "
- "TRIMDONE, chn %d (%s)\n",
+ qib_dev_err(dd,
+ "Failed re-setting TRIMDONE, chn %d (%s)\n",
chn, where);
}
}
@@ -1144,10 +1146,10 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
if (ret < 0) {
int sloc = loc >> EPB_ADDR_SHF;
- qib_dev_err(dd, "pre-read failed: elt %d,"
- " addr 0x%X, chnl %d\n",
- (sloc & 0xF),
- (sloc >> 9) & 0x3f, chnl);
+ qib_dev_err(dd,
+ "pre-read failed: elt %d, addr 0x%X, chnl %d\n",
+ (sloc & 0xF),
+ (sloc >> 9) & 0x3f, chnl);
return ret;
}
val = (ret & ~mask) | (val & mask);
@@ -1157,9 +1159,9 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
if (ret < 0) {
int sloc = loc >> EPB_ADDR_SHF;
- qib_dev_err(dd, "Global WR failed: elt %d,"
- " addr 0x%X, val %02X\n",
- (sloc & 0xF), (sloc >> 9) & 0x3f, val);
+ qib_dev_err(dd,
+ "Global WR failed: elt %d, addr 0x%X, val %02X\n",
+ (sloc & 0xF), (sloc >> 9) & 0x3f, val);
}
return ret;
}
@@ -1173,11 +1175,10 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
if (ret < 0) {
int sloc = loc >> EPB_ADDR_SHF;
- qib_dev_err(dd, "Write failed: elt %d,"
- " addr 0x%X, chnl %d, val 0x%02X,"
- " mask 0x%02X\n",
- (sloc & 0xF), (sloc >> 9) & 0x3f, chnl,
- val & 0xFF, mask & 0xFF);
+ qib_dev_err(dd,
+ "Write failed: elt %d, addr 0x%X, chnl %d, val 0x%02X, mask 0x%02X\n",
+ (sloc & 0xF), (sloc >> 9) & 0x3f, chnl,
+ val & 0xFF, mask & 0xFF);
break;
}
}
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index 12a9604310d7..3fc514431212 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2007, 2008, 2009, 2010 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2007 - 2012 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -276,8 +277,8 @@ static int alloc_sdma(struct qib_pportdata *ppd)
GFP_KERNEL);
if (!ppd->sdma_descq) {
- qib_dev_err(ppd->dd, "failed to allocate SendDMA descriptor "
- "FIFO memory\n");
+ qib_dev_err(ppd->dd,
+ "failed to allocate SendDMA descriptor FIFO memory\n");
goto bail;
}
@@ -285,8 +286,8 @@ static int alloc_sdma(struct qib_pportdata *ppd)
ppd->sdma_head_dma = dma_alloc_coherent(&ppd->dd->pcidev->dev,
PAGE_SIZE, &ppd->sdma_head_phys, GFP_KERNEL);
if (!ppd->sdma_head_dma) {
- qib_dev_err(ppd->dd, "failed to allocate SendDMA "
- "head memory\n");
+ qib_dev_err(ppd->dd,
+ "failed to allocate SendDMA head memory\n");
goto cleanup_descq;
}
ppd->sdma_head_dma[0] = 0;
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index dd9cd49d0979..034cc821de5c 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -33,41 +34,7 @@
#include <linux/ctype.h>
#include "qib.h"
-
-/**
- * qib_parse_ushort - parse an unsigned short value in an arbitrary base
- * @str: the string containing the number
- * @valp: where to put the result
- *
- * Returns the number of bytes consumed, or negative value on error.
- */
-static int qib_parse_ushort(const char *str, unsigned short *valp)
-{
- unsigned long val;
- char *end;
- int ret;
-
- if (!isdigit(str[0])) {
- ret = -EINVAL;
- goto bail;
- }
-
- val = simple_strtoul(str, &end, 0);
-
- if (val > 0xffff) {
- ret = -EINVAL;
- goto bail;
- }
-
- *valp = val;
-
- ret = end + 1 - str;
- if (ret == 0)
- ret = -EINVAL;
-
-bail:
- return ret;
-}
+#include "qib_mad.h"
/* start of per-port functions */
/*
@@ -90,7 +57,11 @@ static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf,
int ret;
u16 val;
- ret = qib_parse_ushort(buf, &val);
+ ret = kstrtou16(buf, 0, &val);
+ if (ret) {
+ qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
+ return ret;
+ }
/*
* Set the "intentional" heartbeat enable per either of
@@ -99,10 +70,7 @@ static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf,
* because entering loopback mode overrides it and automatically
* disables heartbeat.
*/
- if (ret >= 0)
- ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val);
- if (ret < 0)
- qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
+ ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val);
return ret < 0 ? ret : count;
}
@@ -126,12 +94,14 @@ static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf,
int ret;
u16 val;
- ret = qib_parse_ushort(buf, &val);
- if (ret > 0)
- qib_set_led_override(ppd, val);
- else
+ ret = kstrtou16(buf, 0, &val);
+ if (ret) {
qib_dev_err(dd, "attempt to set invalid LED override\n");
- return ret < 0 ? ret : count;
+ return ret;
+ }
+
+ qib_set_led_override(ppd, val);
+ return count;
}
static ssize_t show_status(struct qib_pportdata *ppd, char *buf)
@@ -231,6 +201,98 @@ static struct attribute *port_default_attributes[] = {
NULL
};
+/*
+ * Start of per-port congestion control structures and support code
+ */
+
+/*
+ * Congestion control table size followed by table entries
+ */
+static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
+{
+ int ret;
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, pport_cc_kobj);
+
+ if (!qib_cc_table_size || !ppd->ccti_entries_shadow)
+ return -EINVAL;
+
+ ret = ppd->total_cct_entry * sizeof(struct ib_cc_table_entry_shadow)
+ + sizeof(__be16);
+
+ if (pos > ret)
+ return -EINVAL;
+
+ if (count > ret - pos)
+ count = ret - pos;
+
+ if (!count)
+ return count;
+
+ spin_lock(&ppd->cc_shadow_lock);
+ memcpy(buf, ppd->ccti_entries_shadow, count);
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return count;
+}
+
+static void qib_port_release(struct kobject *kobj)
+{
+ /* nothing to do since memory is freed by qib_free_devdata() */
+}
+
+static struct kobj_type qib_port_cc_ktype = {
+ .release = qib_port_release,
+};
+
+static struct bin_attribute cc_table_bin_attr = {
+ .attr = {.name = "cc_table_bin", .mode = 0444},
+ .read = read_cc_table_bin,
+ .size = PAGE_SIZE,
+};
+
+/*
+ * Congestion settings: port control, control map and an array of 16
+ * entries for the congestion entries - increase, timer, event log
+ * trigger threshold and the minimum injection rate delay.
+ */
+static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
+{
+ int ret;
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, pport_cc_kobj);
+
+ if (!qib_cc_table_size || !ppd->congestion_entries_shadow)
+ return -EINVAL;
+
+ ret = sizeof(struct ib_cc_congestion_setting_attr_shadow);
+
+ if (pos > ret)
+ return -EINVAL;
+ if (count > ret - pos)
+ count = ret - pos;
+
+ if (!count)
+ return count;
+
+ spin_lock(&ppd->cc_shadow_lock);
+ memcpy(buf, ppd->congestion_entries_shadow, count);
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return count;
+}
+
+static struct bin_attribute cc_setting_bin_attr = {
+ .attr = {.name = "cc_settings_bin", .mode = 0444},
+ .read = read_cc_setting_bin,
+ .size = PAGE_SIZE,
+};
+
+
static ssize_t qib_portattr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
@@ -253,10 +315,6 @@ static ssize_t qib_portattr_store(struct kobject *kobj,
return pattr->store(ppd, buf, len);
}
-static void qib_port_release(struct kobject *kobj)
-{
- /* nothing to do since memory is freed by qib_free_devdata() */
-}
static const struct sysfs_ops qib_port_ops = {
.show = qib_portattr_show,
@@ -411,12 +469,12 @@ static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr,
struct qib_pportdata *ppd =
container_of(kobj, struct qib_pportdata, diagc_kobj);
struct qib_ibport *qibp = &ppd->ibport_data;
- char *endp;
- long val = simple_strtol(buf, &endp, 0);
-
- if (val < 0 || endp == buf)
- return -EINVAL;
+ u32 val;
+ int ret;
+ ret = kstrtou32(buf, 0, &val);
+ if (ret)
+ return ret;
*(u32 *)((char *) qibp + dattr->counter) = val;
return size;
}
@@ -649,8 +707,9 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
int ret;
if (!port_num || port_num > dd->num_pports) {
- qib_dev_err(dd, "Skipping infiniband class with "
- "invalid port %u\n", port_num);
+ qib_dev_err(dd,
+ "Skipping infiniband class with invalid port %u\n",
+ port_num);
ret = -ENODEV;
goto bail;
}
@@ -659,8 +718,9 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
ret = kobject_init_and_add(&ppd->pport_kobj, &qib_port_ktype, kobj,
"linkcontrol");
if (ret) {
- qib_dev_err(dd, "Skipping linkcontrol sysfs info, "
- "(err %d) port %u\n", ret, port_num);
+ qib_dev_err(dd,
+ "Skipping linkcontrol sysfs info, (err %d) port %u\n",
+ ret, port_num);
goto bail;
}
kobject_uevent(&ppd->pport_kobj, KOBJ_ADD);
@@ -668,26 +728,70 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
ret = kobject_init_and_add(&ppd->sl2vl_kobj, &qib_sl2vl_ktype, kobj,
"sl2vl");
if (ret) {
- qib_dev_err(dd, "Skipping sl2vl sysfs info, "
- "(err %d) port %u\n", ret, port_num);
- goto bail_sl;
+ qib_dev_err(dd,
+ "Skipping sl2vl sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_link;
}
kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD);
ret = kobject_init_and_add(&ppd->diagc_kobj, &qib_diagc_ktype, kobj,
"diag_counters");
if (ret) {
- qib_dev_err(dd, "Skipping diag_counters sysfs info, "
- "(err %d) port %u\n", ret, port_num);
- goto bail_diagc;
+ qib_dev_err(dd,
+ "Skipping diag_counters sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_sl;
}
kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD);
+ if (!qib_cc_table_size || !ppd->congestion_entries_shadow)
+ return 0;
+
+ ret = kobject_init_and_add(&ppd->pport_cc_kobj, &qib_port_cc_ktype,
+ kobj, "CCMgtA");
+ if (ret) {
+ qib_dev_err(dd,
+ "Skipping Congestion Control sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_diagc;
+ }
+
+ kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
+
+ ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
+ &cc_setting_bin_attr);
+ if (ret) {
+ qib_dev_err(dd,
+ "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_cc;
+ }
+
+ ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
+ &cc_table_bin_attr);
+ if (ret) {
+ qib_dev_err(dd,
+ "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_cc_entry_bin;
+ }
+
+ qib_devinfo(dd->pcidev,
+ "IB%u: Congestion Control Agent enabled for port %d\n",
+ dd->unit, port_num);
+
return 0;
+bail_cc_entry_bin:
+ sysfs_remove_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr);
+bail_cc:
+ kobject_put(&ppd->pport_cc_kobj);
bail_diagc:
- kobject_put(&ppd->sl2vl_kobj);
+ kobject_put(&ppd->diagc_kobj);
bail_sl:
+ kobject_put(&ppd->sl2vl_kobj);
+bail_link:
kobject_put(&ppd->pport_kobj);
bail:
return ret;
@@ -720,7 +824,15 @@ void qib_verbs_unregister_sysfs(struct qib_devdata *dd)
for (i = 0; i < dd->num_pports; i++) {
ppd = &dd->pport[i];
- kobject_put(&ppd->pport_kobj);
+ if (qib_cc_table_size &&
+ ppd->congestion_entries_shadow) {
+ sysfs_remove_bin_file(&ppd->pport_cc_kobj,
+ &cc_setting_bin_attr);
+ sysfs_remove_bin_file(&ppd->pport_cc_kobj,
+ &cc_table_bin_attr);
+ kobject_put(&ppd->pport_cc_kobj);
+ }
kobject_put(&ppd->sl2vl_kobj);
+ kobject_put(&ppd->pport_kobj);
}
}
diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c
index ddde72e11edb..647f7beb1b0a 100644
--- a/drivers/infiniband/hw/qib/qib_twsi.c
+++ b/drivers/infiniband/hw/qib/qib_twsi.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -449,8 +450,9 @@ int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr,
goto failed_write;
ret = qib_twsi_wr(dd, addr, 0);
if (ret) {
- qib_dev_err(dd, "Failed to write interface"
- " write addr %02X\n", addr);
+ qib_dev_err(dd,
+ "Failed to write interface write addr %02X\n",
+ addr);
goto failed_write;
}
}
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index ce7387ff5d91..aa3a8035bb68 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -281,11 +281,7 @@ inv:
set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
qp->r_sge.num_sge = 0;
} else
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
qp->r_state = OP(SEND_LAST);
switch (opcode) {
case OP(SEND_FIRST):
@@ -403,14 +399,9 @@ send_last:
if (unlikely(wc.byte_len > qp->r_len))
goto rewind;
wc.opcode = IB_WC_RECV;
-last_imm:
qib_copy_sge(&qp->r_sge, data, tlen, 0);
- while (qp->s_rdma_read_sge.num_sge) {
- atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount);
- if (--qp->s_rdma_read_sge.num_sge)
- qp->s_rdma_read_sge.sge =
- *qp->s_rdma_read_sge.sg_list++;
- }
+ qib_put_ss(&qp->s_rdma_read_sge);
+last_imm:
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.qp = &qp->ibqp;
@@ -493,13 +484,7 @@ rdma_last_imm:
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
- while (qp->s_rdma_read_sge.num_sge) {
- atomic_dec(&qp->s_rdma_read_sge.sge.mr->
- refcount);
- if (--qp->s_rdma_read_sge.num_sge)
- qp->s_rdma_read_sge.sge =
- *qp->s_rdma_read_sge.sg_list++;
- }
+ qib_put_ss(&qp->s_rdma_read_sge);
else {
ret = qib_get_rwqe(qp, 1);
if (ret < 0)
@@ -509,6 +494,8 @@ rdma_last_imm:
}
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ qib_copy_sge(&qp->r_sge, data, tlen, 1);
+ qib_put_ss(&qp->r_sge);
goto last_imm;
case OP(RDMA_WRITE_LAST):
@@ -524,11 +511,7 @@ rdma_last:
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
qib_copy_sge(&qp->r_sge, data, tlen, 1);
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
break;
default:
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index a468bf2d4465..d6c7fe7f88d5 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -194,11 +194,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
}
length -= len;
}
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
goto bail_unlock;
wc.wr_id = qp->r_wr_id;
@@ -556,11 +552,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
} else
qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
return;
wc.wr_id = qp->r_wr_id;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 7b6c3bffa9d9..fc9b205c2412 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -183,7 +183,7 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (release)
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
@@ -224,7 +224,7 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (release)
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
@@ -333,7 +333,8 @@ static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
* @qp: the QP to post on
* @wr: the work request to send
*/
-static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr)
+static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
+ int *scheduled)
{
struct qib_swqe *wqe;
u32 next;
@@ -435,11 +436,17 @@ bail_inval_free:
while (j) {
struct qib_sge *sge = &wqe->sg_list[--j];
- atomic_dec(&sge->mr->refcount);
+ qib_put_mr(sge->mr);
}
bail_inval:
ret = -EINVAL;
bail:
+ if (!ret && !wr->next &&
+ !qib_sdma_empty(
+ dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
+ qib_schedule_send(qp);
+ *scheduled = 1;
+ }
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
@@ -457,9 +464,10 @@ static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
{
struct qib_qp *qp = to_iqp(ibqp);
int err = 0;
+ int scheduled = 0;
for (; wr; wr = wr->next) {
- err = qib_post_one_send(qp, wr);
+ err = qib_post_one_send(qp, wr, &scheduled);
if (err) {
*bad_wr = wr;
goto bail;
@@ -467,7 +475,8 @@ static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
/* Try to do the send work in the caller's context. */
- qib_do_send(&qp->s_work);
+ if (!scheduled)
+ qib_do_send(&qp->s_work);
bail:
return err;
@@ -978,7 +987,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
if (tx->mr) {
- atomic_dec(&tx->mr->refcount);
+ qib_put_mr(tx->mr);
tx->mr = NULL;
}
if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
@@ -1336,7 +1345,7 @@ done:
}
qib_sendbuf_done(dd, pbufn);
if (qp->s_rdma_mr) {
- atomic_dec(&qp->s_rdma_mr->refcount);
+ qib_put_mr(qp->s_rdma_mr);
qp->s_rdma_mr = NULL;
}
if (qp->s_wqe) {
@@ -1845,6 +1854,23 @@ bail:
return ret;
}
+struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
+{
+ struct ib_ah_attr attr;
+ struct ib_ah *ah = ERR_PTR(-EINVAL);
+ struct qib_qp *qp0;
+
+ memset(&attr, 0, sizeof attr);
+ attr.dlid = dlid;
+ attr.port_num = ppd_from_ibp(ibp)->port;
+ rcu_read_lock();
+ qp0 = rcu_dereference(ibp->qp0);
+ if (qp0)
+ ah = ib_create_ah(qp0->ibqp.pd, &attr);
+ rcu_read_unlock();
+ return ah;
+}
+
/**
* qib_destroy_ah - destroy an address handle
* @ibah: the AH to destroy
@@ -2060,13 +2086,15 @@ int qib_register_ib_device(struct qib_devdata *dd)
spin_lock_init(&dev->lk_table.lock);
dev->lk_table.max = 1 << ib_qib_lkey_table_size;
lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
- dev->lk_table.table = (struct qib_mregion **)
+ dev->lk_table.table = (struct qib_mregion __rcu **)
__get_free_pages(GFP_KERNEL, get_order(lk_tab_size));
if (dev->lk_table.table == NULL) {
ret = -ENOMEM;
goto err_lk;
}
- memset(dev->lk_table.table, 0, lk_tab_size);
+ RCU_INIT_POINTER(dev->dma_mr, NULL);
+ for (i = 0; i < dev->lk_table.max; i++)
+ RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
INIT_LIST_HEAD(&dev->pending_mmaps);
spin_lock_init(&dev->pending_lock);
dev->mmap_offset = PAGE_SIZE;
@@ -2289,3 +2317,17 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
get_order(lk_tab_size));
kfree(dev->qp_table);
}
+
+/*
+ * This must be called with s_lock held.
+ */
+void qib_schedule_send(struct qib_qp *qp)
+{
+ if (qib_send_ok(qp)) {
+ struct qib_ibport *ibp =
+ to_iport(qp->ibqp.device, qp->port_num);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+ queue_work(ppd->qib_wq, &qp->s_work);
+ }
+}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 487606024659..aff8b2c17886 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -41,6 +41,7 @@
#include <linux/interrupt.h>
#include <linux/kref.h>
#include <linux/workqueue.h>
+#include <linux/completion.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
@@ -302,6 +303,9 @@ struct qib_mregion {
u32 max_segs; /* number of qib_segs in all the arrays */
u32 mapsz; /* size of the map array */
u8 page_shift; /* 0 - non unform/non powerof2 sizes */
+ u8 lkey_published; /* in global table */
+ struct completion comp; /* complete when refcount goes to zero */
+ struct rcu_head list;
atomic_t refcount;
struct qib_segarray *map[0]; /* the segments */
};
@@ -416,7 +420,7 @@ struct qib_qp {
/* read mostly fields above and below */
struct ib_ah_attr remote_ah_attr;
struct ib_ah_attr alt_ah_attr;
- struct qib_qp *next; /* link list for QPN hash table */
+ struct qib_qp __rcu *next; /* link list for QPN hash table */
struct qib_swqe *s_wq; /* send work queue */
struct qib_mmap_info *ip;
struct qib_ib_header *s_hdr; /* next packet header to send */
@@ -646,7 +650,7 @@ struct qib_lkey_table {
u32 next; /* next unused index (speeds search) */
u32 gen; /* generation count */
u32 max; /* size of the table */
- struct qib_mregion **table;
+ struct qib_mregion __rcu **table;
};
struct qib_opcode_stats {
@@ -655,8 +659,8 @@ struct qib_opcode_stats {
};
struct qib_ibport {
- struct qib_qp *qp0;
- struct qib_qp *qp1;
+ struct qib_qp __rcu *qp0;
+ struct qib_qp __rcu *qp1;
struct ib_mad_agent *send_agent; /* agent for SMI (traps) */
struct qib_ah *sm_ah;
struct qib_ah *smi_ah;
@@ -723,12 +727,13 @@ struct qib_ibport {
struct qib_opcode_stats opstats[128];
};
+
struct qib_ibdev {
struct ib_device ibdev;
struct list_head pending_mmaps;
spinlock_t mmap_offset_lock; /* protect mmap_offset */
u32 mmap_offset;
- struct qib_mregion *dma_mr;
+ struct qib_mregion __rcu *dma_mr;
/* QP numbers are shared by all IB ports */
struct qib_qpn_table qpn_table;
@@ -739,7 +744,7 @@ struct qib_ibdev {
struct list_head memwait; /* list for wait kernel memory */
struct list_head txreq_free;
struct timer_list mem_timer;
- struct qib_qp **qp_table;
+ struct qib_qp __rcu **qp_table;
struct qib_pio_header *pio_hdrs;
dma_addr_t pio_hdrs_phys;
/* list of QPs waiting for RNR timer */
@@ -832,11 +837,7 @@ extern struct workqueue_struct *qib_cq_wq;
/*
* This must be called with s_lock held.
*/
-static inline void qib_schedule_send(struct qib_qp *qp)
-{
- if (qib_send_ok(qp))
- queue_work(ib_wq, &qp->s_work);
-}
+void qib_schedule_send(struct qib_qp *qp);
static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
{
@@ -933,6 +934,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
+
void qib_rc_rnr_retry(unsigned long arg);
void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr);
@@ -944,9 +947,9 @@ int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr);
void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct qib_qp *qp);
-int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr);
+int qib_alloc_lkey(struct qib_mregion *mr, int dma_region);
-int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr);
+void qib_free_lkey(struct qib_mregion *mr);
int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
struct qib_sge *isge, struct ib_sge *sge, int acc);
@@ -1014,6 +1017,29 @@ int qib_unmap_fmr(struct list_head *fmr_list);
int qib_dealloc_fmr(struct ib_fmr *ibfmr);
+static inline void qib_get_mr(struct qib_mregion *mr)
+{
+ atomic_inc(&mr->refcount);
+}
+
+void mr_rcu_callback(struct rcu_head *list);
+
+static inline void qib_put_mr(struct qib_mregion *mr)
+{
+ if (unlikely(atomic_dec_and_test(&mr->refcount)))
+ call_rcu(&mr->list, mr_rcu_callback);
+}
+
+static inline void qib_put_ss(struct qib_sge_state *ss)
+{
+ while (ss->num_sge) {
+ qib_put_mr(ss->sge.mr);
+ if (--ss->num_sge)
+ ss->sge = *ss->sg_list++;
+ }
+}
+
+
void qib_release_mmap_info(struct kref *ref);
struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size,
diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c
index 561b8bca4060..1d7281c5a02e 100644
--- a/drivers/infiniband/hw/qib/qib_wc_x86_64.c
+++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -102,10 +103,10 @@ int qib_enable_wc(struct qib_devdata *dd)
u64 atmp;
atmp = pioaddr & ~(piolen - 1);
if (atmp < addr || (atmp + piolen) > (addr + len)) {
- qib_dev_err(dd, "No way to align address/size "
- "(%llx/%llx), no WC mtrr\n",
- (unsigned long long) atmp,
- (unsigned long long) piolen << 1);
+ qib_dev_err(dd,
+ "No way to align address/size (%llx/%llx), no WC mtrr\n",
+ (unsigned long long) atmp,
+ (unsigned long long) piolen << 1);
ret = -ENODEV;
} else {
pioaddr = atmp;
@@ -120,8 +121,7 @@ int qib_enable_wc(struct qib_devdata *dd)
if (cookie < 0) {
{
qib_devinfo(dd->pcidev,
- "mtrr_add() WC for PIO bufs "
- "failed (%d)\n",
+ "mtrr_add() WC for PIO bufs failed (%d)\n",
cookie);
ret = -EINVAL;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 86df632ea612..ca43901ed861 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -92,6 +92,8 @@ enum {
IPOIB_STOP_REAPER = 7,
IPOIB_FLAG_ADMIN_CM = 9,
IPOIB_FLAG_UMCAST = 10,
+ IPOIB_STOP_NEIGH_GC = 11,
+ IPOIB_NEIGH_TBL_FLUSH = 12,
IPOIB_MAX_BACKOFF_SECONDS = 16,
@@ -260,6 +262,20 @@ struct ipoib_ethtool_st {
u16 max_coalesced_frames;
};
+struct ipoib_neigh_hash {
+ struct ipoib_neigh __rcu **buckets;
+ struct rcu_head rcu;
+ u32 mask;
+ u32 size;
+};
+
+struct ipoib_neigh_table {
+ struct ipoib_neigh_hash __rcu *htbl;
+ rwlock_t rwlock;
+ atomic_t entries;
+ struct completion flushed;
+};
+
/*
* Device private locking: network stack tx_lock protects members used
* in TX fast path, lock protects everything else. lock nests inside
@@ -279,6 +295,8 @@ struct ipoib_dev_priv {
struct rb_root path_tree;
struct list_head path_list;
+ struct ipoib_neigh_table ntbl;
+
struct ipoib_mcast *broadcast;
struct list_head multicast_list;
struct rb_root multicast_tree;
@@ -291,7 +309,7 @@ struct ipoib_dev_priv {
struct work_struct flush_heavy;
struct work_struct restart_task;
struct delayed_work ah_reap_task;
-
+ struct delayed_work neigh_reap_task;
struct ib_device *ca;
u8 port;
u16 pkey;
@@ -377,13 +395,16 @@ struct ipoib_neigh {
#ifdef CONFIG_INFINIBAND_IPOIB_CM
struct ipoib_cm_tx *cm;
#endif
- union ib_gid dgid;
+ u8 daddr[INFINIBAND_ALEN];
struct sk_buff_head queue;
- struct neighbour *neighbour;
struct net_device *dev;
struct list_head list;
+ struct ipoib_neigh __rcu *hnext;
+ struct rcu_head rcu;
+ atomic_t refcnt;
+ unsigned long alive;
};
#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN)
@@ -394,21 +415,17 @@ static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
}
-/*
- * We stash a pointer to our private neighbour information after our
- * hardware address in neigh->ha. The ALIGN() expression here makes
- * sure that this pointer is stored aligned so that an unaligned
- * load is not needed to dereference it.
- */
-static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh)
+void ipoib_neigh_dtor(struct ipoib_neigh *neigh);
+static inline void ipoib_neigh_put(struct ipoib_neigh *neigh)
{
- return (void*) neigh + ALIGN(offsetof(struct neighbour, ha) +
- INFINIBAND_ALEN, sizeof(void *));
+ if (atomic_dec_and_test(&neigh->refcnt))
+ ipoib_neigh_dtor(neigh);
}
-
-struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh,
+struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr);
+struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
struct net_device *dev);
-void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh);
+void ipoib_neigh_free(struct ipoib_neigh *neigh);
+void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid);
extern struct workqueue_struct *ipoib_workqueue;
@@ -425,7 +442,6 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
{
kref_put(&ah->ref, ipoib_free_ah);
}
-
int ipoib_open(struct net_device *dev);
int ipoib_add_pkey_attr(struct net_device *dev);
int ipoib_add_umcast_attr(struct net_device *dev);
@@ -455,7 +471,7 @@ void ipoib_dev_cleanup(struct net_device *dev);
void ipoib_mcast_join_task(struct work_struct *work);
void ipoib_mcast_carrier_on_task(struct work_struct *work);
-void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
+void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work);
int ipoib_mcast_start_thread(struct net_device *dev);
@@ -517,10 +533,10 @@ static inline int ipoib_cm_admin_enabled(struct net_device *dev)
test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
}
-static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
+static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- return IPOIB_CM_SUPPORTED(n->ha) &&
+ return IPOIB_CM_SUPPORTED(hwaddr) &&
test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
}
@@ -575,7 +591,7 @@ static inline int ipoib_cm_admin_enabled(struct net_device *dev)
{
return 0;
}
-static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
+static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr)
{
return 0;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 014504d8e43c..95ecf4eadf5f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -811,9 +811,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
if (neigh) {
neigh->cm = NULL;
list_del(&neigh->list);
- if (neigh->ah)
- ipoib_put_ah(neigh->ah);
- ipoib_neigh_free(dev, neigh);
+ ipoib_neigh_free(neigh);
tx->neigh = NULL;
}
@@ -1230,9 +1228,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
if (neigh) {
neigh->cm = NULL;
list_del(&neigh->list);
- if (neigh->ah)
- ipoib_put_ah(neigh->ah);
- ipoib_neigh_free(dev, neigh);
+ ipoib_neigh_free(neigh);
tx->neigh = NULL;
}
@@ -1279,7 +1275,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
list_move(&tx->list, &priv->cm.reap_list);
queue_work(ipoib_workqueue, &priv->cm.reap_task);
ipoib_dbg(priv, "Reap connection for gid %pI6\n",
- tx->neigh->dgid.raw);
+ tx->neigh->daddr + 4);
tx->neigh = NULL;
}
}
@@ -1304,7 +1300,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
p = list_entry(priv->cm.start_list.next, typeof(*p), list);
list_del_init(&p->list);
neigh = p->neigh;
- qpn = IPOIB_QPN(neigh->neighbour->ha);
+ qpn = IPOIB_QPN(neigh->daddr);
memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
spin_unlock_irqrestore(&priv->lock, flags);
@@ -1320,9 +1316,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
if (neigh) {
neigh->cm = NULL;
list_del(&neigh->list);
- if (neigh->ah)
- ipoib_put_ah(neigh->ah);
- ipoib_neigh_free(dev, neigh);
+ ipoib_neigh_free(neigh);
}
list_del(&p->list);
kfree(p);
@@ -1376,7 +1370,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
if (skb->protocol == htons(ETH_P_IP))
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
else if (skb->protocol == htons(ETH_P_IPV6))
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
#endif
@@ -1397,7 +1391,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
int e = skb_queue_empty(&priv->cm.skb_queue);
if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
skb_queue_tail(&priv->cm.skb_queue, skb);
if (e)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 5c1bc995e560..f10221f40803 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -123,7 +123,7 @@ static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv,
skb_frag_size_set(frag, size);
skb->data_len += size;
- skb->truesize += size;
+ skb->truesize += PAGE_SIZE;
} else
skb_put(skb, length);
@@ -156,14 +156,18 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb;
int buf_size;
+ int tailroom;
u64 *mapping;
- if (ipoib_ud_need_sg(priv->max_ib_mtu))
+ if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
buf_size = IPOIB_UD_HEAD_SIZE;
- else
+ tailroom = 128; /* reserve some tailroom for IP/TCP headers */
+ } else {
buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
+ tailroom = 0;
+ }
- skb = dev_alloc_skb(buf_size + 4);
+ skb = dev_alloc_skb(buf_size + tailroom + 4);
if (unlikely(!skb))
return NULL;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 3974c290b667..97920b77a5d0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -46,7 +46,8 @@
#include <linux/ip.h>
#include <linux/in.h>
-#include <net/dst.h>
+#include <linux/jhash.h>
+#include <net/arp.h>
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
@@ -84,6 +85,7 @@ struct ib_sa_client ipoib_sa_client;
static void ipoib_add_one(struct ib_device *device);
static void ipoib_remove_one(struct ib_device *device);
+static void ipoib_neigh_reclaim(struct rcu_head *rp);
static struct ib_client ipoib_client = {
.name = "ipoib",
@@ -264,30 +266,15 @@ static int __path_add(struct net_device *dev, struct ipoib_path *path)
static void path_free(struct net_device *dev, struct ipoib_path *path)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ipoib_neigh *neigh, *tn;
struct sk_buff *skb;
- unsigned long flags;
while ((skb = __skb_dequeue(&path->queue)))
dev_kfree_skb_irq(skb);
- spin_lock_irqsave(&priv->lock, flags);
-
- list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
- /*
- * It's safe to call ipoib_put_ah() inside priv->lock
- * here, because we know that path->ah will always
- * hold one more reference, so ipoib_put_ah() will
- * never do more than decrement the ref count.
- */
- if (neigh->ah)
- ipoib_put_ah(neigh->ah);
-
- ipoib_neigh_free(dev, neigh);
- }
+ ipoib_dbg(netdev_priv(dev), "path_free\n");
- spin_unlock_irqrestore(&priv->lock, flags);
+ /* remove all neigh connected to this path */
+ ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
if (path->ah)
ipoib_put_ah(path->ah);
@@ -458,19 +445,15 @@ static void path_rec_completion(int status,
}
kref_get(&path->ah->ref);
neigh->ah = path->ah;
- memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
- sizeof(union ib_gid));
- if (ipoib_cm_enabled(dev, neigh->neighbour)) {
+ if (ipoib_cm_enabled(dev, neigh->daddr)) {
if (!ipoib_cm_get(neigh))
ipoib_cm_set(neigh, ipoib_cm_create_tx(dev,
path,
neigh));
if (!ipoib_cm_get(neigh)) {
list_del(&neigh->list);
- if (neigh->ah)
- ipoib_put_ah(neigh->ah);
- ipoib_neigh_free(dev, neigh);
+ ipoib_neigh_free(neigh);
continue;
}
}
@@ -555,15 +538,15 @@ static int path_rec_start(struct net_device *dev,
return 0;
}
-/* called with rcu_read_lock */
-static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_device *dev)
+static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
+ struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_path *path;
struct ipoib_neigh *neigh;
unsigned long flags;
- neigh = ipoib_neigh_alloc(n, skb->dev);
+ neigh = ipoib_neigh_alloc(daddr, dev);
if (!neigh) {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
@@ -572,9 +555,9 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_
spin_lock_irqsave(&priv->lock, flags);
- path = __path_find(dev, n->ha + 4);
+ path = __path_find(dev, daddr + 4);
if (!path) {
- path = path_rec_create(dev, n->ha + 4);
+ path = path_rec_create(dev, daddr + 4);
if (!path)
goto err_path;
@@ -586,17 +569,13 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_
if (path->ah) {
kref_get(&path->ah->ref);
neigh->ah = path->ah;
- memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
- sizeof(union ib_gid));
- if (ipoib_cm_enabled(dev, neigh->neighbour)) {
+ if (ipoib_cm_enabled(dev, neigh->daddr)) {
if (!ipoib_cm_get(neigh))
ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh));
if (!ipoib_cm_get(neigh)) {
list_del(&neigh->list);
- if (neigh->ah)
- ipoib_put_ah(neigh->ah);
- ipoib_neigh_free(dev, neigh);
+ ipoib_neigh_free(neigh);
goto err_drop;
}
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
@@ -608,7 +587,8 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_
}
} else {
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(n->ha));
+ ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr));
+ ipoib_neigh_put(neigh);
return;
}
} else {
@@ -621,35 +601,20 @@ static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_
}
spin_unlock_irqrestore(&priv->lock, flags);
+ ipoib_neigh_put(neigh);
return;
err_list:
list_del(&neigh->list);
err_path:
- ipoib_neigh_free(dev, neigh);
+ ipoib_neigh_free(neigh);
err_drop:
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
spin_unlock_irqrestore(&priv->lock, flags);
-}
-
-/* called with rcu_read_lock */
-static void ipoib_path_lookup(struct sk_buff *skb, struct neighbour *n, struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
-
- /* Look up path record for unicasts */
- if (n->ha[4] != 0xff) {
- neigh_add_path(skb, n, dev);
- return;
- }
-
- /* Add in the P_Key for multicasts */
- n->ha[8] = (priv->pkey >> 8) & 0xff;
- n->ha[9] = priv->pkey & 0xff;
- ipoib_mcast_send(dev, n->ha + 4, skb);
+ ipoib_neigh_put(neigh);
}
static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
@@ -710,94 +675,80 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_neigh *neigh;
- struct neighbour *n = NULL;
+ struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb;
+ struct ipoib_header *header;
unsigned long flags;
- rcu_read_lock();
- if (likely(skb_dst(skb))) {
- n = dst_get_neighbour_noref(skb_dst(skb));
- if (!n) {
+ header = (struct ipoib_header *) skb->data;
+
+ if (unlikely(cb->hwaddr[4] == 0xff)) {
+ /* multicast, arrange "if" according to probability */
+ if ((header->proto != htons(ETH_P_IP)) &&
+ (header->proto != htons(ETH_P_IPV6)) &&
+ (header->proto != htons(ETH_P_ARP)) &&
+ (header->proto != htons(ETH_P_RARP))) {
+ /* ethertype not supported by IPoIB */
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
- goto unlock;
+ return NETDEV_TX_OK;
}
+ /* Add in the P_Key for multicast*/
+ cb->hwaddr[8] = (priv->pkey >> 8) & 0xff;
+ cb->hwaddr[9] = priv->pkey & 0xff;
+
+ neigh = ipoib_neigh_get(dev, cb->hwaddr);
+ if (likely(neigh))
+ goto send_using_neigh;
+ ipoib_mcast_send(dev, cb->hwaddr, skb);
+ return NETDEV_TX_OK;
}
- if (likely(n)) {
- if (unlikely(!*to_ipoib_neigh(n))) {
- ipoib_path_lookup(skb, n, dev);
- goto unlock;
- }
-
- neigh = *to_ipoib_neigh(n);
- if (unlikely((memcmp(&neigh->dgid.raw,
- n->ha + 4,
- sizeof(union ib_gid))) ||
- (neigh->dev != dev))) {
- spin_lock_irqsave(&priv->lock, flags);
- /*
- * It's safe to call ipoib_put_ah() inside
- * priv->lock here, because we know that
- * path->ah will always hold one more reference,
- * so ipoib_put_ah() will never do more than
- * decrement the ref count.
- */
- if (neigh->ah)
- ipoib_put_ah(neigh->ah);
- list_del(&neigh->list);
- ipoib_neigh_free(dev, neigh);
- spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_path_lookup(skb, n, dev);
- goto unlock;
+ /* unicast, arrange "switch" according to probability */
+ switch (header->proto) {
+ case htons(ETH_P_IP):
+ case htons(ETH_P_IPV6):
+ neigh = ipoib_neigh_get(dev, cb->hwaddr);
+ if (unlikely(!neigh)) {
+ neigh_add_path(skb, cb->hwaddr, dev);
+ return NETDEV_TX_OK;
}
+ break;
+ case htons(ETH_P_ARP):
+ case htons(ETH_P_RARP):
+ /* for unicast ARP and RARP should always perform path find */
+ unicast_arp_send(skb, dev, cb);
+ return NETDEV_TX_OK;
+ default:
+ /* ethertype not supported by IPoIB */
+ ++dev->stats.tx_dropped;
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
- if (ipoib_cm_get(neigh)) {
- if (ipoib_cm_up(neigh)) {
- ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
- goto unlock;
- }
- } else if (neigh->ah) {
- ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(n->ha));
- goto unlock;
+send_using_neigh:
+ /* note we now hold a ref to neigh */
+ if (ipoib_cm_get(neigh)) {
+ if (ipoib_cm_up(neigh)) {
+ ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
+ goto unref;
}
+ } else if (neigh->ah) {
+ ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(cb->hwaddr));
+ goto unref;
+ }
- if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- spin_lock_irqsave(&priv->lock, flags);
- __skb_queue_tail(&neigh->queue, skb);
- spin_unlock_irqrestore(&priv->lock, flags);
- } else {
- ++dev->stats.tx_dropped;
- dev_kfree_skb_any(skb);
- }
+ if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+ spin_lock_irqsave(&priv->lock, flags);
+ __skb_queue_tail(&neigh->queue, skb);
+ spin_unlock_irqrestore(&priv->lock, flags);
} else {
- struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb;
-
- if (cb->hwaddr[4] == 0xff) {
- /* Add in the P_Key for multicast*/
- cb->hwaddr[8] = (priv->pkey >> 8) & 0xff;
- cb->hwaddr[9] = priv->pkey & 0xff;
+ ++dev->stats.tx_dropped;
+ dev_kfree_skb_any(skb);
+ }
- ipoib_mcast_send(dev, cb->hwaddr + 4, skb);
- } else {
- /* unicast GID -- should be ARP or RARP reply */
-
- if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) &&
- (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) {
- ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %pI6\n",
- skb_dst(skb) ? "neigh" : "dst",
- be16_to_cpup((__be16 *) skb->data),
- IPOIB_QPN(cb->hwaddr),
- cb->hwaddr + 4);
- dev_kfree_skb_any(skb);
- ++dev->stats.tx_dropped;
- goto unlock;
- }
+unref:
+ ipoib_neigh_put(neigh);
- unicast_arp_send(skb, dev, cb);
- }
- }
-unlock:
- rcu_read_unlock();
return NETDEV_TX_OK;
}
@@ -819,6 +770,7 @@ static int ipoib_hard_header(struct sk_buff *skb,
const void *daddr, const void *saddr, unsigned len)
{
struct ipoib_header *header;
+ struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb;
header = (struct ipoib_header *) skb_push(skb, sizeof *header);
@@ -826,14 +778,11 @@ static int ipoib_hard_header(struct sk_buff *skb,
header->reserved = 0;
/*
- * If we don't have a dst_entry structure, stuff the
+ * we don't rely on dst_entry structure, always stuff the
* destination address into skb->cb so we can figure out where
* to send the packet later.
*/
- if (!skb_dst(skb)) {
- struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb;
- memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN);
- }
+ memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN);
return 0;
}
@@ -850,86 +799,438 @@ static void ipoib_set_mcast_list(struct net_device *dev)
queue_work(ipoib_workqueue, &priv->restart_task);
}
-static void ipoib_neigh_cleanup(struct neighbour *n)
+static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
{
- struct ipoib_neigh *neigh;
- struct ipoib_dev_priv *priv = netdev_priv(n->dev);
+ /*
+ * Use only the address parts that contributes to spreading
+ * The subnet prefix is not used as one can not connect to
+ * same remote port (GUID) using the same remote QPN via two
+ * different subnets.
+ */
+ /* qpn octets[1:4) & port GUID octets[12:20) */
+ u32 *daddr_32 = (u32 *) daddr;
+ u32 hv;
+
+ hv = jhash_3words(daddr_32[3], daddr_32[4], 0xFFFFFF & daddr_32[0], 0);
+ return hv & htbl->mask;
+}
+
+struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_neigh_table *ntbl = &priv->ntbl;
+ struct ipoib_neigh_hash *htbl;
+ struct ipoib_neigh *neigh = NULL;
+ u32 hash_val;
+
+ rcu_read_lock_bh();
+
+ htbl = rcu_dereference_bh(ntbl->htbl);
+
+ if (!htbl)
+ goto out_unlock;
+
+ hash_val = ipoib_addr_hash(htbl, daddr);
+ for (neigh = rcu_dereference_bh(htbl->buckets[hash_val]);
+ neigh != NULL;
+ neigh = rcu_dereference_bh(neigh->hnext)) {
+ if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) {
+ /* found, take one ref on behalf of the caller */
+ if (!atomic_inc_not_zero(&neigh->refcnt)) {
+ /* deleted */
+ neigh = NULL;
+ goto out_unlock;
+ }
+ neigh->alive = jiffies;
+ goto out_unlock;
+ }
+ }
+
+out_unlock:
+ rcu_read_unlock_bh();
+ return neigh;
+}
+
+static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
+{
+ struct ipoib_neigh_table *ntbl = &priv->ntbl;
+ struct ipoib_neigh_hash *htbl;
+ unsigned long neigh_obsolete;
+ unsigned long dt;
unsigned long flags;
- struct ipoib_ah *ah = NULL;
+ int i;
- neigh = *to_ipoib_neigh(n);
- if (neigh)
- priv = netdev_priv(neigh->dev);
- else
+ if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
return;
- ipoib_dbg(priv,
- "neigh_cleanup for %06x %pI6\n",
- IPOIB_QPN(n->ha),
- n->ha + 4);
- spin_lock_irqsave(&priv->lock, flags);
+ write_lock_bh(&ntbl->rwlock);
+
+ htbl = rcu_dereference_protected(ntbl->htbl,
+ lockdep_is_held(&ntbl->rwlock));
+
+ if (!htbl)
+ goto out_unlock;
+
+ /* neigh is obsolete if it was idle for two GC periods */
+ dt = 2 * arp_tbl.gc_interval;
+ neigh_obsolete = jiffies - dt;
+ /* handle possible race condition */
+ if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
+ goto out_unlock;
+
+ for (i = 0; i < htbl->size; i++) {
+ struct ipoib_neigh *neigh;
+ struct ipoib_neigh __rcu **np = &htbl->buckets[i];
+
+ while ((neigh = rcu_dereference_protected(*np,
+ lockdep_is_held(&ntbl->rwlock))) != NULL) {
+ /* was the neigh idle for two GC periods */
+ if (time_after(neigh_obsolete, neigh->alive)) {
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(neigh->hnext,
+ lockdep_is_held(&ntbl->rwlock)));
+ /* remove from path/mc list */
+ spin_lock_irqsave(&priv->lock, flags);
+ list_del(&neigh->list);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
+ } else {
+ np = &neigh->hnext;
+ }
- if (neigh->ah)
- ah = neigh->ah;
- list_del(&neigh->list);
- ipoib_neigh_free(n->dev, neigh);
+ }
+ }
- spin_unlock_irqrestore(&priv->lock, flags);
+out_unlock:
+ write_unlock_bh(&ntbl->rwlock);
+}
- if (ah)
- ipoib_put_ah(ah);
+static void ipoib_reap_neigh(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv =
+ container_of(work, struct ipoib_dev_priv, neigh_reap_task.work);
+
+ __ipoib_reap_neigh(priv);
+
+ if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
+ queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+ arp_tbl.gc_interval);
}
-struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour,
+
+static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
struct net_device *dev)
{
struct ipoib_neigh *neigh;
- neigh = kmalloc(sizeof *neigh, GFP_ATOMIC);
+ neigh = kzalloc(sizeof *neigh, GFP_ATOMIC);
if (!neigh)
return NULL;
- neigh->neighbour = neighbour;
neigh->dev = dev;
- memset(&neigh->dgid.raw, 0, sizeof (union ib_gid));
- *to_ipoib_neigh(neighbour) = neigh;
+ memcpy(&neigh->daddr, daddr, sizeof(neigh->daddr));
skb_queue_head_init(&neigh->queue);
+ INIT_LIST_HEAD(&neigh->list);
ipoib_cm_set(neigh, NULL);
+ /* one ref on behalf of the caller */
+ atomic_set(&neigh->refcnt, 1);
+
+ return neigh;
+}
+
+struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
+ struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_neigh_table *ntbl = &priv->ntbl;
+ struct ipoib_neigh_hash *htbl;
+ struct ipoib_neigh *neigh;
+ u32 hash_val;
+
+ write_lock_bh(&ntbl->rwlock);
+
+ htbl = rcu_dereference_protected(ntbl->htbl,
+ lockdep_is_held(&ntbl->rwlock));
+ if (!htbl) {
+ neigh = NULL;
+ goto out_unlock;
+ }
+
+ /* need to add a new neigh, but maybe some other thread succeeded?
+ * recalc hash, maybe hash resize took place so we do a search
+ */
+ hash_val = ipoib_addr_hash(htbl, daddr);
+ for (neigh = rcu_dereference_protected(htbl->buckets[hash_val],
+ lockdep_is_held(&ntbl->rwlock));
+ neigh != NULL;
+ neigh = rcu_dereference_protected(neigh->hnext,
+ lockdep_is_held(&ntbl->rwlock))) {
+ if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) {
+ /* found, take one ref on behalf of the caller */
+ if (!atomic_inc_not_zero(&neigh->refcnt)) {
+ /* deleted */
+ neigh = NULL;
+ break;
+ }
+ neigh->alive = jiffies;
+ goto out_unlock;
+ }
+ }
+
+ neigh = ipoib_neigh_ctor(daddr, dev);
+ if (!neigh)
+ goto out_unlock;
+
+ /* one ref on behalf of the hash table */
+ atomic_inc(&neigh->refcnt);
+ neigh->alive = jiffies;
+ /* put in hash */
+ rcu_assign_pointer(neigh->hnext,
+ rcu_dereference_protected(htbl->buckets[hash_val],
+ lockdep_is_held(&ntbl->rwlock)));
+ rcu_assign_pointer(htbl->buckets[hash_val], neigh);
+ atomic_inc(&ntbl->entries);
+
+out_unlock:
+ write_unlock_bh(&ntbl->rwlock);
return neigh;
}
-void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh)
+void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
{
+ /* neigh reference count was dropprd to zero */
+ struct net_device *dev = neigh->dev;
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb;
- *to_ipoib_neigh(neigh->neighbour) = NULL;
+ if (neigh->ah)
+ ipoib_put_ah(neigh->ah);
while ((skb = __skb_dequeue(&neigh->queue))) {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
}
if (ipoib_cm_get(neigh))
ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
+ ipoib_dbg(netdev_priv(dev),
+ "neigh free for %06x %pI6\n",
+ IPOIB_QPN(neigh->daddr),
+ neigh->daddr + 4);
kfree(neigh);
+ if (atomic_dec_and_test(&priv->ntbl.entries)) {
+ if (test_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags))
+ complete(&priv->ntbl.flushed);
+ }
+}
+
+static void ipoib_neigh_reclaim(struct rcu_head *rp)
+{
+ /* Called as a result of removal from hash table */
+ struct ipoib_neigh *neigh = container_of(rp, struct ipoib_neigh, rcu);
+ /* note TX context may hold another ref */
+ ipoib_neigh_put(neigh);
}
-static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms)
+void ipoib_neigh_free(struct ipoib_neigh *neigh)
{
- parms->neigh_cleanup = ipoib_neigh_cleanup;
+ struct net_device *dev = neigh->dev;
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_neigh_table *ntbl = &priv->ntbl;
+ struct ipoib_neigh_hash *htbl;
+ struct ipoib_neigh __rcu **np;
+ struct ipoib_neigh *n;
+ u32 hash_val;
+
+ write_lock_bh(&ntbl->rwlock);
+
+ htbl = rcu_dereference_protected(ntbl->htbl,
+ lockdep_is_held(&ntbl->rwlock));
+ if (!htbl)
+ goto out_unlock;
+
+ hash_val = ipoib_addr_hash(htbl, neigh->daddr);
+ np = &htbl->buckets[hash_val];
+ for (n = rcu_dereference_protected(*np,
+ lockdep_is_held(&ntbl->rwlock));
+ n != NULL;
+ n = rcu_dereference_protected(neigh->hnext,
+ lockdep_is_held(&ntbl->rwlock))) {
+ if (n == neigh) {
+ /* found */
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(neigh->hnext,
+ lockdep_is_held(&ntbl->rwlock)));
+ call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
+ goto out_unlock;
+ } else {
+ np = &n->hnext;
+ }
+ }
+
+out_unlock:
+ write_unlock_bh(&ntbl->rwlock);
+
+}
+
+static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
+{
+ struct ipoib_neigh_table *ntbl = &priv->ntbl;
+ struct ipoib_neigh_hash *htbl;
+ struct ipoib_neigh **buckets;
+ u32 size;
+
+ clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
+ ntbl->htbl = NULL;
+ rwlock_init(&ntbl->rwlock);
+ htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
+ if (!htbl)
+ return -ENOMEM;
+ set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
+ size = roundup_pow_of_two(arp_tbl.gc_thresh3);
+ buckets = kzalloc(size * sizeof(*buckets), GFP_KERNEL);
+ if (!buckets) {
+ kfree(htbl);
+ return -ENOMEM;
+ }
+ htbl->size = size;
+ htbl->mask = (size - 1);
+ htbl->buckets = buckets;
+ ntbl->htbl = htbl;
+ atomic_set(&ntbl->entries, 0);
+
+ /* start garbage collection */
+ clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
+ queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+ arp_tbl.gc_interval);
return 0;
}
+static void neigh_hash_free_rcu(struct rcu_head *head)
+{
+ struct ipoib_neigh_hash *htbl = container_of(head,
+ struct ipoib_neigh_hash,
+ rcu);
+ struct ipoib_neigh __rcu **buckets = htbl->buckets;
+
+ kfree(buckets);
+ kfree(htbl);
+}
+
+void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_neigh_table *ntbl = &priv->ntbl;
+ struct ipoib_neigh_hash *htbl;
+ unsigned long flags;
+ int i;
+
+ /* remove all neigh connected to a given path or mcast */
+ write_lock_bh(&ntbl->rwlock);
+
+ htbl = rcu_dereference_protected(ntbl->htbl,
+ lockdep_is_held(&ntbl->rwlock));
+
+ if (!htbl)
+ goto out_unlock;
+
+ for (i = 0; i < htbl->size; i++) {
+ struct ipoib_neigh *neigh;
+ struct ipoib_neigh __rcu **np = &htbl->buckets[i];
+
+ while ((neigh = rcu_dereference_protected(*np,
+ lockdep_is_held(&ntbl->rwlock))) != NULL) {
+ /* delete neighs belong to this parent */
+ if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) {
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(neigh->hnext,
+ lockdep_is_held(&ntbl->rwlock)));
+ /* remove from parent list */
+ spin_lock_irqsave(&priv->lock, flags);
+ list_del(&neigh->list);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
+ } else {
+ np = &neigh->hnext;
+ }
+
+ }
+ }
+out_unlock:
+ write_unlock_bh(&ntbl->rwlock);
+}
+
+static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
+{
+ struct ipoib_neigh_table *ntbl = &priv->ntbl;
+ struct ipoib_neigh_hash *htbl;
+ unsigned long flags;
+ int i;
+
+ write_lock_bh(&ntbl->rwlock);
+
+ htbl = rcu_dereference_protected(ntbl->htbl,
+ lockdep_is_held(&ntbl->rwlock));
+ if (!htbl)
+ goto out_unlock;
+
+ for (i = 0; i < htbl->size; i++) {
+ struct ipoib_neigh *neigh;
+ struct ipoib_neigh __rcu **np = &htbl->buckets[i];
+
+ while ((neigh = rcu_dereference_protected(*np,
+ lockdep_is_held(&ntbl->rwlock))) != NULL) {
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(neigh->hnext,
+ lockdep_is_held(&ntbl->rwlock)));
+ /* remove from path/mc list */
+ spin_lock_irqsave(&priv->lock, flags);
+ list_del(&neigh->list);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
+ }
+ }
+
+ rcu_assign_pointer(ntbl->htbl, NULL);
+ call_rcu(&htbl->rcu, neigh_hash_free_rcu);
+
+out_unlock:
+ write_unlock_bh(&ntbl->rwlock);
+}
+
+static void ipoib_neigh_hash_uninit(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ int stopped;
+
+ ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
+ init_completion(&priv->ntbl.flushed);
+ set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
+
+ /* Stop GC if called at init fail need to cancel work */
+ stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
+ if (!stopped)
+ cancel_delayed_work(&priv->neigh_reap_task);
+
+ if (atomic_read(&priv->ntbl.entries)) {
+ ipoib_flush_neighs(priv);
+ wait_for_completion(&priv->ntbl.flushed);
+ }
+}
+
+
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
+ if (ipoib_neigh_hash_init(priv) < 0)
+ goto out;
/* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
GFP_KERNEL);
if (!priv->rx_ring) {
printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
ca->name, ipoib_recvq_size);
- goto out;
+ goto out_neigh_hash_cleanup;
}
priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@@ -952,6 +1253,8 @@ out_tx_ring_cleanup:
out_rx_ring_cleanup:
kfree(priv->rx_ring);
+out_neigh_hash_cleanup:
+ ipoib_neigh_hash_uninit(dev);
out:
return -ENOMEM;
}
@@ -964,6 +1267,9 @@ void ipoib_dev_cleanup(struct net_device *dev)
/* Delete any child interfaces first */
list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
+ /* Stop GC on child */
+ set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags);
+ cancel_delayed_work(&cpriv->neigh_reap_task);
unregister_netdev(cpriv->dev);
ipoib_dev_cleanup(cpriv->dev);
free_netdev(cpriv->dev);
@@ -976,6 +1282,8 @@ void ipoib_dev_cleanup(struct net_device *dev)
priv->rx_ring = NULL;
priv->tx_ring = NULL;
+
+ ipoib_neigh_hash_uninit(dev);
}
static const struct header_ops ipoib_header_ops = {
@@ -990,7 +1298,6 @@ static const struct net_device_ops ipoib_netdev_ops = {
.ndo_start_xmit = ipoib_start_xmit,
.ndo_tx_timeout = ipoib_timeout,
.ndo_set_rx_mode = ipoib_set_mcast_list,
- .ndo_neigh_setup = ipoib_neigh_setup_dev,
};
static void ipoib_setup(struct net_device *dev)
@@ -1039,6 +1346,7 @@ static void ipoib_setup(struct net_device *dev)
INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy);
INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
+ INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh);
}
struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
@@ -1279,6 +1587,9 @@ sysfs_failed:
register_failed:
ib_unregister_event_handler(&priv->event_handler);
+ /* Stop GC if started before flush */
+ set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
+ cancel_delayed_work(&priv->neigh_reap_task);
flush_workqueue(ipoib_workqueue);
event_failed:
@@ -1345,6 +1656,9 @@ static void ipoib_remove_one(struct ib_device *device)
dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
rtnl_unlock();
+ /* Stop GC */
+ set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
+ cancel_delayed_work(&priv->neigh_reap_task);
flush_workqueue(ipoib_workqueue);
unregister_netdev(priv->dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 20ebc6fd1bb9..13f4aa7593c8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -69,28 +69,13 @@ struct ipoib_mcast_iter {
static void ipoib_mcast_free(struct ipoib_mcast *mcast)
{
struct net_device *dev = mcast->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ipoib_neigh *neigh, *tmp;
int tx_dropped = 0;
ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n",
mcast->mcmember.mgid.raw);
- spin_lock_irq(&priv->lock);
-
- list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) {
- /*
- * It's safe to call ipoib_put_ah() inside priv->lock
- * here, because we know that mcast->ah will always
- * hold one more reference, so ipoib_put_ah() will
- * never do more than decrement the ref count.
- */
- if (neigh->ah)
- ipoib_put_ah(neigh->ah);
- ipoib_neigh_free(dev, neigh);
- }
-
- spin_unlock_irq(&priv->lock);
+ /* remove all neigh connected to this mcast */
+ ipoib_del_neighs_by_gid(dev, mcast->mcmember.mgid.raw);
if (mcast->ah)
ipoib_put_ah(mcast->ah);
@@ -655,11 +640,12 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
return 0;
}
-void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
+void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_mcast *mcast;
unsigned long flags;
+ void *mgid = daddr + 4;
spin_lock_irqsave(&priv->lock, flags);
@@ -715,25 +701,25 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
out:
if (mcast && mcast->ah) {
- struct dst_entry *dst = skb_dst(skb);
- struct neighbour *n = NULL;
-
- rcu_read_lock();
- if (dst)
- n = dst_get_neighbour_noref(dst);
- if (n && !*to_ipoib_neigh(n)) {
- struct ipoib_neigh *neigh = ipoib_neigh_alloc(n,
- skb->dev);
+ struct ipoib_neigh *neigh;
+ spin_unlock_irqrestore(&priv->lock, flags);
+ neigh = ipoib_neigh_get(dev, daddr);
+ spin_lock_irqsave(&priv->lock, flags);
+ if (!neigh) {
+ spin_unlock_irqrestore(&priv->lock, flags);
+ neigh = ipoib_neigh_alloc(daddr, dev);
+ spin_lock_irqsave(&priv->lock, flags);
if (neigh) {
kref_get(&mcast->ah->ref);
neigh->ah = mcast->ah;
list_add_tail(&neigh->list, &mcast->neigh_list);
}
}
- rcu_read_unlock();
spin_unlock_irqrestore(&priv->lock, flags);
ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+ if (neigh)
+ ipoib_neigh_put(neigh);
return;
}
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 5f6b7f63cdef..7a0ce8d42887 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1377,10 +1377,14 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
break;
case SRPT_STATE_NEED_DATA:
/* DMA_TO_DEVICE (write) - RDMA read error. */
+
+ /* XXX(hch): this is a horrible layering violation.. */
spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags);
ioctx->cmd.transport_state |= CMD_T_LUN_STOP;
+ ioctx->cmd.transport_state &= ~CMD_T_ACTIVE;
spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);
- transport_generic_handle_data(&ioctx->cmd);
+
+ complete(&ioctx->cmd.transport_lun_stop_comp);
break;
case SRPT_STATE_CMD_RSP_SENT:
/*
@@ -1463,9 +1467,10 @@ static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
/**
* srpt_handle_rdma_comp() - Process an IB RDMA completion notification.
*
- * Note: transport_generic_handle_data() is asynchronous so unmapping the
- * data that has been transferred via IB RDMA must be postponed until the
- * check_stop_free() callback.
+ * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping
+ * the data that has been transferred via IB RDMA had to be postponed until the
+ * check_stop_free() callback. None of this is nessecary anymore and needs to
+ * be cleaned up.
*/
static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
struct srpt_send_ioctx *ioctx,
@@ -1477,7 +1482,7 @@ static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
if (opcode == SRPT_RDMA_READ_LAST) {
if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
SRPT_STATE_DATA_IN))
- transport_generic_handle_data(&ioctx->cmd);
+ target_execute_cmd(&ioctx->cmd);
else
printk(KERN_ERR "%s[%d]: wrong state = %d\n", __func__,
__LINE__, srpt_get_cmd_state(ioctx));