summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/umem.c11
-rw-r--r--drivers/infiniband/core/uverbs_main.c22
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c19
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c16
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c457
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c9
-rw-r--r--drivers/infiniband/hw/mlx4/main.c37
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h14
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c7
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c44
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c2
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c14
-rw-r--r--drivers/infiniband/hw/mlx5/doorbell.c2
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c107
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c2
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h5
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c2
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c2
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c8
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c4
-rw-r--r--drivers/infiniband/hw/mlx5/user.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c21
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c20
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h31
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c18
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c195
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c86
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c520
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c44
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c4
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h66
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c66
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c523
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c220
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c691
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h37
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c9
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c233
40 files changed, 1929 insertions, 1645 deletions
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index aec7a6aa2951..38acb3cfc545 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -99,6 +99,17 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (dmasync)
dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
+ if (!size)
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * If the combination of the addr and size requested for this memory
+ * region causes an integer overflow, return error.
+ */
+ if (((addr + size) < addr) ||
+ PAGE_ALIGN(addr + size) < (addr + size))
+ return ERR_PTR(-EINVAL);
+
if (!can_do_mlock())
return ERR_PTR(-EPERM);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 259dcc7779f5..88cce9bb72fe 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -246,6 +246,17 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
kfree(uqp);
}
+ list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
+ struct ib_srq *srq = uobj->object;
+ struct ib_uevent_object *uevent =
+ container_of(uobj, struct ib_uevent_object, uobject);
+
+ idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
+ ib_destroy_srq(srq);
+ ib_uverbs_release_uevent(file, uevent);
+ kfree(uevent);
+ }
+
list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
struct ib_cq *cq = uobj->object;
struct ib_uverbs_event_file *ev_file = cq->cq_context;
@@ -258,17 +269,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
kfree(ucq);
}
- list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
- struct ib_srq *srq = uobj->object;
- struct ib_uevent_object *uevent =
- container_of(uobj, struct ib_uevent_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
- ib_destroy_srq(srq);
- ib_uverbs_release_uevent(file, uevent);
- kfree(uevent);
- }
-
list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
struct ib_mr *mr = uobj->object;
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 6791fd16272c..3ef0cf9f5c44 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -73,7 +73,7 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
c4iw_init_wr_wait(&wr_wait);
wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16);
- skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
+ skb = alloc_skb(wr_len, GFP_KERNEL);
if (!skb)
return -ENOMEM;
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 6d7f453b4d05..450d15965005 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -40,9 +40,9 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/io.h>
-#include <linux/aio.h>
#include <linux/jiffies.h>
#include <linux/cpu.h>
+#include <linux/uio.h>
#include <asm/pgtable.h>
#include "ipath_kernel.h"
@@ -53,15 +53,19 @@ static int ipath_open(struct inode *, struct file *);
static int ipath_close(struct inode *, struct file *);
static ssize_t ipath_write(struct file *, const char __user *, size_t,
loff_t *);
-static ssize_t ipath_writev(struct kiocb *, const struct iovec *,
- unsigned long , loff_t);
+static ssize_t ipath_write_iter(struct kiocb *, struct iov_iter *from);
static unsigned int ipath_poll(struct file *, struct poll_table_struct *);
static int ipath_mmap(struct file *, struct vm_area_struct *);
+/*
+ * This is really, really weird shit - write() and writev() here
+ * have completely unrelated semantics. Sucky userland ABI,
+ * film at 11.
+ */
static const struct file_operations ipath_file_ops = {
.owner = THIS_MODULE,
.write = ipath_write,
- .aio_write = ipath_writev,
+ .write_iter = ipath_write_iter,
.open = ipath_open,
.release = ipath_close,
.poll = ipath_poll,
@@ -2414,18 +2418,17 @@ bail:
return ret;
}
-static ssize_t ipath_writev(struct kiocb *iocb, const struct iovec *iov,
- unsigned long dim, loff_t off)
+static ssize_t ipath_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *filp = iocb->ki_filp;
struct ipath_filedata *fp = filp->private_data;
struct ipath_portdata *pd = port_fp(filp);
struct ipath_user_sdma_queue *pq = fp->pq;
- if (!dim)
+ if (!iter_is_iovec(from) || !from->nr_segs)
return -EINVAL;
- return ipath_user_sdma_writev(pd->port_dd, pq, iov, dim);
+ return ipath_user_sdma_writev(pd->port_dd, pq, from->iov, from->nr_segs);
}
static struct class *ipath_class;
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 33c45dfcbd88..1ca8e32a9592 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -82,14 +82,14 @@ static int create_file(const char *name, umode_t mode,
{
int error;
- mutex_lock(&parent->d_inode->i_mutex);
+ mutex_lock(&d_inode(parent)->i_mutex);
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(*dentry))
- error = ipathfs_mknod(parent->d_inode, *dentry,
+ error = ipathfs_mknod(d_inode(parent), *dentry,
mode, fops, data);
else
error = PTR_ERR(*dentry);
- mutex_unlock(&parent->d_inode->i_mutex);
+ mutex_unlock(&d_inode(parent)->i_mutex);
return error;
}
@@ -277,11 +277,11 @@ static int remove_file(struct dentry *parent, char *name)
}
spin_lock(&tmp->d_lock);
- if (!d_unhashed(tmp) && tmp->d_inode) {
+ if (!d_unhashed(tmp) && d_really_is_positive(tmp)) {
dget_dlock(tmp);
__d_drop(tmp);
spin_unlock(&tmp->d_lock);
- simple_unlink(parent->d_inode, tmp);
+ simple_unlink(d_inode(parent), tmp);
} else
spin_unlock(&tmp->d_lock);
@@ -302,7 +302,7 @@ static int remove_device_files(struct super_block *sb,
int ret;
root = dget(sb->s_root);
- mutex_lock(&root->d_inode->i_mutex);
+ mutex_lock(&d_inode(root)->i_mutex);
snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
dir = lookup_one_len(unit, root, strlen(unit));
@@ -315,10 +315,10 @@ static int remove_device_files(struct super_block *sb,
remove_file(dir, "flash");
remove_file(dir, "atomic_counters");
d_delete(dir);
- ret = simple_rmdir(root->d_inode, dir);
+ ret = simple_rmdir(d_inode(root), dir);
bail:
- mutex_unlock(&root->d_inode->i_mutex);
+ mutex_unlock(&d_inode(root)->i_mutex);
dput(root);
return ret;
}
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index a31e031afd87..0f00204d2ece 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -58,14 +58,19 @@ struct mlx4_alias_guid_work_context {
int query_id;
struct list_head list;
int block_num;
+ ib_sa_comp_mask guid_indexes;
+ u8 method;
};
struct mlx4_next_alias_guid_work {
u8 port;
u8 block_num;
+ u8 method;
struct mlx4_sriov_alias_guid_info_rec_det rec_det;
};
+static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
+ int *resched_delay_sec);
void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
u8 port_num, u8 *p_data)
@@ -118,6 +123,57 @@ ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
return IB_SA_COMP_MASK(4 + index);
}
+void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
+ int port, int slave_init)
+{
+ __be64 curr_guid, required_guid;
+ int record_num = slave / 8;
+ int index = slave % 8;
+ int port_index = port - 1;
+ unsigned long flags;
+ int do_work = 0;
+
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
+ if (dev->sriov.alias_guid.ports_guid[port_index].state_flags &
+ GUID_STATE_NEED_PORT_INIT)
+ goto unlock;
+ if (!slave_init) {
+ curr_guid = *(__be64 *)&dev->sriov.
+ alias_guid.ports_guid[port_index].
+ all_rec_per_port[record_num].
+ all_recs[GUID_REC_SIZE * index];
+ if (curr_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL) ||
+ !curr_guid)
+ goto unlock;
+ required_guid = cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
+ } else {
+ required_guid = mlx4_get_admin_guid(dev->dev, slave, port);
+ if (required_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
+ goto unlock;
+ }
+ *(__be64 *)&dev->sriov.alias_guid.ports_guid[port_index].
+ all_rec_per_port[record_num].
+ all_recs[GUID_REC_SIZE * index] = required_guid;
+ dev->sriov.alias_guid.ports_guid[port_index].
+ all_rec_per_port[record_num].guid_indexes
+ |= mlx4_ib_get_aguid_comp_mask_from_ix(index);
+ dev->sriov.alias_guid.ports_guid[port_index].
+ all_rec_per_port[record_num].status
+ = MLX4_GUID_INFO_STATUS_IDLE;
+ /* set to run immediately */
+ dev->sriov.alias_guid.ports_guid[port_index].
+ all_rec_per_port[record_num].time_to_run = 0;
+ dev->sriov.alias_guid.ports_guid[port_index].
+ all_rec_per_port[record_num].
+ guids_retry_schedule[index] = 0;
+ do_work = 1;
+unlock:
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
+
+ if (do_work)
+ mlx4_ib_init_alias_guid_work(dev, port_index);
+}
+
/*
* Whenever new GUID is set/unset (guid table change) create event and
* notify the relevant slave (master also should be notified).
@@ -138,10 +194,15 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
enum slave_port_state prev_state;
__be64 tmp_cur_ag, form_cache_ag;
enum slave_port_gen_event gen_event;
+ struct mlx4_sriov_alias_guid_info_rec_det *rec;
+ unsigned long flags;
+ __be64 required_value;
if (!mlx4_is_master(dev->dev))
return;
+ rec = &dev->sriov.alias_guid.ports_guid[port_num - 1].
+ all_rec_per_port[block_num];
guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
ports_guid[port_num - 1].
all_rec_per_port[block_num].guid_indexes);
@@ -166,8 +227,27 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
*/
if (tmp_cur_ag != form_cache_ag)
continue;
- mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
+ required_value = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
+
+ if (required_value == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
+ required_value = 0;
+
+ if (tmp_cur_ag == required_value) {
+ rec->guid_indexes = rec->guid_indexes &
+ ~mlx4_ib_get_aguid_comp_mask_from_ix(i);
+ } else {
+ /* may notify port down if value is 0 */
+ if (tmp_cur_ag != MLX4_NOT_SET_GUID) {
+ spin_unlock_irqrestore(&dev->sriov.
+ alias_guid.ag_work_lock, flags);
+ continue;
+ }
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock,
+ flags);
+ mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
/*2 cases: Valid GUID, and Invalid Guid*/
if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
@@ -188,10 +268,14 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
&gen_event);
- pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
- slave_id, port_num);
- mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num,
- MLX4_PORT_CHANGE_SUBTYPE_DOWN);
+ if (gen_event == SLAVE_PORT_GEN_EVENT_DOWN) {
+ pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
+ slave_id, port_num);
+ mlx4_gen_port_state_change_eqe(dev->dev,
+ slave_id,
+ port_num,
+ MLX4_PORT_CHANGE_SUBTYPE_DOWN);
+ }
}
}
}
@@ -206,6 +290,9 @@ static void aliasguid_query_handler(int status,
int i;
struct mlx4_sriov_alias_guid_info_rec_det *rec;
unsigned long flags, flags1;
+ ib_sa_comp_mask declined_guid_indexes = 0;
+ ib_sa_comp_mask applied_guid_indexes = 0;
+ unsigned int resched_delay_sec = 0;
if (!context)
return;
@@ -216,9 +303,9 @@ static void aliasguid_query_handler(int status,
all_rec_per_port[cb_ctx->block_num];
if (status) {
- rec->status = MLX4_GUID_INFO_STATUS_IDLE;
pr_debug("(port: %d) failed: status = %d\n",
cb_ctx->port, status);
+ rec->time_to_run = ktime_get_real_ns() + 1 * NSEC_PER_SEC;
goto out;
}
@@ -235,57 +322,101 @@ static void aliasguid_query_handler(int status,
rec = &dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[guid_rec->block_num];
- rec->status = MLX4_GUID_INFO_STATUS_SET;
- rec->method = MLX4_GUID_INFO_RECORD_SET;
-
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
- __be64 tmp_cur_ag;
- tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
+ __be64 sm_response, required_val;
+
+ if (!(cb_ctx->guid_indexes &
+ mlx4_ib_get_aguid_comp_mask_from_ix(i)))
+ continue;
+ sm_response = *(__be64 *)&guid_rec->guid_info_list
+ [i * GUID_REC_SIZE];
+ required_val = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
+ if (cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE) {
+ if (required_val ==
+ cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
+ goto next_entry;
+
+ /* A new value was set till we got the response */
+ pr_debug("need to set new value %llx, record num %d, block_num:%d\n",
+ be64_to_cpu(required_val),
+ i, guid_rec->block_num);
+ goto entry_declined;
+ }
+
/* check if the SM didn't assign one of the records.
- * if it didn't, if it was not sysadmin request:
- * ask the SM to give a new GUID, (instead of the driver request).
+ * if it didn't, re-ask for.
*/
- if (tmp_cur_ag == MLX4_NOT_SET_GUID) {
- mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in "
- "block_num: %d was declined by SM, "
- "ownership by %d (0 = driver, 1=sysAdmin,"
- " 2=None)\n", __func__, i,
- guid_rec->block_num, rec->ownership);
- if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) {
- /* if it is driver assign, asks for new GUID from SM*/
- *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
- MLX4_NOT_SET_GUID;
-
- /* Mark the record as not assigned, and let it
- * be sent again in the next work sched.*/
- rec->status = MLX4_GUID_INFO_STATUS_IDLE;
- rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
- }
+ if (sm_response == MLX4_NOT_SET_GUID) {
+ if (rec->guids_retry_schedule[i] == 0)
+ mlx4_ib_warn(&dev->ib_dev,
+ "%s:Record num %d in block_num: %d was declined by SM\n",
+ __func__, i,
+ guid_rec->block_num);
+ goto entry_declined;
} else {
/* properly assigned record. */
/* We save the GUID we just got from the SM in the
* admin_guid in order to be persistent, and in the
* request from the sm the process will ask for the same GUID */
- if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN &&
- tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) {
- /* the sysadmin assignment failed.*/
- mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
- " admin guid after SysAdmin "
- "configuration. "
- "Record num %d in block_num:%d "
- "was declined by SM, "
- "new val(0x%llx) was kept\n",
- __func__, i,
- guid_rec->block_num,
- be64_to_cpu(*(__be64 *) &
- rec->all_recs[i * GUID_REC_SIZE]));
+ if (required_val &&
+ sm_response != required_val) {
+ /* Warn only on first retry */
+ if (rec->guids_retry_schedule[i] == 0)
+ mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
+ " admin guid after SysAdmin "
+ "configuration. "
+ "Record num %d in block_num:%d "
+ "was declined by SM, "
+ "new val(0x%llx) was kept, SM returned (0x%llx)\n",
+ __func__, i,
+ guid_rec->block_num,
+ be64_to_cpu(required_val),
+ be64_to_cpu(sm_response));
+ goto entry_declined;
} else {
- memcpy(&rec->all_recs[i * GUID_REC_SIZE],
- &guid_rec->guid_info_list[i * GUID_REC_SIZE],
- GUID_REC_SIZE);
+ *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
+ sm_response;
+ if (required_val == 0)
+ mlx4_set_admin_guid(dev->dev,
+ sm_response,
+ (guid_rec->block_num
+ * NUM_ALIAS_GUID_IN_REC) + i,
+ cb_ctx->port);
+ goto next_entry;
}
}
+entry_declined:
+ declined_guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
+ rec->guids_retry_schedule[i] =
+ (rec->guids_retry_schedule[i] == 0) ? 1 :
+ min((unsigned int)60,
+ rec->guids_retry_schedule[i] * 2);
+ /* using the minimum value among all entries in that record */
+ resched_delay_sec = (resched_delay_sec == 0) ?
+ rec->guids_retry_schedule[i] :
+ min(resched_delay_sec,
+ rec->guids_retry_schedule[i]);
+ continue;
+
+next_entry:
+ rec->guids_retry_schedule[i] = 0;
}
+
+ applied_guid_indexes = cb_ctx->guid_indexes & ~declined_guid_indexes;
+ if (declined_guid_indexes ||
+ rec->guid_indexes & ~(applied_guid_indexes)) {
+ pr_debug("record=%d wasn't fully set, guid_indexes=0x%llx applied_indexes=0x%llx, declined_indexes=0x%llx\n",
+ guid_rec->block_num,
+ be64_to_cpu((__force __be64)rec->guid_indexes),
+ be64_to_cpu((__force __be64)applied_guid_indexes),
+ be64_to_cpu((__force __be64)declined_guid_indexes));
+ rec->time_to_run = ktime_get_real_ns() +
+ resched_delay_sec * NSEC_PER_SEC;
+ } else {
+ rec->status = MLX4_GUID_INFO_STATUS_SET;
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
/*
The func is call here to close the cases when the
sm doesn't send smp, so in the sa response the driver
@@ -297,10 +428,13 @@ static void aliasguid_query_handler(int status,
out:
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
- if (!dev->sriov.is_going_down)
+ if (!dev->sriov.is_going_down) {
+ get_low_record_time_index(dev, port_index, &resched_delay_sec);
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
&dev->sriov.alias_guid.ports_guid[port_index].
- alias_guid_work, 0);
+ alias_guid_work,
+ msecs_to_jiffies(resched_delay_sec * 1000));
+ }
if (cb_ctx->sa_query) {
list_del(&cb_ctx->list);
kfree(cb_ctx);
@@ -317,9 +451,7 @@ static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
ib_sa_comp_mask comp_mask = 0;
dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
- = MLX4_GUID_INFO_STATUS_IDLE;
- dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method
- = MLX4_GUID_INFO_RECORD_SET;
+ = MLX4_GUID_INFO_STATUS_SET;
/* calculate the comp_mask for that record.*/
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
@@ -333,19 +465,21 @@ static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
need to assign GUIDs, then don't put it up for assignment.
*/
if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
- (!index && !i) ||
- MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid.
- ports_guid[port - 1].all_rec_per_port[index].ownership)
+ (!index && !i))
continue;
comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
}
dev->sriov.alias_guid.ports_guid[port - 1].
- all_rec_per_port[index].guid_indexes = comp_mask;
+ all_rec_per_port[index].guid_indexes |= comp_mask;
+ if (dev->sriov.alias_guid.ports_guid[port - 1].
+ all_rec_per_port[index].guid_indexes)
+ dev->sriov.alias_guid.ports_guid[port - 1].
+ all_rec_per_port[index].status = MLX4_GUID_INFO_STATUS_IDLE;
+
}
static int set_guid_rec(struct ib_device *ibdev,
- u8 port, int index,
- struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
+ struct mlx4_next_alias_guid_work *rec)
{
int err;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
@@ -354,6 +488,9 @@ static int set_guid_rec(struct ib_device *ibdev,
struct ib_port_attr attr;
struct mlx4_alias_guid_work_context *callback_context;
unsigned long resched_delay, flags, flags1;
+ u8 port = rec->port + 1;
+ int index = rec->block_num;
+ struct mlx4_sriov_alias_guid_info_rec_det *rec_det = &rec->rec_det;
struct list_head *head =
&dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
@@ -380,6 +517,8 @@ static int set_guid_rec(struct ib_device *ibdev,
callback_context->port = port;
callback_context->dev = dev;
callback_context->block_num = index;
+ callback_context->guid_indexes = rec_det->guid_indexes;
+ callback_context->method = rec->method;
memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
@@ -399,7 +538,7 @@ static int set_guid_rec(struct ib_device *ibdev,
callback_context->query_id =
ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
ibdev, port, &guid_info_rec,
- comp_mask, rec_det->method, 1000,
+ comp_mask, rec->method, 1000,
GFP_KERNEL, aliasguid_query_handler,
callback_context,
&callback_context->sa_query);
@@ -434,6 +573,30 @@ out:
return err;
}
+static void mlx4_ib_guid_port_init(struct mlx4_ib_dev *dev, int port)
+{
+ int j, k, entry;
+ __be64 guid;
+
+ /*Check if the SM doesn't need to assign the GUIDs*/
+ for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
+ for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
+ entry = j * NUM_ALIAS_GUID_IN_REC + k;
+ /* no request for the 0 entry (hw guid) */
+ if (!entry || entry > dev->dev->persist->num_vfs ||
+ !mlx4_is_slave_active(dev->dev, entry))
+ continue;
+ guid = mlx4_get_admin_guid(dev->dev, entry, port);
+ *(__be64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
+ all_rec_per_port[j].all_recs
+ [GUID_REC_SIZE * k] = guid;
+ pr_debug("guid was set, entry=%d, val=0x%llx, port=%d\n",
+ entry,
+ be64_to_cpu(guid),
+ port);
+ }
+ }
+}
void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
{
int i;
@@ -443,6 +606,13 @@ void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+
+ if (dev->sriov.alias_guid.ports_guid[port - 1].state_flags &
+ GUID_STATE_NEED_PORT_INIT) {
+ mlx4_ib_guid_port_init(dev, port);
+ dev->sriov.alias_guid.ports_guid[port - 1].state_flags &=
+ (~GUID_STATE_NEED_PORT_INIT);
+ }
for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
invalidate_guid_record(dev, port, i);
@@ -462,60 +632,107 @@ void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
}
-/* The function returns the next record that was
- * not configured (or failed to be configured) */
-static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
- struct mlx4_next_alias_guid_work *rec)
+static void set_required_record(struct mlx4_ib_dev *dev, u8 port,
+ struct mlx4_next_alias_guid_work *next_rec,
+ int record_index)
{
- int j;
- unsigned long flags;
+ int i;
+ int lowset_time_entry = -1;
+ int lowest_time = 0;
+ ib_sa_comp_mask delete_guid_indexes = 0;
+ ib_sa_comp_mask set_guid_indexes = 0;
+ struct mlx4_sriov_alias_guid_info_rec_det *rec =
+ &dev->sriov.alias_guid.ports_guid[port].
+ all_rec_per_port[record_index];
- for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
- spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
- if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status ==
- MLX4_GUID_INFO_STATUS_IDLE) {
- memcpy(&rec->rec_det,
- &dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j],
- sizeof (struct mlx4_sriov_alias_guid_info_rec_det));
- rec->port = port;
- rec->block_num = j;
- dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status =
- MLX4_GUID_INFO_STATUS_PENDING;
- spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
- return 0;
+ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ if (!(rec->guid_indexes &
+ mlx4_ib_get_aguid_comp_mask_from_ix(i)))
+ continue;
+
+ if (*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] ==
+ cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
+ delete_guid_indexes |=
+ mlx4_ib_get_aguid_comp_mask_from_ix(i);
+ else
+ set_guid_indexes |=
+ mlx4_ib_get_aguid_comp_mask_from_ix(i);
+
+ if (lowset_time_entry == -1 || rec->guids_retry_schedule[i] <=
+ lowest_time) {
+ lowset_time_entry = i;
+ lowest_time = rec->guids_retry_schedule[i];
}
- spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
}
- return -ENOENT;
+
+ memcpy(&next_rec->rec_det, rec, sizeof(*rec));
+ next_rec->port = port;
+ next_rec->block_num = record_index;
+
+ if (*(__be64 *)&rec->all_recs[lowset_time_entry * GUID_REC_SIZE] ==
+ cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) {
+ next_rec->rec_det.guid_indexes = delete_guid_indexes;
+ next_rec->method = MLX4_GUID_INFO_RECORD_DELETE;
+ } else {
+ next_rec->rec_det.guid_indexes = set_guid_indexes;
+ next_rec->method = MLX4_GUID_INFO_RECORD_SET;
+ }
}
-static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port,
- int rec_index,
- struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
+/* return index of record that should be updated based on lowest
+ * rescheduled time
+ */
+static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
+ int *resched_delay_sec)
{
- dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes =
- rec_det->guid_indexes;
- memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs,
- rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
- dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status =
- rec_det->status;
+ int record_index = -1;
+ u64 low_record_time = 0;
+ struct mlx4_sriov_alias_guid_info_rec_det rec;
+ int j;
+
+ for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
+ rec = dev->sriov.alias_guid.ports_guid[port].
+ all_rec_per_port[j];
+ if (rec.status == MLX4_GUID_INFO_STATUS_IDLE &&
+ rec.guid_indexes) {
+ if (record_index == -1 ||
+ rec.time_to_run < low_record_time) {
+ record_index = j;
+ low_record_time = rec.time_to_run;
+ }
+ }
+ }
+ if (resched_delay_sec) {
+ u64 curr_time = ktime_get_real_ns();
+
+ *resched_delay_sec = (low_record_time < curr_time) ? 0 :
+ div_u64((low_record_time - curr_time), NSEC_PER_SEC);
+ }
+
+ return record_index;
}
-static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port)
+/* The function returns the next record that was
+ * not configured (or failed to be configured) */
+static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
+ struct mlx4_next_alias_guid_work *rec)
{
- int j;
- struct mlx4_sriov_alias_guid_info_rec_det rec_det ;
-
- for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) {
- memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
- rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) |
- IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 |
- IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 |
- IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 |
- IB_SA_GUIDINFO_REC_GID7;
- rec_det.status = MLX4_GUID_INFO_STATUS_IDLE;
- set_administratively_guid_record(dev, port, j, &rec_det);
+ unsigned long flags;
+ int record_index;
+ int ret = 0;
+
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
+ record_index = get_low_record_time_index(dev, port, NULL);
+
+ if (record_index < 0) {
+ ret = -ENOENT;
+ goto out;
}
+
+ set_required_record(dev, port, rec, record_index);
+out:
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
+ return ret;
}
static void alias_guid_work(struct work_struct *work)
@@ -545,9 +762,7 @@ static void alias_guid_work(struct work_struct *work)
goto out;
}
- set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num,
- &rec->rec_det);
-
+ set_guid_rec(&dev->ib_dev, rec);
out:
kfree(rec);
}
@@ -562,6 +777,12 @@ void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
if (!dev->sriov.is_going_down) {
+ /* If there is pending one should cancell then run, otherwise
+ * won't run till previous one is ended as same work
+ * struct is used.
+ */
+ cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[port].
+ alias_guid_work);
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
&dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
}
@@ -609,7 +830,7 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
{
char alias_wq_name[15];
int ret = 0;
- int i, j, k;
+ int i, j;
union ib_gid gid;
if (!mlx4_is_master(dev->dev))
@@ -633,33 +854,25 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
for (i = 0 ; i < dev->num_ports; i++) {
memset(&dev->sriov.alias_guid.ports_guid[i], 0,
sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
- /*Check if the SM doesn't need to assign the GUIDs*/
+ dev->sriov.alias_guid.ports_guid[i].state_flags |=
+ GUID_STATE_NEED_PORT_INIT;
for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
- if (mlx4_ib_sm_guid_assign) {
- dev->sriov.alias_guid.ports_guid[i].
- all_rec_per_port[j].
- ownership = MLX4_GUID_DRIVER_ASSIGN;
- continue;
- }
- dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j].
- ownership = MLX4_GUID_NONE_ASSIGN;
- /*mark each val as it was deleted,
- till the sysAdmin will give it valid val*/
- for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
- *(__be64 *)&dev->sriov.alias_guid.ports_guid[i].
- all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] =
- cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
- }
+ /* mark each val as it was deleted */
+ memset(dev->sriov.alias_guid.ports_guid[i].
+ all_rec_per_port[j].all_recs, 0xFF,
+ sizeof(dev->sriov.alias_guid.ports_guid[i].
+ all_rec_per_port[j].all_recs));
}
INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
/*prepare the records, set them to be allocated by sm*/
+ if (mlx4_ib_sm_guid_assign)
+ for (j = 1; j < NUM_ALIAS_GUID_PER_PORT; j++)
+ mlx4_set_admin_guid(dev->dev, 0, j, i + 1);
for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
invalidate_guid_record(dev, i + 1, j);
dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
dev->sriov.alias_guid.ports_guid[i].port = i;
- if (mlx4_ib_sm_guid_assign)
- set_all_slaves_guids(dev, i);
snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
dev->sriov.alias_guid.ports_guid[i].wq =
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 59040265e361..9cd2b002d7ae 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1430,6 +1430,10 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tun_qp->ring[i].addr,
rx_buf_size,
DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(ctx->ib_dev, tun_qp->ring[i].map)) {
+ kfree(tun_qp->ring[i].addr);
+ goto err;
+ }
}
for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
@@ -1442,6 +1446,11 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tun_qp->tx_ring[i].buf.addr,
tx_buf_size,
DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(ctx->ib_dev,
+ tun_qp->tx_ring[i].buf.map)) {
+ kfree(tun_qp->tx_ring[i].buf.addr);
+ goto tx_err;
+ }
tun_qp->tx_ring[i].ah = NULL;
}
spin_lock_init(&tun_qp->tx_lock);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index b972c0b41799..57070c529dfb 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -66,9 +66,9 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
-int mlx4_ib_sm_guid_assign = 1;
+int mlx4_ib_sm_guid_assign = 0;
module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
-MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
+MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
static const char mlx4_ib_version[] =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
@@ -587,8 +587,9 @@ static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_vio
((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
}
- err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
+ err = mlx4_cmd(dev->dev, mailbox->dma, port, MLX4_SET_PORT_IB_OPCODE,
+ MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
mlx4_free_cmd_mailbox(dev->dev, mailbox);
return err;
@@ -1525,8 +1526,8 @@ static void update_gids_task(struct work_struct *work)
memcpy(gids, gw->gids, sizeof gw->gids);
err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
- 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
- MLX4_CMD_WRAPPED);
+ MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
if (err)
pr_warn("set port command failed\n");
else
@@ -1564,7 +1565,7 @@ static void reset_gids_task(struct work_struct *work)
IB_LINK_LAYER_ETHERNET) {
err = mlx4_cmd(dev, mailbox->dma,
MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
- 1, MLX4_CMD_SET_PORT,
+ MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT,
MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
if (err)
@@ -2790,9 +2791,31 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_SLAVE_INIT:
/* here, p is the slave id */
do_slave_init(ibdev, p, 1);
+ if (mlx4_is_master(dev)) {
+ int i;
+
+ for (i = 1; i <= ibdev->num_ports; i++) {
+ if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
+ == IB_LINK_LAYER_INFINIBAND)
+ mlx4_ib_slave_alias_guid_event(ibdev,
+ p, i,
+ 1);
+ }
+ }
return;
case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
+ if (mlx4_is_master(dev)) {
+ int i;
+
+ for (i = 1; i <= ibdev->num_ports; i++) {
+ if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
+ == IB_LINK_LAYER_INFINIBAND)
+ mlx4_ib_slave_alias_guid_event(ibdev,
+ p, i,
+ 0);
+ }
+ }
/* here, p is the slave id */
do_slave_init(ibdev, p, 0);
return;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index f829fd935b79..fce3934372a1 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -342,14 +342,9 @@ struct mlx4_ib_ah {
enum mlx4_guid_alias_rec_status {
MLX4_GUID_INFO_STATUS_IDLE,
MLX4_GUID_INFO_STATUS_SET,
- MLX4_GUID_INFO_STATUS_PENDING,
};
-enum mlx4_guid_alias_rec_ownership {
- MLX4_GUID_DRIVER_ASSIGN,
- MLX4_GUID_SYSADMIN_ASSIGN,
- MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
-};
+#define GUID_STATE_NEED_PORT_INIT 0x01
enum mlx4_guid_alias_rec_method {
MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET,
@@ -360,8 +355,8 @@ struct mlx4_sriov_alias_guid_info_rec_det {
u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
- u8 method; /*set or delete*/
- enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/
+ unsigned int guids_retry_schedule[NUM_ALIAS_GUID_IN_REC];
+ u64 time_to_run;
};
struct mlx4_sriov_alias_guid_port_rec_det {
@@ -369,6 +364,7 @@ struct mlx4_sriov_alias_guid_port_rec_det {
struct workqueue_struct *wq;
struct delayed_work alias_guid_work;
u8 port;
+ u32 state_flags;
struct mlx4_sriov_alias_guid *parent;
struct list_head cb_list;
};
@@ -802,6 +798,8 @@ int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
struct attribute *attr);
ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
+void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
+ int port, int slave_init);
int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index ed2bd6701f9b..02fc91c68027 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -566,6 +566,10 @@ static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
sizeof (struct mlx4_ib_proxy_sqp_hdr),
DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(dev, qp->sqp_proxy_rcv[i].map)) {
+ kfree(qp->sqp_proxy_rcv[i].addr);
+ goto err;
+ }
}
return 0;
@@ -2605,8 +2609,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
- *lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
- wr->wr.ud.hlen);
+ *lso_hdr_sz = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen);
*lso_seg_len = halign;
return 0;
}
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index d10c2b8a5dad..6797108ce873 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -46,21 +46,17 @@
static ssize_t show_admin_alias_guid(struct device *dev,
struct device_attribute *attr, char *buf)
{
- int record_num;/*0-15*/
- int guid_index_in_rec; /*0 - 7*/
struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
+ __be64 sysadmin_ag_val;
- record_num = mlx4_ib_iov_dentry->entry_num / 8 ;
- guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ;
+ sysadmin_ag_val = mlx4_get_admin_guid(mdev->dev,
+ mlx4_ib_iov_dentry->entry_num,
+ port->num);
- return sprintf(buf, "%llx\n",
- be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
- ports_guid[port->num - 1].
- all_rec_per_port[record_num].
- all_recs[8 * guid_index_in_rec]));
+ return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val));
}
/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
@@ -80,6 +76,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
u64 sysadmin_ag_val;
+ unsigned long flags;
record_num = mlx4_ib_iov_dentry->entry_num / 8;
guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
@@ -87,6 +84,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
pr_err("GUID 0 block 0 is RO\n");
return count;
}
+ spin_lock_irqsave(&mdev->sriov.alias_guid.ag_work_lock, flags);
sscanf(buf, "%llx", &sysadmin_ag_val);
*(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
all_rec_per_port[record_num].
@@ -96,33 +94,15 @@ static ssize_t store_admin_alias_guid(struct device *dev,
/* Change the state to be pending for update */
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
= MLX4_GUID_INFO_STATUS_IDLE ;
-
- mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
- = MLX4_GUID_INFO_RECORD_SET;
-
- switch (sysadmin_ag_val) {
- case MLX4_GUID_FOR_DELETE_VAL:
- mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
- = MLX4_GUID_INFO_RECORD_DELETE;
- mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
- = MLX4_GUID_SYSADMIN_ASSIGN;
- break;
- /* The sysadmin requests the SM to re-assign */
- case MLX4_NOT_SET_GUID:
- mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
- = MLX4_GUID_DRIVER_ASSIGN;
- break;
- /* The sysadmin requests a specific value.*/
- default:
- mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
- = MLX4_GUID_SYSADMIN_ASSIGN;
- break;
- }
+ mlx4_set_admin_guid(mdev->dev, cpu_to_be64(sysadmin_ag_val),
+ mlx4_ib_iov_dentry->entry_num,
+ port->num);
/* set the record index */
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
- = mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
+ |= mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
+ spin_unlock_irqrestore(&mdev->sriov.alias_guid.ag_work_lock, flags);
mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
return count;
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 39ab0caefdf9..66080580e24d 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index c463e7bba5f4..2ee6b1051975 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -572,11 +572,15 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
+ struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
+ void __iomem *uar_page = mdev->priv.uuari.uars[0].map;
+
mlx5_cq_arm(&to_mcq(ibcq)->mcq,
(flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
- to_mdev(ibcq->device)->mdev->priv.uuari.uars[0].map,
- MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->mdev->priv.cq_uar_lock));
+ uar_page,
+ MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock),
+ to_mcq(ibcq)->mcq.cons_index);
return 0;
}
@@ -697,8 +701,6 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
cq->mcq.set_ci_db = cq->db.db;
cq->mcq.arm_db = cq->db.db + 1;
- *cq->mcq.set_ci_db = 0;
- *cq->mcq.arm_db = 0;
cq->mcq.cqe_sz = cqe_size;
err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
@@ -782,7 +784,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
cq->cqe_size = cqe_size;
cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
- err = mlx5_vector2eqn(dev, vector, &eqn, &irqn);
+ err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
if (err)
goto err_cqb;
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
index ece028fc47d6..a0e4e6ddb71a 100644
--- a/drivers/infiniband/hw/mlx5/doorbell.c
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 657af9a1167c..9cf9a37bb5ff 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index cc4ac1e583b2..57c9809e8b87 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -62,95 +62,6 @@ static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
-int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn)
-{
- struct mlx5_eq_table *table = &dev->mdev->priv.eq_table;
- struct mlx5_eq *eq, *n;
- int err = -ENOENT;
-
- spin_lock(&table->lock);
- list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
- if (eq->index == vector) {
- *eqn = eq->eqn;
- *irqn = eq->irqn;
- err = 0;
- break;
- }
- }
- spin_unlock(&table->lock);
-
- return err;
-}
-
-static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
-{
- struct mlx5_eq_table *table = &dev->mdev->priv.eq_table;
- char name[MLX5_MAX_EQ_NAME];
- struct mlx5_eq *eq, *n;
- int ncomp_vec;
- int nent;
- int err;
- int i;
-
- INIT_LIST_HEAD(&dev->eqs_list);
- ncomp_vec = table->num_comp_vectors;
- nent = MLX5_COMP_EQ_SIZE;
- for (i = 0; i < ncomp_vec; i++) {
- eq = kzalloc(sizeof(*eq), GFP_KERNEL);
- if (!eq) {
- err = -ENOMEM;
- goto clean;
- }
-
- snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
- err = mlx5_create_map_eq(dev->mdev, eq,
- i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
- name, &dev->mdev->priv.uuari.uars[0]);
- if (err) {
- kfree(eq);
- goto clean;
- }
- mlx5_ib_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
- eq->index = i;
- spin_lock(&table->lock);
- list_add_tail(&eq->list, &dev->eqs_list);
- spin_unlock(&table->lock);
- }
-
- dev->num_comp_vectors = ncomp_vec;
- return 0;
-
-clean:
- spin_lock(&table->lock);
- list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
- list_del(&eq->list);
- spin_unlock(&table->lock);
- if (mlx5_destroy_unmap_eq(dev->mdev, eq))
- mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
- kfree(eq);
- spin_lock(&table->lock);
- }
- spin_unlock(&table->lock);
- return err;
-}
-
-static void free_comp_eqs(struct mlx5_ib_dev *dev)
-{
- struct mlx5_eq_table *table = &dev->mdev->priv.eq_table;
- struct mlx5_eq *eq, *n;
-
- spin_lock(&table->lock);
- list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
- list_del(&eq->list);
- spin_unlock(&table->lock);
- if (mlx5_destroy_unmap_eq(dev->mdev, eq))
- mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
- kfree(eq);
- spin_lock(&table->lock);
- }
- spin_unlock(&table->lock);
-}
-
static int mlx5_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
@@ -1291,10 +1202,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
get_ext_port_caps(dev);
- err = alloc_comp_eqs(dev);
- if (err)
- goto err_dealloc;
-
MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
@@ -1303,7 +1210,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.local_dma_lkey = mdev->caps.gen.reserved_lkey;
dev->num_ports = mdev->caps.gen.num_ports;
dev->ib_dev.phys_port_cnt = dev->num_ports;
- dev->ib_dev.num_comp_vectors = dev->num_comp_vectors;
+ dev->ib_dev.num_comp_vectors =
+ dev->mdev->priv.eq_table.num_comp_vectors;
dev->ib_dev.dma_device = &mdev->pdev->dev;
dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
@@ -1390,13 +1298,13 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
err = init_node_data(dev);
if (err)
- goto err_eqs;
+ goto err_dealloc;
mutex_init(&dev->cap_mask_mutex);
err = create_dev_resources(&dev->devr);
if (err)
- goto err_eqs;
+ goto err_dealloc;
err = mlx5_ib_odp_init_one(dev);
if (err)
@@ -1433,9 +1341,6 @@ err_odp:
err_rsrc:
destroy_dev_resources(&dev->devr);
-err_eqs:
- free_comp_eqs(dev);
-
err_dealloc:
ib_dealloc_device((struct ib_device *)dev);
@@ -1450,7 +1355,6 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
- free_comp_eqs(dev);
ib_dealloc_device(&dev->ib_dev);
}
@@ -1458,6 +1362,7 @@ static struct mlx5_interface mlx5_ib_interface = {
.add = mlx5_ib_add,
.remove = mlx5_ib_remove,
.event = mlx5_ib_event,
+ .protocol = MLX5_INTERFACE_PROTOCOL_IB,
};
static int __init mlx5_ib_init(void)
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 611a9fdf2f38..40df2cca0609 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 83f22fe297c8..dff1cfcdf476 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -421,9 +421,7 @@ struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
- struct list_head eqs_list;
int num_ports;
- int num_comp_vectors;
/* serialize update of capability mask
*/
struct mutex cap_mask_mutex;
@@ -594,7 +592,6 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata);
int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd);
-int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn);
int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index cd9822eeacae..71c593583864 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index a2c541c4809a..5099db08afd2 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index be0cd358b080..4d7024b899cb 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -796,9 +796,6 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
goto err_free;
}
- qp->db.db[0] = 0;
- qp->db.db[1] = 0;
-
qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL);
qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL);
qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL);
@@ -1162,10 +1159,11 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
return;
+
if (qp->state != IB_QPS_RESET) {
mlx5_ib_qp_disable_pagefaults(qp);
if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state),
- MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp))
+ MLX5_QP_STATE_RST, in, 0, &qp->mqp))
mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
qp->mqp.qpn);
}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 41fec66217dd..02d77a29764d 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -165,8 +165,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
return err;
}
- *srq->db.db = 0;
-
if (mlx5_buf_alloc(dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
mlx5_ib_dbg(dev, "buf alloc failed\n");
err = -ENOMEM;
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
index d0ba264ac1ed..76fb7b927d37 100644
--- a/drivers/infiniband/hw/mlx5/user.h
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 41937c6f888a..9ea6c440a00c 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -39,11 +39,11 @@
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/io.h>
-#include <linux/aio.h>
#include <linux/jiffies.h>
#include <asm/pgtable.h>
#include <linux/delay.h>
#include <linux/export.h>
+#include <linux/uio.h>
#include "qib.h"
#include "qib_common.h"
@@ -55,15 +55,19 @@
static int qib_open(struct inode *, struct file *);
static int qib_close(struct inode *, struct file *);
static ssize_t qib_write(struct file *, const char __user *, size_t, loff_t *);
-static ssize_t qib_aio_write(struct kiocb *, const struct iovec *,
- unsigned long, loff_t);
+static ssize_t qib_write_iter(struct kiocb *, struct iov_iter *);
static unsigned int qib_poll(struct file *, struct poll_table_struct *);
static int qib_mmapf(struct file *, struct vm_area_struct *);
+/*
+ * This is really, really weird shit - write() and writev() here
+ * have completely unrelated semantics. Sucky userland ABI,
+ * film at 11.
+ */
static const struct file_operations qib_file_ops = {
.owner = THIS_MODULE,
.write = qib_write,
- .aio_write = qib_aio_write,
+ .write_iter = qib_write_iter,
.open = qib_open,
.release = qib_close,
.poll = qib_poll,
@@ -2249,17 +2253,16 @@ bail:
return ret;
}
-static ssize_t qib_aio_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long dim, loff_t off)
+static ssize_t qib_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct qib_filedata *fp = iocb->ki_filp->private_data;
struct qib_ctxtdata *rcd = ctxt_fp(iocb->ki_filp);
struct qib_user_sdma_queue *pq = fp->pq;
- if (!dim || !pq)
+ if (!iter_is_iovec(from) || !from->nr_segs || !pq)
return -EINVAL;
-
- return qib_user_sdma_writev(rcd, pq, iov, dim);
+
+ return qib_user_sdma_writev(rcd, pq, from->iov, from->nr_segs);
}
static struct class *qib_class;
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index 650897a8591e..bdd5d3857203 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -89,14 +89,14 @@ static int create_file(const char *name, umode_t mode,
{
int error;
- mutex_lock(&parent->d_inode->i_mutex);
+ mutex_lock(&d_inode(parent)->i_mutex);
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(*dentry))
- error = qibfs_mknod(parent->d_inode, *dentry,
+ error = qibfs_mknod(d_inode(parent), *dentry,
mode, fops, data);
else
error = PTR_ERR(*dentry);
- mutex_unlock(&parent->d_inode->i_mutex);
+ mutex_unlock(&d_inode(parent)->i_mutex);
return error;
}
@@ -455,10 +455,10 @@ static int remove_file(struct dentry *parent, char *name)
}
spin_lock(&tmp->d_lock);
- if (!d_unhashed(tmp) && tmp->d_inode) {
+ if (!d_unhashed(tmp) && d_really_is_positive(tmp)) {
__d_drop(tmp);
spin_unlock(&tmp->d_lock);
- simple_unlink(parent->d_inode, tmp);
+ simple_unlink(d_inode(parent), tmp);
} else {
spin_unlock(&tmp->d_lock);
}
@@ -481,7 +481,7 @@ static int remove_device_files(struct super_block *sb,
int ret, i;
root = dget(sb->s_root);
- mutex_lock(&root->d_inode->i_mutex);
+ mutex_lock(&d_inode(root)->i_mutex);
snprintf(unit, sizeof(unit), "%u", dd->unit);
dir = lookup_one_len(unit, root, strlen(unit));
@@ -491,7 +491,7 @@ static int remove_device_files(struct super_block *sb,
goto bail;
}
- mutex_lock(&dir->d_inode->i_mutex);
+ mutex_lock(&d_inode(dir)->i_mutex);
remove_file(dir, "counters");
remove_file(dir, "counter_names");
remove_file(dir, "portcounter_names");
@@ -506,13 +506,13 @@ static int remove_device_files(struct super_block *sb,
}
}
remove_file(dir, "flash");
- mutex_unlock(&dir->d_inode->i_mutex);
- ret = simple_rmdir(root->d_inode, dir);
+ mutex_unlock(&d_inode(dir)->i_mutex);
+ ret = simple_rmdir(d_inode(root), dir);
d_delete(dir);
dput(dir);
bail:
- mutex_unlock(&root->d_inode->i_mutex);
+ mutex_unlock(&d_inode(root)->i_mutex);
dput(root);
return ret;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index d7562beb5423..bd94b0a6e9e5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -87,7 +87,6 @@ enum {
IPOIB_FLAG_ADMIN_UP = 2,
IPOIB_PKEY_ASSIGNED = 3,
IPOIB_FLAG_SUBINTERFACE = 5,
- IPOIB_MCAST_RUN = 6,
IPOIB_STOP_REAPER = 7,
IPOIB_FLAG_ADMIN_CM = 9,
IPOIB_FLAG_UMCAST = 10,
@@ -98,9 +97,15 @@ enum {
IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */
IPOIB_MCAST_FLAG_SENDONLY = 1,
- IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
+ /*
+ * For IPOIB_MCAST_FLAG_BUSY
+ * When set, in flight join and mcast->mc is unreliable
+ * When clear and mcast->mc IS_ERR_OR_NULL, need to restart or
+ * haven't started yet
+ * When clear and mcast->mc is valid pointer, join was successful
+ */
+ IPOIB_MCAST_FLAG_BUSY = 2,
IPOIB_MCAST_FLAG_ATTACHED = 3,
- IPOIB_MCAST_JOIN_STARTED = 4,
MAX_SEND_CQE = 16,
IPOIB_CM_COPYBREAK = 256,
@@ -148,6 +153,7 @@ struct ipoib_mcast {
unsigned long created;
unsigned long backoff;
+ unsigned long delay_until;
unsigned long flags;
unsigned char logcount;
@@ -292,6 +298,11 @@ struct ipoib_neigh_table {
struct completion deleted;
};
+struct ipoib_qp_state_validate {
+ struct work_struct work;
+ struct ipoib_dev_priv *priv;
+};
+
/*
* Device private locking: network stack tx_lock protects members used
* in TX fast path, lock protects everything else. lock nests inside
@@ -317,6 +328,7 @@ struct ipoib_dev_priv {
struct list_head multicast_list;
struct rb_root multicast_tree;
+ struct workqueue_struct *wq;
struct delayed_work mcast_task;
struct work_struct carrier_on_task;
struct work_struct flush_light;
@@ -426,11 +438,6 @@ struct ipoib_neigh {
#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN)
#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES)
-static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
-{
- return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
-}
-
void ipoib_neigh_dtor(struct ipoib_neigh *neigh);
static inline void ipoib_neigh_put(struct ipoib_neigh *neigh)
{
@@ -477,10 +484,10 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
-int ipoib_ib_dev_open(struct net_device *dev, int flush);
+int ipoib_ib_dev_open(struct net_device *dev);
int ipoib_ib_dev_up(struct net_device *dev);
-int ipoib_ib_dev_down(struct net_device *dev, int flush);
-int ipoib_ib_dev_stop(struct net_device *dev, int flush);
+int ipoib_ib_dev_down(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev);
void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -492,7 +499,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work);
int ipoib_mcast_start_thread(struct net_device *dev);
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
+int ipoib_mcast_stop_thread(struct net_device *dev);
void ipoib_mcast_dev_down(struct net_device *dev);
void ipoib_mcast_dev_flush(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 933efcea0d03..56959adb6c7d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
}
spin_lock_irq(&priv->lock);
- queue_delayed_work(ipoib_workqueue,
+ queue_delayed_work(priv->wq,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY);
/* Add this entry to passive ids list head, but do not re-add it
* if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
@@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->lock, flags);
list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
ipoib_cm_start_rx_drain(priv);
- queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+ queue_work(priv->wq, &priv->cm.rx_reap_task);
spin_unlock_irqrestore(&priv->lock, flags);
} else
ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->lock, flags);
list_move(&p->list, &priv->cm.rx_reap_list);
spin_unlock_irqrestore(&priv->lock, flags);
- queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+ queue_work(priv->wq, &priv->cm.rx_reap_task);
}
return;
}
@@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list);
- queue_work(ipoib_workqueue, &priv->cm.reap_task);
+ queue_work(priv->wq, &priv->cm.reap_task);
}
clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
@@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list);
- queue_work(ipoib_workqueue, &priv->cm.reap_task);
+ queue_work(priv->wq, &priv->cm.reap_task);
}
spin_unlock_irqrestore(&priv->lock, flags);
@@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
tx->dev = dev;
list_add(&tx->list, &priv->cm.start_list);
set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
- queue_work(ipoib_workqueue, &priv->cm.start_task);
+ queue_work(priv->wq, &priv->cm.start_task);
return tx;
}
@@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
spin_lock_irqsave(&priv->lock, flags);
list_move(&tx->list, &priv->cm.reap_list);
- queue_work(ipoib_workqueue, &priv->cm.reap_task);
+ queue_work(priv->wq, &priv->cm.reap_task);
ipoib_dbg(priv, "Reap connection for gid %pI6\n",
tx->neigh->daddr + 4);
tx->neigh = NULL;
@@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
skb_queue_tail(&priv->cm.skb_queue, skb);
if (e)
- queue_work(ipoib_workqueue, &priv->cm.skb_task);
+ queue_work(priv->wq, &priv->cm.skb_task);
}
static void ipoib_cm_rx_reap(struct work_struct *work)
@@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work)
}
if (!list_empty(&priv->cm.passive_ids))
- queue_delayed_work(ipoib_workqueue,
+ queue_delayed_work(priv->wq,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY);
spin_unlock_irq(&priv->lock);
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 72626c348174..63b92cbb29ad 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -94,39 +94,9 @@ void ipoib_free_ah(struct kref *kref)
static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
u64 mapping[IPOIB_UD_RX_SG])
{
- if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
- ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE,
- DMA_FROM_DEVICE);
- ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE,
- DMA_FROM_DEVICE);
- } else
- ib_dma_unmap_single(priv->ca, mapping[0],
- IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
- DMA_FROM_DEVICE);
-}
-
-static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv,
- struct sk_buff *skb,
- unsigned int length)
-{
- if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
- skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
- unsigned int size;
- /*
- * There is only two buffers needed for max_payload = 4K,
- * first buf size is IPOIB_UD_HEAD_SIZE
- */
- skb->tail += IPOIB_UD_HEAD_SIZE;
- skb->len += length;
-
- size = length - IPOIB_UD_HEAD_SIZE;
-
- skb_frag_size_set(frag, size);
- skb->data_len += size;
- skb->truesize += PAGE_SIZE;
- } else
- skb_put(skb, length);
-
+ ib_dma_unmap_single(priv->ca, mapping[0],
+ IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
+ DMA_FROM_DEVICE);
}
static int ipoib_ib_post_receive(struct net_device *dev, int id)
@@ -156,18 +126,11 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb;
int buf_size;
- int tailroom;
u64 *mapping;
- if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
- buf_size = IPOIB_UD_HEAD_SIZE;
- tailroom = 128; /* reserve some tailroom for IP/TCP headers */
- } else {
- buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
- tailroom = 0;
- }
+ buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
- skb = dev_alloc_skb(buf_size + tailroom + 4);
+ skb = dev_alloc_skb(buf_size + IPOIB_ENCAP_LEN);
if (unlikely(!skb))
return NULL;
@@ -184,23 +147,8 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
goto error;
- if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
- struct page *page = alloc_page(GFP_ATOMIC);
- if (!page)
- goto partial_error;
- skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE);
- mapping[1] =
- ib_dma_map_page(priv->ca, page,
- 0, PAGE_SIZE, DMA_FROM_DEVICE);
- if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1])))
- goto partial_error;
- }
-
priv->rx_ring[id].skb = skb;
return skb;
-
-partial_error:
- ib_dma_unmap_single(priv->ca, mapping[0], buf_size, DMA_FROM_DEVICE);
error:
dev_kfree_skb_any(skb);
return NULL;
@@ -278,7 +226,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
wc->byte_len, wc->slid);
ipoib_ud_dma_unmap_rx(priv, mapping);
- ipoib_ud_skb_put_frags(priv, skb, wc->byte_len);
+
+ skb_put(skb, wc->byte_len);
/* First byte of dgid signals multicast when 0xff */
dgid = &((struct ib_grh *)skb->data)->dgid;
@@ -296,6 +245,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
skb_reset_mac_header(skb);
skb_pull(skb, IPOIB_ENCAP_LEN);
+ skb->truesize = SKB_TRUESIZE(skb->len);
+
++dev->stats.rx_packets;
dev->stats.rx_bytes += skb->len;
@@ -376,6 +327,51 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca,
}
}
+/*
+ * As the result of a completion error the QP Can be transferred to SQE states.
+ * The function checks if the (send)QP is in SQE state and
+ * moves it back to RTS state, that in order to have it functional again.
+ */
+static void ipoib_qp_state_validate_work(struct work_struct *work)
+{
+ struct ipoib_qp_state_validate *qp_work =
+ container_of(work, struct ipoib_qp_state_validate, work);
+
+ struct ipoib_dev_priv *priv = qp_work->priv;
+ struct ib_qp_attr qp_attr;
+ struct ib_qp_init_attr query_init_attr;
+ int ret;
+
+ ret = ib_query_qp(priv->qp, &qp_attr, IB_QP_STATE, &query_init_attr);
+ if (ret) {
+ ipoib_warn(priv, "%s: Failed to query QP ret: %d\n",
+ __func__, ret);
+ goto free_res;
+ }
+ pr_info("%s: QP: 0x%x is in state: %d\n",
+ __func__, priv->qp->qp_num, qp_attr.qp_state);
+
+ /* currently support only in SQE->RTS transition*/
+ if (qp_attr.qp_state == IB_QPS_SQE) {
+ qp_attr.qp_state = IB_QPS_RTS;
+
+ ret = ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE);
+ if (ret) {
+ pr_warn("failed(%d) modify QP:0x%x SQE->RTS\n",
+ ret, priv->qp->qp_num);
+ goto free_res;
+ }
+ pr_info("%s: QP: 0x%x moved from IB_QPS_SQE to IB_QPS_RTS\n",
+ __func__, priv->qp->qp_num);
+ } else {
+ pr_warn("QP (%d) will stay in state: %d\n",
+ priv->qp->qp_num, qp_attr.qp_state);
+ }
+
+free_res:
+ kfree(qp_work);
+}
+
static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -407,10 +403,22 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
netif_wake_queue(dev);
if (wc->status != IB_WC_SUCCESS &&
- wc->status != IB_WC_WR_FLUSH_ERR)
+ wc->status != IB_WC_WR_FLUSH_ERR) {
+ struct ipoib_qp_state_validate *qp_work;
ipoib_warn(priv, "failed send event "
"(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err);
+ qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC);
+ if (!qp_work) {
+ ipoib_warn(priv, "%s Failed alloc ipoib_qp_state_validate for qp: 0x%x\n",
+ __func__, priv->qp->qp_num);
+ return;
+ }
+
+ INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work);
+ qp_work->priv = priv;
+ queue_work(priv->wq, &qp_work->work);
+ }
}
static int poll_tx(struct ipoib_dev_priv *priv)
@@ -655,16 +663,33 @@ void ipoib_reap_ah(struct work_struct *work)
__ipoib_reap_ah(dev);
if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
- queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+ queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ));
}
+static void ipoib_flush_ah(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+ cancel_delayed_work(&priv->ah_reap_task);
+ flush_workqueue(priv->wq);
+ ipoib_reap_ah(&priv->ah_reap_task.work);
+}
+
+static void ipoib_stop_ah(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+ set_bit(IPOIB_STOP_REAPER, &priv->flags);
+ ipoib_flush_ah(dev);
+}
+
static void ipoib_ib_tx_timer_func(unsigned long ctx)
{
drain_tx_cq((struct net_device *)ctx);
}
-int ipoib_ib_dev_open(struct net_device *dev, int flush)
+int ipoib_ib_dev_open(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
@@ -696,7 +721,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
}
clear_bit(IPOIB_STOP_REAPER, &priv->flags);
- queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+ queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ));
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
@@ -706,7 +731,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
dev_stop:
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
napi_enable(&priv->napi);
- ipoib_ib_dev_stop(dev, flush);
+ ipoib_ib_dev_stop(dev);
return -1;
}
@@ -738,7 +763,7 @@ int ipoib_ib_dev_up(struct net_device *dev)
return ipoib_mcast_start_thread(dev);
}
-int ipoib_ib_dev_down(struct net_device *dev, int flush)
+int ipoib_ib_dev_down(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -747,7 +772,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
netif_carrier_off(dev);
- ipoib_mcast_stop_thread(dev, flush);
+ ipoib_mcast_stop_thread(dev);
ipoib_mcast_dev_flush(dev);
ipoib_flush_paths(dev);
@@ -807,7 +832,7 @@ void ipoib_drain_cq(struct net_device *dev)
local_bh_enable();
}
-int ipoib_ib_dev_stop(struct net_device *dev, int flush)
+int ipoib_ib_dev_stop(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr qp_attr;
@@ -877,24 +902,7 @@ timeout:
if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
ipoib_warn(priv, "Failed to modify QP to RESET state\n");
- /* Wait for all AHs to be reaped */
- set_bit(IPOIB_STOP_REAPER, &priv->flags);
- cancel_delayed_work(&priv->ah_reap_task);
- if (flush)
- flush_workqueue(ipoib_workqueue);
-
- begin = jiffies;
-
- while (!list_empty(&priv->dead_ahs)) {
- __ipoib_reap_ah(dev);
-
- if (time_after(jiffies, begin + HZ)) {
- ipoib_warn(priv, "timing out; will leak address handles\n");
- break;
- }
-
- msleep(1);
- }
+ ipoib_flush_ah(dev);
ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
@@ -918,7 +926,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
(unsigned long) dev);
if (dev->flags & IFF_UP) {
- if (ipoib_ib_dev_open(dev, 1)) {
+ if (ipoib_ib_dev_open(dev)) {
ipoib_transport_dev_cleanup(dev);
return -ENODEV;
}
@@ -1037,15 +1045,16 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
if (level == IPOIB_FLUSH_LIGHT) {
ipoib_mark_paths_invalid(dev);
ipoib_mcast_dev_flush(dev);
+ ipoib_flush_ah(dev);
}
if (level >= IPOIB_FLUSH_NORMAL)
- ipoib_ib_dev_down(dev, 0);
+ ipoib_ib_dev_down(dev);
if (level == IPOIB_FLUSH_HEAVY) {
if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- ipoib_ib_dev_stop(dev, 0);
- if (ipoib_ib_dev_open(dev, 0) != 0)
+ ipoib_ib_dev_stop(dev);
+ if (ipoib_ib_dev_open(dev) != 0)
return;
if (netif_queue_stopped(dev))
netif_start_queue(dev);
@@ -1097,9 +1106,17 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
*/
ipoib_flush_paths(dev);
- ipoib_mcast_stop_thread(dev, 1);
+ ipoib_mcast_stop_thread(dev);
ipoib_mcast_dev_flush(dev);
+ /*
+ * All of our ah references aren't free until after
+ * ipoib_mcast_dev_flush(), ipoib_flush_paths, and
+ * the neighbor garbage collection is stopped and reaped.
+ * That should all be done now, so make a final ah flush.
+ */
+ ipoib_stop_ah(dev);
+
ipoib_transport_dev_cleanup(dev);
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 58b5aa3b6f2d..9e1b203d756d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev)
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
- if (ipoib_ib_dev_open(dev, 1)) {
+ if (ipoib_ib_dev_open(dev)) {
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
return 0;
goto err_disable;
@@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev)
return 0;
err_stop:
- ipoib_ib_dev_stop(dev, 1);
+ ipoib_ib_dev_stop(dev);
err_disable:
clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
@@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev)
netif_stop_queue(dev);
- ipoib_ib_dev_down(dev, 1);
- ipoib_ib_dev_stop(dev, 0);
+ ipoib_ib_dev_down(dev);
+ ipoib_ib_dev_stop(dev);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
@@ -640,8 +640,10 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
if (!path->query && path_rec_start(dev, path))
goto err_path;
-
- __skb_queue_tail(&neigh->queue, skb);
+ if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
+ __skb_queue_tail(&neigh->queue, skb);
+ else
+ goto err_drop;
}
spin_unlock_irqrestore(&priv->lock, flags);
@@ -676,7 +678,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
new_path = 1;
}
if (path) {
- __skb_queue_tail(&path->queue, skb);
+ if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+ __skb_queue_tail(&path->queue, skb);
+ } else {
+ ++dev->stats.tx_dropped;
+ dev_kfree_skb_any(skb);
+ }
if (!path->query && path_rec_start(dev, path)) {
spin_unlock_irqrestore(&priv->lock, flags);
@@ -839,7 +846,19 @@ static void ipoib_set_mcast_list(struct net_device *dev)
return;
}
- queue_work(ipoib_workqueue, &priv->restart_task);
+ queue_work(priv->wq, &priv->restart_task);
+}
+
+static int ipoib_get_iflink(const struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+ /* parent interface */
+ if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
+ return dev->ifindex;
+
+ /* child/vlan interface */
+ return priv->parent->ifindex;
}
static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
@@ -954,7 +973,7 @@ static void ipoib_reap_neigh(struct work_struct *work)
__ipoib_reap_neigh(priv);
if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
- queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+ queue_delayed_work(priv->wq, &priv->neigh_reap_task,
arp_tbl.gc_interval);
}
@@ -1133,7 +1152,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
/* start garbage collection */
clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
- queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+ queue_delayed_work(priv->wq, &priv->neigh_reap_task,
arp_tbl.gc_interval);
return 0;
@@ -1262,15 +1281,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- if (ipoib_neigh_hash_init(priv) < 0)
- goto out;
/* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
GFP_KERNEL);
if (!priv->rx_ring) {
printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
ca->name, ipoib_recvq_size);
- goto out_neigh_hash_cleanup;
+ goto out;
}
priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@@ -1285,16 +1302,24 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
if (ipoib_ib_dev_init(dev, ca, port))
goto out_tx_ring_cleanup;
+ /*
+ * Must be after ipoib_ib_dev_init so we can allocate a per
+ * device wq there and use it here
+ */
+ if (ipoib_neigh_hash_init(priv) < 0)
+ goto out_dev_uninit;
+
return 0;
+out_dev_uninit:
+ ipoib_ib_dev_cleanup(dev);
+
out_tx_ring_cleanup:
vfree(priv->tx_ring);
out_rx_ring_cleanup:
kfree(priv->rx_ring);
-out_neigh_hash_cleanup:
- ipoib_neigh_hash_uninit(dev);
out:
return -ENOMEM;
}
@@ -1317,6 +1342,12 @@ void ipoib_dev_cleanup(struct net_device *dev)
}
unregister_netdevice_many(&head);
+ /*
+ * Must be before ipoib_ib_dev_cleanup or we delete an in use
+ * work queue
+ */
+ ipoib_neigh_hash_uninit(dev);
+
ipoib_ib_dev_cleanup(dev);
kfree(priv->rx_ring);
@@ -1324,8 +1355,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
priv->rx_ring = NULL;
priv->tx_ring = NULL;
-
- ipoib_neigh_hash_uninit(dev);
}
static const struct header_ops ipoib_header_ops = {
@@ -1341,6 +1370,7 @@ static const struct net_device_ops ipoib_netdev_ops = {
.ndo_start_xmit = ipoib_start_xmit,
.ndo_tx_timeout = ipoib_timeout,
.ndo_set_rx_mode = ipoib_set_mcast_list,
+ .ndo_get_iflink = ipoib_get_iflink,
};
void ipoib_setup(struct net_device *dev)
@@ -1633,10 +1663,11 @@ sysfs_failed:
register_failed:
ib_unregister_event_handler(&priv->event_handler);
+ flush_workqueue(ipoib_workqueue);
/* Stop GC if started before flush */
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
cancel_delayed_work(&priv->neigh_reap_task);
- flush_workqueue(ipoib_workqueue);
+ flush_workqueue(priv->wq);
event_failed:
ipoib_dev_cleanup(priv->dev);
@@ -1699,6 +1730,7 @@ static void ipoib_remove_one(struct ib_device *device)
list_for_each_entry_safe(priv, tmp, dev_list, list) {
ib_unregister_event_handler(&priv->event_handler);
+ flush_workqueue(ipoib_workqueue);
rtnl_lock();
dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
@@ -1707,7 +1739,7 @@ static void ipoib_remove_one(struct ib_device *device)
/* Stop GC */
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
cancel_delayed_work(&priv->neigh_reap_task);
- flush_workqueue(ipoib_workqueue);
+ flush_workqueue(priv->wq);
unregister_netdev(priv->dev);
free_netdev(priv->dev);
@@ -1742,14 +1774,16 @@ static int __init ipoib_init_module(void)
return ret;
/*
- * We create our own workqueue mainly because we want to be
- * able to flush it when devices are being removed. We can't
- * use schedule_work()/flush_scheduled_work() because both
- * unregister_netdev() and linkwatch_event take the rtnl lock,
- * so flush_scheduled_work() can deadlock during device
- * removal.
+ * We create a global workqueue here that is used for all flush
+ * operations. However, if you attempt to flush a workqueue
+ * from a task on that same workqueue, it deadlocks the system.
+ * We want to be able to flush the tasks associated with a
+ * specific net device, so we also create a workqueue for each
+ * netdevice. We queue up the tasks for that device only on
+ * its private workqueue, and we only queue up flush events
+ * on our global flush workqueue. This avoids the deadlocks.
*/
- ipoib_workqueue = create_singlethread_workqueue("ipoib");
+ ipoib_workqueue = create_singlethread_workqueue("ipoib_flush");
if (!ipoib_workqueue) {
ret = -ENOMEM;
goto err_fs;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index ffb83b5f7e80..0d23e0568deb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -55,8 +55,6 @@ MODULE_PARM_DESC(mcast_debug_level,
"Enable multicast debug tracing if > 0");
#endif
-static DEFINE_MUTEX(mcast_mutex);
-
struct ipoib_mcast_iter {
struct net_device *dev;
union ib_gid mgid;
@@ -66,6 +64,48 @@ struct ipoib_mcast_iter {
unsigned int send_only;
};
+/*
+ * This should be called with the priv->lock held
+ */
+static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv,
+ struct ipoib_mcast *mcast,
+ bool delay)
+{
+ if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+ return;
+
+ /*
+ * We will be scheduling *something*, so cancel whatever is
+ * currently scheduled first
+ */
+ cancel_delayed_work(&priv->mcast_task);
+ if (mcast && delay) {
+ /*
+ * We had a failure and want to schedule a retry later
+ */
+ mcast->backoff *= 2;
+ if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
+ mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
+ mcast->delay_until = jiffies + (mcast->backoff * HZ);
+ /*
+ * Mark this mcast for its delay, but restart the
+ * task immediately. The join task will make sure to
+ * clear out all entries without delays, and then
+ * schedule itself to run again when the earliest
+ * delay expires
+ */
+ queue_delayed_work(priv->wq, &priv->mcast_task, 0);
+ } else if (delay) {
+ /*
+ * Special case of retrying after a failure to
+ * allocate the broadcast multicast group, wait
+ * 1 second and try again
+ */
+ queue_delayed_work(priv->wq, &priv->mcast_task, HZ);
+ } else
+ queue_delayed_work(priv->wq, &priv->mcast_task, 0);
+}
+
static void ipoib_mcast_free(struct ipoib_mcast *mcast)
{
struct net_device *dev = mcast->dev;
@@ -103,6 +143,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
mcast->dev = dev;
mcast->created = jiffies;
+ mcast->delay_until = jiffies;
mcast->backoff = 1;
INIT_LIST_HEAD(&mcast->list);
@@ -185,17 +226,27 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
spin_unlock_irq(&priv->lock);
return -EAGAIN;
}
- priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+ /*update priv member according to the new mcast*/
+ priv->broadcast->mcmember.qkey = mcmember->qkey;
+ priv->broadcast->mcmember.mtu = mcmember->mtu;
+ priv->broadcast->mcmember.traffic_class = mcmember->traffic_class;
+ priv->broadcast->mcmember.rate = mcmember->rate;
+ priv->broadcast->mcmember.sl = mcmember->sl;
+ priv->broadcast->mcmember.flow_label = mcmember->flow_label;
+ priv->broadcast->mcmember.hop_limit = mcmember->hop_limit;
+ /* assume if the admin and the mcast are the same both can be changed */
+ if (priv->mcast_mtu == priv->admin_mtu)
+ priv->admin_mtu =
+ priv->mcast_mtu =
+ IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+ else
+ priv->mcast_mtu =
+ IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+
priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
spin_unlock_irq(&priv->lock);
priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
set_qkey = 1;
-
- if (!ipoib_cm_admin_enabled(dev)) {
- rtnl_lock();
- dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
- rtnl_unlock();
- }
}
if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -270,107 +321,35 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
return 0;
}
-static int
-ipoib_mcast_sendonly_join_complete(int status,
- struct ib_sa_multicast *multicast)
-{
- struct ipoib_mcast *mcast = multicast->context;
- struct net_device *dev = mcast->dev;
-
- /* We trap for port events ourselves. */
- if (status == -ENETRESET)
- return 0;
-
- if (!status)
- status = ipoib_mcast_join_finish(mcast, &multicast->rec);
-
- if (status) {
- if (mcast->logcount++ < 20)
- ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
- mcast->mcmember.mgid.raw, status);
-
- /* Flush out any queued packets */
- netif_tx_lock_bh(dev);
- while (!skb_queue_empty(&mcast->pkt_queue)) {
- ++dev->stats.tx_dropped;
- dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
- }
- netif_tx_unlock_bh(dev);
-
- /* Clear the busy flag so we try again */
- status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
- &mcast->flags);
- }
- return status;
-}
-
-static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
-{
- struct net_device *dev = mcast->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ib_sa_mcmember_rec rec = {
-#if 0 /* Some SMs don't support send-only yet */
- .join_state = 4
-#else
- .join_state = 1
-#endif
- };
- int ret = 0;
-
- if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
- ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
- return -ENODEV;
- }
-
- if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
- ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
- return -EBUSY;
- }
-
- rec.mgid = mcast->mcmember.mgid;
- rec.port_gid = priv->local_gid;
- rec.pkey = cpu_to_be16(priv->pkey);
-
- mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
- priv->port, &rec,
- IB_SA_MCMEMBER_REC_MGID |
- IB_SA_MCMEMBER_REC_PORT_GID |
- IB_SA_MCMEMBER_REC_PKEY |
- IB_SA_MCMEMBER_REC_JOIN_STATE,
- GFP_ATOMIC,
- ipoib_mcast_sendonly_join_complete,
- mcast);
- if (IS_ERR(mcast->mc)) {
- ret = PTR_ERR(mcast->mc);
- clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
- ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
- ret);
- } else {
- ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
- mcast->mcmember.mgid.raw);
- }
-
- return ret;
-}
-
void ipoib_mcast_carrier_on_task(struct work_struct *work)
{
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
carrier_on_task);
struct ib_port_attr attr;
- /*
- * Take rtnl_lock to avoid racing with ipoib_stop() and
- * turning the carrier back on while a device is being
- * removed.
- */
if (ib_query_port(priv->ca, priv->port, &attr) ||
attr.state != IB_PORT_ACTIVE) {
ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
return;
}
- rtnl_lock();
+ /*
+ * Take rtnl_lock to avoid racing with ipoib_stop() and
+ * turning the carrier back on while a device is being
+ * removed. However, ipoib_stop() will attempt to flush
+ * the workqueue while holding the rtnl lock, so loop
+ * on trylock until either we get the lock or we see
+ * FLAG_OPER_UP go away as that signals that we are bailing
+ * and can safely ignore the carrier on work.
+ */
+ while (!rtnl_trylock()) {
+ if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+ return;
+ else
+ msleep(20);
+ }
+ if (!ipoib_cm_admin_enabled(priv->dev))
+ dev_set_mtu(priv->dev, min(priv->mcast_mtu, priv->admin_mtu));
netif_carrier_on(priv->dev);
rtnl_unlock();
}
@@ -382,7 +361,9 @@ static int ipoib_mcast_join_complete(int status,
struct net_device *dev = mcast->dev;
struct ipoib_dev_priv *priv = netdev_priv(dev);
- ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
+ ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
+ test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
+ "sendonly " : "",
mcast->mcmember.mgid.raw, status);
/* We trap for port events ourselves. */
@@ -396,49 +377,74 @@ static int ipoib_mcast_join_complete(int status,
if (!status) {
mcast->backoff = 1;
- mutex_lock(&mcast_mutex);
- if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
- queue_delayed_work(ipoib_workqueue,
- &priv->mcast_task, 0);
- mutex_unlock(&mcast_mutex);
+ mcast->delay_until = jiffies;
/*
- * Defer carrier on work to ipoib_workqueue to avoid a
- * deadlock on rtnl_lock here.
+ * Defer carrier on work to priv->wq to avoid a
+ * deadlock on rtnl_lock here. Requeue our multicast
+ * work too, which will end up happening right after
+ * our carrier on task work and will allow us to
+ * send out all of the non-broadcast joins
*/
- if (mcast == priv->broadcast)
- queue_work(ipoib_workqueue, &priv->carrier_on_task);
-
- status = 0;
- goto out;
- }
+ if (mcast == priv->broadcast) {
+ spin_lock_irq(&priv->lock);
+ queue_work(priv->wq, &priv->carrier_on_task);
+ __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
+ goto out_locked;
+ }
+ } else {
+ if (mcast->logcount++ < 20) {
+ if (status == -ETIMEDOUT || status == -EAGAIN) {
+ ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n",
+ test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
+ mcast->mcmember.mgid.raw, status);
+ } else {
+ ipoib_warn(priv, "%smulticast join failed for %pI6, status %d\n",
+ test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
+ mcast->mcmember.mgid.raw, status);
+ }
+ }
- if (mcast->logcount++ < 20) {
- if (status == -ETIMEDOUT || status == -EAGAIN) {
- ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
- mcast->mcmember.mgid.raw, status);
+ if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
+ mcast->backoff >= 2) {
+ /*
+ * We only retry sendonly joins once before we drop
+ * the packet and quit trying to deal with the
+ * group. However, we leave the group in the
+ * mcast list as an unjoined group. If we want to
+ * try joining again, we simply queue up a packet
+ * and restart the join thread. The empty queue
+ * is why the join thread ignores this group.
+ */
+ mcast->backoff = 1;
+ netif_tx_lock_bh(dev);
+ while (!skb_queue_empty(&mcast->pkt_queue)) {
+ ++dev->stats.tx_dropped;
+ dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
+ }
+ netif_tx_unlock_bh(dev);
} else {
- ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
- mcast->mcmember.mgid.raw, status);
+ spin_lock_irq(&priv->lock);
+ /* Requeue this join task with a backoff delay */
+ __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
+ goto out_locked;
}
}
-
- mcast->backoff *= 2;
- if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
- mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
-
- /* Clear the busy flag so we try again */
- status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-
- mutex_lock(&mcast_mutex);
+out:
spin_lock_irq(&priv->lock);
- if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
- queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
- mcast->backoff * HZ);
+out_locked:
+ /*
+ * Make sure to set mcast->mc before we clear the busy flag to avoid
+ * racing with code that checks for BUSY before checking mcast->mc
+ */
+ if (status)
+ mcast->mc = NULL;
+ else
+ mcast->mc = multicast;
+ clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
spin_unlock_irq(&priv->lock);
- mutex_unlock(&mcast_mutex);
-out:
complete(&mcast->done);
+
return status;
}
@@ -446,6 +452,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
int create)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ib_sa_multicast *multicast;
struct ib_sa_mcmember_rec rec = {
.join_state = 1
};
@@ -487,29 +494,18 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
rec.hop_limit = priv->broadcast->mcmember.hop_limit;
}
- set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
- init_completion(&mcast->done);
- set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
-
- mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
+ multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
&rec, comp_mask, GFP_KERNEL,
ipoib_mcast_join_complete, mcast);
- if (IS_ERR(mcast->mc)) {
+ if (IS_ERR(multicast)) {
+ ret = PTR_ERR(multicast);
+ ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
+ spin_lock_irq(&priv->lock);
+ /* Requeue this join task with a backoff delay */
+ __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ spin_unlock_irq(&priv->lock);
complete(&mcast->done);
- ret = PTR_ERR(mcast->mc);
- ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
-
- mcast->backoff *= 2;
- if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
- mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
-
- mutex_lock(&mcast_mutex);
- if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
- queue_delayed_work(ipoib_workqueue,
- &priv->mcast_task,
- mcast->backoff * HZ);
- mutex_unlock(&mcast_mutex);
}
}
@@ -519,8 +515,11 @@ void ipoib_mcast_join_task(struct work_struct *work)
container_of(work, struct ipoib_dev_priv, mcast_task.work);
struct net_device *dev = priv->dev;
struct ib_port_attr port_attr;
+ unsigned long delay_until = 0;
+ struct ipoib_mcast *mcast = NULL;
+ int create = 1;
- if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
+ if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
return;
if (ib_query_port(priv->ca, priv->port, &port_attr) ||
@@ -536,93 +535,118 @@ void ipoib_mcast_join_task(struct work_struct *work)
else
memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
+ spin_lock_irq(&priv->lock);
+ if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+ goto out;
+
if (!priv->broadcast) {
struct ipoib_mcast *broadcast;
- if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
- return;
-
- broadcast = ipoib_mcast_alloc(dev, 1);
+ broadcast = ipoib_mcast_alloc(dev, 0);
if (!broadcast) {
ipoib_warn(priv, "failed to allocate broadcast group\n");
- mutex_lock(&mcast_mutex);
- if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
- queue_delayed_work(ipoib_workqueue,
- &priv->mcast_task, HZ);
- mutex_unlock(&mcast_mutex);
- return;
+ /*
+ * Restart us after a 1 second delay to retry
+ * creating our broadcast group and attaching to
+ * it. Until this succeeds, this ipoib dev is
+ * completely stalled (multicast wise).
+ */
+ __ipoib_mcast_schedule_join_thread(priv, NULL, 1);
+ goto out;
}
- spin_lock_irq(&priv->lock);
memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
sizeof (union ib_gid));
priv->broadcast = broadcast;
__ipoib_mcast_add(dev, priv->broadcast);
- spin_unlock_irq(&priv->lock);
}
if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
- if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
- ipoib_mcast_join(dev, priv->broadcast, 0);
- return;
- }
-
- while (1) {
- struct ipoib_mcast *mcast = NULL;
-
- spin_lock_irq(&priv->lock);
- list_for_each_entry(mcast, &priv->multicast_list, list) {
- if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
- && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
- && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
- /* Found the next unjoined group */
- break;
+ if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
+ !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) {
+ mcast = priv->broadcast;
+ create = 0;
+ if (mcast->backoff > 1 &&
+ time_before(jiffies, mcast->delay_until)) {
+ delay_until = mcast->delay_until;
+ mcast = NULL;
}
}
- spin_unlock_irq(&priv->lock);
+ goto out;
+ }
- if (&mcast->list == &priv->multicast_list) {
- /* All done */
- break;
+ /*
+ * We'll never get here until the broadcast group is both allocated
+ * and attached
+ */
+ list_for_each_entry(mcast, &priv->multicast_list, list) {
+ if (IS_ERR_OR_NULL(mcast->mc) &&
+ !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
+ (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ||
+ !skb_queue_empty(&mcast->pkt_queue))) {
+ if (mcast->backoff == 1 ||
+ time_after_eq(jiffies, mcast->delay_until)) {
+ /* Found the next unjoined group */
+ init_completion(&mcast->done);
+ set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+ create = 0;
+ else
+ create = 1;
+ spin_unlock_irq(&priv->lock);
+ ipoib_mcast_join(dev, mcast, create);
+ spin_lock_irq(&priv->lock);
+ } else if (!delay_until ||
+ time_before(mcast->delay_until, delay_until))
+ delay_until = mcast->delay_until;
}
-
- ipoib_mcast_join(dev, mcast, 1);
- return;
}
- ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
+ mcast = NULL;
+ ipoib_dbg_mcast(priv, "successfully started all multicast joins\n");
- clear_bit(IPOIB_MCAST_RUN, &priv->flags);
+out:
+ if (delay_until) {
+ cancel_delayed_work(&priv->mcast_task);
+ queue_delayed_work(priv->wq, &priv->mcast_task,
+ delay_until - jiffies);
+ }
+ if (mcast) {
+ init_completion(&mcast->done);
+ set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ }
+ spin_unlock_irq(&priv->lock);
+ if (mcast)
+ ipoib_mcast_join(dev, mcast, create);
}
int ipoib_mcast_start_thread(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
+ unsigned long flags;
ipoib_dbg_mcast(priv, "starting multicast thread\n");
- mutex_lock(&mcast_mutex);
- if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
- queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
- mutex_unlock(&mcast_mutex);
+ spin_lock_irqsave(&priv->lock, flags);
+ __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
+ spin_unlock_irqrestore(&priv->lock, flags);
return 0;
}
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
+int ipoib_mcast_stop_thread(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
+ unsigned long flags;
ipoib_dbg_mcast(priv, "stopping multicast thread\n");
- mutex_lock(&mcast_mutex);
- clear_bit(IPOIB_MCAST_RUN, &priv->flags);
+ spin_lock_irqsave(&priv->lock, flags);
cancel_delayed_work(&priv->mcast_task);
- mutex_unlock(&mcast_mutex);
+ spin_unlock_irqrestore(&priv->lock, flags);
- if (flush)
- flush_workqueue(ipoib_workqueue);
+ flush_workqueue(priv->wq);
return 0;
}
@@ -633,6 +657,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
int ret = 0;
if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+ ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n");
+
+ if (!IS_ERR_OR_NULL(mcast->mc))
ib_sa_free_multicast(mcast->mc);
if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
@@ -644,7 +671,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
be16_to_cpu(mcast->mcmember.mlid));
if (ret)
ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
- }
+ } else if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+ ipoib_dbg(priv, "leaving with no mcmember but not a "
+ "SENDONLY join\n");
return 0;
}
@@ -667,49 +696,37 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
}
mcast = __ipoib_mcast_find(dev, mgid);
- if (!mcast) {
- /* Let's create a new send only group now */
- ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
- mgid);
-
- mcast = ipoib_mcast_alloc(dev, 0);
+ if (!mcast || !mcast->ah) {
if (!mcast) {
- ipoib_warn(priv, "unable to allocate memory for "
- "multicast structure\n");
- ++dev->stats.tx_dropped;
- dev_kfree_skb_any(skb);
- goto out;
- }
-
- set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
- memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
- __ipoib_mcast_add(dev, mcast);
- list_add_tail(&mcast->list, &priv->multicast_list);
- }
+ /* Let's create a new send only group now */
+ ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
+ mgid);
+
+ mcast = ipoib_mcast_alloc(dev, 0);
+ if (!mcast) {
+ ipoib_warn(priv, "unable to allocate memory "
+ "for multicast structure\n");
+ ++dev->stats.tx_dropped;
+ dev_kfree_skb_any(skb);
+ goto unlock;
+ }
- if (!mcast->ah) {
+ set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
+ memcpy(mcast->mcmember.mgid.raw, mgid,
+ sizeof (union ib_gid));
+ __ipoib_mcast_add(dev, mcast);
+ list_add_tail(&mcast->list, &priv->multicast_list);
+ }
if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
skb_queue_tail(&mcast->pkt_queue, skb);
else {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
}
-
- if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
- ipoib_dbg_mcast(priv, "no address vector, "
- "but multicast join already started\n");
- else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
- ipoib_mcast_sendonly_join(mcast);
-
- /*
- * If lookup completes between here and out:, don't
- * want to send packet twice.
- */
- mcast = NULL;
- }
-
-out:
- if (mcast && mcast->ah) {
+ if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
+ __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
+ }
+ } else {
struct ipoib_neigh *neigh;
spin_unlock_irqrestore(&priv->lock, flags);
@@ -759,9 +776,12 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
spin_unlock_irqrestore(&priv->lock, flags);
- /* seperate between the wait to the leave*/
+ /*
+ * make sure the in-flight joins have finished before we attempt
+ * to leave
+ */
list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
- if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
+ if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
wait_for_completion(&mcast->done);
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
@@ -792,9 +812,14 @@ void ipoib_mcast_restart_task(struct work_struct *work)
unsigned long flags;
struct ib_sa_mcmember_rec rec;
- ipoib_dbg_mcast(priv, "restarting multicast task\n");
+ if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+ /*
+ * shortcut...on shutdown flush is called next, just
+ * let it do all the work
+ */
+ return;
- ipoib_mcast_stop_thread(dev, 0);
+ ipoib_dbg_mcast(priv, "restarting multicast task\n");
local_irq_save(flags);
netif_addr_lock(dev);
@@ -880,14 +905,27 @@ void ipoib_mcast_restart_task(struct work_struct *work)
netif_addr_unlock(dev);
local_irq_restore(flags);
- /* We have to cancel outside of the spinlock */
+ /*
+ * make sure the in-flight joins have finished before we attempt
+ * to leave
+ */
+ list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
+ if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+ wait_for_completion(&mcast->done);
+
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
ipoib_mcast_leave(mcast->dev, mcast);
ipoib_mcast_free(mcast);
}
- if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
- ipoib_mcast_start_thread(dev);
+ /*
+ * Double check that we are still up
+ */
+ if (test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
+ spin_lock_irqsave(&priv->lock, flags);
+ __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ }
}
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index c56d5d44c53b..e5cc43074196 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -157,6 +157,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
goto out_free_pd;
}
+ /*
+ * the various IPoIB tasks assume they will never race against
+ * themselves, so always use a single thread workqueue
+ */
+ priv->wq = create_singlethread_workqueue("ipoib_wq");
+ if (!priv->wq) {
+ printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
+ goto out_free_mr;
+ }
+
size = ipoib_recvq_size + 1;
ret = ipoib_cm_dev_init(dev);
if (!ret) {
@@ -165,12 +175,13 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
else
size += ipoib_recvq_size * ipoib_max_conn_qp;
- }
+ } else
+ goto out_free_wq;
priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
if (IS_ERR(priv->recv_cq)) {
printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
- goto out_free_mr;
+ goto out_cm_dev_cleanup;
}
priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL,
@@ -216,15 +227,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->tx_wr.send_flags = IB_SEND_SIGNALED;
priv->rx_sge[0].lkey = priv->mr->lkey;
- if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
- priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE;
- priv->rx_sge[1].length = PAGE_SIZE;
- priv->rx_sge[1].lkey = priv->mr->lkey;
- priv->rx_wr.num_sge = IPOIB_UD_RX_SG;
- } else {
- priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
- priv->rx_wr.num_sge = 1;
- }
+
+ priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
+ priv->rx_wr.num_sge = 1;
+
priv->rx_wr.next = NULL;
priv->rx_wr.sg_list = priv->rx_sge;
@@ -236,12 +242,19 @@ out_free_send_cq:
out_free_recv_cq:
ib_destroy_cq(priv->recv_cq);
+out_cm_dev_cleanup:
+ ipoib_cm_dev_cleanup(dev);
+
+out_free_wq:
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+
out_free_mr:
ib_dereg_mr(priv->mr);
- ipoib_cm_dev_cleanup(dev);
out_free_pd:
ib_dealloc_pd(priv->pd);
+
return -ENODEV;
}
@@ -265,11 +278,18 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
ipoib_cm_dev_cleanup(dev);
+ if (priv->wq) {
+ flush_workqueue(priv->wq);
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+ }
+
if (ib_dereg_mr(priv->mr))
ipoib_warn(priv, "ib_dereg_mr failed\n");
if (ib_dealloc_pd(priv->pd))
ipoib_warn(priv, "ib_dealloc_pd failed\n");
+
}
void ipoib_event(struct ib_event_handler *handler,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 9fad7b5ac8b9..fca1a882de27 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -58,6 +58,7 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
/* MTU will be reset when mcast join happens */
priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu;
+ priv->parent = ppriv->dev;
set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
result = ipoib_set_dev_features(priv, ppriv->ca);
@@ -84,8 +85,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
goto register_failed;
}
- priv->parent = ppriv->dev;
-
ipoib_create_debug_files(priv->dev);
/* RTNL childs don't need proprietary sysfs entries */
@@ -102,7 +101,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
}
priv->child_type = type;
- priv->dev->iflink = ppriv->dev->ifindex;
list_add_tail(&priv->list, &ppriv->child_intfs);
return 0;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index b47aea1094b2..262ba1f8ee50 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -69,7 +69,7 @@
#define DRV_NAME "iser"
#define PFX DRV_NAME ": "
-#define DRV_VER "1.5"
+#define DRV_VER "1.6"
#define iser_dbg(fmt, arg...) \
do { \
@@ -218,22 +218,21 @@ enum iser_data_dir {
/**
* struct iser_data_buf - iSER data buffer
*
- * @buf: pointer to the sg list
+ * @sg: pointer to the sg list
* @size: num entries of this sg
* @data_len: total beffer byte len
* @dma_nents: returned by dma_map_sg
- * @copy_buf: allocated copy buf for SGs unaligned
- * for rdma which are copied
- * @sg_single: SG-ified clone of a non SG SC or
- * unaligned SG
+ * @orig_sg: pointer to the original sg list (in case
+ * we used a copy)
+ * @orig_size: num entris of orig sg list
*/
struct iser_data_buf {
- void *buf;
+ struct scatterlist *sg;
unsigned int size;
unsigned long data_len;
unsigned int dma_nents;
- char *copy_buf;
- struct scatterlist sg_single;
+ struct scatterlist *orig_sg;
+ unsigned int orig_size;
};
/* fwd declarations */
@@ -244,35 +243,14 @@ struct iscsi_endpoint;
/**
* struct iser_mem_reg - iSER memory registration info
*
- * @lkey: MR local key
- * @rkey: MR remote key
- * @va: MR start address (buffer va)
- * @len: MR length
+ * @sge: memory region sg element
+ * @rkey: memory region remote key
* @mem_h: pointer to registration context (FMR/Fastreg)
*/
struct iser_mem_reg {
- u32 lkey;
- u32 rkey;
- u64 va;
- u64 len;
- void *mem_h;
-};
-
-/**
- * struct iser_regd_buf - iSER buffer registration desc
- *
- * @reg: memory registration info
- * @virt_addr: virtual address of buffer
- * @device: reference to iser device
- * @direction: dma direction (for dma_unmap)
- * @data_size: data buffer size in bytes
- */
-struct iser_regd_buf {
- struct iser_mem_reg reg;
- void *virt_addr;
- struct iser_device *device;
- enum dma_data_direction direction;
- unsigned int data_size;
+ struct ib_sge sge;
+ u32 rkey;
+ void *mem_h;
};
enum iser_desc_type {
@@ -534,11 +512,9 @@ struct iser_conn {
* @sc: link to scsi command
* @command_sent: indicate if command was sent
* @dir: iser data direction
- * @rdma_regd: task rdma registration desc
+ * @rdma_reg: task rdma registration desc
* @data: iser data buffer desc
- * @data_copy: iser data copy buffer desc (bounce buffer)
* @prot: iser protection buffer desc
- * @prot_copy: iser protection copy buffer desc (bounce buffer)
*/
struct iscsi_iser_task {
struct iser_tx_desc desc;
@@ -547,11 +523,9 @@ struct iscsi_iser_task {
struct scsi_cmnd *sc;
int command_sent;
int dir[ISER_DIRS_NUM];
- struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];
+ struct iser_mem_reg rdma_reg[ISER_DIRS_NUM];
struct iser_data_buf data[ISER_DIRS_NUM];
- struct iser_data_buf data_copy[ISER_DIRS_NUM];
struct iser_data_buf prot[ISER_DIRS_NUM];
- struct iser_data_buf prot_copy[ISER_DIRS_NUM];
};
struct iser_page_vec {
@@ -621,7 +595,6 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn);
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem,
- struct iser_data_buf *mem_copy,
enum iser_data_dir cmd_dir);
int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
@@ -634,10 +607,6 @@ int iser_connect(struct iser_conn *iser_conn,
struct sockaddr *dst_addr,
int non_blocking);
-int iser_reg_page_vec(struct ib_conn *ib_conn,
- struct iser_page_vec *page_vec,
- struct iser_mem_reg *mem_reg);
-
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir);
void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
@@ -667,4 +636,9 @@ int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max);
void iser_free_fastreg_pool(struct ib_conn *ib_conn);
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector);
+struct fast_reg_descriptor *
+iser_reg_desc_get(struct ib_conn *ib_conn);
+void
+iser_reg_desc_put(struct ib_conn *ib_conn,
+ struct fast_reg_descriptor *desc);
#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 20e859a6f1a6..3e2118e8ed87 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -50,7 +50,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
{
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_device *device = iser_task->iser_conn->ib_conn.device;
- struct iser_regd_buf *regd_buf;
+ struct iser_mem_reg *mem_reg;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
@@ -78,15 +78,15 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
iser_err("Failed to set up Data-IN RDMA\n");
return err;
}
- regd_buf = &iser_task->rdma_regd[ISER_DIR_IN];
+ mem_reg = &iser_task->rdma_reg[ISER_DIR_IN];
hdr->flags |= ISER_RSV;
- hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey);
- hdr->read_va = cpu_to_be64(regd_buf->reg.va);
+ hdr->read_stag = cpu_to_be32(mem_reg->rkey);
+ hdr->read_va = cpu_to_be64(mem_reg->sge.addr);
iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
- task->itt, regd_buf->reg.rkey,
- (unsigned long long)regd_buf->reg.va);
+ task->itt, mem_reg->rkey,
+ (unsigned long long)mem_reg->sge.addr);
return 0;
}
@@ -104,7 +104,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
{
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_device *device = iser_task->iser_conn->ib_conn.device;
- struct iser_regd_buf *regd_buf;
+ struct iser_mem_reg *mem_reg;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
@@ -134,25 +134,25 @@ iser_prepare_write_cmd(struct iscsi_task *task,
return err;
}
- regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
+ mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
if (unsol_sz < edtl) {
hdr->flags |= ISER_WSV;
- hdr->write_stag = cpu_to_be32(regd_buf->reg.rkey);
- hdr->write_va = cpu_to_be64(regd_buf->reg.va + unsol_sz);
+ hdr->write_stag = cpu_to_be32(mem_reg->rkey);
+ hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz);
iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
"VA:%#llX + unsol:%d\n",
- task->itt, regd_buf->reg.rkey,
- (unsigned long long)regd_buf->reg.va, unsol_sz);
+ task->itt, mem_reg->rkey,
+ (unsigned long long)mem_reg->sge.addr, unsol_sz);
}
if (imm_sz > 0) {
iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
task->itt, imm_sz);
- tx_dsg->addr = regd_buf->reg.va;
+ tx_dsg->addr = mem_reg->sge.addr;
tx_dsg->length = imm_sz;
- tx_dsg->lkey = regd_buf->reg.lkey;
+ tx_dsg->lkey = mem_reg->sge.lkey;
iser_task->desc.num_sge = 2;
}
@@ -401,16 +401,16 @@ int iser_send_command(struct iscsi_conn *conn,
}
if (scsi_sg_count(sc)) { /* using a scatter list */
- data_buf->buf = scsi_sglist(sc);
+ data_buf->sg = scsi_sglist(sc);
data_buf->size = scsi_sg_count(sc);
}
data_buf->data_len = scsi_bufflen(sc);
if (scsi_prot_sg_count(sc)) {
- prot_buf->buf = scsi_prot_sglist(sc);
+ prot_buf->sg = scsi_prot_sglist(sc);
prot_buf->size = scsi_prot_sg_count(sc);
- prot_buf->data_len = data_buf->data_len >>
- ilog2(sc->device->sector_size) * 8;
+ prot_buf->data_len = (data_buf->data_len >>
+ ilog2(sc->device->sector_size)) * 8;
}
if (hdr->flags & ISCSI_FLAG_CMD_READ) {
@@ -450,7 +450,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
struct iser_conn *iser_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_tx_desc *tx_desc = NULL;
- struct iser_regd_buf *regd_buf;
+ struct iser_mem_reg *mem_reg;
unsigned long buf_offset;
unsigned long data_seg_len;
uint32_t itt;
@@ -477,11 +477,11 @@ int iser_send_data_out(struct iscsi_conn *conn,
/* build the tx desc */
iser_initialize_task_headers(task, tx_desc);
- regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
+ mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
tx_dsg = &tx_desc->tx_sg[1];
- tx_dsg->addr = regd_buf->reg.va + buf_offset;
- tx_dsg->length = data_seg_len;
- tx_dsg->lkey = regd_buf->reg.lkey;
+ tx_dsg->addr = mem_reg->sge.addr + buf_offset;
+ tx_dsg->length = data_seg_len;
+ tx_dsg->lkey = mem_reg->sge.lkey;
tx_desc->num_sge = 2;
if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
@@ -658,10 +658,10 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
iser_task->prot[ISER_DIR_IN].data_len = 0;
iser_task->prot[ISER_DIR_OUT].data_len = 0;
- memset(&iser_task->rdma_regd[ISER_DIR_IN], 0,
- sizeof(struct iser_regd_buf));
- memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0,
- sizeof(struct iser_regd_buf));
+ memset(&iser_task->rdma_reg[ISER_DIR_IN], 0,
+ sizeof(struct iser_mem_reg));
+ memset(&iser_task->rdma_reg[ISER_DIR_OUT], 0,
+ sizeof(struct iser_mem_reg));
}
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
@@ -674,35 +674,31 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
/* if we were reading, copy back to unaligned sglist,
* anyway dma_unmap and free the copy
*/
- if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) {
+ if (iser_task->data[ISER_DIR_IN].orig_sg) {
is_rdma_data_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->data[ISER_DIR_IN],
- &iser_task->data_copy[ISER_DIR_IN],
ISER_DIR_IN);
}
- if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) {
+ if (iser_task->data[ISER_DIR_OUT].orig_sg) {
is_rdma_data_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->data[ISER_DIR_OUT],
- &iser_task->data_copy[ISER_DIR_OUT],
ISER_DIR_OUT);
}
- if (iser_task->prot_copy[ISER_DIR_IN].copy_buf != NULL) {
+ if (iser_task->prot[ISER_DIR_IN].orig_sg) {
is_rdma_prot_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->prot[ISER_DIR_IN],
- &iser_task->prot_copy[ISER_DIR_IN],
ISER_DIR_IN);
}
- if (iser_task->prot_copy[ISER_DIR_OUT].copy_buf != NULL) {
+ if (iser_task->prot[ISER_DIR_OUT].orig_sg) {
is_rdma_prot_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->prot[ISER_DIR_OUT],
- &iser_task->prot_copy[ISER_DIR_OUT],
ISER_DIR_OUT);
}
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 341040bf0984..f0cdc961eb11 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -39,68 +39,173 @@
#include "iscsi_iser.h"
-#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
+static void
+iser_free_bounce_sg(struct iser_data_buf *data)
+{
+ struct scatterlist *sg;
+ int count;
-/**
- * iser_start_rdma_unaligned_sg
- */
-static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
- struct iser_data_buf *data,
- struct iser_data_buf *data_copy,
- enum iser_data_dir cmd_dir)
+ for_each_sg(data->sg, sg, data->size, count)
+ __free_page(sg_page(sg));
+
+ kfree(data->sg);
+
+ data->sg = data->orig_sg;
+ data->size = data->orig_size;
+ data->orig_sg = NULL;
+ data->orig_size = 0;
+}
+
+static int
+iser_alloc_bounce_sg(struct iser_data_buf *data)
{
- struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
- struct scatterlist *sgl = (struct scatterlist *)data->buf;
struct scatterlist *sg;
- char *mem = NULL;
- unsigned long cmd_data_len = 0;
- int dma_nents, i;
+ struct page *page;
+ unsigned long length = data->data_len;
+ int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE);
- for_each_sg(sgl, sg, data->size, i)
- cmd_data_len += ib_sg_dma_len(dev, sg);
+ sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC);
+ if (!sg)
+ goto err;
- if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
- mem = (void *)__get_free_pages(GFP_ATOMIC,
- ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
- else
- mem = kmalloc(cmd_data_len, GFP_ATOMIC);
+ sg_init_table(sg, nents);
+ while (length) {
+ u32 page_len = min_t(u32, length, PAGE_SIZE);
- if (mem == NULL) {
- iser_err("Failed to allocate mem size %d %d for copying sglist\n",
- data->size, (int)cmd_data_len);
- return -ENOMEM;
+ page = alloc_page(GFP_ATOMIC);
+ if (!page)
+ goto err;
+
+ sg_set_page(&sg[i], page, page_len, 0);
+ length -= page_len;
+ i++;
}
- if (cmd_dir == ISER_DIR_OUT) {
- /* copy the unaligned sg the buffer which is used for RDMA */
- char *p, *from;
-
- sgl = (struct scatterlist *)data->buf;
- p = mem;
- for_each_sg(sgl, sg, data->size, i) {
- from = kmap_atomic(sg_page(sg));
- memcpy(p,
- from + sg->offset,
- sg->length);
- kunmap_atomic(from);
- p += sg->length;
+ data->orig_sg = data->sg;
+ data->orig_size = data->size;
+ data->sg = sg;
+ data->size = nents;
+
+ return 0;
+
+err:
+ for (; i > 0; i--)
+ __free_page(sg_page(&sg[i - 1]));
+ kfree(sg);
+
+ return -ENOMEM;
+}
+
+static void
+iser_copy_bounce(struct iser_data_buf *data, bool to_buffer)
+{
+ struct scatterlist *osg, *bsg = data->sg;
+ void *oaddr, *baddr;
+ unsigned int left = data->data_len;
+ unsigned int bsg_off = 0;
+ int i;
+
+ for_each_sg(data->orig_sg, osg, data->orig_size, i) {
+ unsigned int copy_len, osg_off = 0;
+
+ oaddr = kmap_atomic(sg_page(osg)) + osg->offset;
+ copy_len = min(left, osg->length);
+ while (copy_len) {
+ unsigned int len = min(copy_len, bsg->length - bsg_off);
+
+ baddr = kmap_atomic(sg_page(bsg)) + bsg->offset;
+ if (to_buffer)
+ memcpy(baddr + bsg_off, oaddr + osg_off, len);
+ else
+ memcpy(oaddr + osg_off, baddr + bsg_off, len);
+
+ kunmap_atomic(baddr - bsg->offset);
+ osg_off += len;
+ bsg_off += len;
+ copy_len -= len;
+
+ if (bsg_off >= bsg->length) {
+ bsg = sg_next(bsg);
+ bsg_off = 0;
+ }
}
+ kunmap_atomic(oaddr - osg->offset);
+ left -= osg_off;
}
+}
+
+static inline void
+iser_copy_from_bounce(struct iser_data_buf *data)
+{
+ iser_copy_bounce(data, false);
+}
+
+static inline void
+iser_copy_to_bounce(struct iser_data_buf *data)
+{
+ iser_copy_bounce(data, true);
+}
+
+struct fast_reg_descriptor *
+iser_reg_desc_get(struct ib_conn *ib_conn)
+{
+ struct fast_reg_descriptor *desc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ib_conn->lock, flags);
+ desc = list_first_entry(&ib_conn->fastreg.pool,
+ struct fast_reg_descriptor, list);
+ list_del(&desc->list);
+ spin_unlock_irqrestore(&ib_conn->lock, flags);
+
+ return desc;
+}
+
+void
+iser_reg_desc_put(struct ib_conn *ib_conn,
+ struct fast_reg_descriptor *desc)
+{
+ unsigned long flags;
- sg_init_one(&data_copy->sg_single, mem, cmd_data_len);
- data_copy->buf = &data_copy->sg_single;
- data_copy->size = 1;
- data_copy->copy_buf = mem;
+ spin_lock_irqsave(&ib_conn->lock, flags);
+ list_add(&desc->list, &ib_conn->fastreg.pool);
+ spin_unlock_irqrestore(&ib_conn->lock, flags);
+}
- dma_nents = ib_dma_map_sg(dev, &data_copy->sg_single, 1,
- (cmd_dir == ISER_DIR_OUT) ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
- BUG_ON(dma_nents == 0);
+/**
+ * iser_start_rdma_unaligned_sg
+ */
+static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
+ struct iser_data_buf *data,
+ enum iser_data_dir cmd_dir)
+{
+ struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
+ int rc;
+
+ rc = iser_alloc_bounce_sg(data);
+ if (rc) {
+ iser_err("Failed to allocate bounce for data len %lu\n",
+ data->data_len);
+ return rc;
+ }
+
+ if (cmd_dir == ISER_DIR_OUT)
+ iser_copy_to_bounce(data);
- data_copy->dma_nents = dma_nents;
- data_copy->data_len = cmd_data_len;
+ data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size,
+ (cmd_dir == ISER_DIR_OUT) ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ if (!data->dma_nents) {
+ iser_err("Got dma_nents %d, something went wrong...\n",
+ data->dma_nents);
+ rc = -ENOMEM;
+ goto err;
+ }
return 0;
+err:
+ iser_free_bounce_sg(data);
+ return rc;
}
/**
@@ -109,51 +214,18 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
struct iser_data_buf *data,
- struct iser_data_buf *data_copy,
enum iser_data_dir cmd_dir)
{
- struct ib_device *dev;
- unsigned long cmd_data_len;
-
- dev = iser_task->iser_conn->ib_conn.device->ib_device;
+ struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
- ib_dma_unmap_sg(dev, &data_copy->sg_single, 1,
+ ib_dma_unmap_sg(dev, data->sg, data->size,
(cmd_dir == ISER_DIR_OUT) ?
DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (cmd_dir == ISER_DIR_IN) {
- char *mem;
- struct scatterlist *sgl, *sg;
- unsigned char *p, *to;
- unsigned int sg_size;
- int i;
-
- /* copy back read RDMA to unaligned sg */
- mem = data_copy->copy_buf;
-
- sgl = (struct scatterlist *)data->buf;
- sg_size = data->size;
-
- p = mem;
- for_each_sg(sgl, sg, sg_size, i) {
- to = kmap_atomic(sg_page(sg));
- memcpy(to + sg->offset,
- p,
- sg->length);
- kunmap_atomic(to);
- p += sg->length;
- }
- }
+ if (cmd_dir == ISER_DIR_IN)
+ iser_copy_from_bounce(data);
- cmd_data_len = data->data_len;
-
- if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
- free_pages((unsigned long)data_copy->copy_buf,
- ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
- else
- kfree(data_copy->copy_buf);
-
- data_copy->copy_buf = NULL;
+ iser_free_bounce_sg(data);
}
#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
@@ -175,7 +247,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
struct ib_device *ibdev, u64 *pages,
int *offset, int *data_size)
{
- struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
+ struct scatterlist *sg, *sgl = data->sg;
u64 start_addr, end_addr, page, chunk_start = 0;
unsigned long total_sz = 0;
unsigned int dma_len;
@@ -227,14 +299,14 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
static int iser_data_buf_aligned_len(struct iser_data_buf *data,
struct ib_device *ibdev)
{
- struct scatterlist *sgl, *sg, *next_sg = NULL;
+ struct scatterlist *sg, *sgl, *next_sg = NULL;
u64 start_addr, end_addr;
int i, ret_len, start_check = 0;
if (data->dma_nents == 1)
return 1;
- sgl = (struct scatterlist *)data->buf;
+ sgl = data->sg;
start_addr = ib_sg_dma_address(ibdev, sgl);
for_each_sg(sgl, sg, data->dma_nents, i) {
@@ -266,11 +338,10 @@ static int iser_data_buf_aligned_len(struct iser_data_buf *data,
static void iser_data_buf_dump(struct iser_data_buf *data,
struct ib_device *ibdev)
{
- struct scatterlist *sgl = (struct scatterlist *)data->buf;
struct scatterlist *sg;
int i;
- for_each_sg(sgl, sg, data->dma_nents, i)
+ for_each_sg(data->sg, sg, data->dma_nents, i)
iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
"off:0x%x sz:0x%x dma_len:0x%x\n",
i, (unsigned long)ib_sg_dma_address(ibdev, sg),
@@ -288,31 +359,6 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
}
-static void iser_page_vec_build(struct iser_data_buf *data,
- struct iser_page_vec *page_vec,
- struct ib_device *ibdev)
-{
- int page_vec_len = 0;
-
- page_vec->length = 0;
- page_vec->offset = 0;
-
- iser_dbg("Translating sg sz: %d\n", data->dma_nents);
- page_vec_len = iser_sg_to_page_vec(data, ibdev, page_vec->pages,
- &page_vec->offset,
- &page_vec->data_size);
- iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents, page_vec_len);
-
- page_vec->length = page_vec_len;
-
- if (page_vec_len * SIZE_4K < page_vec->data_size) {
- iser_err("page_vec too short to hold this SG\n");
- iser_data_buf_dump(data, ibdev);
- iser_dump_page_vec(page_vec);
- BUG();
- }
-}
-
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
struct iser_data_buf *data,
enum iser_data_dir iser_dir,
@@ -323,7 +369,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
iser_task->dir[iser_dir] = 1;
dev = iser_task->iser_conn->ib_conn.device->ib_device;
- data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir);
+ data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir);
if (data->dma_nents == 0) {
iser_err("dma_map_sg failed!!!\n");
return -EINVAL;
@@ -338,24 +384,41 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
struct ib_device *dev;
dev = iser_task->iser_conn->ib_conn.device->ib_device;
- ib_dma_unmap_sg(dev, data->buf, data->size, dir);
+ ib_dma_unmap_sg(dev, data->sg, data->size, dir);
+}
+
+static int
+iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
+ struct iser_mem_reg *reg)
+{
+ struct scatterlist *sg = mem->sg;
+
+ reg->sge.lkey = device->mr->lkey;
+ reg->rkey = device->mr->rkey;
+ reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
+ reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
+
+ iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
+ " length=0x%x\n", reg->sge.lkey, reg->rkey,
+ reg->sge.addr, reg->sge.length);
+
+ return 0;
}
static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
- struct ib_device *ibdev,
struct iser_data_buf *mem,
- struct iser_data_buf *mem_copy,
enum iser_data_dir cmd_dir,
int aligned_len)
{
- struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
+ struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
+ struct iser_device *device = iser_task->iser_conn->ib_conn.device;
iscsi_conn->fmr_unalign_cnt++;
iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
aligned_len, mem->size);
if (iser_debug_level > 0)
- iser_data_buf_dump(mem, ibdev);
+ iser_data_buf_dump(mem, device->ib_device);
/* unmap the command data before accessing it */
iser_dma_unmap_task_data(iser_task, mem,
@@ -364,13 +427,95 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
/* allocate copy buf, if we are writing, copy the */
/* unaligned scatterlist, dma map the copy */
- if (iser_start_rdma_unaligned_sg(iser_task, mem, mem_copy, cmd_dir) != 0)
+ if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
return -ENOMEM;
return 0;
}
/**
+ * iser_reg_page_vec - Register physical memory
+ *
+ * returns: 0 on success, errno code on failure
+ */
+static
+int iser_reg_page_vec(struct iscsi_iser_task *iser_task,
+ struct iser_data_buf *mem,
+ struct iser_page_vec *page_vec,
+ struct iser_mem_reg *mem_reg)
+{
+ struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
+ struct iser_device *device = ib_conn->device;
+ struct ib_pool_fmr *fmr;
+ int ret, plen;
+
+ plen = iser_sg_to_page_vec(mem, device->ib_device,
+ page_vec->pages,
+ &page_vec->offset,
+ &page_vec->data_size);
+ page_vec->length = plen;
+ if (plen * SIZE_4K < page_vec->data_size) {
+ iser_err("page vec too short to hold this SG\n");
+ iser_data_buf_dump(mem, device->ib_device);
+ iser_dump_page_vec(page_vec);
+ return -EINVAL;
+ }
+
+ fmr = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
+ page_vec->pages,
+ page_vec->length,
+ page_vec->pages[0]);
+ if (IS_ERR(fmr)) {
+ ret = PTR_ERR(fmr);
+ iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
+ return ret;
+ }
+
+ mem_reg->sge.lkey = fmr->fmr->lkey;
+ mem_reg->rkey = fmr->fmr->rkey;
+ mem_reg->sge.addr = page_vec->pages[0] + page_vec->offset;
+ mem_reg->sge.length = page_vec->data_size;
+ mem_reg->mem_h = fmr;
+
+ return 0;
+}
+
+/**
+ * Unregister (previosuly registered using FMR) memory.
+ * If memory is non-FMR does nothing.
+ */
+void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
+{
+ struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
+ int ret;
+
+ if (!reg->mem_h)
+ return;
+
+ iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);
+
+ ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
+ if (ret)
+ iser_err("ib_fmr_pool_unmap failed %d\n", ret);
+
+ reg->mem_h = NULL;
+}
+
+void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
+{
+ struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
+
+ if (!reg->mem_h)
+ return;
+
+ iser_reg_desc_put(&iser_task->iser_conn->ib_conn,
+ reg->mem_h);
+ reg->mem_h = NULL;
+}
+
+/**
* iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
* using FMR (if possible) obtaining rkey and va
*
@@ -383,45 +528,29 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
struct iser_data_buf *mem = &iser_task->data[cmd_dir];
- struct iser_regd_buf *regd_buf;
+ struct iser_mem_reg *mem_reg;
int aligned_len;
int err;
int i;
- struct scatterlist *sg;
- regd_buf = &iser_task->rdma_regd[cmd_dir];
+ mem_reg = &iser_task->rdma_reg[cmd_dir];
aligned_len = iser_data_buf_aligned_len(mem, ibdev);
if (aligned_len != mem->dma_nents) {
- err = fall_to_bounce_buf(iser_task, ibdev, mem,
- &iser_task->data_copy[cmd_dir],
+ err = fall_to_bounce_buf(iser_task, mem,
cmd_dir, aligned_len);
if (err) {
iser_err("failed to allocate bounce buffer\n");
return err;
}
- mem = &iser_task->data_copy[cmd_dir];
}
/* if there a single dma entry, FMR is not needed */
if (mem->dma_nents == 1) {
- sg = (struct scatterlist *)mem->buf;
-
- regd_buf->reg.lkey = device->mr->lkey;
- regd_buf->reg.rkey = device->mr->rkey;
- regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);
- regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);
-
- iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X "
- "va: 0x%08lX sz: %ld]\n",
- (unsigned int)regd_buf->reg.lkey,
- (unsigned int)regd_buf->reg.rkey,
- (unsigned long)regd_buf->reg.va,
- (unsigned long)regd_buf->reg.len);
+ return iser_reg_dma(device, mem, mem_reg);
} else { /* use FMR for multiple dma entries */
- iser_page_vec_build(mem, ib_conn->fmr.page_vec, ibdev);
- err = iser_reg_page_vec(ib_conn, ib_conn->fmr.page_vec,
- &regd_buf->reg);
+ err = iser_reg_page_vec(iser_task, mem, ib_conn->fmr.page_vec,
+ mem_reg);
if (err && err != -EAGAIN) {
iser_data_buf_dump(mem, ibdev);
iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
@@ -519,8 +648,10 @@ iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
static int
iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
- struct fast_reg_descriptor *desc, struct ib_sge *data_sge,
- struct ib_sge *prot_sge, struct ib_sge *sig_sge)
+ struct fast_reg_descriptor *desc,
+ struct iser_mem_reg *data_reg,
+ struct iser_mem_reg *prot_reg,
+ struct iser_mem_reg *sig_reg)
{
struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
struct iser_pi_context *pi_ctx = desc->pi_ctx;
@@ -544,12 +675,12 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
memset(&sig_wr, 0, sizeof(sig_wr));
sig_wr.opcode = IB_WR_REG_SIG_MR;
sig_wr.wr_id = ISER_FASTREG_LI_WRID;
- sig_wr.sg_list = data_sge;
+ sig_wr.sg_list = &data_reg->sge;
sig_wr.num_sge = 1;
sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
if (scsi_prot_sg_count(iser_task->sc))
- sig_wr.wr.sig_handover.prot = prot_sge;
+ sig_wr.wr.sig_handover.prot = &prot_reg->sge;
sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE;
@@ -566,27 +697,26 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
}
desc->reg_indicators &= ~ISER_SIG_KEY_VALID;
- sig_sge->lkey = pi_ctx->sig_mr->lkey;
- sig_sge->addr = 0;
- sig_sge->length = scsi_transfer_length(iser_task->sc);
+ sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
+ sig_reg->rkey = pi_ctx->sig_mr->rkey;
+ sig_reg->sge.addr = 0;
+ sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
- iser_dbg("sig_sge: addr: 0x%llx length: %u lkey: 0x%x\n",
- sig_sge->addr, sig_sge->length,
- sig_sge->lkey);
+ iser_dbg("sig_sge: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n",
+ sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
+ sig_reg->sge.length);
err:
return ret;
}
static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
- struct iser_regd_buf *regd_buf,
struct iser_data_buf *mem,
+ struct fast_reg_descriptor *desc,
enum iser_reg_indicator ind,
- struct ib_sge *sge)
+ struct iser_mem_reg *reg)
{
- struct fast_reg_descriptor *desc = regd_buf->reg.mem_h;
struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
- struct ib_device *ibdev = device->ib_device;
struct ib_mr *mr;
struct ib_fast_reg_page_list *frpl;
struct ib_send_wr fastreg_wr, inv_wr;
@@ -594,17 +724,8 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
int ret, offset, size, plen;
/* if there a single dma entry, dma mr suffices */
- if (mem->dma_nents == 1) {
- struct scatterlist *sg = (struct scatterlist *)mem->buf;
-
- sge->lkey = device->mr->lkey;
- sge->addr = ib_sg_dma_address(ibdev, &sg[0]);
- sge->length = ib_sg_dma_len(ibdev, &sg[0]);
-
- iser_dbg("Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n",
- sge->lkey, sge->addr, sge->length);
- return 0;
- }
+ if (mem->dma_nents == 1)
+ return iser_reg_dma(device, mem, reg);
if (ind == ISER_DATA_KEY_VALID) {
mr = desc->data_mr;
@@ -652,9 +773,10 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
}
desc->reg_indicators &= ~ind;
- sge->lkey = mr->lkey;
- sge->addr = frpl->page_list[0] + offset;
- sge->length = size;
+ reg->sge.lkey = mr->lkey;
+ reg->rkey = mr->rkey;
+ reg->sge.addr = frpl->page_list[0] + offset;
+ reg->sge.length = size;
return ret;
}
@@ -672,93 +794,66 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
struct iser_data_buf *mem = &iser_task->data[cmd_dir];
- struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir];
+ struct iser_mem_reg *mem_reg = &iser_task->rdma_reg[cmd_dir];
struct fast_reg_descriptor *desc = NULL;
- struct ib_sge data_sge;
int err, aligned_len;
- unsigned long flags;
aligned_len = iser_data_buf_aligned_len(mem, ibdev);
if (aligned_len != mem->dma_nents) {
- err = fall_to_bounce_buf(iser_task, ibdev, mem,
- &iser_task->data_copy[cmd_dir],
+ err = fall_to_bounce_buf(iser_task, mem,
cmd_dir, aligned_len);
if (err) {
iser_err("failed to allocate bounce buffer\n");
return err;
}
- mem = &iser_task->data_copy[cmd_dir];
}
if (mem->dma_nents != 1 ||
scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
- spin_lock_irqsave(&ib_conn->lock, flags);
- desc = list_first_entry(&ib_conn->fastreg.pool,
- struct fast_reg_descriptor, list);
- list_del(&desc->list);
- spin_unlock_irqrestore(&ib_conn->lock, flags);
- regd_buf->reg.mem_h = desc;
+ desc = iser_reg_desc_get(ib_conn);
+ mem_reg->mem_h = desc;
}
- err = iser_fast_reg_mr(iser_task, regd_buf, mem,
- ISER_DATA_KEY_VALID, &data_sge);
+ err = iser_fast_reg_mr(iser_task, mem, desc,
+ ISER_DATA_KEY_VALID, mem_reg);
if (err)
goto err_reg;
if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
- struct ib_sge prot_sge, sig_sge;
+ struct iser_mem_reg prot_reg;
- memset(&prot_sge, 0, sizeof(prot_sge));
+ memset(&prot_reg, 0, sizeof(prot_reg));
if (scsi_prot_sg_count(iser_task->sc)) {
mem = &iser_task->prot[cmd_dir];
aligned_len = iser_data_buf_aligned_len(mem, ibdev);
if (aligned_len != mem->dma_nents) {
- err = fall_to_bounce_buf(iser_task, ibdev, mem,
- &iser_task->prot_copy[cmd_dir],
+ err = fall_to_bounce_buf(iser_task, mem,
cmd_dir, aligned_len);
if (err) {
iser_err("failed to allocate bounce buffer\n");
return err;
}
- mem = &iser_task->prot_copy[cmd_dir];
}
- err = iser_fast_reg_mr(iser_task, regd_buf, mem,
- ISER_PROT_KEY_VALID, &prot_sge);
+ err = iser_fast_reg_mr(iser_task, mem, desc,
+ ISER_PROT_KEY_VALID, &prot_reg);
if (err)
goto err_reg;
}
- err = iser_reg_sig_mr(iser_task, desc, &data_sge,
- &prot_sge, &sig_sge);
+ err = iser_reg_sig_mr(iser_task, desc, mem_reg,
+ &prot_reg, mem_reg);
if (err) {
iser_err("Failed to register signature mr\n");
return err;
}
desc->reg_indicators |= ISER_FASTREG_PROTECTED;
-
- regd_buf->reg.lkey = sig_sge.lkey;
- regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey;
- regd_buf->reg.va = sig_sge.addr;
- regd_buf->reg.len = sig_sge.length;
- } else {
- if (desc)
- regd_buf->reg.rkey = desc->data_mr->rkey;
- else
- regd_buf->reg.rkey = device->mr->rkey;
-
- regd_buf->reg.lkey = data_sge.lkey;
- regd_buf->reg.va = data_sge.addr;
- regd_buf->reg.len = data_sge.length;
}
return 0;
err_reg:
- if (desc) {
- spin_lock_irqsave(&ib_conn->lock, flags);
- list_add_tail(&desc->list, &ib_conn->fastreg.pool);
- spin_unlock_irqrestore(&ib_conn->lock, flags);
- }
+ if (desc)
+ iser_reg_desc_put(ib_conn, desc);
return err;
}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 4065abe28829..cc2dd35ffbc0 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -274,6 +274,65 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn)
}
static int
+iser_alloc_pi_ctx(struct ib_device *ib_device, struct ib_pd *pd,
+ struct fast_reg_descriptor *desc)
+{
+ struct iser_pi_context *pi_ctx = NULL;
+ struct ib_mr_init_attr mr_init_attr = {.max_reg_descriptors = 2,
+ .flags = IB_MR_SIGNATURE_EN};
+ int ret = 0;
+
+ desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
+ if (!desc->pi_ctx)
+ return -ENOMEM;
+
+ pi_ctx = desc->pi_ctx;
+
+ pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
+ ISCSI_ISER_SG_TABLESIZE);
+ if (IS_ERR(pi_ctx->prot_frpl)) {
+ ret = PTR_ERR(pi_ctx->prot_frpl);
+ goto prot_frpl_failure;
+ }
+
+ pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
+ ISCSI_ISER_SG_TABLESIZE + 1);
+ if (IS_ERR(pi_ctx->prot_mr)) {
+ ret = PTR_ERR(pi_ctx->prot_mr);
+ goto prot_mr_failure;
+ }
+ desc->reg_indicators |= ISER_PROT_KEY_VALID;
+
+ pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
+ if (IS_ERR(pi_ctx->sig_mr)) {
+ ret = PTR_ERR(pi_ctx->sig_mr);
+ goto sig_mr_failure;
+ }
+ desc->reg_indicators |= ISER_SIG_KEY_VALID;
+ desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
+
+ return 0;
+
+sig_mr_failure:
+ ib_dereg_mr(desc->pi_ctx->prot_mr);
+prot_mr_failure:
+ ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
+prot_frpl_failure:
+ kfree(desc->pi_ctx);
+
+ return ret;
+}
+
+static void
+iser_free_pi_ctx(struct iser_pi_context *pi_ctx)
+{
+ ib_free_fast_reg_page_list(pi_ctx->prot_frpl);
+ ib_dereg_mr(pi_ctx->prot_mr);
+ ib_destroy_mr(pi_ctx->sig_mr);
+ kfree(pi_ctx);
+}
+
+static int
iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
bool pi_enable, struct fast_reg_descriptor *desc)
{
@@ -297,59 +356,12 @@ iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
desc->reg_indicators |= ISER_DATA_KEY_VALID;
if (pi_enable) {
- struct ib_mr_init_attr mr_init_attr = {0};
- struct iser_pi_context *pi_ctx = NULL;
-
- desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
- if (!desc->pi_ctx) {
- iser_err("Failed to allocate pi context\n");
- ret = -ENOMEM;
+ ret = iser_alloc_pi_ctx(ib_device, pd, desc);
+ if (ret)
goto pi_ctx_alloc_failure;
- }
- pi_ctx = desc->pi_ctx;
-
- pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
- ISCSI_ISER_SG_TABLESIZE);
- if (IS_ERR(pi_ctx->prot_frpl)) {
- ret = PTR_ERR(pi_ctx->prot_frpl);
- iser_err("Failed to allocate prot frpl ret=%d\n",
- ret);
- goto prot_frpl_failure;
- }
-
- pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
- ISCSI_ISER_SG_TABLESIZE + 1);
- if (IS_ERR(pi_ctx->prot_mr)) {
- ret = PTR_ERR(pi_ctx->prot_mr);
- iser_err("Failed to allocate prot frmr ret=%d\n",
- ret);
- goto prot_mr_failure;
- }
- desc->reg_indicators |= ISER_PROT_KEY_VALID;
-
- mr_init_attr.max_reg_descriptors = 2;
- mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
- pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
- if (IS_ERR(pi_ctx->sig_mr)) {
- ret = PTR_ERR(pi_ctx->sig_mr);
- iser_err("Failed to allocate signature enabled mr err=%d\n",
- ret);
- goto sig_mr_failure;
- }
- desc->reg_indicators |= ISER_SIG_KEY_VALID;
}
- desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
-
- iser_dbg("Create fr_desc %p page_list %p\n",
- desc, desc->data_frpl->page_list);
return 0;
-sig_mr_failure:
- ib_dereg_mr(desc->pi_ctx->prot_mr);
-prot_mr_failure:
- ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
-prot_frpl_failure:
- kfree(desc->pi_ctx);
pi_ctx_alloc_failure:
ib_dereg_mr(desc->data_mr);
fast_reg_mr_failure:
@@ -416,12 +428,8 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
list_del(&desc->list);
ib_free_fast_reg_page_list(desc->data_frpl);
ib_dereg_mr(desc->data_mr);
- if (desc->pi_ctx) {
- ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
- ib_dereg_mr(desc->pi_ctx->prot_mr);
- ib_destroy_mr(desc->pi_ctx->sig_mr);
- kfree(desc->pi_ctx);
- }
+ if (desc->pi_ctx)
+ iser_free_pi_ctx(desc->pi_ctx);
kfree(desc);
++i;
}
@@ -721,7 +729,7 @@ static void iser_connect_error(struct rdma_cm_id *cma_id)
struct iser_conn *iser_conn;
iser_conn = (struct iser_conn *)cma_id->context;
- iser_conn->state = ISER_CONN_DOWN;
+ iser_conn->state = ISER_CONN_TERMINATING;
}
/**
@@ -992,93 +1000,6 @@ connect_failure:
return err;
}
-/**
- * iser_reg_page_vec - Register physical memory
- *
- * returns: 0 on success, errno code on failure
- */
-int iser_reg_page_vec(struct ib_conn *ib_conn,
- struct iser_page_vec *page_vec,
- struct iser_mem_reg *mem_reg)
-{
- struct ib_pool_fmr *mem;
- u64 io_addr;
- u64 *page_list;
- int status;
-
- page_list = page_vec->pages;
- io_addr = page_list[0];
-
- mem = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
- page_list,
- page_vec->length,
- io_addr);
-
- if (IS_ERR(mem)) {
- status = (int)PTR_ERR(mem);
- iser_err("ib_fmr_pool_map_phys failed: %d\n", status);
- return status;
- }
-
- mem_reg->lkey = mem->fmr->lkey;
- mem_reg->rkey = mem->fmr->rkey;
- mem_reg->len = page_vec->length * SIZE_4K;
- mem_reg->va = io_addr;
- mem_reg->mem_h = (void *)mem;
-
- mem_reg->va += page_vec->offset;
- mem_reg->len = page_vec->data_size;
-
- iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, "
- "entry[0]: (0x%08lx,%ld)] -> "
- "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n",
- page_vec, page_vec->length,
- (unsigned long)page_vec->pages[0],
- (unsigned long)page_vec->data_size,
- (unsigned int)mem_reg->lkey, mem_reg->mem_h,
- (unsigned long)mem_reg->va, (unsigned long)mem_reg->len);
- return 0;
-}
-
-/**
- * Unregister (previosuly registered using FMR) memory.
- * If memory is non-FMR does nothing.
- */
-void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
- enum iser_data_dir cmd_dir)
-{
- struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
- int ret;
-
- if (!reg->mem_h)
- return;
-
- iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
-
- ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
- if (ret)
- iser_err("ib_fmr_pool_unmap failed %d\n", ret);
-
- reg->mem_h = NULL;
-}
-
-void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
- enum iser_data_dir cmd_dir)
-{
- struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
- struct iser_conn *iser_conn = iser_task->iser_conn;
- struct ib_conn *ib_conn = &iser_conn->ib_conn;
- struct fast_reg_descriptor *desc = reg->mem_h;
-
- if (!desc)
- return;
-
- reg->mem_h = NULL;
- spin_lock_bh(&ib_conn->lock);
- list_add_tail(&desc->list, &ib_conn->fastreg.pool);
- spin_unlock_bh(&ib_conn->lock);
-}
-
int iser_post_recvl(struct iser_conn *iser_conn)
{
struct ib_recv_wr rx_wr, *rx_wr_failed;
@@ -1210,6 +1131,9 @@ iser_handle_comp_error(struct ib_conn *ib_conn,
iscsi_conn_failure(iser_conn->iscsi_conn,
ISCSI_ERR_CONN_FAILED);
+ if (wc->wr_id == ISER_FASTREG_LI_WRID)
+ return;
+
if (is_iser_tx_desc(iser_conn, wr_id)) {
struct iser_tx_desc *desc = wr_id;
@@ -1254,13 +1178,11 @@ static void iser_handle_wc(struct ib_wc *wc)
else
iser_dbg("flush error: wr id %llx\n", wc->wr_id);
- if (wc->wr_id != ISER_FASTREG_LI_WRID &&
- wc->wr_id != ISER_BEACON_WRID)
- iser_handle_comp_error(ib_conn, wc);
-
- /* complete in case all flush errors were consumed */
if (wc->wr_id == ISER_BEACON_WRID)
+ /* all flush errors were consumed */
complete(&ib_conn->flush_comp);
+ else
+ iser_handle_comp_error(ib_conn, wc);
}
}
@@ -1306,7 +1228,7 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector)
{
- struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
+ struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
struct fast_reg_descriptor *desc = reg->mem_h;
unsigned long sector_size = iser_task->sc->device->sector_size;
struct ib_mr_status mr_status;
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 075b19cc78e8..327529ee85eb 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -76,12 +76,12 @@ isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd)
static void
isert_qp_event_callback(struct ib_event *e, void *context)
{
- struct isert_conn *isert_conn = (struct isert_conn *)context;
+ struct isert_conn *isert_conn = context;
isert_err("conn %p event: %d\n", isert_conn, e->event);
switch (e->event) {
case IB_EVENT_COMM_EST:
- rdma_notify(isert_conn->conn_cm_id, IB_EVENT_COMM_EST);
+ rdma_notify(isert_conn->cm_id, IB_EVENT_COMM_EST);
break;
case IB_EVENT_QP_LAST_WQE_REACHED:
isert_warn("Reached TX IB_EVENT_QP_LAST_WQE_REACHED\n");
@@ -107,13 +107,12 @@ isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr)
return 0;
}
-static int
-isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
+static struct isert_comp *
+isert_comp_get(struct isert_conn *isert_conn)
{
- struct isert_device *device = isert_conn->conn_device;
- struct ib_qp_init_attr attr;
+ struct isert_device *device = isert_conn->device;
struct isert_comp *comp;
- int ret, i, min = 0;
+ int i, min = 0;
mutex_lock(&device_list_mutex);
for (i = 0; i < device->comps_used; i++)
@@ -122,9 +121,30 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
min = i;
comp = &device->comps[min];
comp->active_qps++;
+ mutex_unlock(&device_list_mutex);
+
isert_info("conn %p, using comp %p min_index: %d\n",
isert_conn, comp, min);
+
+ return comp;
+}
+
+static void
+isert_comp_put(struct isert_comp *comp)
+{
+ mutex_lock(&device_list_mutex);
+ comp->active_qps--;
mutex_unlock(&device_list_mutex);
+}
+
+static struct ib_qp *
+isert_create_qp(struct isert_conn *isert_conn,
+ struct isert_comp *comp,
+ struct rdma_cm_id *cma_id)
+{
+ struct isert_device *device = isert_conn->device;
+ struct ib_qp_init_attr attr;
+ int ret;
memset(&attr, 0, sizeof(struct ib_qp_init_attr));
attr.event_handler = isert_qp_event_callback;
@@ -149,19 +169,31 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
if (device->pi_capable)
attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
- ret = rdma_create_qp(cma_id, isert_conn->conn_pd, &attr);
+ ret = rdma_create_qp(cma_id, device->pd, &attr);
if (ret) {
isert_err("rdma_create_qp failed for cma_id %d\n", ret);
+ return ERR_PTR(ret);
+ }
+
+ return cma_id->qp;
+}
+
+static int
+isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
+{
+ struct isert_comp *comp;
+ int ret;
+
+ comp = isert_comp_get(isert_conn);
+ isert_conn->qp = isert_create_qp(isert_conn, comp, cma_id);
+ if (IS_ERR(isert_conn->qp)) {
+ ret = PTR_ERR(isert_conn->qp);
goto err;
}
- isert_conn->conn_qp = cma_id->qp;
return 0;
err:
- mutex_lock(&device_list_mutex);
- comp->active_qps--;
- mutex_unlock(&device_list_mutex);
-
+ isert_comp_put(comp);
return ret;
}
@@ -174,18 +206,19 @@ isert_cq_event_callback(struct ib_event *e, void *context)
static int
isert_alloc_rx_descriptors(struct isert_conn *isert_conn)
{
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct isert_device *device = isert_conn->device;
+ struct ib_device *ib_dev = device->ib_device;
struct iser_rx_desc *rx_desc;
struct ib_sge *rx_sg;
u64 dma_addr;
int i, j;
- isert_conn->conn_rx_descs = kzalloc(ISERT_QP_MAX_RECV_DTOS *
+ isert_conn->rx_descs = kzalloc(ISERT_QP_MAX_RECV_DTOS *
sizeof(struct iser_rx_desc), GFP_KERNEL);
- if (!isert_conn->conn_rx_descs)
+ if (!isert_conn->rx_descs)
goto fail;
- rx_desc = isert_conn->conn_rx_descs;
+ rx_desc = isert_conn->rx_descs;
for (i = 0; i < ISERT_QP_MAX_RECV_DTOS; i++, rx_desc++) {
dma_addr = ib_dma_map_single(ib_dev, (void *)rx_desc,
@@ -198,21 +231,21 @@ isert_alloc_rx_descriptors(struct isert_conn *isert_conn)
rx_sg = &rx_desc->rx_sg;
rx_sg->addr = rx_desc->dma_addr;
rx_sg->length = ISER_RX_PAYLOAD_SIZE;
- rx_sg->lkey = isert_conn->conn_mr->lkey;
+ rx_sg->lkey = device->mr->lkey;
}
- isert_conn->conn_rx_desc_head = 0;
+ isert_conn->rx_desc_head = 0;
return 0;
dma_map_fail:
- rx_desc = isert_conn->conn_rx_descs;
+ rx_desc = isert_conn->rx_descs;
for (j = 0; j < i; j++, rx_desc++) {
ib_dma_unmap_single(ib_dev, rx_desc->dma_addr,
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
}
- kfree(isert_conn->conn_rx_descs);
- isert_conn->conn_rx_descs = NULL;
+ kfree(isert_conn->rx_descs);
+ isert_conn->rx_descs = NULL;
fail:
isert_err("conn %p failed to allocate rx descriptors\n", isert_conn);
@@ -222,59 +255,51 @@ fail:
static void
isert_free_rx_descriptors(struct isert_conn *isert_conn)
{
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct ib_device *ib_dev = isert_conn->device->ib_device;
struct iser_rx_desc *rx_desc;
int i;
- if (!isert_conn->conn_rx_descs)
+ if (!isert_conn->rx_descs)
return;
- rx_desc = isert_conn->conn_rx_descs;
+ rx_desc = isert_conn->rx_descs;
for (i = 0; i < ISERT_QP_MAX_RECV_DTOS; i++, rx_desc++) {
ib_dma_unmap_single(ib_dev, rx_desc->dma_addr,
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
}
- kfree(isert_conn->conn_rx_descs);
- isert_conn->conn_rx_descs = NULL;
+ kfree(isert_conn->rx_descs);
+ isert_conn->rx_descs = NULL;
}
static void isert_cq_work(struct work_struct *);
static void isert_cq_callback(struct ib_cq *, void *);
-static int
-isert_create_device_ib_res(struct isert_device *device)
+static void
+isert_free_comps(struct isert_device *device)
{
- struct ib_device *ib_dev = device->ib_device;
- struct ib_device_attr *dev_attr;
- int ret = 0, i;
- int max_cqe;
-
- dev_attr = &device->dev_attr;
- ret = isert_query_device(ib_dev, dev_attr);
- if (ret)
- return ret;
+ int i;
- max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
+ for (i = 0; i < device->comps_used; i++) {
+ struct isert_comp *comp = &device->comps[i];
- /* asign function handlers */
- if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
- dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
- device->use_fastreg = 1;
- device->reg_rdma_mem = isert_reg_rdma;
- device->unreg_rdma_mem = isert_unreg_rdma;
- } else {
- device->use_fastreg = 0;
- device->reg_rdma_mem = isert_map_rdma;
- device->unreg_rdma_mem = isert_unmap_cmd;
+ if (comp->cq) {
+ cancel_work_sync(&comp->work);
+ ib_destroy_cq(comp->cq);
+ }
}
+ kfree(device->comps);
+}
- /* Check signature cap */
- device->pi_capable = dev_attr->device_cap_flags &
- IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
+static int
+isert_alloc_comps(struct isert_device *device,
+ struct ib_device_attr *attr)
+{
+ int i, max_cqe, ret = 0;
device->comps_used = min(ISERT_MAX_CQ, min_t(int, num_online_cpus(),
- device->ib_device->num_comp_vectors));
+ device->ib_device->num_comp_vectors));
+
isert_info("Using %d CQs, %s supports %d vectors support "
"Fast registration %d pi_capable %d\n",
device->comps_used, device->ib_device->name,
@@ -288,6 +313,8 @@ isert_create_device_ib_res(struct isert_device *device)
return -ENOMEM;
}
+ max_cqe = min(ISER_MAX_CQ_LEN, attr->max_cqe);
+
for (i = 0; i < device->comps_used; i++) {
struct isert_comp *comp = &device->comps[i];
@@ -299,6 +326,7 @@ isert_create_device_ib_res(struct isert_device *device)
(void *)comp,
max_cqe, i);
if (IS_ERR(comp->cq)) {
+ isert_err("Unable to allocate cq\n");
ret = PTR_ERR(comp->cq);
comp->cq = NULL;
goto out_cq;
@@ -310,40 +338,79 @@ isert_create_device_ib_res(struct isert_device *device)
}
return 0;
-
out_cq:
- for (i = 0; i < device->comps_used; i++) {
- struct isert_comp *comp = &device->comps[i];
+ isert_free_comps(device);
+ return ret;
+}
- if (comp->cq) {
- cancel_work_sync(&comp->work);
- ib_destroy_cq(comp->cq);
- }
+static int
+isert_create_device_ib_res(struct isert_device *device)
+{
+ struct ib_device_attr *dev_attr;
+ int ret;
+
+ dev_attr = &device->dev_attr;
+ ret = isert_query_device(device->ib_device, dev_attr);
+ if (ret)
+ return ret;
+
+ /* asign function handlers */
+ if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
+ dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
+ device->use_fastreg = 1;
+ device->reg_rdma_mem = isert_reg_rdma;
+ device->unreg_rdma_mem = isert_unreg_rdma;
+ } else {
+ device->use_fastreg = 0;
+ device->reg_rdma_mem = isert_map_rdma;
+ device->unreg_rdma_mem = isert_unmap_cmd;
}
- kfree(device->comps);
+ ret = isert_alloc_comps(device, dev_attr);
+ if (ret)
+ return ret;
+
+ device->pd = ib_alloc_pd(device->ib_device);
+ if (IS_ERR(device->pd)) {
+ ret = PTR_ERR(device->pd);
+ isert_err("failed to allocate pd, device %p, ret=%d\n",
+ device, ret);
+ goto out_cq;
+ }
+
+ device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(device->mr)) {
+ ret = PTR_ERR(device->mr);
+ isert_err("failed to create dma mr, device %p, ret=%d\n",
+ device, ret);
+ goto out_mr;
+ }
+
+ /* Check signature cap */
+ device->pi_capable = dev_attr->device_cap_flags &
+ IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
+
+ return 0;
+
+out_mr:
+ ib_dealloc_pd(device->pd);
+out_cq:
+ isert_free_comps(device);
return ret;
}
static void
isert_free_device_ib_res(struct isert_device *device)
{
- int i;
-
isert_info("device %p\n", device);
- for (i = 0; i < device->comps_used; i++) {
- struct isert_comp *comp = &device->comps[i];
-
- cancel_work_sync(&comp->work);
- ib_destroy_cq(comp->cq);
- comp->cq = NULL;
- }
- kfree(device->comps);
+ ib_dereg_mr(device->mr);
+ ib_dealloc_pd(device->pd);
+ isert_free_comps(device);
}
static void
-isert_device_try_release(struct isert_device *device)
+isert_device_put(struct isert_device *device)
{
mutex_lock(&device_list_mutex);
device->refcount--;
@@ -357,7 +424,7 @@ isert_device_try_release(struct isert_device *device)
}
static struct isert_device *
-isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id)
+isert_device_get(struct rdma_cm_id *cma_id)
{
struct isert_device *device;
int ret;
@@ -404,13 +471,13 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
struct fast_reg_descriptor *fr_desc, *tmp;
int i = 0;
- if (list_empty(&isert_conn->conn_fr_pool))
+ if (list_empty(&isert_conn->fr_pool))
return;
isert_info("Freeing conn %p fastreg pool", isert_conn);
list_for_each_entry_safe(fr_desc, tmp,
- &isert_conn->conn_fr_pool, list) {
+ &isert_conn->fr_pool, list) {
list_del(&fr_desc->list);
ib_free_fast_reg_page_list(fr_desc->data_frpl);
ib_dereg_mr(fr_desc->data_mr);
@@ -424,9 +491,9 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
++i;
}
- if (i < isert_conn->conn_fr_pool_size)
+ if (i < isert_conn->fr_pool_size)
isert_warn("Pool still has %d regions registered\n",
- isert_conn->conn_fr_pool_size - i);
+ isert_conn->fr_pool_size - i);
}
static int
@@ -526,7 +593,7 @@ static int
isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
{
struct fast_reg_descriptor *fr_desc;
- struct isert_device *device = isert_conn->conn_device;
+ struct isert_device *device = isert_conn->device;
struct se_session *se_sess = isert_conn->conn->sess->se_sess;
struct se_node_acl *se_nacl = se_sess->se_node_acl;
int i, ret, tag_num;
@@ -537,7 +604,7 @@ isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
tag_num = max_t(u32, ISCSIT_MIN_TAGS, se_nacl->queue_depth);
tag_num = (tag_num * 2) + ISCSIT_EXTRA_TAGS;
- isert_conn->conn_fr_pool_size = 0;
+ isert_conn->fr_pool_size = 0;
for (i = 0; i < tag_num; i++) {
fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL);
if (!fr_desc) {
@@ -547,7 +614,7 @@ isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
}
ret = isert_create_fr_desc(device->ib_device,
- isert_conn->conn_pd, fr_desc);
+ device->pd, fr_desc);
if (ret) {
isert_err("Failed to create fastreg descriptor err=%d\n",
ret);
@@ -555,12 +622,12 @@ isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
goto err;
}
- list_add_tail(&fr_desc->list, &isert_conn->conn_fr_pool);
- isert_conn->conn_fr_pool_size++;
+ list_add_tail(&fr_desc->list, &isert_conn->fr_pool);
+ isert_conn->fr_pool_size++;
}
isert_dbg("Creating conn %p fastreg pool size=%d",
- isert_conn, isert_conn->conn_fr_pool_size);
+ isert_conn, isert_conn->fr_pool_size);
return 0;
@@ -569,55 +636,50 @@ err:
return ret;
}
-static int
-isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
+static void
+isert_init_conn(struct isert_conn *isert_conn)
{
- struct isert_np *isert_np = cma_id->context;
- struct iscsi_np *np = isert_np->np;
- struct isert_conn *isert_conn;
- struct isert_device *device;
- struct ib_device *ib_dev = cma_id->device;
- int ret = 0;
-
- spin_lock_bh(&np->np_thread_lock);
- if (!np->enabled) {
- spin_unlock_bh(&np->np_thread_lock);
- isert_dbg("iscsi_np is not enabled, reject connect request\n");
- return rdma_reject(cma_id, NULL, 0);
- }
- spin_unlock_bh(&np->np_thread_lock);
-
- isert_dbg("cma_id: %p, portal: %p\n",
- cma_id, cma_id->context);
-
- isert_conn = kzalloc(sizeof(struct isert_conn), GFP_KERNEL);
- if (!isert_conn) {
- isert_err("Unable to allocate isert_conn\n");
- return -ENOMEM;
- }
isert_conn->state = ISER_CONN_INIT;
- INIT_LIST_HEAD(&isert_conn->conn_accept_node);
- init_completion(&isert_conn->conn_login_comp);
+ INIT_LIST_HEAD(&isert_conn->accept_node);
+ init_completion(&isert_conn->login_comp);
init_completion(&isert_conn->login_req_comp);
- init_completion(&isert_conn->conn_wait);
- kref_init(&isert_conn->conn_kref);
- mutex_init(&isert_conn->conn_mutex);
- spin_lock_init(&isert_conn->conn_lock);
- INIT_LIST_HEAD(&isert_conn->conn_fr_pool);
+ init_completion(&isert_conn->wait);
+ kref_init(&isert_conn->kref);
+ mutex_init(&isert_conn->mutex);
+ spin_lock_init(&isert_conn->pool_lock);
+ INIT_LIST_HEAD(&isert_conn->fr_pool);
+}
+
+static void
+isert_free_login_buf(struct isert_conn *isert_conn)
+{
+ struct ib_device *ib_dev = isert_conn->device->ib_device;
- isert_conn->conn_cm_id = cma_id;
+ ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma,
+ ISER_RX_LOGIN_SIZE, DMA_TO_DEVICE);
+ ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma,
+ ISCSI_DEF_MAX_RECV_SEG_LEN,
+ DMA_FROM_DEVICE);
+ kfree(isert_conn->login_buf);
+}
+
+static int
+isert_alloc_login_buf(struct isert_conn *isert_conn,
+ struct ib_device *ib_dev)
+{
+ int ret;
isert_conn->login_buf = kzalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
ISER_RX_LOGIN_SIZE, GFP_KERNEL);
if (!isert_conn->login_buf) {
isert_err("Unable to allocate isert_conn->login_buf\n");
- ret = -ENOMEM;
- goto out;
+ return -ENOMEM;
}
isert_conn->login_req_buf = isert_conn->login_buf;
isert_conn->login_rsp_buf = isert_conn->login_buf +
ISCSI_DEF_MAX_RECV_SEG_LEN;
+
isert_dbg("Set login_buf: %p login_req_buf: %p login_rsp_buf: %p\n",
isert_conn->login_buf, isert_conn->login_req_buf,
isert_conn->login_rsp_buf);
@@ -628,8 +690,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
ret = ib_dma_mapping_error(ib_dev, isert_conn->login_req_dma);
if (ret) {
- isert_err("ib_dma_mapping_error failed for login_req_dma: %d\n",
- ret);
+ isert_err("login_req_dma mapping error: %d\n", ret);
isert_conn->login_req_dma = 0;
goto out_login_buf;
}
@@ -640,17 +701,58 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
ret = ib_dma_mapping_error(ib_dev, isert_conn->login_rsp_dma);
if (ret) {
- isert_err("ib_dma_mapping_error failed for login_rsp_dma: %d\n",
- ret);
+ isert_err("login_rsp_dma mapping error: %d\n", ret);
isert_conn->login_rsp_dma = 0;
goto out_req_dma_map;
}
- device = isert_device_find_by_ib_dev(cma_id);
+ return 0;
+
+out_req_dma_map:
+ ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma,
+ ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_FROM_DEVICE);
+out_login_buf:
+ kfree(isert_conn->login_buf);
+ return ret;
+}
+
+static int
+isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
+{
+ struct isert_np *isert_np = cma_id->context;
+ struct iscsi_np *np = isert_np->np;
+ struct isert_conn *isert_conn;
+ struct isert_device *device;
+ int ret = 0;
+
+ spin_lock_bh(&np->np_thread_lock);
+ if (!np->enabled) {
+ spin_unlock_bh(&np->np_thread_lock);
+ isert_dbg("iscsi_np is not enabled, reject connect request\n");
+ return rdma_reject(cma_id, NULL, 0);
+ }
+ spin_unlock_bh(&np->np_thread_lock);
+
+ isert_dbg("cma_id: %p, portal: %p\n",
+ cma_id, cma_id->context);
+
+ isert_conn = kzalloc(sizeof(struct isert_conn), GFP_KERNEL);
+ if (!isert_conn)
+ return -ENOMEM;
+
+ isert_init_conn(isert_conn);
+ isert_conn->cm_id = cma_id;
+
+ ret = isert_alloc_login_buf(isert_conn, cma_id->device);
+ if (ret)
+ goto out;
+
+ device = isert_device_get(cma_id);
if (IS_ERR(device)) {
ret = PTR_ERR(device);
goto out_rsp_dma_map;
}
+ isert_conn->device = device;
/* Set max inflight RDMA READ requests */
isert_conn->initiator_depth = min_t(u8,
@@ -658,24 +760,6 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
device->dev_attr.max_qp_init_rd_atom);
isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
- isert_conn->conn_device = device;
- isert_conn->conn_pd = ib_alloc_pd(isert_conn->conn_device->ib_device);
- if (IS_ERR(isert_conn->conn_pd)) {
- ret = PTR_ERR(isert_conn->conn_pd);
- isert_err("ib_alloc_pd failed for conn %p: ret=%d\n",
- isert_conn, ret);
- goto out_pd;
- }
-
- isert_conn->conn_mr = ib_get_dma_mr(isert_conn->conn_pd,
- IB_ACCESS_LOCAL_WRITE);
- if (IS_ERR(isert_conn->conn_mr)) {
- ret = PTR_ERR(isert_conn->conn_mr);
- isert_err("ib_get_dma_mr failed for conn %p: ret=%d\n",
- isert_conn, ret);
- goto out_mr;
- }
-
ret = isert_conn_setup_qp(isert_conn, cma_id);
if (ret)
goto out_conn_dev;
@@ -689,7 +773,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
goto out_conn_dev;
mutex_lock(&isert_np->np_accept_mutex);
- list_add_tail(&isert_conn->conn_accept_node, &isert_np->np_accept_list);
+ list_add_tail(&isert_conn->accept_node, &isert_np->np_accept_list);
mutex_unlock(&isert_np->np_accept_mutex);
isert_info("np %p: Allow accept_np to continue\n", np);
@@ -697,19 +781,9 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
return 0;
out_conn_dev:
- ib_dereg_mr(isert_conn->conn_mr);
-out_mr:
- ib_dealloc_pd(isert_conn->conn_pd);
-out_pd:
- isert_device_try_release(device);
+ isert_device_put(device);
out_rsp_dma_map:
- ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma,
- ISER_RX_LOGIN_SIZE, DMA_TO_DEVICE);
-out_req_dma_map:
- ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma,
- ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_FROM_DEVICE);
-out_login_buf:
- kfree(isert_conn->login_buf);
+ isert_free_login_buf(isert_conn);
out:
kfree(isert_conn);
rdma_reject(cma_id, NULL, 0);
@@ -719,43 +793,32 @@ out:
static void
isert_connect_release(struct isert_conn *isert_conn)
{
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
- struct isert_device *device = isert_conn->conn_device;
+ struct isert_device *device = isert_conn->device;
isert_dbg("conn %p\n", isert_conn);
- if (device && device->use_fastreg)
+ BUG_ON(!device);
+
+ if (device->use_fastreg)
isert_conn_free_fastreg_pool(isert_conn);
isert_free_rx_descriptors(isert_conn);
- rdma_destroy_id(isert_conn->conn_cm_id);
+ if (isert_conn->cm_id)
+ rdma_destroy_id(isert_conn->cm_id);
- if (isert_conn->conn_qp) {
- struct isert_comp *comp = isert_conn->conn_qp->recv_cq->cq_context;
+ if (isert_conn->qp) {
+ struct isert_comp *comp = isert_conn->qp->recv_cq->cq_context;
- isert_dbg("dec completion context %p active_qps\n", comp);
- mutex_lock(&device_list_mutex);
- comp->active_qps--;
- mutex_unlock(&device_list_mutex);
-
- ib_destroy_qp(isert_conn->conn_qp);
+ isert_comp_put(comp);
+ ib_destroy_qp(isert_conn->qp);
}
- ib_dereg_mr(isert_conn->conn_mr);
- ib_dealloc_pd(isert_conn->conn_pd);
+ if (isert_conn->login_buf)
+ isert_free_login_buf(isert_conn);
- if (isert_conn->login_buf) {
- ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma,
- ISER_RX_LOGIN_SIZE, DMA_TO_DEVICE);
- ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma,
- ISCSI_DEF_MAX_RECV_SEG_LEN,
- DMA_FROM_DEVICE);
- kfree(isert_conn->login_buf);
- }
- kfree(isert_conn);
+ isert_device_put(device);
- if (device)
- isert_device_try_release(device);
+ kfree(isert_conn);
}
static void
@@ -765,22 +828,22 @@ isert_connected_handler(struct rdma_cm_id *cma_id)
isert_info("conn %p\n", isert_conn);
- if (!kref_get_unless_zero(&isert_conn->conn_kref)) {
+ if (!kref_get_unless_zero(&isert_conn->kref)) {
isert_warn("conn %p connect_release is running\n", isert_conn);
return;
}
- mutex_lock(&isert_conn->conn_mutex);
+ mutex_lock(&isert_conn->mutex);
if (isert_conn->state != ISER_CONN_FULL_FEATURE)
isert_conn->state = ISER_CONN_UP;
- mutex_unlock(&isert_conn->conn_mutex);
+ mutex_unlock(&isert_conn->mutex);
}
static void
-isert_release_conn_kref(struct kref *kref)
+isert_release_kref(struct kref *kref)
{
struct isert_conn *isert_conn = container_of(kref,
- struct isert_conn, conn_kref);
+ struct isert_conn, kref);
isert_info("conn %p final kref %s/%d\n", isert_conn, current->comm,
current->pid);
@@ -791,7 +854,7 @@ isert_release_conn_kref(struct kref *kref)
static void
isert_put_conn(struct isert_conn *isert_conn)
{
- kref_put(&isert_conn->conn_kref, isert_release_conn_kref);
+ kref_put(&isert_conn->kref, isert_release_kref);
}
/**
@@ -803,7 +866,7 @@ isert_put_conn(struct isert_conn *isert_conn)
* to TEMINATING and start teardown sequence (rdma_disconnect).
* In case the connection state is UP, complete flush as well.
*
- * This routine must be called with conn_mutex held. Thus it is
+ * This routine must be called with mutex held. Thus it is
* safe to call multiple times.
*/
static void
@@ -819,7 +882,7 @@ isert_conn_terminate(struct isert_conn *isert_conn)
isert_info("Terminating conn %p state %d\n",
isert_conn, isert_conn->state);
isert_conn->state = ISER_CONN_TERMINATING;
- err = rdma_disconnect(isert_conn->conn_cm_id);
+ err = rdma_disconnect(isert_conn->cm_id);
if (err)
isert_warn("Failed rdma_disconnect isert_conn %p\n",
isert_conn);
@@ -868,22 +931,25 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id,
isert_conn = cma_id->qp->qp_context;
- mutex_lock(&isert_conn->conn_mutex);
+ mutex_lock(&isert_conn->mutex);
isert_conn_terminate(isert_conn);
- mutex_unlock(&isert_conn->conn_mutex);
+ mutex_unlock(&isert_conn->mutex);
- isert_info("conn %p completing conn_wait\n", isert_conn);
- complete(&isert_conn->conn_wait);
+ isert_info("conn %p completing wait\n", isert_conn);
+ complete(&isert_conn->wait);
return 0;
}
-static void
+static int
isert_connect_error(struct rdma_cm_id *cma_id)
{
struct isert_conn *isert_conn = cma_id->qp->qp_context;
+ isert_conn->cm_id = NULL;
isert_put_conn(isert_conn);
+
+ return -1;
}
static int
@@ -912,7 +978,7 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */
case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */
case RDMA_CM_EVENT_CONNECT_ERROR:
- isert_connect_error(cma_id);
+ ret = isert_connect_error(cma_id);
break;
default:
isert_err("Unhandled RDMA CMA event: %d\n", event->event);
@@ -927,11 +993,11 @@ isert_post_recv(struct isert_conn *isert_conn, u32 count)
{
struct ib_recv_wr *rx_wr, *rx_wr_failed;
int i, ret;
- unsigned int rx_head = isert_conn->conn_rx_desc_head;
+ unsigned int rx_head = isert_conn->rx_desc_head;
struct iser_rx_desc *rx_desc;
- for (rx_wr = isert_conn->conn_rx_wr, i = 0; i < count; i++, rx_wr++) {
- rx_desc = &isert_conn->conn_rx_descs[rx_head];
+ for (rx_wr = isert_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
+ rx_desc = &isert_conn->rx_descs[rx_head];
rx_wr->wr_id = (uintptr_t)rx_desc;
rx_wr->sg_list = &rx_desc->rx_sg;
rx_wr->num_sge = 1;
@@ -943,14 +1009,14 @@ isert_post_recv(struct isert_conn *isert_conn, u32 count)
rx_wr->next = NULL; /* mark end of work requests list */
isert_conn->post_recv_buf_count += count;
- ret = ib_post_recv(isert_conn->conn_qp, isert_conn->conn_rx_wr,
+ ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr,
&rx_wr_failed);
if (ret) {
isert_err("ib_post_recv() failed with ret: %d\n", ret);
isert_conn->post_recv_buf_count -= count;
} else {
isert_dbg("Posted %d RX buffers\n", count);
- isert_conn->conn_rx_desc_head = rx_head;
+ isert_conn->rx_desc_head = rx_head;
}
return ret;
}
@@ -958,7 +1024,7 @@ isert_post_recv(struct isert_conn *isert_conn, u32 count)
static int
isert_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc)
{
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct ib_device *ib_dev = isert_conn->cm_id->device;
struct ib_send_wr send_wr, *send_wr_failed;
int ret;
@@ -972,7 +1038,7 @@ isert_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc)
send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;
- ret = ib_post_send(isert_conn->conn_qp, &send_wr, &send_wr_failed);
+ ret = ib_post_send(isert_conn->qp, &send_wr, &send_wr_failed);
if (ret)
isert_err("ib_post_send() failed, ret: %d\n", ret);
@@ -984,7 +1050,8 @@ isert_create_send_desc(struct isert_conn *isert_conn,
struct isert_cmd *isert_cmd,
struct iser_tx_desc *tx_desc)
{
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct isert_device *device = isert_conn->device;
+ struct ib_device *ib_dev = device->ib_device;
ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
ISER_HEADERS_LEN, DMA_TO_DEVICE);
@@ -995,8 +1062,8 @@ isert_create_send_desc(struct isert_conn *isert_conn,
tx_desc->num_sge = 1;
tx_desc->isert_cmd = isert_cmd;
- if (tx_desc->tx_sg[0].lkey != isert_conn->conn_mr->lkey) {
- tx_desc->tx_sg[0].lkey = isert_conn->conn_mr->lkey;
+ if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
+ tx_desc->tx_sg[0].lkey = device->mr->lkey;
isert_dbg("tx_desc %p lkey mismatch, fixing\n", tx_desc);
}
}
@@ -1005,7 +1072,8 @@ static int
isert_init_tx_hdrs(struct isert_conn *isert_conn,
struct iser_tx_desc *tx_desc)
{
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct isert_device *device = isert_conn->device;
+ struct ib_device *ib_dev = device->ib_device;
u64 dma_addr;
dma_addr = ib_dma_map_single(ib_dev, (void *)tx_desc,
@@ -1018,7 +1086,7 @@ isert_init_tx_hdrs(struct isert_conn *isert_conn,
tx_desc->dma_addr = dma_addr;
tx_desc->tx_sg[0].addr = tx_desc->dma_addr;
tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
- tx_desc->tx_sg[0].lkey = isert_conn->conn_mr->lkey;
+ tx_desc->tx_sg[0].lkey = device->mr->lkey;
isert_dbg("Setup tx_sg[0].addr: 0x%llx length: %u lkey: 0x%x\n",
tx_desc->tx_sg[0].addr, tx_desc->tx_sg[0].length,
@@ -1051,7 +1119,7 @@ isert_rdma_post_recvl(struct isert_conn *isert_conn)
memset(&sge, 0, sizeof(struct ib_sge));
sge.addr = isert_conn->login_req_dma;
sge.length = ISER_RX_LOGIN_SIZE;
- sge.lkey = isert_conn->conn_mr->lkey;
+ sge.lkey = isert_conn->device->mr->lkey;
isert_dbg("Setup sge: addr: %llx length: %d 0x%08x\n",
sge.addr, sge.length, sge.lkey);
@@ -1062,7 +1130,7 @@ isert_rdma_post_recvl(struct isert_conn *isert_conn)
rx_wr.num_sge = 1;
isert_conn->post_recv_buf_count++;
- ret = ib_post_recv(isert_conn->conn_qp, &rx_wr, &rx_wr_fail);
+ ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_fail);
if (ret) {
isert_err("ib_post_recv() failed: %d\n", ret);
isert_conn->post_recv_buf_count--;
@@ -1076,8 +1144,9 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
u32 length)
{
struct isert_conn *isert_conn = conn->context;
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
- struct iser_tx_desc *tx_desc = &isert_conn->conn_login_tx_desc;
+ struct isert_device *device = isert_conn->device;
+ struct ib_device *ib_dev = device->ib_device;
+ struct iser_tx_desc *tx_desc = &isert_conn->login_tx_desc;
int ret;
isert_create_send_desc(isert_conn, NULL, tx_desc);
@@ -1100,13 +1169,13 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
tx_dsg->addr = isert_conn->login_rsp_dma;
tx_dsg->length = length;
- tx_dsg->lkey = isert_conn->conn_mr->lkey;
+ tx_dsg->lkey = isert_conn->device->mr->lkey;
tx_desc->num_sge = 2;
}
if (!login->login_failed) {
if (login->login_complete) {
if (!conn->sess->sess_ops->SessionType &&
- isert_conn->conn_device->use_fastreg) {
+ isert_conn->device->use_fastreg) {
ret = isert_conn_create_fastreg_pool(isert_conn);
if (ret) {
isert_err("Conn: %p failed to create"
@@ -1124,9 +1193,9 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
return ret;
/* Now we are in FULL_FEATURE phase */
- mutex_lock(&isert_conn->conn_mutex);
+ mutex_lock(&isert_conn->mutex);
isert_conn->state = ISER_CONN_FULL_FEATURE;
- mutex_unlock(&isert_conn->conn_mutex);
+ mutex_unlock(&isert_conn->mutex);
goto post_send;
}
@@ -1185,7 +1254,7 @@ isert_rx_login_req(struct isert_conn *isert_conn)
memcpy(login->req_buf, &rx_desc->data[0], size);
if (login->first_request) {
- complete(&isert_conn->conn_login_comp);
+ complete(&isert_conn->login_comp);
return;
}
schedule_delayed_work(&conn->login_work, 0);
@@ -1194,7 +1263,7 @@ isert_rx_login_req(struct isert_conn *isert_conn)
static struct iscsi_cmd
*isert_allocate_cmd(struct iscsi_conn *conn)
{
- struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+ struct isert_conn *isert_conn = conn->context;
struct isert_cmd *isert_cmd;
struct iscsi_cmd *cmd;
@@ -1379,13 +1448,12 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
{
struct iscsi_hdr *hdr = &rx_desc->iscsi_header;
struct iscsi_conn *conn = isert_conn->conn;
- struct iscsi_session *sess = conn->sess;
struct iscsi_cmd *cmd;
struct isert_cmd *isert_cmd;
int ret = -EINVAL;
u8 opcode = (hdr->opcode & ISCSI_OPCODE_MASK);
- if (sess->sess_ops->SessionType &&
+ if (conn->sess->sess_ops->SessionType &&
(!(opcode & ISCSI_OP_TEXT) || !(opcode & ISCSI_OP_LOGOUT))) {
isert_err("Got illegal opcode: 0x%02x in SessionType=Discovery,"
" ignoring\n", opcode);
@@ -1497,10 +1565,11 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
}
static void
-isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn,
- u32 xfer_len)
+isert_rcv_completion(struct iser_rx_desc *desc,
+ struct isert_conn *isert_conn,
+ u32 xfer_len)
{
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct ib_device *ib_dev = isert_conn->cm_id->device;
struct iscsi_hdr *hdr;
u64 rx_dma;
int rx_buflen, outstanding;
@@ -1532,9 +1601,9 @@ isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn,
if (login && !login->first_request)
isert_rx_login_req(isert_conn);
}
- mutex_lock(&isert_conn->conn_mutex);
+ mutex_lock(&isert_conn->mutex);
complete(&isert_conn->login_req_comp);
- mutex_unlock(&isert_conn->conn_mutex);
+ mutex_unlock(&isert_conn->mutex);
} else {
isert_rx_do_work(desc, isert_conn);
}
@@ -1566,7 +1635,7 @@ isert_map_data_buf(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
struct scatterlist *sg, u32 nents, u32 length, u32 offset,
enum iser_ib_op_code op, struct isert_data_buf *data)
{
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct ib_device *ib_dev = isert_conn->cm_id->device;
data->dma_dir = op == ISER_IB_RDMA_WRITE ?
DMA_TO_DEVICE : DMA_FROM_DEVICE;
@@ -1597,7 +1666,7 @@ isert_map_data_buf(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
static void
isert_unmap_data_buf(struct isert_conn *isert_conn, struct isert_data_buf *data)
{
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct ib_device *ib_dev = isert_conn->cm_id->device;
ib_dma_unmap_sg(ib_dev, data->sg, data->nents, data->dma_dir);
memset(data, 0, sizeof(*data));
@@ -1634,7 +1703,6 @@ static void
isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
{
struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
- LIST_HEAD(unmap_list);
isert_dbg("Cmd %p\n", isert_cmd);
@@ -1644,9 +1712,9 @@ isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
isert_unmap_data_buf(isert_conn, &wr->prot);
wr->fr_desc->ind &= ~ISERT_PROTECTED;
}
- spin_lock_bh(&isert_conn->conn_lock);
- list_add_tail(&wr->fr_desc->list, &isert_conn->conn_fr_pool);
- spin_unlock_bh(&isert_conn->conn_lock);
+ spin_lock_bh(&isert_conn->pool_lock);
+ list_add_tail(&wr->fr_desc->list, &isert_conn->fr_pool);
+ spin_unlock_bh(&isert_conn->pool_lock);
wr->fr_desc = NULL;
}
@@ -1665,7 +1733,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err)
struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
struct isert_conn *isert_conn = isert_cmd->conn;
struct iscsi_conn *conn = isert_conn->conn;
- struct isert_device *device = isert_conn->conn_device;
+ struct isert_device *device = isert_conn->device;
struct iscsi_text_rsp *hdr;
isert_dbg("Cmd %p\n", isert_cmd);
@@ -1815,7 +1883,7 @@ isert_completion_rdma_write(struct iser_tx_desc *tx_desc,
struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
struct se_cmd *se_cmd = &cmd->se_cmd;
struct isert_conn *isert_conn = isert_cmd->conn;
- struct isert_device *device = isert_conn->conn_device;
+ struct isert_device *device = isert_conn->device;
int ret = 0;
if (wr->fr_desc && wr->fr_desc->ind & ISERT_PROTECTED) {
@@ -1841,7 +1909,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc,
struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
struct se_cmd *se_cmd = &cmd->se_cmd;
struct isert_conn *isert_conn = isert_cmd->conn;
- struct isert_device *device = isert_conn->conn_device;
+ struct isert_device *device = isert_conn->device;
int ret = 0;
if (wr->fr_desc && wr->fr_desc->ind & ISERT_PROTECTED) {
@@ -1861,11 +1929,13 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc,
cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
spin_unlock_bh(&cmd->istate_lock);
- if (ret)
+ if (ret) {
+ target_put_sess_cmd(se_cmd->se_sess, se_cmd);
transport_send_check_condition_and_sense(se_cmd,
se_cmd->pi_err, 0);
- else
+ } else {
target_execute_cmd(se_cmd);
+ }
}
static void
@@ -1874,7 +1944,7 @@ isert_do_control_comp(struct work_struct *work)
struct isert_cmd *isert_cmd = container_of(work,
struct isert_cmd, comp_work);
struct isert_conn *isert_conn = isert_cmd->conn;
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct ib_device *ib_dev = isert_conn->cm_id->device;
struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
isert_dbg("Cmd %p i_state %d\n", isert_cmd, cmd->i_state);
@@ -1922,10 +1992,10 @@ isert_response_completion(struct iser_tx_desc *tx_desc,
}
static void
-isert_send_completion(struct iser_tx_desc *tx_desc,
+isert_snd_completion(struct iser_tx_desc *tx_desc,
struct isert_conn *isert_conn)
{
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct ib_device *ib_dev = isert_conn->cm_id->device;
struct isert_cmd *isert_cmd = tx_desc->isert_cmd;
struct isert_rdma_wr *wr;
@@ -1938,10 +2008,6 @@ isert_send_completion(struct iser_tx_desc *tx_desc,
isert_dbg("Cmd %p iser_ib_op %d\n", isert_cmd, wr->iser_ib_op);
switch (wr->iser_ib_op) {
- case ISER_IB_RECV:
- isert_err("Got ISER_IB_RECV\n");
- dump_stack();
- break;
case ISER_IB_SEND:
isert_response_completion(tx_desc, isert_cmd,
isert_conn, ib_dev);
@@ -1973,8 +2039,8 @@ isert_send_completion(struct iser_tx_desc *tx_desc,
static inline bool
is_isert_tx_desc(struct isert_conn *isert_conn, void *wr_id)
{
- void *start = isert_conn->conn_rx_descs;
- int len = ISERT_QP_MAX_RECV_DTOS * sizeof(*isert_conn->conn_rx_descs);
+ void *start = isert_conn->rx_descs;
+ int len = ISERT_QP_MAX_RECV_DTOS * sizeof(*isert_conn->rx_descs);
if (wr_id >= start && wr_id < start + len)
return false;
@@ -1986,11 +2052,11 @@ static void
isert_cq_comp_err(struct isert_conn *isert_conn, struct ib_wc *wc)
{
if (wc->wr_id == ISER_BEACON_WRID) {
- isert_info("conn %p completing conn_wait_comp_err\n",
+ isert_info("conn %p completing wait_comp_err\n",
isert_conn);
- complete(&isert_conn->conn_wait_comp_err);
+ complete(&isert_conn->wait_comp_err);
} else if (is_isert_tx_desc(isert_conn, (void *)(uintptr_t)wc->wr_id)) {
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct ib_device *ib_dev = isert_conn->cm_id->device;
struct isert_cmd *isert_cmd;
struct iser_tx_desc *desc;
@@ -2018,10 +2084,10 @@ isert_handle_wc(struct ib_wc *wc)
if (likely(wc->status == IB_WC_SUCCESS)) {
if (wc->opcode == IB_WC_RECV) {
rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
- isert_rx_completion(rx_desc, isert_conn, wc->byte_len);
+ isert_rcv_completion(rx_desc, isert_conn, wc->byte_len);
} else {
tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
- isert_send_completion(tx_desc, isert_conn);
+ isert_snd_completion(tx_desc, isert_conn);
}
} else {
if (wc->status != IB_WC_WR_FLUSH_ERR)
@@ -2070,7 +2136,7 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd)
struct ib_send_wr *wr_failed;
int ret;
- ret = ib_post_send(isert_conn->conn_qp, &isert_cmd->tx_desc.send_wr,
+ ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr,
&wr_failed);
if (ret) {
isert_err("ib_post_send failed with %d\n", ret);
@@ -2083,7 +2149,7 @@ static int
isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
- struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+ struct isert_conn *isert_conn = conn->context;
struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
struct iscsi_scsi_rsp *hdr = (struct iscsi_scsi_rsp *)
&isert_cmd->tx_desc.iscsi_header;
@@ -2097,7 +2163,8 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
if (cmd->se_cmd.sense_buffer &&
((cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ||
(cmd->se_cmd.se_cmd_flags & SCF_EMULATED_TASK_SENSE))) {
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct isert_device *device = isert_conn->device;
+ struct ib_device *ib_dev = device->ib_device;
struct ib_sge *tx_dsg = &isert_cmd->tx_desc.tx_sg[1];
u32 padding, pdu_len;
@@ -2116,7 +2183,7 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
isert_cmd->pdu_buf_len = pdu_len;
tx_dsg->addr = isert_cmd->pdu_buf_dma;
tx_dsg->length = pdu_len;
- tx_dsg->lkey = isert_conn->conn_mr->lkey;
+ tx_dsg->lkey = device->mr->lkey;
isert_cmd->tx_desc.num_sge = 2;
}
@@ -2131,8 +2198,8 @@ static void
isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
- struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
- struct isert_device *device = isert_conn->conn_device;
+ struct isert_conn *isert_conn = conn->context;
+ struct isert_device *device = isert_conn->device;
spin_lock_bh(&conn->cmd_lock);
if (!list_empty(&cmd->i_conn_node))
@@ -2148,8 +2215,8 @@ isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
static enum target_prot_op
isert_get_sup_prot_ops(struct iscsi_conn *conn)
{
- struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
- struct isert_device *device = isert_conn->conn_device;
+ struct isert_conn *isert_conn = conn->context;
+ struct isert_device *device = isert_conn->device;
if (conn->tpg->tpg_attrib.t10_pi) {
if (device->pi_capable) {
@@ -2170,7 +2237,7 @@ isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
bool nopout_response)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
- struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+ struct isert_conn *isert_conn = conn->context;
struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
@@ -2189,7 +2256,7 @@ static int
isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
- struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+ struct isert_conn *isert_conn = conn->context;
struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
@@ -2207,7 +2274,7 @@ static int
isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
- struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+ struct isert_conn *isert_conn = conn->context;
struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
@@ -2225,9 +2292,10 @@ static int
isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
- struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+ struct isert_conn *isert_conn = conn->context;
struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct isert_device *device = isert_conn->device;
+ struct ib_device *ib_dev = device->ib_device;
struct ib_sge *tx_dsg = &isert_cmd->tx_desc.tx_sg[1];
struct iscsi_reject *hdr =
(struct iscsi_reject *)&isert_cmd->tx_desc.iscsi_header;
@@ -2243,7 +2311,7 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
isert_cmd->pdu_buf_len = ISCSI_HDR_LEN;
tx_dsg->addr = isert_cmd->pdu_buf_dma;
tx_dsg->length = ISCSI_HDR_LEN;
- tx_dsg->lkey = isert_conn->conn_mr->lkey;
+ tx_dsg->lkey = device->mr->lkey;
isert_cmd->tx_desc.num_sge = 2;
isert_init_send_wr(isert_conn, isert_cmd, send_wr);
@@ -2257,7 +2325,7 @@ static int
isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
- struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+ struct isert_conn *isert_conn = conn->context;
struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
struct iscsi_text_rsp *hdr =
(struct iscsi_text_rsp *)&isert_cmd->tx_desc.iscsi_header;
@@ -2273,7 +2341,8 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
if (txt_rsp_len) {
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct isert_device *device = isert_conn->device;
+ struct ib_device *ib_dev = device->ib_device;
struct ib_sge *tx_dsg = &isert_cmd->tx_desc.tx_sg[1];
void *txt_rsp_buf = cmd->buf_ptr;
@@ -2283,7 +2352,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
isert_cmd->pdu_buf_len = txt_rsp_len;
tx_dsg->addr = isert_cmd->pdu_buf_dma;
tx_dsg->length = txt_rsp_len;
- tx_dsg->lkey = isert_conn->conn_mr->lkey;
+ tx_dsg->lkey = device->mr->lkey;
isert_cmd->tx_desc.num_sge = 2;
}
isert_init_send_wr(isert_conn, isert_cmd, send_wr);
@@ -2300,7 +2369,8 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
{
struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
struct scatterlist *sg_start, *tmp_sg;
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct isert_device *device = isert_conn->device;
+ struct ib_device *ib_dev = device->ib_device;
u32 sg_off, page_off;
int i = 0, sg_nents;
@@ -2324,7 +2394,7 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
ib_sge->addr = ib_sg_dma_address(ib_dev, tmp_sg) + page_off;
ib_sge->length = min_t(u32, data_left,
ib_sg_dma_len(ib_dev, tmp_sg) - page_off);
- ib_sge->lkey = isert_conn->conn_mr->lkey;
+ ib_sge->lkey = device->mr->lkey;
isert_dbg("RDMA ib_sge: addr: 0x%llx length: %u lkey: %x\n",
ib_sge->addr, ib_sge->length, ib_sge->lkey);
@@ -2346,7 +2416,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
{
struct se_cmd *se_cmd = &cmd->se_cmd;
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
- struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+ struct isert_conn *isert_conn = conn->context;
struct isert_data_buf *data = &wr->data;
struct ib_send_wr *send_wr;
struct ib_sge *ib_sge;
@@ -2485,7 +2555,8 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
enum isert_indicator ind,
struct ib_sge *sge)
{
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct isert_device *device = isert_conn->device;
+ struct ib_device *ib_dev = device->ib_device;
struct ib_mr *mr;
struct ib_fast_reg_page_list *frpl;
struct ib_send_wr fr_wr, inv_wr;
@@ -2494,7 +2565,7 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
u32 page_off;
if (mem->dma_nents == 1) {
- sge->lkey = isert_conn->conn_mr->lkey;
+ sge->lkey = device->mr->lkey;
sge->addr = ib_sg_dma_address(ib_dev, &mem->sg[0]);
sge->length = ib_sg_dma_len(ib_dev, &mem->sg[0]);
isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n",
@@ -2542,7 +2613,7 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
else
wr->next = &fr_wr;
- ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr);
+ ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
if (ret) {
isert_err("fast registration failed, ret:%d\n", ret);
return ret;
@@ -2655,7 +2726,7 @@ isert_reg_sig_mr(struct isert_conn *isert_conn,
else
wr->next = &sig_wr;
- ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr);
+ ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
if (ret) {
isert_err("fast registration failed, ret:%d\n", ret);
goto err;
@@ -2685,14 +2756,14 @@ isert_handle_prot_cmd(struct isert_conn *isert_conn,
struct isert_cmd *isert_cmd,
struct isert_rdma_wr *wr)
{
- struct isert_device *device = isert_conn->conn_device;
+ struct isert_device *device = isert_conn->device;
struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd;
int ret;
if (!wr->fr_desc->pi_ctx) {
ret = isert_create_pi_ctx(wr->fr_desc,
device->ib_device,
- isert_conn->conn_pd);
+ device->pd);
if (ret) {
isert_err("conn %p failed to allocate pi_ctx\n",
isert_conn);
@@ -2763,11 +2834,11 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
return ret;
if (wr->data.dma_nents != 1 || isert_prot_cmd(isert_conn, se_cmd)) {
- spin_lock_irqsave(&isert_conn->conn_lock, flags);
- fr_desc = list_first_entry(&isert_conn->conn_fr_pool,
+ spin_lock_irqsave(&isert_conn->pool_lock, flags);
+ fr_desc = list_first_entry(&isert_conn->fr_pool,
struct fast_reg_descriptor, list);
list_del(&fr_desc->list);
- spin_unlock_irqrestore(&isert_conn->conn_lock, flags);
+ spin_unlock_irqrestore(&isert_conn->pool_lock, flags);
wr->fr_desc = fr_desc;
}
@@ -2814,9 +2885,9 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
unmap_cmd:
if (fr_desc) {
- spin_lock_irqsave(&isert_conn->conn_lock, flags);
- list_add_tail(&fr_desc->list, &isert_conn->conn_fr_pool);
- spin_unlock_irqrestore(&isert_conn->conn_lock, flags);
+ spin_lock_irqsave(&isert_conn->pool_lock, flags);
+ list_add_tail(&fr_desc->list, &isert_conn->fr_pool);
+ spin_unlock_irqrestore(&isert_conn->pool_lock, flags);
}
isert_unmap_data_buf(isert_conn, &wr->data);
@@ -2829,8 +2900,8 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
struct se_cmd *se_cmd = &cmd->se_cmd;
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
- struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
- struct isert_device *device = isert_conn->conn_device;
+ struct isert_conn *isert_conn = conn->context;
+ struct isert_device *device = isert_conn->device;
struct ib_send_wr *wr_failed;
int rc;
@@ -2859,7 +2930,7 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
wr->send_wr_num += 1;
}
- rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
+ rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed);
if (rc)
isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
@@ -2879,8 +2950,8 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
struct se_cmd *se_cmd = &cmd->se_cmd;
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
- struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
- struct isert_device *device = isert_conn->conn_device;
+ struct isert_conn *isert_conn = conn->context;
+ struct isert_device *device = isert_conn->device;
struct ib_send_wr *wr_failed;
int rc;
@@ -2893,7 +2964,7 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
return rc;
}
- rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
+ rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed);
if (rc)
isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
@@ -2987,7 +3058,7 @@ isert_setup_id(struct isert_np *isert_np)
goto out_id;
}
- ret = rdma_listen(id, ISERT_RDMA_LISTEN_BACKLOG);
+ ret = rdma_listen(id, 0);
if (ret) {
isert_err("rdma_listen() failed: %d\n", ret);
goto out_id;
@@ -3046,7 +3117,7 @@ out:
static int
isert_rdma_accept(struct isert_conn *isert_conn)
{
- struct rdma_cm_id *cm_id = isert_conn->conn_cm_id;
+ struct rdma_cm_id *cm_id = isert_conn->cm_id;
struct rdma_conn_param cp;
int ret;
@@ -3067,7 +3138,7 @@ isert_rdma_accept(struct isert_conn *isert_conn)
static int
isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login)
{
- struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+ struct isert_conn *isert_conn = conn->context;
int ret;
isert_info("before login_req comp conn: %p\n", isert_conn);
@@ -3090,8 +3161,8 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login)
isert_rx_login_req(isert_conn);
- isert_info("before conn_login_comp conn: %p\n", conn);
- ret = wait_for_completion_interruptible(&isert_conn->conn_login_comp);
+ isert_info("before login_comp conn: %p\n", conn);
+ ret = wait_for_completion_interruptible(&isert_conn->login_comp);
if (ret)
return ret;
@@ -3104,7 +3175,7 @@ static void
isert_set_conn_info(struct iscsi_np *np, struct iscsi_conn *conn,
struct isert_conn *isert_conn)
{
- struct rdma_cm_id *cm_id = isert_conn->conn_cm_id;
+ struct rdma_cm_id *cm_id = isert_conn->cm_id;
struct rdma_route *cm_route = &cm_id->route;
struct sockaddr_in *sock_in;
struct sockaddr_in6 *sock_in6;
@@ -3137,13 +3208,13 @@ isert_set_conn_info(struct iscsi_np *np, struct iscsi_conn *conn,
static int
isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
{
- struct isert_np *isert_np = (struct isert_np *)np->np_context;
+ struct isert_np *isert_np = np->np_context;
struct isert_conn *isert_conn;
- int max_accept = 0, ret;
+ int ret;
accept_wait:
ret = down_interruptible(&isert_np->np_sem);
- if (ret || max_accept > 5)
+ if (ret)
return -ENODEV;
spin_lock_bh(&np->np_thread_lock);
@@ -3162,17 +3233,15 @@ accept_wait:
mutex_lock(&isert_np->np_accept_mutex);
if (list_empty(&isert_np->np_accept_list)) {
mutex_unlock(&isert_np->np_accept_mutex);
- max_accept++;
goto accept_wait;
}
isert_conn = list_first_entry(&isert_np->np_accept_list,
- struct isert_conn, conn_accept_node);
- list_del_init(&isert_conn->conn_accept_node);
+ struct isert_conn, accept_node);
+ list_del_init(&isert_conn->accept_node);
mutex_unlock(&isert_np->np_accept_mutex);
conn->context = isert_conn;
isert_conn->conn = conn;
- max_accept = 0;
isert_set_conn_info(np, conn, isert_conn);
@@ -3184,7 +3253,7 @@ accept_wait:
static void
isert_free_np(struct iscsi_np *np)
{
- struct isert_np *isert_np = (struct isert_np *)np->np_context;
+ struct isert_np *isert_np = np->np_context;
struct isert_conn *isert_conn, *n;
if (isert_np->np_cm_id)
@@ -3202,7 +3271,7 @@ isert_free_np(struct iscsi_np *np)
isert_info("Still have isert connections, cleaning up...\n");
list_for_each_entry_safe(isert_conn, n,
&isert_np->np_accept_list,
- conn_accept_node) {
+ accept_node) {
isert_info("cleaning isert_conn %p state (%d)\n",
isert_conn, isert_conn->state);
isert_connect_release(isert_conn);
@@ -3222,11 +3291,11 @@ static void isert_release_work(struct work_struct *work)
isert_info("Starting release conn %p\n", isert_conn);
- wait_for_completion(&isert_conn->conn_wait);
+ wait_for_completion(&isert_conn->wait);
- mutex_lock(&isert_conn->conn_mutex);
+ mutex_lock(&isert_conn->mutex);
isert_conn->state = ISER_CONN_DOWN;
- mutex_unlock(&isert_conn->conn_mutex);
+ mutex_unlock(&isert_conn->mutex);
isert_info("Destroying conn %p\n", isert_conn);
isert_put_conn(isert_conn);
@@ -3264,15 +3333,15 @@ isert_wait4flush(struct isert_conn *isert_conn)
isert_info("conn %p\n", isert_conn);
- init_completion(&isert_conn->conn_wait_comp_err);
+ init_completion(&isert_conn->wait_comp_err);
isert_conn->beacon.wr_id = ISER_BEACON_WRID;
/* post an indication that all flush errors were consumed */
- if (ib_post_recv(isert_conn->conn_qp, &isert_conn->beacon, &bad_wr)) {
+ if (ib_post_recv(isert_conn->qp, &isert_conn->beacon, &bad_wr)) {
isert_err("conn %p failed to post beacon", isert_conn);
return;
}
- wait_for_completion(&isert_conn->conn_wait_comp_err);
+ wait_for_completion(&isert_conn->wait_comp_err);
}
static void isert_wait_conn(struct iscsi_conn *conn)
@@ -3281,17 +3350,17 @@ static void isert_wait_conn(struct iscsi_conn *conn)
isert_info("Starting conn %p\n", isert_conn);
- mutex_lock(&isert_conn->conn_mutex);
+ mutex_lock(&isert_conn->mutex);
/*
- * Only wait for conn_wait_comp_err if the isert_conn made it
+ * Only wait for wait_comp_err if the isert_conn made it
* into full feature phase..
*/
if (isert_conn->state == ISER_CONN_INIT) {
- mutex_unlock(&isert_conn->conn_mutex);
+ mutex_unlock(&isert_conn->mutex);
return;
}
isert_conn_terminate(isert_conn);
- mutex_unlock(&isert_conn->conn_mutex);
+ mutex_unlock(&isert_conn->mutex);
isert_wait4cmds(conn);
isert_wait4flush(isert_conn);
@@ -3370,7 +3439,7 @@ static void __exit isert_exit(void)
}
MODULE_DESCRIPTION("iSER-Target for mainline target infrastructure");
-MODULE_VERSION("0.1");
+MODULE_VERSION("1.0");
MODULE_AUTHOR("nab@Linux-iSCSI.org");
MODULE_LICENSE("GPL");
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 8dc8415d152d..9ec23a786c02 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -31,7 +31,6 @@
#define isert_err(fmt, arg...) \
pr_err(PFX "%s: " fmt, __func__ , ## arg)
-#define ISERT_RDMA_LISTEN_BACKLOG 10
#define ISCSI_ISER_SG_TABLESIZE 256
#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
#define ISER_BEACON_WRID 0xfffffffffffffffeULL
@@ -160,27 +159,25 @@ struct isert_conn {
u64 login_req_dma;
int login_req_len;
u64 login_rsp_dma;
- unsigned int conn_rx_desc_head;
- struct iser_rx_desc *conn_rx_descs;
- struct ib_recv_wr conn_rx_wr[ISERT_MIN_POSTED_RX];
+ unsigned int rx_desc_head;
+ struct iser_rx_desc *rx_descs;
+ struct ib_recv_wr rx_wr[ISERT_MIN_POSTED_RX];
struct iscsi_conn *conn;
- struct list_head conn_accept_node;
- struct completion conn_login_comp;
+ struct list_head accept_node;
+ struct completion login_comp;
struct completion login_req_comp;
- struct iser_tx_desc conn_login_tx_desc;
- struct rdma_cm_id *conn_cm_id;
- struct ib_pd *conn_pd;
- struct ib_mr *conn_mr;
- struct ib_qp *conn_qp;
- struct isert_device *conn_device;
- struct mutex conn_mutex;
- struct completion conn_wait;
- struct completion conn_wait_comp_err;
- struct kref conn_kref;
- struct list_head conn_fr_pool;
- int conn_fr_pool_size;
+ struct iser_tx_desc login_tx_desc;
+ struct rdma_cm_id *cm_id;
+ struct ib_qp *qp;
+ struct isert_device *device;
+ struct mutex mutex;
+ struct completion wait;
+ struct completion wait_comp_err;
+ struct kref kref;
+ struct list_head fr_pool;
+ int fr_pool_size;
/* lock to protect fastreg pool */
- spinlock_t conn_lock;
+ spinlock_t pool_lock;
struct work_struct release_work;
struct ib_recv_wr beacon;
bool logout_posted;
@@ -211,6 +208,8 @@ struct isert_device {
bool pi_capable;
int refcount;
struct ib_device *ib_device;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
struct isert_comp *comps;
int comps_used;
struct list_head dev_node;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 0747c0595a9d..918814cd0f80 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -40,6 +40,7 @@
#include <linux/parser.h>
#include <linux/random.h>
#include <linux/jiffies.h>
+#include <rdma/ib_cache.h>
#include <linux/atomic.h>
@@ -265,10 +266,10 @@ static int srp_init_qp(struct srp_target_port *target,
if (!attr)
return -ENOMEM;
- ret = ib_find_pkey(target->srp_host->srp_dev->dev,
- target->srp_host->port,
- be16_to_cpu(target->pkey),
- &attr->pkey_index);
+ ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
+ target->srp_host->port,
+ be16_to_cpu(target->pkey),
+ &attr->pkey_index);
if (ret)
goto out;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 6e0a477681e9..9b84b4c0a000 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -93,7 +93,7 @@ MODULE_PARM_DESC(srpt_service_guid,
" instead of using the node_guid of the first HCA.");
static struct ib_client srpt_client;
-static struct target_fabric_configfs *srpt_target;
+static const struct target_core_fabric_ops srpt_template;
static void srpt_release_channel(struct srpt_rdma_ch *ch);
static int srpt_queue_status(struct se_cmd *cmd);
@@ -207,7 +207,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
}
break;
default:
- printk(KERN_ERR "received unrecognized IB event %d\n",
+ pr_err("received unrecognized IB event %d\n",
event->event);
break;
}
@@ -218,7 +218,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
*/
static void srpt_srq_event(struct ib_event *event, void *ctx)
{
- printk(KERN_INFO "SRQ event %d\n", event->event);
+ pr_info("SRQ event %d\n", event->event);
}
/**
@@ -242,8 +242,7 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
ch->sess_name, srpt_get_ch_state(ch));
break;
default:
- printk(KERN_ERR "received unrecognized IB QP event %d\n",
- event->event);
+ pr_err("received unrecognized IB QP event %d\n", event->event);
break;
}
}
@@ -602,7 +601,7 @@ static void srpt_unregister_mad_agent(struct srpt_device *sdev)
sport = &sdev->port[i - 1];
WARN_ON(sport->port != i);
if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
- printk(KERN_ERR "disabling MAD processing failed.\n");
+ pr_err("disabling MAD processing failed.\n");
if (sport->mad_agent) {
ib_unregister_mad_agent(sport->mad_agent);
sport->mad_agent = NULL;
@@ -810,7 +809,7 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
ret = -ENOMEM;
if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) {
- printk(KERN_WARNING "IB send queue full (needed 1)\n");
+ pr_warn("IB send queue full (needed 1)\n");
goto out;
}
@@ -912,7 +911,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
if (ioctx->n_rbuf >
(srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
- printk(KERN_ERR "received unsupported SRP_CMD request"
+ pr_err("received unsupported SRP_CMD request"
" type (%u out + %u in != %u / %zu)\n",
srp_cmd->data_out_desc_cnt,
srp_cmd->data_in_desc_cnt,
@@ -1432,7 +1431,7 @@ static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
srpt_unmap_sg_to_ib_sge(ch, ioctx);
transport_generic_free_cmd(&ioctx->cmd, 0);
} else {
- printk(KERN_ERR "IB completion has been received too late for"
+ pr_err("IB completion has been received too late for"
" wr_id = %u.\n", ioctx->ioctx.index);
}
}
@@ -1457,7 +1456,7 @@ static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
SRPT_STATE_DATA_IN))
target_execute_cmd(&ioctx->cmd);
else
- printk(KERN_ERR "%s[%d]: wrong state = %d\n", __func__,
+ pr_err("%s[%d]: wrong state = %d\n", __func__,
__LINE__, srpt_get_cmd_state(ioctx));
} else if (opcode == SRPT_RDMA_ABORT) {
ioctx->rdma_aborted = true;
@@ -1481,7 +1480,7 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
switch (opcode) {
case SRPT_RDMA_READ_LAST:
if (ioctx->n_rdma <= 0) {
- printk(KERN_ERR "Received invalid RDMA read"
+ pr_err("Received invalid RDMA read"
" error completion with idx %d\n",
ioctx->ioctx.index);
break;
@@ -1490,14 +1489,13 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
if (state == SRPT_STATE_NEED_DATA)
srpt_abort_cmd(ioctx);
else
- printk(KERN_ERR "%s[%d]: wrong state = %d\n",
+ pr_err("%s[%d]: wrong state = %d\n",
__func__, __LINE__, state);
break;
case SRPT_RDMA_WRITE_LAST:
break;
default:
- printk(KERN_ERR "%s[%d]: opcode = %u\n", __func__,
- __LINE__, opcode);
+ pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode);
break;
}
}
@@ -1549,8 +1547,8 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp));
max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp);
if (sense_data_len > max_sense_len) {
- printk(KERN_WARNING "truncated sense data from %d to %d"
- " bytes\n", sense_data_len, max_sense_len);
+ pr_warn("truncated sense data from %d to %d"
+ " bytes\n", sense_data_len, max_sense_len);
sense_data_len = max_sense_len;
}
@@ -1628,8 +1626,8 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
int addressing_method;
if (unlikely(len < 2)) {
- printk(KERN_ERR "Illegal LUN length %d, expected 2 bytes or "
- "more", len);
+ pr_err("Illegal LUN length %d, expected 2 bytes or more\n",
+ len);
goto out;
}
@@ -1663,7 +1661,7 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN:
default:
- printk(KERN_ERR "Unimplemented LUN addressing method %u",
+ pr_err("Unimplemented LUN addressing method %u\n",
addressing_method);
break;
}
@@ -1672,8 +1670,7 @@ out:
return res;
out_err:
- printk(KERN_ERR "Support for multi-level LUNs has not yet been"
- " implemented");
+ pr_err("Support for multi-level LUNs has not yet been implemented\n");
goto out;
}
@@ -1723,7 +1720,7 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch,
}
if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) {
- printk(KERN_ERR "0x%llx: parsing SRP descriptor table failed.\n",
+ pr_err("0x%llx: parsing SRP descriptor table failed.\n",
srp_cmd->tag);
ret = TCM_INVALID_CDB_FIELD;
goto send_sense;
@@ -1912,7 +1909,7 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
srpt_handle_tsk_mgmt(ch, recv_ioctx, send_ioctx);
break;
case SRP_I_LOGOUT:
- printk(KERN_ERR "Not yet implemented: SRP_I_LOGOUT\n");
+ pr_err("Not yet implemented: SRP_I_LOGOUT\n");
break;
case SRP_CRED_RSP:
pr_debug("received SRP_CRED_RSP\n");
@@ -1921,10 +1918,10 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
pr_debug("received SRP_AER_RSP\n");
break;
case SRP_RSP:
- printk(KERN_ERR "Received SRP_RSP\n");
+ pr_err("Received SRP_RSP\n");
break;
default:
- printk(KERN_ERR "received IU with unknown opcode 0x%x\n",
+ pr_err("received IU with unknown opcode 0x%x\n",
srp_cmd->opcode);
break;
}
@@ -1948,12 +1945,12 @@ static void srpt_process_rcv_completion(struct ib_cq *cq,
req_lim = atomic_dec_return(&ch->req_lim);
if (unlikely(req_lim < 0))
- printk(KERN_ERR "req_lim = %d < 0\n", req_lim);
+ pr_err("req_lim = %d < 0\n", req_lim);
ioctx = sdev->ioctx_ring[index];
srpt_handle_new_iu(ch, ioctx, NULL);
} else {
- printk(KERN_INFO "receiving failed for idx %u with status %d\n",
- index, wc->status);
+ pr_info("receiving failed for idx %u with status %d\n",
+ index, wc->status);
}
}
@@ -1993,12 +1990,12 @@ static void srpt_process_send_completion(struct ib_cq *cq,
}
} else {
if (opcode == SRPT_SEND) {
- printk(KERN_INFO "sending response for idx %u failed"
- " with status %d\n", index, wc->status);
+ pr_info("sending response for idx %u failed"
+ " with status %d\n", index, wc->status);
srpt_handle_send_err_comp(ch, wc->wr_id);
} else if (opcode != SRPT_RDMA_MID) {
- printk(KERN_INFO "RDMA t %d for idx %u failed with"
- " status %d", opcode, index, wc->status);
+ pr_info("RDMA t %d for idx %u failed with"
+ " status %d\n", opcode, index, wc->status);
srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
}
}
@@ -2062,15 +2059,15 @@ static int srpt_compl_thread(void *arg)
ch = arg;
BUG_ON(!ch);
- printk(KERN_INFO "Session %s: kernel thread %s (PID %d) started\n",
- ch->sess_name, ch->thread->comm, current->pid);
+ pr_info("Session %s: kernel thread %s (PID %d) started\n",
+ ch->sess_name, ch->thread->comm, current->pid);
while (!kthread_should_stop()) {
wait_event_interruptible(ch->wait_queue,
(srpt_process_completion(ch->cq, ch),
kthread_should_stop()));
}
- printk(KERN_INFO "Session %s: kernel thread %s (PID %d) stopped\n",
- ch->sess_name, ch->thread->comm, current->pid);
+ pr_info("Session %s: kernel thread %s (PID %d) stopped\n",
+ ch->sess_name, ch->thread->comm, current->pid);
return 0;
}
@@ -2097,7 +2094,7 @@ retry:
ch->rq_size + srp_sq_size, 0);
if (IS_ERR(ch->cq)) {
ret = PTR_ERR(ch->cq);
- printk(KERN_ERR "failed to create CQ cqe= %d ret= %d\n",
+ pr_err("failed to create CQ cqe= %d ret= %d\n",
ch->rq_size + srp_sq_size, ret);
goto out;
}
@@ -2123,7 +2120,7 @@ retry:
goto retry;
}
}
- printk(KERN_ERR "failed to create_qp ret= %d\n", ret);
+ pr_err("failed to create_qp ret= %d\n", ret);
goto err_destroy_cq;
}
@@ -2143,7 +2140,7 @@ retry:
ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
if (IS_ERR(ch->thread)) {
- printk(KERN_ERR "failed to create kernel thread %ld\n",
+ pr_err("failed to create kernel thread %ld\n",
PTR_ERR(ch->thread));
ch->thread = NULL;
goto err_destroy_qp;
@@ -2204,7 +2201,7 @@ static void __srpt_close_ch(struct srpt_rdma_ch *ch)
/* fall through */
case CH_LIVE:
if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0)
- printk(KERN_ERR "sending CM DREQ failed.\n");
+ pr_err("sending CM DREQ failed.\n");
break;
case CH_DISCONNECTING:
break;
@@ -2291,7 +2288,7 @@ static void srpt_drain_channel(struct ib_cm_id *cm_id)
ret = srpt_ch_qp_err(ch);
if (ret < 0)
- printk(KERN_ERR "Setting queue pair in error state"
+ pr_err("Setting queue pair in error state"
" failed: %d\n", ret);
}
}
@@ -2435,17 +2432,17 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
it_iu_len = be32_to_cpu(req->req_it_iu_len);
- printk(KERN_INFO "Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx,"
- " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d"
- " (guid=0x%llx:0x%llx)\n",
- be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]),
- be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]),
- be64_to_cpu(*(__be64 *)&req->target_port_id[0]),
- be64_to_cpu(*(__be64 *)&req->target_port_id[8]),
- it_iu_len,
- param->port,
- be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
- be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
+ pr_info("Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx,"
+ " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d"
+ " (guid=0x%llx:0x%llx)\n",
+ be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]),
+ be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]),
+ be64_to_cpu(*(__be64 *)&req->target_port_id[0]),
+ be64_to_cpu(*(__be64 *)&req->target_port_id[8]),
+ it_iu_len,
+ param->port,
+ be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
+ be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
rej = kzalloc(sizeof *rej, GFP_KERNEL);
@@ -2460,7 +2457,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
rej->reason = __constant_cpu_to_be32(
SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
ret = -EINVAL;
- printk(KERN_ERR "rejected SRP_LOGIN_REQ because its"
+ pr_err("rejected SRP_LOGIN_REQ because its"
" length (%d bytes) is out of range (%d .. %d)\n",
it_iu_len, 64, srp_max_req_size);
goto reject;
@@ -2470,7 +2467,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
rej->reason = __constant_cpu_to_be32(
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
ret = -EINVAL;
- printk(KERN_ERR "rejected SRP_LOGIN_REQ because the target port"
+ pr_err("rejected SRP_LOGIN_REQ because the target port"
" has not yet been enabled\n");
goto reject;
}
@@ -2516,7 +2513,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
rej->reason = __constant_cpu_to_be32(
SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
ret = -ENOMEM;
- printk(KERN_ERR "rejected SRP_LOGIN_REQ because it"
+ pr_err("rejected SRP_LOGIN_REQ because it"
" has an invalid target port identifier.\n");
goto reject;
}
@@ -2525,7 +2522,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
if (!ch) {
rej->reason = __constant_cpu_to_be32(
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
- printk(KERN_ERR "rejected SRP_LOGIN_REQ because no memory.\n");
+ pr_err("rejected SRP_LOGIN_REQ because no memory.\n");
ret = -ENOMEM;
goto reject;
}
@@ -2562,7 +2559,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
if (ret) {
rej->reason = __constant_cpu_to_be32(
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
- printk(KERN_ERR "rejected SRP_LOGIN_REQ because creating"
+ pr_err("rejected SRP_LOGIN_REQ because creating"
" a new RDMA channel failed.\n");
goto free_ring;
}
@@ -2571,7 +2568,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
if (ret) {
rej->reason = __constant_cpu_to_be32(
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
- printk(KERN_ERR "rejected SRP_LOGIN_REQ because enabling"
+ pr_err("rejected SRP_LOGIN_REQ because enabling"
" RTR failed (error code = %d)\n", ret);
goto destroy_ib;
}
@@ -2586,8 +2583,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
nacl = srpt_lookup_acl(sport, ch->i_port_id);
if (!nacl) {
- printk(KERN_INFO "Rejected login because no ACL has been"
- " configured yet for initiator %s.\n", ch->sess_name);
+ pr_info("Rejected login because no ACL has been"
+ " configured yet for initiator %s.\n", ch->sess_name);
rej->reason = __constant_cpu_to_be32(
SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
goto destroy_ib;
@@ -2631,7 +2628,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
ret = ib_send_cm_rep(cm_id, rep_param);
if (ret) {
- printk(KERN_ERR "sending SRP_LOGIN_REQ response failed"
+ pr_err("sending SRP_LOGIN_REQ response failed"
" (error code = %d)\n", ret);
goto release_channel;
}
@@ -2679,7 +2676,7 @@ out:
static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
{
- printk(KERN_INFO "Received IB REJ for cm_id %p.\n", cm_id);
+ pr_info("Received IB REJ for cm_id %p.\n", cm_id);
srpt_drain_channel(cm_id);
}
@@ -2714,13 +2711,13 @@ static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
{
- printk(KERN_INFO "Received IB TimeWait exit for cm_id %p.\n", cm_id);
+ pr_info("Received IB TimeWait exit for cm_id %p.\n", cm_id);
srpt_drain_channel(cm_id);
}
static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
{
- printk(KERN_INFO "Received IB REP error for cm_id %p.\n", cm_id);
+ pr_info("Received IB REP error for cm_id %p.\n", cm_id);
srpt_drain_channel(cm_id);
}
@@ -2755,9 +2752,9 @@ static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
if (send_drep) {
if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0)
- printk(KERN_ERR "Sending IB DREP failed.\n");
- printk(KERN_INFO "Received DREQ and sent DREP for session %s.\n",
- ch->sess_name);
+ pr_err("Sending IB DREP failed.\n");
+ pr_info("Received DREQ and sent DREP for session %s.\n",
+ ch->sess_name);
}
}
@@ -2766,8 +2763,7 @@ static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
*/
static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
{
- printk(KERN_INFO "Received InfiniBand DREP message for cm_id %p.\n",
- cm_id);
+ pr_info("Received InfiniBand DREP message for cm_id %p.\n", cm_id);
srpt_drain_channel(cm_id);
}
@@ -2811,14 +2807,13 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
srpt_cm_rep_error(cm_id);
break;
case IB_CM_DREQ_ERROR:
- printk(KERN_INFO "Received IB DREQ ERROR event.\n");
+ pr_info("Received IB DREQ ERROR event.\n");
break;
case IB_CM_MRA_RECEIVED:
- printk(KERN_INFO "Received IB MRA event\n");
+ pr_info("Received IB MRA event\n");
break;
default:
- printk(KERN_ERR "received unrecognized IB CM event %d\n",
- event->event);
+ pr_err("received unrecognized IB CM event %d\n", event->event);
break;
}
@@ -2848,8 +2843,8 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
ret = -ENOMEM;
sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail);
if (sq_wr_avail < 0) {
- printk(KERN_WARNING "IB send queue full (needed %d)\n",
- n_rdma);
+ pr_warn("IB send queue full (needed %d)\n",
+ n_rdma);
goto out;
}
}
@@ -2889,7 +2884,7 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
}
if (ret)
- printk(KERN_ERR "%s[%d]: ib_post_send() returned %d for %d/%d",
+ pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
__func__, __LINE__, ret, i, n_rdma);
if (ret && i > 0) {
wr.num_sge = 0;
@@ -2897,12 +2892,12 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
wr.send_flags = IB_SEND_SIGNALED;
while (ch->state == CH_LIVE &&
ib_post_send(ch->qp, &wr, &bad_wr) != 0) {
- printk(KERN_INFO "Trying to abort failed RDMA transfer [%d]",
+ pr_info("Trying to abort failed RDMA transfer [%d]\n",
ioctx->ioctx.index);
msleep(1000);
}
while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
- printk(KERN_INFO "Waiting until RDMA abort finished [%d]",
+ pr_info("Waiting until RDMA abort finished [%d]\n",
ioctx->ioctx.index);
msleep(1000);
}
@@ -2923,17 +2918,17 @@ static int srpt_xfer_data(struct srpt_rdma_ch *ch,
ret = srpt_map_sg_to_ib_sge(ch, ioctx);
if (ret) {
- printk(KERN_ERR "%s[%d] ret=%d\n", __func__, __LINE__, ret);
+ pr_err("%s[%d] ret=%d\n", __func__, __LINE__, ret);
goto out;
}
ret = srpt_perform_rdmas(ch, ioctx);
if (ret) {
if (ret == -EAGAIN || ret == -ENOMEM)
- printk(KERN_INFO "%s[%d] queue full -- ret=%d\n",
- __func__, __LINE__, ret);
+ pr_info("%s[%d] queue full -- ret=%d\n",
+ __func__, __LINE__, ret);
else
- printk(KERN_ERR "%s[%d] fatal error -- ret=%d\n",
+ pr_err("%s[%d] fatal error -- ret=%d\n",
__func__, __LINE__, ret);
goto out_unmap;
}
@@ -3058,7 +3053,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
!ioctx->queue_status_only) {
ret = srpt_xfer_data(ch, ioctx);
if (ret) {
- printk(KERN_ERR "xfer_data failed for tag %llu\n",
+ pr_err("xfer_data failed for tag %llu\n",
ioctx->tag);
return;
}
@@ -3075,7 +3070,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
}
ret = srpt_post_send(ch, ioctx, resp_len);
if (ret) {
- printk(KERN_ERR "sending cmd response failed for tag %llu\n",
+ pr_err("sending cmd response failed for tag %llu\n",
ioctx->tag);
srpt_unmap_sg_to_ib_sge(ch, ioctx);
srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
@@ -3154,7 +3149,7 @@ static int srpt_release_sdev(struct srpt_device *sdev)
res = wait_event_interruptible(sdev->ch_releaseQ,
srpt_ch_list_empty(sdev));
if (res)
- printk(KERN_ERR "%s: interrupted.\n", __func__);
+ pr_err("%s: interrupted.\n", __func__);
return 0;
}
@@ -3293,7 +3288,7 @@ static void srpt_add_one(struct ib_device *device)
spin_lock_init(&sport->port_acl_lock);
if (srpt_refresh_port(sport)) {
- printk(KERN_ERR "MAD registration failed for %s-%d.\n",
+ pr_err("MAD registration failed for %s-%d.\n",
srpt_sdev_name(sdev), i);
goto err_ring;
}
@@ -3330,7 +3325,7 @@ free_dev:
kfree(sdev);
err:
sdev = NULL;
- printk(KERN_INFO "%s(%s) failed.\n", __func__, device->name);
+ pr_info("%s(%s) failed.\n", __func__, device->name);
goto out;
}
@@ -3344,8 +3339,7 @@ static void srpt_remove_one(struct ib_device *device)
sdev = ib_get_client_data(device, &srpt_client);
if (!sdev) {
- printk(KERN_INFO "%s(%s): nothing to do.\n", __func__,
- device->name);
+ pr_info("%s(%s): nothing to do.\n", __func__, device->name);
return;
}
@@ -3464,7 +3458,7 @@ static struct se_node_acl *srpt_alloc_fabric_acl(struct se_portal_group *se_tpg)
nacl = kzalloc(sizeof(struct srpt_node_acl), GFP_KERNEL);
if (!nacl) {
- printk(KERN_ERR "Unable to allocate struct srpt_node_acl\n");
+ pr_err("Unable to allocate struct srpt_node_acl\n");
return NULL;
}
@@ -3615,7 +3609,7 @@ static struct se_node_acl *srpt_make_nodeacl(struct se_portal_group *tpg,
u8 i_port_id[16];
if (srpt_parse_i_port_id(i_port_id, name) < 0) {
- printk(KERN_ERR "invalid initiator port ID %s\n", name);
+ pr_err("invalid initiator port ID %s\n", name);
ret = -EINVAL;
goto err;
}
@@ -3816,12 +3810,12 @@ static ssize_t srpt_tpg_store_enable(
ret = kstrtoul(page, 0, &tmp);
if (ret < 0) {
- printk(KERN_ERR "Unable to extract srpt_tpg_store_enable\n");
+ pr_err("Unable to extract srpt_tpg_store_enable\n");
return -EINVAL;
}
if ((tmp != 0) && (tmp != 1)) {
- printk(KERN_ERR "Illegal value for srpt_tpg_store_enable: %lu\n", tmp);
+ pr_err("Illegal value for srpt_tpg_store_enable: %lu\n", tmp);
return -EINVAL;
}
if (tmp == 1)
@@ -3851,7 +3845,7 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
int res;
/* Initialize sport->port_wwn and sport->port_tpg_1 */
- res = core_tpg_register(&srpt_target->tf_ops, &sport->port_wwn,
+ res = core_tpg_register(&srpt_template, &sport->port_wwn,
&sport->port_tpg_1, sport, TRANSPORT_TPG_TYPE_NORMAL);
if (res)
return ERR_PTR(res);
@@ -3919,7 +3913,9 @@ static struct configfs_attribute *srpt_wwn_attrs[] = {
NULL,
};
-static struct target_core_fabric_ops srpt_template = {
+static const struct target_core_fabric_ops srpt_template = {
+ .module = THIS_MODULE,
+ .name = "srpt",
.get_fabric_name = srpt_get_fabric_name,
.get_fabric_proto_ident = srpt_get_fabric_proto_ident,
.tpg_get_wwn = srpt_get_fabric_wwn,
@@ -3964,6 +3960,10 @@ static struct target_core_fabric_ops srpt_template = {
.fabric_drop_np = NULL,
.fabric_make_nodeacl = srpt_make_nodeacl,
.fabric_drop_nodeacl = srpt_drop_nodeacl,
+
+ .tfc_wwn_attrs = srpt_wwn_attrs,
+ .tfc_tpg_base_attrs = srpt_tpg_attrs,
+ .tfc_tpg_attrib_attrs = srpt_tpg_attrib_attrs,
};
/**
@@ -3980,7 +3980,7 @@ static int __init srpt_init_module(void)
ret = -EINVAL;
if (srp_max_req_size < MIN_MAX_REQ_SIZE) {
- printk(KERN_ERR "invalid value %d for kernel module parameter"
+ pr_err("invalid value %d for kernel module parameter"
" srp_max_req_size -- must be at least %d.\n",
srp_max_req_size, MIN_MAX_REQ_SIZE);
goto out;
@@ -3988,54 +3988,26 @@ static int __init srpt_init_module(void)
if (srpt_srq_size < MIN_SRPT_SRQ_SIZE
|| srpt_srq_size > MAX_SRPT_SRQ_SIZE) {
- printk(KERN_ERR "invalid value %d for kernel module parameter"
+ pr_err("invalid value %d for kernel module parameter"
" srpt_srq_size -- must be in the range [%d..%d].\n",
srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE);
goto out;
}
- srpt_target = target_fabric_configfs_init(THIS_MODULE, "srpt");
- if (IS_ERR(srpt_target)) {
- printk(KERN_ERR "couldn't register\n");
- ret = PTR_ERR(srpt_target);
+ ret = target_register_template(&srpt_template);
+ if (ret)
goto out;
- }
-
- srpt_target->tf_ops = srpt_template;
-
- /*
- * Set up default attribute lists.
- */
- srpt_target->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = srpt_wwn_attrs;
- srpt_target->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = srpt_tpg_attrs;
- srpt_target->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = srpt_tpg_attrib_attrs;
- srpt_target->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
- srpt_target->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
- srpt_target->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
- srpt_target->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
- srpt_target->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
- srpt_target->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-
- ret = target_fabric_configfs_register(srpt_target);
- if (ret < 0) {
- printk(KERN_ERR "couldn't register\n");
- goto out_free_target;
- }
ret = ib_register_client(&srpt_client);
if (ret) {
- printk(KERN_ERR "couldn't register IB client\n");
+ pr_err("couldn't register IB client\n");
goto out_unregister_target;
}
return 0;
out_unregister_target:
- target_fabric_configfs_deregister(srpt_target);
- srpt_target = NULL;
-out_free_target:
- if (srpt_target)
- target_fabric_configfs_free(srpt_target);
+ target_unregister_template(&srpt_template);
out:
return ret;
}
@@ -4043,8 +4015,7 @@ out:
static void __exit srpt_cleanup_module(void)
{
ib_unregister_client(&srpt_client);
- target_fabric_configfs_deregister(srpt_target);
- srpt_target = NULL;
+ target_unregister_template(&srpt_template);
}
module_init(srpt_init_module);