summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/hfi1/sdma.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/hfi1/sdma.c')
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c377
1 files changed, 368 insertions, 9 deletions
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index f9befc05b349..fd39bcaa062d 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -726,6 +726,34 @@ u16 sdma_get_descq_cnt(void)
}
/**
+ * sdma_engine_get_vl() - return vl for a given sdma engine
+ * @sde: sdma engine
+ *
+ * This function returns the vl mapped to a given engine, or an error if
+ * the mapping can't be found. The mapping fields are protected by RCU.
+ */
+int sdma_engine_get_vl(struct sdma_engine *sde)
+{
+ struct hfi1_devdata *dd = sde->dd;
+ struct sdma_vl_map *m;
+ u8 vl;
+
+ if (sde->this_idx >= TXE_NUM_SDMA_ENGINES)
+ return -EINVAL;
+
+ rcu_read_lock();
+ m = rcu_dereference(dd->sdma_map);
+ if (unlikely(!m)) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+ vl = m->engine_to_vl[sde->this_idx];
+ rcu_read_unlock();
+
+ return vl;
+}
+
+/**
* sdma_select_engine_vl() - select sdma engine
* @dd: devdata
* @selector: a spreading factor
@@ -788,6 +816,326 @@ struct sdma_engine *sdma_select_engine_sc(
return sdma_select_engine_vl(dd, selector, vl);
}
+struct sdma_rht_map_elem {
+ u32 mask;
+ u8 ctr;
+ struct sdma_engine *sde[0];
+};
+
+struct sdma_rht_node {
+ unsigned long cpu_id;
+ struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED];
+ struct rhash_head node;
+};
+
+#define NR_CPUS_HINT 192
+
+static const struct rhashtable_params sdma_rht_params = {
+ .nelem_hint = NR_CPUS_HINT,
+ .head_offset = offsetof(struct sdma_rht_node, node),
+ .key_offset = offsetof(struct sdma_rht_node, cpu_id),
+ .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
+ .max_size = NR_CPUS,
+ .min_size = 8,
+ .automatic_shrinking = true,
+};
+
+/*
+ * sdma_select_user_engine() - select sdma engine based on user setup
+ * @dd: devdata
+ * @selector: a spreading factor
+ * @vl: this vl
+ *
+ * This function returns an sdma engine for a user sdma request.
+ * User defined sdma engine affinity setting is honored when applicable,
+ * otherwise system default sdma engine mapping is used. To ensure correct
+ * ordering, the mapping from <selector, vl> to sde must remain unchanged.
+ */
+struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
+ u32 selector, u8 vl)
+{
+ struct sdma_rht_node *rht_node;
+ struct sdma_engine *sde = NULL;
+ const struct cpumask *current_mask = tsk_cpus_allowed(current);
+ unsigned long cpu_id;
+
+ /*
+ * To ensure that always the same sdma engine(s) will be
+ * selected make sure the process is pinned to this CPU only.
+ */
+ if (cpumask_weight(current_mask) != 1)
+ goto out;
+
+ cpu_id = smp_processor_id();
+ rcu_read_lock();
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id,
+ sdma_rht_params);
+
+ if (rht_node && rht_node->map[vl]) {
+ struct sdma_rht_map_elem *map = rht_node->map[vl];
+
+ sde = map->sde[selector & map->mask];
+ }
+ rcu_read_unlock();
+
+ if (sde)
+ return sde;
+
+out:
+ return sdma_select_engine_vl(dd, selector, vl);
+}
+
+static void sdma_populate_sde_map(struct sdma_rht_map_elem *map)
+{
+ int i;
+
+ for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++)
+ map->sde[map->ctr + i] = map->sde[i];
+}
+
+static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map,
+ struct sdma_engine *sde)
+{
+ unsigned int i, pow;
+
+ /* only need to check the first ctr entries for a match */
+ for (i = 0; i < map->ctr; i++) {
+ if (map->sde[i] == sde) {
+ memmove(&map->sde[i], &map->sde[i + 1],
+ (map->ctr - i - 1) * sizeof(map->sde[0]));
+ map->ctr--;
+ pow = roundup_pow_of_two(map->ctr ? : 1);
+ map->mask = pow - 1;
+ sdma_populate_sde_map(map);
+ break;
+ }
+ }
+}
+
+/*
+ * Prevents concurrent reads and writes of the sdma engine cpu_mask
+ */
+static DEFINE_MUTEX(process_to_sde_mutex);
+
+ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
+ size_t count)
+{
+ struct hfi1_devdata *dd = sde->dd;
+ cpumask_var_t mask, new_mask;
+ unsigned long cpu;
+ int ret, vl, sz;
+
+ vl = sdma_engine_get_vl(sde);
+ if (unlikely(vl < 0))
+ return -EINVAL;
+
+ ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+ if (!ret)
+ return -ENOMEM;
+
+ ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL);
+ if (!ret) {
+ free_cpumask_var(mask);
+ return -ENOMEM;
+ }
+ ret = cpulist_parse(buf, mask);
+ if (ret)
+ goto out_free;
+
+ if (!cpumask_subset(mask, cpu_online_mask)) {
+ dd_dev_warn(sde->dd, "Invalid CPU mask\n");
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ sz = sizeof(struct sdma_rht_map_elem) +
+ (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *));
+
+ mutex_lock(&process_to_sde_mutex);
+
+ for_each_cpu(cpu, mask) {
+ struct sdma_rht_node *rht_node;
+
+ /* Check if we have this already mapped */
+ if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
+ cpumask_set_cpu(cpu, new_mask);
+ continue;
+ }
+
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ sdma_rht_params);
+ if (!rht_node) {
+ rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
+ if (!rht_node) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
+ if (!rht_node->map[vl]) {
+ kfree(rht_node);
+ ret = -ENOMEM;
+ goto out;
+ }
+ rht_node->cpu_id = cpu;
+ rht_node->map[vl]->mask = 0;
+ rht_node->map[vl]->ctr = 1;
+ rht_node->map[vl]->sde[0] = sde;
+
+ ret = rhashtable_insert_fast(&dd->sdma_rht,
+ &rht_node->node,
+ sdma_rht_params);
+ if (ret) {
+ kfree(rht_node->map[vl]);
+ kfree(rht_node);
+ dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n",
+ cpu);
+ goto out;
+ }
+
+ } else {
+ int ctr, pow;
+
+ /* Add new user mappings */
+ if (!rht_node->map[vl])
+ rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
+
+ if (!rht_node->map[vl]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ rht_node->map[vl]->ctr++;
+ ctr = rht_node->map[vl]->ctr;
+ rht_node->map[vl]->sde[ctr - 1] = sde;
+ pow = roundup_pow_of_two(ctr);
+ rht_node->map[vl]->mask = pow - 1;
+
+ /* Populate the sde map table */
+ sdma_populate_sde_map(rht_node->map[vl]);
+ }
+ cpumask_set_cpu(cpu, new_mask);
+ }
+
+ /* Clean up old mappings */
+ for_each_cpu(cpu, cpu_online_mask) {
+ struct sdma_rht_node *rht_node;
+
+ /* Don't cleanup sdes that are set in the new mask */
+ if (cpumask_test_cpu(cpu, mask))
+ continue;
+
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ sdma_rht_params);
+ if (rht_node) {
+ bool empty = true;
+ int i;
+
+ /* Remove mappings for old sde */
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+ if (rht_node->map[i])
+ sdma_cleanup_sde_map(rht_node->map[i],
+ sde);
+
+ /* Free empty hash table entries */
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
+ if (!rht_node->map[i])
+ continue;
+
+ if (rht_node->map[i]->ctr) {
+ empty = false;
+ break;
+ }
+ }
+
+ if (empty) {
+ ret = rhashtable_remove_fast(&dd->sdma_rht,
+ &rht_node->node,
+ sdma_rht_params);
+ WARN_ON(ret);
+
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+ kfree(rht_node->map[i]);
+
+ kfree(rht_node);
+ }
+ }
+ }
+
+ cpumask_copy(&sde->cpu_mask, new_mask);
+out:
+ mutex_unlock(&process_to_sde_mutex);
+out_free:
+ free_cpumask_var(mask);
+ free_cpumask_var(new_mask);
+ return ret ? : strnlen(buf, PAGE_SIZE);
+}
+
+ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
+{
+ mutex_lock(&process_to_sde_mutex);
+ if (cpumask_empty(&sde->cpu_mask))
+ snprintf(buf, PAGE_SIZE, "%s\n", "empty");
+ else
+ cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask);
+ mutex_unlock(&process_to_sde_mutex);
+ return strnlen(buf, PAGE_SIZE);
+}
+
+static void sdma_rht_free(void *ptr, void *arg)
+{
+ struct sdma_rht_node *rht_node = ptr;
+ int i;
+
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+ kfree(rht_node->map[i]);
+
+ kfree(rht_node);
+}
+
+/**
+ * sdma_seqfile_dump_cpu_list() - debugfs dump the cpu to sdma mappings
+ * @s: seq file
+ * @dd: hfi1_devdata
+ * @cpuid: cpu id
+ *
+ * This routine dumps the process to sde mappings per cpu
+ */
+void sdma_seqfile_dump_cpu_list(struct seq_file *s,
+ struct hfi1_devdata *dd,
+ unsigned long cpuid)
+{
+ struct sdma_rht_node *rht_node;
+ int i, j;
+
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid,
+ sdma_rht_params);
+ if (!rht_node)
+ return;
+
+ seq_printf(s, "cpu%3lu: ", cpuid);
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
+ if (!rht_node->map[i] || !rht_node->map[i]->ctr)
+ continue;
+
+ seq_printf(s, " vl%d: [", i);
+
+ for (j = 0; j < rht_node->map[i]->ctr; j++) {
+ if (!rht_node->map[i]->sde[j])
+ continue;
+
+ if (j > 0)
+ seq_puts(s, ",");
+
+ seq_printf(s, " sdma%2d",
+ rht_node->map[i]->sde[j]->this_idx);
+ }
+ seq_puts(s, " ]");
+ }
+
+ seq_puts(s, "\n");
+}
+
/*
* Free the indicated map struct
*/
@@ -1161,6 +1509,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
dd->num_sdma = num_engines;
if (sdma_map_init(dd, port, ppd->vls_operational, NULL))
goto bail;
+
+ if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params))
+ goto bail;
+
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
return 0;
@@ -1252,6 +1604,7 @@ void sdma_exit(struct hfi1_devdata *dd)
sdma_finalput(&sde->state);
}
sdma_clean(dd, dd->num_sdma);
+ rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL);
}
/*
@@ -2086,6 +2439,11 @@ nodesc:
* @sde: sdma engine to use
* @wait: wait structure to use when full (may be NULL)
* @tx_list: list of sdma_txreqs to submit
+ * @count: pointer to a u32 which, after return will contain the total number of
+ * sdma_txreqs removed from the tx_list. This will include sdma_txreqs
+ * whose SDMA descriptors are submitted to the ring and the sdma_txreqs
+ * which are added to SDMA engine flush list if the SDMA engine state is
+ * not running.
*
* The call submits the list into the ring.
*
@@ -2100,18 +2458,18 @@ nodesc:
* side locking.
*
* Return:
- * > 0 - Success (value is number of sdma_txreq's submitted),
+ * 0 - Success,
* -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/
int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
- struct list_head *tx_list)
+ struct list_head *tx_list, u32 *count_out)
{
struct sdma_txreq *tx, *tx_next;
int ret = 0;
unsigned long flags;
u16 tail = INVALID_TAIL;
- int count = 0;
+ u32 submit_count = 0, flush_count = 0, total_count;
spin_lock_irqsave(&sde->tail_lock, flags);
retry:
@@ -2127,33 +2485,34 @@ retry:
}
list_del_init(&tx->list);
tail = submit_tx(sde, tx);
- count++;
+ submit_count++;
if (tail != INVALID_TAIL &&
- (count & SDMA_TAIL_UPDATE_THRESH) == 0) {
+ (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) {
sdma_update_tail(sde, tail);
tail = INVALID_TAIL;
}
}
update_tail:
+ total_count = submit_count + flush_count;
if (wait)
- iowait_sdma_add(wait, count);
+ iowait_sdma_add(wait, total_count);
if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail);
spin_unlock_irqrestore(&sde->tail_lock, flags);
- return ret == 0 ? count : ret;
+ *count_out = total_count;
+ return ret;
unlock_noconn:
spin_lock(&sde->flushlist_lock);
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
tx->wait = wait;
list_del_init(&tx->list);
- if (wait)
- iowait_sdma_inc(wait);
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
tx->sn = sde->tail_sn++;
trace_hfi1_sdma_in_sn(sde, tx->sn);
#endif
list_add_tail(&tx->list, &sde->flushlist);
+ flush_count++;
if (wait) {
wait->tx_count++;
wait->count += tx->num_desc;