diff options
Diffstat (limited to 'drivers/infiniband/hw/cxgb4/device.c')
-rw-r--r-- | drivers/infiniband/hw/cxgb4/device.c | 192 |
1 files changed, 188 insertions, 4 deletions
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 7db82b24302b..f25df5276c22 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -33,6 +33,7 @@ #include <linux/moduleparam.h> #include <linux/debugfs.h> #include <linux/vmalloc.h> +#include <linux/math64.h> #include <rdma/ib_verbs.h> @@ -55,6 +56,15 @@ module_param(allow_db_coalescing_on_t5, int, 0644); MODULE_PARM_DESC(allow_db_coalescing_on_t5, "Allow DB Coalescing on T5 (default = 0)"); +int c4iw_wr_log = 0; +module_param(c4iw_wr_log, int, 0444); +MODULE_PARM_DESC(c4iw_wr_log, "Enables logging of work request timing data."); + +int c4iw_wr_log_size_order = 12; +module_param(c4iw_wr_log_size_order, int, 0444); +MODULE_PARM_DESC(c4iw_wr_log_size_order, + "Number of entries (log2) in the work request timing log."); + struct uld_ctx { struct list_head entry; struct cxgb4_lld_info lldi; @@ -103,6 +113,117 @@ static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count, return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos); } +void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe) +{ + struct wr_log_entry le; + int idx; + + if (!wq->rdev->wr_log) + return; + + idx = (atomic_inc_return(&wq->rdev->wr_log_idx) - 1) & + (wq->rdev->wr_log_size - 1); + le.poll_sge_ts = cxgb4_read_sge_timestamp(wq->rdev->lldi.ports[0]); + getnstimeofday(&le.poll_host_ts); + le.valid = 1; + le.cqe_sge_ts = CQE_TS(cqe); + if (SQ_TYPE(cqe)) { + le.qid = wq->sq.qid; + le.opcode = CQE_OPCODE(cqe); + le.post_host_ts = wq->sq.sw_sq[wq->sq.cidx].host_ts; + le.post_sge_ts = wq->sq.sw_sq[wq->sq.cidx].sge_ts; + le.wr_id = CQE_WRID_SQ_IDX(cqe); + } else { + le.qid = wq->rq.qid; + le.opcode = FW_RI_RECEIVE; + le.post_host_ts = wq->rq.sw_rq[wq->rq.cidx].host_ts; + le.post_sge_ts = wq->rq.sw_rq[wq->rq.cidx].sge_ts; + le.wr_id = CQE_WRID_MSN(cqe); + } + wq->rdev->wr_log[idx] = le; +} + +static int wr_log_show(struct seq_file *seq, void *v) +{ + struct c4iw_dev *dev = seq->private; + struct timespec prev_ts = {0, 0}; + struct wr_log_entry *lep; + int prev_ts_set = 0; + int idx, end; + +#define ts2ns(ts) div64_ul((ts) * dev->rdev.lldi.cclk_ps, 1000) + + idx = atomic_read(&dev->rdev.wr_log_idx) & + (dev->rdev.wr_log_size - 1); + end = idx - 1; + if (end < 0) + end = dev->rdev.wr_log_size - 1; + lep = &dev->rdev.wr_log[idx]; + while (idx != end) { + if (lep->valid) { + if (!prev_ts_set) { + prev_ts_set = 1; + prev_ts = lep->poll_host_ts; + } + seq_printf(seq, "%04u: sec %lu nsec %lu qid %u opcode " + "%u %s 0x%x host_wr_delta sec %lu nsec %lu " + "post_sge_ts 0x%llx cqe_sge_ts 0x%llx " + "poll_sge_ts 0x%llx post_poll_delta_ns %llu " + "cqe_poll_delta_ns %llu\n", + idx, + timespec_sub(lep->poll_host_ts, + prev_ts).tv_sec, + timespec_sub(lep->poll_host_ts, + prev_ts).tv_nsec, + lep->qid, lep->opcode, + lep->opcode == FW_RI_RECEIVE ? + "msn" : "wrid", + lep->wr_id, + timespec_sub(lep->poll_host_ts, + lep->post_host_ts).tv_sec, + timespec_sub(lep->poll_host_ts, + lep->post_host_ts).tv_nsec, + lep->post_sge_ts, lep->cqe_sge_ts, + lep->poll_sge_ts, + ts2ns(lep->poll_sge_ts - lep->post_sge_ts), + ts2ns(lep->poll_sge_ts - lep->cqe_sge_ts)); + prev_ts = lep->poll_host_ts; + } + idx++; + if (idx > (dev->rdev.wr_log_size - 1)) + idx = 0; + lep = &dev->rdev.wr_log[idx]; + } +#undef ts2ns + return 0; +} + +static int wr_log_open(struct inode *inode, struct file *file) +{ + return single_open(file, wr_log_show, inode->i_private); +} + +static ssize_t wr_log_clear(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private; + int i; + + if (dev->rdev.wr_log) + for (i = 0; i < dev->rdev.wr_log_size; i++) + dev->rdev.wr_log[i].valid = 0; + return count; +} + +static const struct file_operations wr_log_debugfs_fops = { + .owner = THIS_MODULE, + .open = wr_log_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek, + .write = wr_log_clear, +}; + static int dump_qp(int id, void *p, void *data) { struct c4iw_qp *qp = p; @@ -241,12 +362,32 @@ static int dump_stag(int id, void *p, void *data) struct c4iw_debugfs_data *stagd = data; int space; int cc; + struct fw_ri_tpte tpte; + int ret; space = stagd->bufsize - stagd->pos - 1; if (space == 0) return 1; - cc = snprintf(stagd->buf + stagd->pos, space, "0x%x\n", id<<8); + ret = cxgb4_read_tpte(stagd->devp->rdev.lldi.ports[0], (u32)id<<8, + (__be32 *)&tpte); + if (ret) { + dev_err(&stagd->devp->rdev.lldi.pdev->dev, + "%s cxgb4_read_tpte err %d\n", __func__, ret); + return ret; + } + cc = snprintf(stagd->buf + stagd->pos, space, + "stag: idx 0x%x valid %d key 0x%x state %d pdid %d " + "perm 0x%x ps %d len 0x%llx va 0x%llx\n", + (u32)id<<8, + G_FW_RI_TPTE_VALID(ntohl(tpte.valid_to_pdid)), + G_FW_RI_TPTE_STAGKEY(ntohl(tpte.valid_to_pdid)), + G_FW_RI_TPTE_STAGSTATE(ntohl(tpte.valid_to_pdid)), + G_FW_RI_TPTE_PDID(ntohl(tpte.valid_to_pdid)), + G_FW_RI_TPTE_PERM(ntohl(tpte.locread_to_qpid)), + G_FW_RI_TPTE_PS(ntohl(tpte.locread_to_qpid)), + ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo), + ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo)); if (cc < space) stagd->pos += cc; return 0; @@ -259,7 +400,7 @@ static int stag_release(struct inode *inode, struct file *file) printk(KERN_INFO "%s null stagd?\n", __func__); return 0; } - kfree(stagd->buf); + vfree(stagd->buf); kfree(stagd); return 0; } @@ -282,8 +423,8 @@ static int stag_open(struct inode *inode, struct file *file) idr_for_each(&stagd->devp->mmidr, count_idrs, &count); spin_unlock_irq(&stagd->devp->lock); - stagd->bufsize = count * sizeof("0x12345678\n"); - stagd->buf = kmalloc(stagd->bufsize, GFP_KERNEL); + stagd->bufsize = count * 256; + stagd->buf = vmalloc(stagd->bufsize); if (!stagd->buf) { ret = -ENOMEM; goto err1; @@ -348,6 +489,7 @@ static int stats_show(struct seq_file *seq, void *v) dev->rdev.stats.act_ofld_conn_fails); seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n", dev->rdev.stats.pas_ofld_conn_fails); + seq_printf(seq, "AVAILABLE IRD: %10u\n", dev->avail_ird); return 0; } @@ -583,6 +725,12 @@ static int setup_debugfs(struct c4iw_dev *devp) if (de && de->d_inode) de->d_inode->i_size = 4096; + if (c4iw_wr_log) { + de = debugfs_create_file("wr_log", S_IWUSR, devp->debugfs_root, + (void *)devp, &wr_log_debugfs_fops); + if (de && de->d_inode) + de->d_inode->i_size = 4096; + } return 0; } @@ -696,7 +844,20 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) pr_err(MOD "error allocating status page\n"); goto err4; } + + if (c4iw_wr_log) { + rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) * + sizeof(*rdev->wr_log), GFP_KERNEL); + if (rdev->wr_log) { + rdev->wr_log_size = 1 << c4iw_wr_log_size_order; + atomic_set(&rdev->wr_log_idx, 0); + } else { + pr_err(MOD "error allocating wr_log. Logging disabled\n"); + } + } + rdev->status_page->db_off = 0; + return 0; err4: c4iw_rqtpool_destroy(rdev); @@ -710,6 +871,7 @@ err1: static void c4iw_rdev_close(struct c4iw_rdev *rdev) { + kfree(rdev->wr_log); free_page((unsigned long)rdev->status_page); c4iw_pblpool_destroy(rdev); c4iw_rqtpool_destroy(rdev); @@ -768,6 +930,27 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) } devp->rdev.lldi = *infop; + /* init various hw-queue params based on lld info */ + PDBG("%s: Ing. padding boundary is %d, egrsstatuspagesize = %d\n", + __func__, devp->rdev.lldi.sge_ingpadboundary, + devp->rdev.lldi.sge_egrstatuspagesize); + + devp->rdev.hw_queue.t4_eq_status_entries = + devp->rdev.lldi.sge_ingpadboundary > 64 ? 2 : 1; + devp->rdev.hw_queue.t4_max_eq_size = 65520; + devp->rdev.hw_queue.t4_max_iq_size = 65520; + devp->rdev.hw_queue.t4_max_rq_size = 8192 - + devp->rdev.hw_queue.t4_eq_status_entries - 1; + devp->rdev.hw_queue.t4_max_sq_size = + devp->rdev.hw_queue.t4_max_eq_size - + devp->rdev.hw_queue.t4_eq_status_entries - 1; + devp->rdev.hw_queue.t4_max_qp_depth = + devp->rdev.hw_queue.t4_max_rq_size; + devp->rdev.hw_queue.t4_max_cq_depth = + devp->rdev.hw_queue.t4_max_iq_size - 2; + devp->rdev.hw_queue.t4_stat_len = + devp->rdev.lldi.sge_egrstatuspagesize; + /* * For T5 devices, we map all of BAR2 with WC. * For T4 devices with onchip qp mem, we map only that part @@ -818,6 +1001,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); INIT_LIST_HEAD(&devp->db_fc_list); + devp->avail_ird = devp->rdev.lldi.max_ird_adapter; if (c4iw_debugfs_root) { devp->debugfs_root = debugfs_create_dir( |