diff options
29 files changed, 764 insertions, 623 deletions
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 39279fd630bc..5d6b1eeaa9a0 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1055,7 +1055,7 @@ static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg); static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg); static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg); static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg); -static void set_partition_keys(struct hfi1_pportdata *); +static void set_partition_keys(struct hfi1_pportdata *ppd); static const char *link_state_name(u32 state); static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state); @@ -1068,9 +1068,9 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); -static void handle_temp_err(struct hfi1_devdata *); -static void dc_shutdown(struct hfi1_devdata *); -static void dc_start(struct hfi1_devdata *); +static void handle_temp_err(struct hfi1_devdata *dd); +static void dc_shutdown(struct hfi1_devdata *dd); +static void dc_start(struct hfi1_devdata *dd); static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, unsigned int *np); static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd); @@ -10233,7 +10233,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) if (pstate == PLS_OFFLINE) { do_transition = 0; /* in right state */ do_wait = 0; /* ...no need to wait */ - } else if ((pstate & 0xff) == PLS_OFFLINE) { + } else if ((pstate & 0xf0) == PLS_OFFLINE) { do_transition = 0; /* in an offline transient state */ do_wait = 1; /* ...wait for it to settle */ } else { @@ -12662,7 +12662,7 @@ u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd) #define SET_STATIC_RATE_CONTROL_SMASK(r) \ (r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) -int hfi1_init_ctxt(struct send_context *sc) +void hfi1_init_ctxt(struct send_context *sc) { if (sc) { struct hfi1_devdata *dd = sc->dd; @@ -12679,7 +12679,6 @@ int hfi1_init_ctxt(struct send_context *sc) write_kctxt_csr(dd, sc->hw_context, SEND_CTXT_CHECK_ENABLE, reg); } - return 0; } int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp) @@ -14528,30 +14527,24 @@ done: return ret; } -int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt) +int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt) { - struct hfi1_ctxtdata *rcd; - unsigned sctxt; - int ret = 0; + u8 hw_ctxt; u64 reg; - if (ctxt < dd->num_rcv_contexts) { - rcd = dd->rcd[ctxt]; - } else { - ret = -EINVAL; - goto done; - } - if (!rcd || !rcd->sc) { - ret = -EINVAL; - goto done; - } - sctxt = rcd->sc->hw_context; - reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); + if (!ctxt || !ctxt->sc) + return -EINVAL; + + if (ctxt->ctxt >= dd->num_rcv_contexts) + return -EINVAL; + + hw_ctxt = ctxt->sc->hw_context; + reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE); reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0); -done: - return ret; + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg); + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0); + + return 0; } /* diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index b9dbf16d7703..cbe455d9ab8b 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -636,7 +636,8 @@ static inline void write_uctxt_csr(struct hfi1_devdata *dd, int ctxt, write_csr(dd, offset0 + (0x1000 * ctxt), value); } -u64 create_pbc(struct hfi1_pportdata *ppd, u64, int, u32, u32); +u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, + u32 dw_len); /* firmware.c */ #define SBUS_MASTER_BROADCAST 0xfd @@ -728,7 +729,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd); void set_intr_state(struct hfi1_devdata *dd, u32 enable); void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths); -void update_usrhead(struct hfi1_ctxtdata *, u32, u32, u32, u32, u32); +void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd, + u32 intr_adjust, u32 npkts); int stop_drain_data_vls(struct hfi1_devdata *dd); int open_fill_data_vls(struct hfi1_devdata *dd); u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns); @@ -1347,7 +1349,7 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd); void hfi1_clear_tids(struct hfi1_ctxtdata *rcd); struct ib_header *hfi1_get_msgheader( struct hfi1_devdata *dd, __le32 *rhf_addr); -int hfi1_init_ctxt(struct send_context *sc); +void hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order); void hfi1_quiet_serdes(struct hfi1_pportdata *ppd); @@ -1360,7 +1362,7 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val); int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey); int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt); int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey); -int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt); +int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt); void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality); void hfi1_init_vnic_rsm(struct hfi1_devdata *dd); void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 527895487175..a50870e455a3 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -85,8 +85,8 @@ module_param_named(cu, hfi1_cu, uint, S_IRUGO); MODULE_PARM_DESC(cu, "Credit return units"); unsigned long hfi1_cap_mask = HFI1_CAP_MASK_DEFAULT; -static int hfi1_caps_set(const char *, const struct kernel_param *); -static int hfi1_caps_get(char *, const struct kernel_param *); +static int hfi1_caps_set(const char *val, const struct kernel_param *kp); +static int hfi1_caps_get(char *buffer, const struct kernel_param *kp); static const struct kernel_param_ops cap_ops = { .set = hfi1_caps_set, .get = hfi1_caps_get @@ -211,42 +211,6 @@ int hfi1_count_active_units(void) } /* - * Return count of all units, optionally return in arguments - * the number of usable (present) units, and the number of - * ports that are up. - */ -int hfi1_count_units(int *npresentp, int *nupp) -{ - int nunits = 0, npresent = 0, nup = 0; - struct hfi1_devdata *dd; - unsigned long flags; - int pidx; - struct hfi1_pportdata *ppd; - - spin_lock_irqsave(&hfi1_devs_lock, flags); - - list_for_each_entry(dd, &hfi1_dev_list, list) { - nunits++; - if ((dd->flags & HFI1_PRESENT) && dd->kregbase) - npresent++; - for (pidx = 0; pidx < dd->num_pports; ++pidx) { - ppd = dd->pport + pidx; - if (ppd->lid && ppd->linkup) - nup++; - } - } - - spin_unlock_irqrestore(&hfi1_devs_lock, flags); - - if (npresentp) - *npresentp = npresent; - if (nupp) - *nupp = nup; - - return nunits; -} - -/* * Get address of eager buffer from it's index (allocated in chunks, not * contiguous). */ @@ -1325,7 +1289,7 @@ int hfi1_reset_device(int unit) if (dd->rcd) for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { - if (!dd->rcd[i] || !dd->rcd[i]->cnt) + if (!dd->rcd[i]) continue; spin_unlock_irqrestore(&dd->uctxt_lock, flags); ret = -EBUSY; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 3d9bce4bfcc7..3158128d57e8 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -49,6 +49,7 @@ #include <linux/vmalloc.h> #include <linux/io.h> #include <linux/sched/mm.h> +#include <linux/bitmap.h> #include <rdma/ib.h> @@ -70,30 +71,37 @@ /* * File operation functions */ -static int hfi1_file_open(struct inode *, struct file *); -static int hfi1_file_close(struct inode *, struct file *); -static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *); -static unsigned int hfi1_poll(struct file *, struct poll_table_struct *); -static int hfi1_file_mmap(struct file *, struct vm_area_struct *); - -static u64 kvirt_to_phys(void *); -static int assign_ctxt(struct file *, struct hfi1_user_info *); -static int init_subctxts(struct hfi1_ctxtdata *, const struct hfi1_user_info *); -static int user_init(struct file *); -static int get_ctxt_info(struct file *, void __user *, __u32); -static int get_base_info(struct file *, void __user *, __u32); -static int setup_ctxt(struct file *); -static int setup_subctxt(struct hfi1_ctxtdata *); -static int get_user_context(struct file *, struct hfi1_user_info *, int); -static int find_shared_ctxt(struct file *, const struct hfi1_user_info *); -static int allocate_ctxt(struct file *, struct hfi1_devdata *, - struct hfi1_user_info *); -static unsigned int poll_urgent(struct file *, struct poll_table_struct *); -static unsigned int poll_next(struct file *, struct poll_table_struct *); -static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long); -static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16); -static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int); -static int vma_fault(struct vm_fault *); +static int hfi1_file_open(struct inode *inode, struct file *fp); +static int hfi1_file_close(struct inode *inode, struct file *fp); +static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from); +static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt); +static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma); + +static u64 kvirt_to_phys(void *addr); +static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo); +static int init_subctxts(struct hfi1_ctxtdata *uctxt, + const struct hfi1_user_info *uinfo); +static int init_user_ctxt(struct hfi1_filedata *fd); +static void user_init(struct hfi1_ctxtdata *uctxt); +static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, + __u32 len); +static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, + __u32 len); +static int setup_base_ctxt(struct hfi1_filedata *fd); +static int setup_subctxt(struct hfi1_ctxtdata *uctxt); + +static int find_sub_ctxt(struct hfi1_filedata *fd, + const struct hfi1_user_info *uinfo); +static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, + struct hfi1_user_info *uinfo); +static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); +static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); +static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, + unsigned long events); +static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey); +static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, + int start_stop); +static int vma_fault(struct vm_fault *vmf); static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, unsigned long arg); @@ -173,6 +181,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) struct hfi1_devdata, user_cdev); + if (!((dd->flags & HFI1_PRESENT) && dd->kregbase)) + return -EINVAL; + if (!atomic_inc_not_zero(&dd->user_refcount)) return -ENXIO; @@ -187,6 +198,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd->rec_cpu_num = -1; /* no cpu affinity by default */ fd->mm = current->mm; mmgrab(fd->mm); + fd->dd = dd; fp->private_data = fd; } else { fp->private_data = NULL; @@ -229,20 +241,14 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sizeof(uinfo))) return -EFAULT; - ret = assign_ctxt(fp, &uinfo); - if (ret < 0) - return ret; - ret = setup_ctxt(fp); - if (ret) - return ret; - ret = user_init(fp); + ret = assign_ctxt(fd, &uinfo); break; case HFI1_IOCTL_CTXT_INFO: - ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg, + ret = get_ctxt_info(fd, (void __user *)(unsigned long)arg, sizeof(struct hfi1_ctxt_info)); break; case HFI1_IOCTL_USER_INFO: - ret = get_base_info(fp, (void __user *)(unsigned long)arg, + ret = get_base_info(fd, (void __user *)(unsigned long)arg, sizeof(struct hfi1_base_info)); break; case HFI1_IOCTL_CREDIT_UPD: @@ -256,7 +262,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sizeof(tinfo))) return -EFAULT; - ret = hfi1_user_exp_rcv_setup(fp, &tinfo); + ret = hfi1_user_exp_rcv_setup(fd, &tinfo); if (!ret) { /* * Copy the number of tidlist entries we used @@ -278,7 +284,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sizeof(tinfo))) return -EFAULT; - ret = hfi1_user_exp_rcv_clear(fp, &tinfo); + ret = hfi1_user_exp_rcv_clear(fd, &tinfo); if (ret) break; addr = arg + offsetof(struct hfi1_tid_info, tidcnt); @@ -293,7 +299,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sizeof(tinfo))) return -EFAULT; - ret = hfi1_user_exp_rcv_invalid(fp, &tinfo); + ret = hfi1_user_exp_rcv_invalid(fd, &tinfo); if (ret) break; addr = arg + offsetof(struct hfi1_tid_info, tidcnt); @@ -430,7 +436,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) unsigned long count = 0; ret = hfi1_user_sdma_process_request( - kiocb->ki_filp, (struct iovec *)(from->iov + done), + fd, (struct iovec *)(from->iov + done), dim, &count); if (ret) { reqs = ret; @@ -756,6 +762,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) /* release the cpu */ hfi1_put_proc_affinity(fdata->rec_cpu_num); + /* clean up rcv side */ + hfi1_user_exp_rcv_free(fdata); + /* * Clear any left over, unhandled events so the next process that * gets this context doesn't get confused. @@ -764,8 +773,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; *ev = 0; - if (--uctxt->cnt) { - uctxt->active_slaves &= ~(1 << fdata->subctxt); + __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); + if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { mutex_unlock(&hfi1_mutex); goto done; } @@ -795,8 +804,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) dd->rcd[uctxt->ctxt] = NULL; - hfi1_user_exp_rcv_free(fdata); - hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); + hfi1_user_exp_rcv_grp_free(uctxt); + hfi1_clear_ctxt_pkey(dd, uctxt); uctxt->rcvwait_to = 0; uctxt->piowait_to = 0; @@ -836,127 +845,135 @@ static u64 kvirt_to_phys(void *addr) return paddr; } -static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) +static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) { - int i_minor, ret = 0; + int ret; unsigned int swmajor, swminor; swmajor = uinfo->userversion >> 16; - if (swmajor != HFI1_USER_SWMAJOR) { - ret = -ENODEV; - goto done; - } + if (swmajor != HFI1_USER_SWMAJOR) + return -ENODEV; swminor = uinfo->userversion & 0xffff; mutex_lock(&hfi1_mutex); - /* First, lets check if we need to setup a shared context? */ + /* + * Get a sub context if necessary. + * ret < 0 error, 0 no context, 1 sub-context found + */ + ret = 0; if (uinfo->subctxt_cnt) { - struct hfi1_filedata *fd = fp->private_data; - - ret = find_shared_ctxt(fp, uinfo); - if (ret < 0) - goto done_unlock; - if (ret) { + ret = find_sub_ctxt(fd, uinfo); + if (ret > 0) fd->rec_cpu_num = hfi1_get_proc_affinity(fd->uctxt->numa_id); - } } /* - * We execute the following block if we couldn't find a - * shared context or if context sharing is not required. + * Allocate a base context if context sharing is not required or we + * couldn't find a sub context. */ - if (!ret) { - i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; - ret = get_user_context(fp, uinfo, i_minor); - } -done_unlock: + if (!ret) + ret = allocate_ctxt(fd, fd->dd, uinfo); + mutex_unlock(&hfi1_mutex); -done: + + /* Depending on the context type, do the appropriate init */ + if (ret > 0) { + /* + * sub-context info can only be set up after the base + * context has been completed. + */ + ret = wait_event_interruptible(fd->uctxt->wait, !test_bit( + HFI1_CTXT_BASE_UNINIT, + &fd->uctxt->event_flags)); + if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) { + clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); + return -ENOMEM; + } + /* The only thing a sub context needs is the user_xxx stuff */ + if (!ret) + ret = init_user_ctxt(fd); + + if (ret) + clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); + } else if (!ret) { + ret = setup_base_ctxt(fd); + if (fd->uctxt->subctxt_cnt) { + /* If there is an error, set the failed bit. */ + if (ret) + set_bit(HFI1_CTXT_BASE_FAILED, + &fd->uctxt->event_flags); + /* + * Base context is done, notify anybody using a + * sub-context that is waiting for this completion + */ + clear_bit(HFI1_CTXT_BASE_UNINIT, + &fd->uctxt->event_flags); + wake_up(&fd->uctxt->wait); + } + } + return ret; } -static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo, - int devno) +/* + * The hfi1_mutex must be held when this function is called. It is + * necessary to ensure serialized access to the bitmask in_use_ctxts. + */ +static int find_sub_ctxt(struct hfi1_filedata *fd, + const struct hfi1_user_info *uinfo) { - struct hfi1_devdata *dd = NULL; - int devmax, npresent, nup; + int i; + struct hfi1_devdata *dd = fd->dd; + u16 subctxt; - devmax = hfi1_count_units(&npresent, &nup); - if (!npresent) - return -ENXIO; + for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { + struct hfi1_ctxtdata *uctxt = dd->rcd[i]; - if (!nup) - return -ENETDOWN; + /* Skip ctxts which are not yet open */ + if (!uctxt || + bitmap_empty(uctxt->in_use_ctxts, + HFI1_MAX_SHARED_CTXTS)) + continue; - dd = hfi1_lookup(devno); - if (!dd) - return -ENODEV; - else if (!dd->freectxts) - return -EBUSY; + /* Skip dynamically allocted kernel contexts */ + if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) + continue; - return allocate_ctxt(fp, dd, uinfo); -} + /* Skip ctxt if it doesn't match the requested one */ + if (memcmp(uctxt->uuid, uinfo->uuid, + sizeof(uctxt->uuid)) || + uctxt->jkey != generate_jkey(current_uid()) || + uctxt->subctxt_id != uinfo->subctxt_id || + uctxt->subctxt_cnt != uinfo->subctxt_cnt) + continue; -static int find_shared_ctxt(struct file *fp, - const struct hfi1_user_info *uinfo) -{ - int devmax, ndev, i; - int ret = 0; - struct hfi1_filedata *fd = fp->private_data; + /* Verify the sharing process matches the master */ + if (uctxt->userversion != uinfo->userversion) + return -EINVAL; - devmax = hfi1_count_units(NULL, NULL); + /* Find an unused context */ + subctxt = find_first_zero_bit(uctxt->in_use_ctxts, + HFI1_MAX_SHARED_CTXTS); + if (subctxt >= uctxt->subctxt_cnt) + return -EBUSY; - for (ndev = 0; ndev < devmax; ndev++) { - struct hfi1_devdata *dd = hfi1_lookup(ndev); + fd->uctxt = uctxt; + fd->subctxt = subctxt; + __set_bit(fd->subctxt, uctxt->in_use_ctxts); - if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase)) - continue; - for (i = dd->first_dyn_alloc_ctxt; - i < dd->num_rcv_contexts; i++) { - struct hfi1_ctxtdata *uctxt = dd->rcd[i]; - - /* Skip ctxts which are not yet open */ - if (!uctxt || !uctxt->cnt) - continue; - - /* Skip dynamically allocted kernel contexts */ - if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) - continue; - - /* Skip ctxt if it doesn't match the requested one */ - if (memcmp(uctxt->uuid, uinfo->uuid, - sizeof(uctxt->uuid)) || - uctxt->jkey != generate_jkey(current_uid()) || - uctxt->subctxt_id != uinfo->subctxt_id || - uctxt->subctxt_cnt != uinfo->subctxt_cnt) - continue; - - /* Verify the sharing process matches the master */ - if (uctxt->userversion != uinfo->userversion || - uctxt->cnt >= uctxt->subctxt_cnt) { - ret = -EINVAL; - goto done; - } - fd->uctxt = uctxt; - fd->subctxt = uctxt->cnt++; - uctxt->active_slaves |= 1 << fd->subctxt; - ret = 1; - goto done; - } + return 1; } -done: - return ret; + return 0; } -static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, +static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_user_info *uinfo) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt; - unsigned ctxt; + unsigned int ctxt; int ret, numa; if (dd->flags & HFI1_FROZEN) { @@ -970,6 +987,14 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, return -EIO; } + /* + * This check is sort of redundant to the next EBUSY error. It would + * also indicate an inconsistancy in the driver if this value was + * zero, but there were still contexts available. + */ + if (!dd->freectxts) + return -EBUSY; + for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts; ctxt++) if (!dd->rcd[ctxt]) @@ -1013,12 +1038,12 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, goto ctxdata_free; /* - * Setup shared context resources if the user-level has requested - * shared contexts and this is the 'master' process. + * Setup sub context resources if the user-level has requested + * sub contexts. * This has to be done here so the rest of the sub-contexts find the * proper master. */ - if (uinfo->subctxt_cnt && !fd->subctxt) { + if (uinfo->subctxt_cnt) { ret = init_subctxts(uctxt, uinfo); /* * On error, we don't need to disable and de-allocate the @@ -1055,7 +1080,7 @@ ctxdata_free: static int init_subctxts(struct hfi1_ctxtdata *uctxt, const struct hfi1_user_info *uinfo) { - unsigned num_subctxts; + u16 num_subctxts; num_subctxts = uinfo->subctxt_cnt; if (num_subctxts > HFI1_MAX_SHARED_CTXTS) @@ -1063,9 +1088,8 @@ static int init_subctxts(struct hfi1_ctxtdata *uctxt, uctxt->subctxt_cnt = uinfo->subctxt_cnt; uctxt->subctxt_id = uinfo->subctxt_id; - uctxt->active_slaves = 1; uctxt->redirect_seq_cnt = 1; - set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags); + set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); return 0; } @@ -1073,13 +1097,12 @@ static int init_subctxts(struct hfi1_ctxtdata *uctxt, static int setup_subctxt(struct hfi1_ctxtdata *uctxt) { int ret = 0; - unsigned num_subctxts = uctxt->subctxt_cnt; + u16 num_subctxts = uctxt->subctxt_cnt; uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE); - if (!uctxt->subctxt_uregbase) { - ret = -ENOMEM; - goto bail; - } + if (!uctxt->subctxt_uregbase) + return -ENOMEM; + /* We can take the size of the RcvHdr Queue from the master */ uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size * num_subctxts); @@ -1094,25 +1117,22 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt) ret = -ENOMEM; goto bail_rhdr; } - goto bail; + + return 0; + bail_rhdr: vfree(uctxt->subctxt_rcvhdr_base); + uctxt->subctxt_rcvhdr_base = NULL; bail_ureg: vfree(uctxt->subctxt_uregbase); uctxt->subctxt_uregbase = NULL; -bail: + return ret; } -static int user_init(struct file *fp) +static void user_init(struct hfi1_ctxtdata *uctxt) { unsigned int rcvctrl_ops = 0; - struct hfi1_filedata *fd = fp->private_data; - struct hfi1_ctxtdata *uctxt = fd->uctxt; - - /* make sure that the context has already been setup */ - if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) - return -EFAULT; /* initialize poll variables... */ uctxt->urgent = 0; @@ -1160,20 +1180,12 @@ static int user_init(struct file *fp) else rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); - - /* Notify any waiting slaves */ - if (uctxt->subctxt_cnt) { - clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags); - wake_up(&uctxt->wait); - } - - return 0; } -static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) +static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, + __u32 len) { struct hfi1_ctxt_info cinfo; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; int ret = 0; @@ -1211,75 +1223,71 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) return ret; } -static int setup_ctxt(struct file *fp) +static int init_user_ctxt(struct hfi1_filedata *fd) +{ + struct hfi1_ctxtdata *uctxt = fd->uctxt; + int ret; + + ret = hfi1_user_sdma_alloc_queues(uctxt, fd); + if (ret) + return ret; + + ret = hfi1_user_exp_rcv_init(fd); + + return ret; +} + +static int setup_base_ctxt(struct hfi1_filedata *fd) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; int ret = 0; - /* - * Context should be set up only once, including allocation and - * programming of eager buffers. This is done if context sharing - * is not requested or by the master process. - */ - if (!uctxt->subctxt_cnt || !fd->subctxt) { - ret = hfi1_init_ctxt(uctxt->sc); - if (ret) - goto done; + hfi1_init_ctxt(uctxt->sc); - /* Now allocate the RcvHdr queue and eager buffers. */ - ret = hfi1_create_rcvhdrq(dd, uctxt); - if (ret) - goto done; - ret = hfi1_setup_eagerbufs(uctxt); - if (ret) - goto done; - if (uctxt->subctxt_cnt && !fd->subctxt) { - ret = setup_subctxt(uctxt); - if (ret) - goto done; - } - } else { - ret = wait_event_interruptible(uctxt->wait, !test_bit( - HFI1_CTXT_MASTER_UNINIT, - &uctxt->event_flags)); - if (ret) - goto done; - } + /* Now allocate the RcvHdr queue and eager buffers. */ + ret = hfi1_create_rcvhdrq(dd, uctxt); + if (ret) + return ret; - ret = hfi1_user_sdma_alloc_queues(uctxt, fp); + ret = hfi1_setup_eagerbufs(uctxt); if (ret) - goto done; - /* - * Expected receive has to be setup for all processes (including - * shared contexts). However, it has to be done after the master - * context has been fully configured as it depends on the - * eager/expected split of the RcvArray entries. - * Setting it up here ensures that the subcontexts will be waiting - * (due to the above wait_event_interruptible() until the master - * is setup. - */ - ret = hfi1_user_exp_rcv_init(fp); + goto setup_failed; + + /* If sub-contexts are enabled, do the appropriate setup */ + if (uctxt->subctxt_cnt) + ret = setup_subctxt(uctxt); if (ret) - goto done; + goto setup_failed; - set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags); -done: + ret = hfi1_user_exp_rcv_grp_init(fd); + if (ret) + goto setup_failed; + + ret = init_user_ctxt(fd); + if (ret) + goto setup_failed; + + user_init(uctxt); + + return 0; + +setup_failed: + hfi1_free_ctxtdata(dd, uctxt); return ret; } -static int get_base_info(struct file *fp, void __user *ubase, __u32 len) +static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, + __u32 len) { struct hfi1_base_info binfo; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; ssize_t sz; unsigned offset; int ret = 0; - trace_hfi1_uctxtdata(uctxt->dd, uctxt); + trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt); memset(&binfo, 0, sizeof(binfo)); binfo.hw_version = dd->revision; @@ -1443,7 +1451,7 @@ done: * overflow conditions. start_stop==1 re-enables, to be used to * re-init the software copy of the head register */ -static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt, +static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, int start_stop) { struct hfi1_devdata *dd = uctxt->dd; @@ -1478,7 +1486,7 @@ bail: * User process then performs actions appropriate to bit having been * set, if desired, and checks again in future. */ -static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt, +static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, unsigned long events) { int i; @@ -1499,8 +1507,7 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt, return 0; } -static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt, - u16 pkey) +static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey) { int ret = -ENOENT, i, intable = 0; struct hfi1_pportdata *ppd = uctxt->ppd; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 14063bd30c2a..da322e6668cc 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -196,12 +196,6 @@ struct hfi1_ctxtdata { void *rcvhdrq; /* kernel virtual address where hdrqtail is updated */ volatile __le64 *rcvhdrtail_kvaddr; - /* - * Shared page for kernel to signal user processes that send buffers - * need disarming. The process should call HFI1_CMD_DISARM_BUFS - * or HFI1_CMD_ACK_EVENT with IPATH_EVENT_DISARM_BUFS set. - */ - unsigned long *user_event_mask; /* when waiting for rcv or pioavail */ wait_queue_head_t wait; /* rcvhdrq size (for freeing) */ @@ -224,13 +218,12 @@ struct hfi1_ctxtdata { * (ignoring forks, dup, etc. for now) */ int cnt; + /* Device context index */ + unsigned ctxt; /* - * how much space to leave at start of eager TID entries for - * protocol use, on each TID + * non-zero if ctxt can be shared, and defines the maximum number of + * sub-contexts for this device context. */ - /* instead of calculating it */ - unsigned ctxt; - /* non-zero if ctxt is being shared. */ u16 subctxt_cnt; /* non-zero if ctxt is being shared. */ u16 subctxt_id; @@ -288,10 +281,10 @@ struct hfi1_ctxtdata { void *subctxt_rcvegrbuf; /* An array of pages for the eager header queue entries * N */ void *subctxt_rcvhdr_base; + /* Bitmask of in use context(s) */ + DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS); /* The version of the library which opened this ctxt */ u32 userversion; - /* Bitmask of active slaves */ - u32 active_slaves; /* Type of packets or conditions we want to poll for */ u16 poll_type; /* receive packet sequence counter */ @@ -1238,10 +1231,11 @@ struct mmu_rb_handler; /* Private data for file operations */ struct hfi1_filedata { + struct hfi1_devdata *dd; struct hfi1_ctxtdata *uctxt; - unsigned subctxt; struct hfi1_user_sdma_comp_q *cq; struct hfi1_user_sdma_pkt_q *pq; + u16 subctxt; /* for cpu affinity; -1 if none */ int rec_cpu_num; u32 tid_n_pinned; @@ -1263,27 +1257,27 @@ struct hfi1_devdata *hfi1_lookup(int unit); extern u32 hfi1_cpulist_count; extern unsigned long *hfi1_cpulist; -int hfi1_init(struct hfi1_devdata *, int); -int hfi1_count_units(int *npresentp, int *nupp); +int hfi1_init(struct hfi1_devdata *dd, int reinit); int hfi1_count_active_units(void); -int hfi1_diag_add(struct hfi1_devdata *); -void hfi1_diag_remove(struct hfi1_devdata *); +int hfi1_diag_add(struct hfi1_devdata *dd); +void hfi1_diag_remove(struct hfi1_devdata *dd); void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup); void handle_user_interrupt(struct hfi1_ctxtdata *rcd); -int hfi1_create_rcvhdrq(struct hfi1_devdata *, struct hfi1_ctxtdata *); -int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *); +int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); +int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd); int hfi1_create_ctxts(struct hfi1_devdata *dd); -struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32, int); -void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *, - struct hfi1_devdata *, u8, u8); -void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *); - -int handle_receive_interrupt(struct hfi1_ctxtdata *, int); -int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int); -int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int); +struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, + int numa); +void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, + struct hfi1_devdata *dd, u8 hw_pidx, u8 port); +void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); + +int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread); +int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); +int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread); void set_all_slowpath(struct hfi1_devdata *dd); void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd); void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd); @@ -1580,7 +1574,7 @@ bad: u32 lrh_max_header_bytes(struct hfi1_devdata *dd); int mtu_to_enum(u32 mtu, int default_if_bad); -u16 enum_to_mtu(int); +u16 enum_to_mtu(int mtu); static inline int valid_ib_mtu(unsigned int mtu) { return mtu == 256 || mtu == 512 || @@ -1594,15 +1588,15 @@ static inline int valid_opa_max_mtu(unsigned int mtu) (valid_ib_mtu(mtu) || mtu == 8192 || mtu == 10240); } -int set_mtu(struct hfi1_pportdata *); +int set_mtu(struct hfi1_pportdata *ppd); -int hfi1_set_lid(struct hfi1_pportdata *, u32, u8); -void hfi1_disable_after_error(struct hfi1_devdata *); -int hfi1_set_uevent_bits(struct hfi1_pportdata *, const int); -int hfi1_rcvbuf_validate(u32, u8, u16 *); +int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc); +void hfi1_disable_after_error(struct hfi1_devdata *dd); +int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit); +int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encode); -int fm_get_table(struct hfi1_pportdata *, int, void *); -int fm_set_table(struct hfi1_pportdata *, int, void *); +int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t); +int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t); void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf); void reset_link_credits(struct hfi1_devdata *dd); @@ -1724,19 +1718,19 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) #define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1) /* ctxt_flag bit offsets */ - /* context has been setup */ -#define HFI1_CTXT_SETUP_DONE 1 + /* base context has not finished initializing */ +#define HFI1_CTXT_BASE_UNINIT 1 + /* base context initaliation failed */ +#define HFI1_CTXT_BASE_FAILED 2 /* waiting for a packet to arrive */ -#define HFI1_CTXT_WAITING_RCV 2 - /* master has not finished initializing */ -#define HFI1_CTXT_MASTER_UNINIT 4 +#define HFI1_CTXT_WAITING_RCV 3 /* waiting for an urgent packet to arrive */ -#define HFI1_CTXT_WAITING_URG 5 +#define HFI1_CTXT_WAITING_URG 4 /* free up any allocated data at closes */ -struct hfi1_devdata *hfi1_init_dd(struct pci_dev *, - const struct pci_device_id *); -void hfi1_free_devdata(struct hfi1_devdata *); +struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, + const struct pci_device_id *ent); +void hfi1_free_devdata(struct hfi1_devdata *dd); struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra); /* LED beaconing functions */ @@ -1811,23 +1805,24 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd) extern const char ib_hfi1_version[]; -int hfi1_device_create(struct hfi1_devdata *); -void hfi1_device_remove(struct hfi1_devdata *); +int hfi1_device_create(struct hfi1_devdata *dd); +void hfi1_device_remove(struct hfi1_devdata *dd); int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, struct kobject *kobj); -int hfi1_verbs_register_sysfs(struct hfi1_devdata *); -void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *); +int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd); +void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd); /* Hook for sysfs read of QSFP */ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len); -int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *); -void hfi1_pcie_cleanup(struct pci_dev *); -int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *); +int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent); +void hfi1_pcie_cleanup(struct pci_dev *pdev); +int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev); void hfi1_pcie_ddcleanup(struct hfi1_devdata *); -int pcie_speeds(struct hfi1_devdata *); -void request_msix(struct hfi1_devdata *, u32 *, struct hfi1_msix_entry *); -void hfi1_enable_intx(struct pci_dev *); +int pcie_speeds(struct hfi1_devdata *dd); +void request_msix(struct hfi1_devdata *dd, u32 *nent, + struct hfi1_msix_entry *entry); +void hfi1_enable_intx(struct pci_dev *pdev); void restore_pci_variables(struct hfi1_devdata *dd); int do_pcie_gen3_transition(struct hfi1_devdata *dd); int parse_platform_config(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 4d6b9f82efa3..4a11d4da4c92 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -53,6 +53,7 @@ #include <linux/module.h> #include <linux/printk.h> #include <linux/hrtimer.h> +#include <linux/bitmap.h> #include <rdma/rdma_vt.h> #include "hfi.h" @@ -70,6 +71,7 @@ #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt +#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5 /* * min buffers we want to have per context, after driver */ @@ -101,9 +103,9 @@ static unsigned hfi1_rcvarr_split = 25; module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO); MODULE_PARM_DESC(rcvarr_split, "Percent of context's RcvArray entries used for Eager buffers"); -static uint eager_buffer_size = (2 << 20); /* 2MB */ +static uint eager_buffer_size = (8 << 20); /* 8MB */ module_param(eager_buffer_size, uint, S_IRUGO); -MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 2MB"); +MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 8MB"); static uint rcvhdrcnt = 2048; /* 2x the max eager buffer count */ module_param_named(rcvhdrcnt, rcvhdrcnt, uint, S_IRUGO); @@ -117,7 +119,7 @@ unsigned int user_credit_return_threshold = 33; /* default is 33% */ module_param(user_credit_return_threshold, uint, S_IRUGO); MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)"); -static inline u64 encode_rcv_header_entry_size(u16); +static inline u64 encode_rcv_header_entry_size(u16 size); static struct idr hfi1_unit_table; u32 hfi1_cpulist_count; @@ -175,13 +177,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) goto nomem; } - ret = hfi1_init_ctxt(rcd->sc); - if (ret < 0) { - dd_dev_err(dd, - "Failed to setup kernel receive context, failing\n"); - ret = -EFAULT; - goto bail; - } + hfi1_init_ctxt(rcd->sc); } /* @@ -193,7 +189,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) return 0; nomem: ret = -ENOMEM; -bail: + if (dd->rcd) { for (i = 0; i < dd->num_rcv_contexts; ++i) hfi1_free_ctxtdata(dd, dd->rcd[i]); @@ -227,7 +223,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, INIT_LIST_HEAD(&rcd->qp_wait_list); rcd->ppd = ppd; rcd->dd = dd; - rcd->cnt = 1; + __set_bit(0, rcd->in_use_ctxts); rcd->ctxt = ctxt; dd->rcd[ctxt] = rcd; rcd->numa_id = numa; @@ -623,7 +619,7 @@ static int create_workqueues(struct hfi1_devdata *dd) alloc_workqueue( "hfi%d_%d", WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE, - dd->num_sdma, + HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES, dd->unit, pidx); if (!ppd->hfi1_wq) goto wq_error; @@ -968,7 +964,6 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) kfree(rcd->egrbufs.buffers); sc_free(rcd->sc); - vfree(rcd->user_event_mask); vfree(rcd->subctxt_uregbase); vfree(rcd->subctxt_rcvegrbuf); vfree(rcd->subctxt_rcvhdr_base); @@ -1687,8 +1682,6 @@ bail_free: dd_dev_err(dd, "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n", rcd->ctxt); - vfree(rcd->user_event_mask); - rcd->user_event_mask = NULL; dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq, rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; @@ -1777,6 +1770,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) !HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) { dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n", rcd->ctxt); + ret = -ENOMEM; goto bail_rcvegrbuf_phys; } @@ -1854,7 +1848,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) "ctxt%u: current Eager buffer size is invalid %u\n", rcd->ctxt, rcd->egrbufs.rcvtid_size); ret = -EINVAL; - goto bail; + goto bail_rcvegrbuf_phys; } for (idx = 0; idx < rcd->egrbufs.alloced; idx++) { @@ -1862,7 +1856,8 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) rcd->egrbufs.rcvtids[idx].dma, order); cond_resched(); } - goto bail; + + return 0; bail_rcvegrbuf_phys: for (idx = 0; idx < rcd->egrbufs.alloced && @@ -1876,6 +1871,6 @@ bail_rcvegrbuf_phys: rcd->egrbufs.buffers[idx].dma = 0; rcd->egrbufs.buffers[idx].len = 0; } -bail: + return ret; } diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 232014d46f79..ba265d0ae93b 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -47,6 +47,7 @@ #include <linux/pci.h> #include <linux/delay.h> +#include <linux/bitmap.h> #include "hfi.h" #include "common.h" @@ -189,7 +190,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd) unsigned long flags; spin_lock_irqsave(&dd->uctxt_lock, flags); - if (!rcd->cnt) + if (bitmap_empty(rcd->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) goto done; if (test_and_clear_bit(HFI1_CTXT_WAITING_RCV, &rcd->event_flags)) { diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 4573e4c9f35c..650305cc0373 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -731,9 +731,7 @@ void quiesce_qp(struct rvt_qp *qp) void notify_qp_reset(struct rvt_qp *qp) { - struct hfi1_qp_priv *priv = qp->priv; - - priv->r_adefered = 0; + qp->r_adefered = 0; clear_ahg(qp); } diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 75a729cd0c3d..069bdaf061ab 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -727,10 +727,9 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, struct ib_header hdr; struct ib_other_headers *ohdr; unsigned long flags; - struct hfi1_qp_priv *priv = qp->priv; /* clear the defer count */ - priv->r_adefered = 0; + qp->r_adefered = 0; /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ if (qp->s_flags & RVT_S_RESP_PENDING) @@ -1604,9 +1603,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd, static inline void rc_cancel_ack(struct rvt_qp *qp) { - struct hfi1_qp_priv *priv = qp->priv; - - priv->r_adefered = 0; + qp->r_adefered = 0; if (list_empty(&qp->rspwait)) return; list_del_init(&qp->rspwait); @@ -2314,13 +2311,11 @@ send_last: qp->r_nak_state = 0; /* Send an ACK if requested or required. */ if (psn & IB_BTH_REQ_ACK) { - struct hfi1_qp_priv *priv = qp->priv; - if (packet->numpkt == 0) { rc_cancel_ack(qp); goto send_ack; } - if (priv->r_adefered >= HFI1_PSN_CREDIT) { + if (qp->r_adefered >= HFI1_PSN_CREDIT) { rc_cancel_ack(qp); goto send_ack; } @@ -2328,7 +2323,7 @@ send_last: rc_cancel_ack(qp); goto send_ack; } - priv->r_adefered++; + qp->r_adefered++; rc_defered_ack(rcd, qp); } return; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 891ba0a81bbd..3a17daba28a9 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -800,6 +800,43 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, /* when sending, force a reschedule every one of these periods */ #define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */ +/** + * schedule_send_yield - test for a yield required for QP send engine + * @timeout: Final time for timeout slice for jiffies + * @qp: a pointer to QP + * @ps: a pointer to a structure with commonly lookup values for + * the the send engine progress + * + * This routine checks if the time slice for the QP has expired + * for RC QPs, if so an additional work entry is queued. At this + * point, other QPs have an opportunity to be scheduled. It + * returns true if a yield is required, otherwise, false + * is returned. + */ +static bool schedule_send_yield(struct rvt_qp *qp, + struct hfi1_pkt_state *ps) +{ + if (unlikely(time_after(jiffies, ps->timeout))) { + if (!ps->in_thread || + workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) { + spin_lock_irqsave(&qp->s_lock, ps->flags); + qp->s_flags &= ~RVT_S_BUSY; + hfi1_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, ps->flags); + this_cpu_inc(*ps->ppd->dd->send_schedule); + trace_hfi1_rc_expired_time_slice(qp, true); + return true; + } + + cond_resched(); + this_cpu_inc(*ps->ppd->dd->send_schedule); + ps->timeout = jiffies + ps->timeout_int; + } + + trace_hfi1_rc_expired_time_slice(qp, false); + return false; +} + void hfi1_do_send_from_rvt(struct rvt_qp *qp) { hfi1_do_send(qp, false); @@ -827,13 +864,13 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) struct hfi1_pkt_state ps; struct hfi1_qp_priv *priv = qp->priv; int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps); - unsigned long timeout; - unsigned long timeout_int; - int cpu; ps.dev = to_idev(qp->ibqp.device); ps.ibp = to_iport(qp->ibqp.device, qp->port_num); ps.ppd = ppd_from_ibp(ps.ibp); + ps.in_thread = in_thread; + + trace_hfi1_rc_do_send(qp, in_thread); switch (qp->ibqp.qp_type) { case IB_QPT_RC: @@ -844,7 +881,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) return; } make_req = hfi1_make_rc_req; - timeout_int = (qp->timeout_jiffies); + ps.timeout_int = qp->timeout_jiffies; break; case IB_QPT_UC: if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) & @@ -854,11 +891,11 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) return; } make_req = hfi1_make_uc_req; - timeout_int = SEND_RESCHED_TIMEOUT; + ps.timeout_int = SEND_RESCHED_TIMEOUT; break; default: make_req = hfi1_make_ud_req; - timeout_int = SEND_RESCHED_TIMEOUT; + ps.timeout_int = SEND_RESCHED_TIMEOUT; } spin_lock_irqsave(&qp->s_lock, ps.flags); @@ -871,9 +908,11 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) qp->s_flags |= RVT_S_BUSY; - timeout = jiffies + (timeout_int) / 8; - cpu = priv->s_sde ? priv->s_sde->cpu : + ps.timeout_int = ps.timeout_int / 8; + ps.timeout = jiffies + ps.timeout_int; + ps.cpu = priv->s_sde ? priv->s_sde->cpu : cpumask_first(cpumask_of_node(ps.ppd->dd->node)); + /* insure a pre-built packet is handled */ ps.s_txreq = get_waiting_verbs_txreq(qp); do { @@ -889,28 +928,9 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) /* Record that s_ahg is empty. */ qp->s_hdrwords = 0; /* allow other tasks to run */ - if (unlikely(time_after(jiffies, timeout))) { - if (!in_thread || - workqueue_congested( - cpu, - ps.ppd->hfi1_wq)) { - spin_lock_irqsave( - &qp->s_lock, - ps.flags); - qp->s_flags &= ~RVT_S_BUSY; - hfi1_schedule_send(qp); - spin_unlock_irqrestore( - &qp->s_lock, - ps.flags); - this_cpu_inc( - *ps.ppd->dd->send_schedule); - return; - } - cond_resched(); - this_cpu_inc( - *ps.ppd->dd->send_schedule); - timeout = jiffies + (timeout_int) / 8; - } + if (schedule_send_yield(qp, &ps)) + return; + spin_lock_irqsave(&qp->s_lock, ps.flags); } } while (make_req(qp, &ps)); diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h index 26ae789e47cf..4eb4cc798035 100644 --- a/drivers/infiniband/hw/hfi1/trace_ctxts.h +++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h @@ -57,12 +57,14 @@ #define UCTXT_FMT \ "cred:%u, credaddr:0x%llx, piobase:0x%p, rcvhdr_cnt:%u, " \ - "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx" + "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx, subctxt_cnt:%u" TRACE_EVENT(hfi1_uctxtdata, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt), - TP_ARGS(dd, uctxt), + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt, + unsigned int subctxt), + TP_ARGS(dd, uctxt, subctxt), TP_STRUCT__entry(DD_DEV_ENTRY(dd) __field(unsigned int, ctxt) + __field(unsigned int, subctxt) __field(u32, credits) __field(u64, hw_free) __field(void __iomem *, piobase) @@ -70,9 +72,11 @@ TRACE_EVENT(hfi1_uctxtdata, __field(u64, rcvhdrq_dma) __field(u32, eager_cnt) __field(u64, rcvegr_dma) + __field(unsigned int, subctxt_cnt) ), TP_fast_assign(DD_DEV_ASSIGN(dd); __entry->ctxt = uctxt->ctxt; + __entry->subctxt = subctxt; __entry->credits = uctxt->sc->credits; __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free); __entry->piobase = uctxt->sc->base_addr; @@ -80,17 +84,20 @@ TRACE_EVENT(hfi1_uctxtdata, __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma; __entry->eager_cnt = uctxt->egrbufs.alloced; __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma; + __entry->subctxt_cnt = uctxt->subctxt_cnt; ), - TP_printk("[%s] ctxt %u " UCTXT_FMT, + TP_printk("[%s] ctxt %u:%u " UCTXT_FMT, __get_str(dev), __entry->ctxt, + __entry->subctxt, __entry->credits, __entry->hw_free, __entry->piobase, __entry->rcvhdrq_cnt, __entry->rcvhdrq_dma, __entry->eager_cnt, - __entry->rcvegr_dma + __entry->rcvegr_dma, + __entry->subctxt_cnt ) ); diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h index 2c9ac57657d3..c59809a7f121 100644 --- a/drivers/infiniband/hw/hfi1/trace_tx.h +++ b/drivers/infiniband/hw/hfi1/trace_tx.h @@ -676,6 +676,40 @@ TRACE_EVENT( ) ); +DECLARE_EVENT_CLASS( + hfi1_do_send_template, + TP_PROTO(struct rvt_qp *qp, bool flag), + TP_ARGS(qp, flag), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(u32, qpn) + __field(bool, flag) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->flag = flag; + ), + TP_printk( + "[%s] qpn %x flag %d", + __get_str(dev), + __entry->qpn, + __entry->flag + ) +); + +DEFINE_EVENT( + hfi1_do_send_template, hfi1_rc_do_send, + TP_PROTO(struct rvt_qp *qp, bool flag), + TP_ARGS(qp, flag) +); + +DEFINE_EVENT( + hfi1_do_send_template, hfi1_rc_expired_time_slice, + TP_PROTO(struct rvt_qp *qp, bool flag), + TP_ARGS(qp, flag) +); + #endif /* __HFI1_TRACE_TX_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 35c6e7ec8ad6..a8f0aa4722f6 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -53,7 +53,7 @@ struct tid_group { struct list_head list; - unsigned base; + u32 base; u8 size; u8 used; u8 map; @@ -82,20 +82,25 @@ struct tid_pageset { (unsigned long)(len) - 1) & PAGE_MASK) - \ ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT)) -static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *, - struct hfi1_filedata *); -static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *); -static int set_rcvarray_entry(struct file *, unsigned long, u32, - struct tid_group *, struct page **, unsigned); -static int tid_rb_insert(void *, struct mmu_rb_node *); +static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, + struct exp_tid_set *set, + struct hfi1_filedata *fd); +static u32 find_phys_blocks(struct page **pages, unsigned npages, + struct tid_pageset *list); +static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr, + u32 rcventry, struct tid_group *grp, + struct page **pages, unsigned npages); +static int tid_rb_insert(void *arg, struct mmu_rb_node *node); static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, struct tid_rb_node *tnode); -static void tid_rb_remove(void *, struct mmu_rb_node *); -static int tid_rb_invalidate(void *, struct mmu_rb_node *); -static int program_rcvarray(struct file *, unsigned long, struct tid_group *, - struct tid_pageset *, unsigned, u16, struct page **, - u32 *, unsigned *, unsigned *); -static int unprogram_rcvarray(struct file *, u32, struct tid_group **); +static void tid_rb_remove(void *arg, struct mmu_rb_node *node); +static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode); +static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr, + struct tid_group *grp, struct tid_pageset *sets, + unsigned start, u16 count, struct page **pages, + u32 *tidlist, unsigned *tididx, unsigned *pmapped); +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, + struct tid_group **grp); static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); static struct mmu_rb_ops tid_rb_ops = { @@ -149,52 +154,60 @@ static inline void tid_group_move(struct tid_group *group, tid_group_add_tail(group, s2); } +int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd) +{ + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct hfi1_devdata *dd = fd->dd; + u32 tidbase; + u32 i; + struct tid_group *grp, *gptr; + + exp_tid_group_init(&uctxt->tid_group_list); + exp_tid_group_init(&uctxt->tid_used_list); + exp_tid_group_init(&uctxt->tid_full_list); + + tidbase = uctxt->expected_base; + for (i = 0; i < uctxt->expected_count / + dd->rcv_entries.group_size; i++) { + grp = kzalloc(sizeof(*grp), GFP_KERNEL); + if (!grp) + goto grp_failed; + + grp->size = dd->rcv_entries.group_size; + grp->base = tidbase; + tid_group_add_tail(grp, &uctxt->tid_group_list); + tidbase += dd->rcv_entries.group_size; + } + + return 0; + +grp_failed: + list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, + list) { + list_del_init(&grp->list); + kfree(grp); + } + + return -ENOMEM; +} + /* * Initialize context and file private data needed for Expected * receive caching. This needs to be done after the context has * been configured with the eager/expected RcvEntry counts. */ -int hfi1_user_exp_rcv_init(struct file *fp) +int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; - unsigned tidbase; - int i, ret = 0; + int ret = 0; spin_lock_init(&fd->tid_lock); spin_lock_init(&fd->invalid_lock); - if (!uctxt->subctxt_cnt || !fd->subctxt) { - exp_tid_group_init(&uctxt->tid_group_list); - exp_tid_group_init(&uctxt->tid_used_list); - exp_tid_group_init(&uctxt->tid_full_list); - - tidbase = uctxt->expected_base; - for (i = 0; i < uctxt->expected_count / - dd->rcv_entries.group_size; i++) { - struct tid_group *grp; - - grp = kzalloc(sizeof(*grp), GFP_KERNEL); - if (!grp) { - /* - * If we fail here, the groups already - * allocated will be freed by the close - * call. - */ - ret = -ENOMEM; - goto done; - } - grp->size = dd->rcv_entries.group_size; - grp->base = tidbase; - tid_group_add_tail(grp, &uctxt->tid_group_list); - tidbase += dd->rcv_entries.group_size; - } - } - fd->entry_to_rb = kcalloc(uctxt->expected_count, - sizeof(struct rb_node *), - GFP_KERNEL); + sizeof(struct rb_node *), + GFP_KERNEL); if (!fd->entry_to_rb) return -ENOMEM; @@ -204,8 +217,9 @@ int hfi1_user_exp_rcv_init(struct file *fp) sizeof(*fd->invalid_tids), GFP_KERNEL); if (!fd->invalid_tids) { - ret = -ENOMEM; - goto done; + kfree(fd->entry_to_rb); + fd->entry_to_rb = NULL; + return -ENOMEM; } /* @@ -248,41 +262,44 @@ int hfi1_user_exp_rcv_init(struct file *fp) fd->tid_limit = uctxt->expected_count; } spin_unlock(&fd->tid_lock); -done: + return ret; } -int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) +void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt) { - struct hfi1_ctxtdata *uctxt = fd->uctxt; struct tid_group *grp, *gptr; - if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) - return 0; + list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, + list) { + list_del_init(&grp->list); + kfree(grp); + } + hfi1_clear_tids(uctxt); +} + +void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) +{ + struct hfi1_ctxtdata *uctxt = fd->uctxt; + /* * The notifier would have been removed when the process'es mm * was freed. */ - if (fd->handler) + if (fd->handler) { hfi1_mmu_rb_unregister(fd->handler); - - kfree(fd->invalid_tids); - - if (!uctxt->cnt) { + } else { if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); - list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, - list) { - list_del_init(&grp->list); - kfree(grp); - } - hfi1_clear_tids(uctxt); } + kfree(fd->invalid_tids); + fd->invalid_tids = NULL; + kfree(fd->entry_to_rb); - return 0; + fd->entry_to_rb = NULL; } /* @@ -351,10 +368,10 @@ static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) * can fit into the group. If the group becomes fully * used, move it to tid_full_list. */ -int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) +int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo) { int ret = 0, need_group = 0, pinned; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets, @@ -451,7 +468,7 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) struct tid_group *grp = tid_group_pop(&uctxt->tid_group_list); - ret = program_rcvarray(fp, vaddr, grp, pagesets, + ret = program_rcvarray(fd, vaddr, grp, pagesets, pageidx, dd->rcv_entries.group_size, pages, tidlist, &tididx, &mapped); /* @@ -497,7 +514,7 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) unsigned use = min_t(unsigned, pageset_count - pageidx, grp->size - grp->used); - ret = program_rcvarray(fp, vaddr, grp, pagesets, + ret = program_rcvarray(fd, vaddr, grp, pagesets, pageidx, use, pages, tidlist, &tididx, &mapped); if (ret < 0) { @@ -547,7 +564,7 @@ nomem: * everything done so far so we don't leak resources. */ tinfo->tidlist = (unsigned long)&tidlist; - hfi1_user_exp_rcv_clear(fp, tinfo); + hfi1_user_exp_rcv_clear(fd, tinfo); tinfo->tidlist = 0; ret = -EFAULT; goto bail; @@ -571,10 +588,10 @@ bail: return ret > 0 ? 0 : ret; } -int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo) +int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo) { int ret = 0; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; u32 *tidinfo; unsigned tididx; @@ -589,7 +606,7 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo) mutex_lock(&uctxt->exp_lock); for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { - ret = unprogram_rcvarray(fp, tidinfo[tididx], NULL); + ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL); if (ret) { hfi1_cdbg(TID, "Failed to unprogram rcv array %d", ret); @@ -606,9 +623,9 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo) return ret; } -int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo) +int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; unsigned long *ev = uctxt->dd->events + (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) * @@ -723,7 +740,7 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages, /** * program_rcvarray() - program an RcvArray group with receive buffers - * @fp: file pointer + * @fd: filedata pointer * @vaddr: starting user virtual address * @grp: RcvArray group * @sets: array of struct tid_pageset holding information on physically @@ -748,13 +765,12 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages, * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or * number of RcvArray entries programmed. */ -static int program_rcvarray(struct file *fp, unsigned long vaddr, +static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr, struct tid_group *grp, struct tid_pageset *sets, unsigned start, u16 count, struct page **pages, u32 *tidlist, unsigned *tididx, unsigned *pmapped) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; u16 idx; @@ -795,7 +811,7 @@ static int program_rcvarray(struct file *fp, unsigned long vaddr, npages = sets[setidx].count; pageidx = sets[setidx].idx; - ret = set_rcvarray_entry(fp, vaddr + (pageidx * PAGE_SIZE), + ret = set_rcvarray_entry(fd, vaddr + (pageidx * PAGE_SIZE), rcventry, grp, pages + pageidx, npages); if (ret) @@ -817,12 +833,11 @@ static int program_rcvarray(struct file *fp, unsigned long vaddr, return idx; } -static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, +static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr, u32 rcventry, struct tid_group *grp, struct page **pages, unsigned npages) { int ret; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct tid_rb_node *node; struct hfi1_devdata *dd = uctxt->dd; @@ -876,10 +891,9 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, return 0; } -static int unprogram_rcvarray(struct file *fp, u32 tidinfo, +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, struct tid_group **grp) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; struct tid_rb_node *node; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 9bc8d9fba87e..5250c897298d 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -1,7 +1,7 @@ #ifndef _HFI1_USER_EXP_RCV_H #define _HFI1_USER_EXP_RCV_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -70,10 +70,15 @@ (tid) |= EXP_TID_SET(field, (value)); \ } while (0) -int hfi1_user_exp_rcv_init(struct file *); -int hfi1_user_exp_rcv_free(struct hfi1_filedata *); -int hfi1_user_exp_rcv_setup(struct file *, struct hfi1_tid_info *); -int hfi1_user_exp_rcv_clear(struct file *, struct hfi1_tid_info *); -int hfi1_user_exp_rcv_invalid(struct file *, struct hfi1_tid_info *); +void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt); +int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd); +int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd); +void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); +int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo); +int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo); +int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo); #endif /* _HFI1_USER_EXP_RCV_H */ diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 0749689d7643..d55339f5d73b 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -143,7 +143,9 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 /* KDETH OM multipliers and switch over point */ #define KDETH_OM_SMALL 4 +#define KDETH_OM_SMALL_SHIFT 2 #define KDETH_OM_LARGE 64 +#define KDETH_OM_LARGE_SHIFT 6 #define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) /* Tx request flag bits */ @@ -153,9 +155,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 /* SDMA request flag bits */ #define SDMA_REQ_FOR_THREAD 1 #define SDMA_REQ_SEND_DONE 2 -#define SDMA_REQ_HAVE_AHG 3 -#define SDMA_REQ_HAS_ERROR 4 -#define SDMA_REQ_DONE_ERROR 5 +#define SDMA_REQ_HAS_ERROR 3 +#define SDMA_REQ_DONE_ERROR 4 #define SDMA_PKT_Q_INACTIVE BIT(0) #define SDMA_PKT_Q_ACTIVE BIT(1) @@ -214,7 +215,7 @@ struct user_sdma_request { * each request will need it's own engine pointer. */ struct sdma_engine *sde; - u8 ahg_idx; + s8 ahg_idx; u32 ahg[9]; /* * KDETH.Offset (Eager) field @@ -229,12 +230,6 @@ struct user_sdma_request { */ u32 tidoffset; /* - * KDETH.OM - * Remember this because the header template always sets it - * to 0. - */ - u8 omfactor; - /* * We copy the iovs for this request (based on * info.iovcnt). These are only the data vectors */ @@ -284,39 +279,43 @@ struct user_sdma_txreq { hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \ (pq)->subctxt, ##__VA_ARGS__) -static int user_sdma_send_pkts(struct user_sdma_request *, unsigned); -static int num_user_pages(const struct iovec *); -static void user_sdma_txreq_cb(struct sdma_txreq *, int); -static inline void pq_update(struct hfi1_user_sdma_pkt_q *); -static void user_sdma_free_request(struct user_sdma_request *, bool); -static int pin_vector_pages(struct user_sdma_request *, - struct user_sdma_iovec *); -static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned, - unsigned); -static int check_header_template(struct user_sdma_request *, - struct hfi1_pkt_header *, u32, u32); -static int set_txreq_header(struct user_sdma_request *, - struct user_sdma_txreq *, u32); -static int set_txreq_header_ahg(struct user_sdma_request *, - struct user_sdma_txreq *, u32); -static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *, - struct hfi1_user_sdma_comp_q *, - u16, enum hfi1_sdma_comp_state, int); -static inline u32 set_pkt_bth_psn(__be32, u8, u32); +static int user_sdma_send_pkts(struct user_sdma_request *req, + unsigned maxpkts); +static int num_user_pages(const struct iovec *iov); +static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); +static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq); +static void user_sdma_free_request(struct user_sdma_request *req, bool unpin); +static int pin_vector_pages(struct user_sdma_request *req, + struct user_sdma_iovec *iovec); +static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, + unsigned start, unsigned npages); +static int check_header_template(struct user_sdma_request *req, + struct hfi1_pkt_header *hdr, u32 lrhlen, + u32 datalen); +static int set_txreq_header(struct user_sdma_request *req, + struct user_sdma_txreq *tx, u32 datalen); +static int set_txreq_header_ahg(struct user_sdma_request *req, + struct user_sdma_txreq *tx, u32 len); +static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, + struct hfi1_user_sdma_comp_q *cq, + u16 idx, enum hfi1_sdma_comp_state state, + int ret); +static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags); static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len); static int defer_packet_queue( - struct sdma_engine *, - struct iowait *, - struct sdma_txreq *, - unsigned seq); -static void activate_packet_queue(struct iowait *, int); -static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long); -static int sdma_rb_insert(void *, struct mmu_rb_node *); + struct sdma_engine *sde, + struct iowait *wait, + struct sdma_txreq *txreq, + unsigned int seq); +static void activate_packet_queue(struct iowait *wait, int reason); +static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, + unsigned long len); +static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode); static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, void *arg2, bool *stop); -static void sdma_rb_remove(void *, struct mmu_rb_node *); -static int sdma_rb_invalidate(void *, struct mmu_rb_node *); +static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode); +static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode); static struct mmu_rb_ops sdma_rb_ops = { .filter = sdma_rb_filter, @@ -372,45 +371,27 @@ static void sdma_kmem_cache_ctor(void *obj) memset(tx, 0, sizeof(*tx)); } -int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) +int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, + struct hfi1_filedata *fd) { - struct hfi1_filedata *fd; - int ret = 0; + int ret = -ENOMEM; char buf[64]; struct hfi1_devdata *dd; struct hfi1_user_sdma_comp_q *cq; struct hfi1_user_sdma_pkt_q *pq; unsigned long flags; - if (!uctxt || !fp) { - ret = -EBADF; - goto done; - } - - fd = fp->private_data; + if (!uctxt || !fd) + return -EBADF; - if (!hfi1_sdma_comp_ring_size) { - ret = -EINVAL; - goto done; - } + if (!hfi1_sdma_comp_ring_size) + return -EINVAL; dd = uctxt->dd; pq = kzalloc(sizeof(*pq), GFP_KERNEL); if (!pq) - goto pq_nomem; - - pq->reqs = kcalloc(hfi1_sdma_comp_ring_size, - sizeof(*pq->reqs), - GFP_KERNEL); - if (!pq->reqs) - goto pq_reqs_nomem; - - pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size), - sizeof(*pq->req_in_use), - GFP_KERNEL); - if (!pq->req_in_use) - goto pq_reqs_no_in_use; + return -ENOMEM; INIT_LIST_HEAD(&pq->list); pq->dd = dd; @@ -426,10 +407,23 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) iowait_init(&pq->busy, 0, NULL, defer_packet_queue, activate_packet_queue, NULL); pq->reqidx = 0; + + pq->reqs = kcalloc(hfi1_sdma_comp_ring_size, + sizeof(*pq->reqs), + GFP_KERNEL); + if (!pq->reqs) + goto pq_reqs_nomem; + + pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size), + sizeof(*pq->req_in_use), + GFP_KERNEL); + if (!pq->req_in_use) + goto pq_reqs_no_in_use; + snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt, fd->subctxt); pq->txreq_cache = kmem_cache_create(buf, - sizeof(struct user_sdma_txreq), + sizeof(struct user_sdma_txreq), L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, sdma_kmem_cache_ctor); @@ -438,7 +432,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) uctxt->ctxt); goto pq_txreq_nomem; } - fd->pq = pq; + cq = kzalloc(sizeof(*cq), GFP_KERNEL); if (!cq) goto cq_nomem; @@ -449,20 +443,25 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) goto cq_comps_nomem; cq->nentries = hfi1_sdma_comp_ring_size; - fd->cq = cq; ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq, &pq->handler); if (ret) { dd_dev_err(dd, "Failed to register with MMU %d", ret); - goto done; + goto pq_mmu_fail; } + fd->pq = pq; + fd->cq = cq; + spin_lock_irqsave(&uctxt->sdma_qlock, flags); list_add(&pq->list, &uctxt->sdma_queues); spin_unlock_irqrestore(&uctxt->sdma_qlock, flags); - goto done; + return 0; + +pq_mmu_fail: + vfree(cq->comps); cq_comps_nomem: kfree(cq); cq_nomem: @@ -473,10 +472,7 @@ pq_reqs_no_in_use: kfree(pq->reqs); pq_reqs_nomem: kfree(pq); - fd->pq = NULL; -pq_nomem: - ret = -ENOMEM; -done: + return ret; } @@ -536,11 +532,11 @@ static u8 dlid_to_selector(u16 dlid) return mapping[hash]; } -int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, - unsigned long dim, unsigned long *count) +int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, + struct iovec *iovec, unsigned long dim, + unsigned long *count) { int ret = 0, i; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_user_sdma_pkt_q *pq = fd->pq; struct hfi1_user_sdma_comp_q *cq = fd->cq; @@ -616,6 +612,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, req->pq = pq; req->cq = cq; req->status = -1; + req->ahg_idx = -1; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); @@ -766,14 +763,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, } /* We don't need an AHG entry if the request contains only one packet */ - if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG)) { - int ahg = sdma_ahg_alloc(req->sde); - - if (likely(ahg >= 0)) { - req->ahg_idx = (u8)ahg; - set_bit(SDMA_REQ_HAVE_AHG, &req->flags); - } - } + if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG)) + req->ahg_idx = sdma_ahg_alloc(req->sde); set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); atomic_inc(&pq->n_reqs); @@ -991,7 +982,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) } } - if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) { + if (req->ahg_idx >= 0) { if (!req->seqnum) { u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); u32 lrhlen = get_lrh_len(req->hdr, @@ -1121,7 +1112,7 @@ dosend: * happen due to the sequential manner in which * descriptors are processed. */ - if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) + if (req->ahg_idx >= 0) sdma_ahg_free(req->sde, req->ahg_idx); } return ret; @@ -1323,6 +1314,7 @@ static int set_txreq_header(struct user_sdma_request *req, { struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &tx->hdr; + u8 omfactor; /* KDETH.OM */ u16 pbclen; int ret; u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); @@ -1400,8 +1392,9 @@ static int set_txreq_header(struct user_sdma_request *req, } tidval = req->tids[req->tididx]; } - req->omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >= - KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE : KDETH_OM_SMALL; + omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >= + KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT : + KDETH_OM_SMALL_SHIFT; /* Set KDETH.TIDCtrl based on value for this TID. */ KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL, EXP_TID_GET(tidval, CTRL)); @@ -1416,12 +1409,12 @@ static int set_txreq_header(struct user_sdma_request *req, * transfer. */ SDMA_DBG(req, "TID offset %ubytes %uunits om%u", - req->tidoffset, req->tidoffset / req->omfactor, - req->omfactor != KDETH_OM_SMALL); + req->tidoffset, req->tidoffset >> omfactor, + omfactor != KDETH_OM_SMALL_SHIFT); KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET, - req->tidoffset / req->omfactor); + req->tidoffset >> omfactor); KDETH_SET(hdr->kdeth.ver_tid_offset, OM, - req->omfactor != KDETH_OM_SMALL); + omfactor != KDETH_OM_SMALL_SHIFT); } done: trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt, @@ -1433,6 +1426,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, struct user_sdma_txreq *tx, u32 len) { int diff = 0; + u8 omfactor; /* KDETH.OM */ struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &req->hdr; u16 pbclen = le16_to_cpu(hdr->pbc[0]); @@ -1484,14 +1478,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, } tidval = req->tids[req->tididx]; } - req->omfactor = ((EXP_TID_GET(tidval, LEN) * + omfactor = ((EXP_TID_GET(tidval, LEN) * PAGE_SIZE) >= - KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE : - KDETH_OM_SMALL; + KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT : + KDETH_OM_SMALL_SHIFT; /* KDETH.OM and KDETH.OFFSET (TID) */ AHG_HEADER_SET(req->ahg, diff, 7, 0, 16, - ((!!(req->omfactor - KDETH_OM_SMALL)) << 15 | - ((req->tidoffset / req->omfactor) & 0x7fff))); + ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | + ((req->tidoffset >> omfactor) + & 0x7fff))); /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */ val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) | (EXP_TID_GET(tidval, IDX) & 0x3ff)); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 39001714f551..e5b10aefe212 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -1,5 +1,7 @@ +#ifndef _HFI1_USER_SDMA_H +#define _HFI1_USER_SDMA_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -56,7 +58,7 @@ extern uint extended_psn; struct hfi1_user_sdma_pkt_q { struct list_head list; unsigned ctxt; - unsigned subctxt; + u16 subctxt; u16 n_max_reqs; atomic_t n_reqs; u16 reqidx; @@ -78,7 +80,11 @@ struct hfi1_user_sdma_comp_q { struct hfi1_sdma_comp_entry *comps; }; -int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *, struct file *); -int hfi1_user_sdma_free_queues(struct hfi1_filedata *); -int hfi1_user_sdma_process_request(struct file *, struct iovec *, unsigned long, - unsigned long *); +int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, + struct hfi1_filedata *fd); +int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd); +int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, + struct iovec *iovec, unsigned long dim, + unsigned long *count); + +#endif /* _HFI1_USER_SDMA_H */ diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 52ff275caf54..cd635d0c1d3b 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -125,7 +125,6 @@ struct hfi1_qp_priv { struct sdma_engine *s_sde; /* current sde */ struct send_context *s_sendcontext; /* current sendcontext */ u8 s_sc; /* SC[0..4] for next packet */ - u8 r_adefered; /* number of acks defered */ struct iowait s_iowait; struct rvt_qp *owner; }; @@ -140,6 +139,10 @@ struct hfi1_pkt_state { struct hfi1_pportdata *ppd; struct verbs_txreq *s_txreq; unsigned long flags; + unsigned long timeout; + unsigned long timeout_int; + int cpu; + bool in_thread; }; #define HFI1_PSN_CREDIT 16 diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 392f4d57f3e3..b601c2929f8f 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -67,9 +67,7 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) unsigned int rcvctrl_ops = 0; int ret; - ret = hfi1_init_ctxt(uctxt->sc); - if (ret) - goto done; + hfi1_init_ctxt(uctxt->sc); uctxt->do_interrupt = &handle_receive_interrupt; @@ -82,8 +80,6 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) if (ret) goto done; - set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags); - if (uctxt->rcvhdrtail_kvaddr) clear_rcvhdrtail(uctxt); @@ -209,7 +205,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, uctxt->event_flags = 0; hfi1_clear_tids(uctxt); - hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); + hfi1_clear_ctxt_pkey(dd, uctxt); hfi1_stats.sps_ctxts--; hfi1_free_ctxtdata(dd, uctxt); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 9f3ba320ce70..d45772da0963 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3530,6 +3530,26 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, return num_counters; } +static struct net_device* +mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, + u8 port_num, + enum rdma_netdev_t type, + const char *name, + unsigned char name_assign_type, + void (*setup)(struct net_device *)) +{ + if (type != RDMA_NETDEV_IPOIB) + return ERR_PTR(-EOPNOTSUPP); + + return mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca, + name, setup); +} + +static void mlx5_ib_free_rdma_netdev(struct net_device *netdev) +{ + return mlx5_rdma_netdev_free(netdev); +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; @@ -3660,6 +3680,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; dev->ib_dev.get_port_immutable = mlx5_port_immutable; dev->ib_dev.get_dev_fw_str = get_dev_fw_str; + dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev; + dev->ib_dev.free_rdma_netdev = mlx5_ib_free_rdma_netdev; if (mlx5_core_is_pf(mdev)) { dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state; diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index ced15c4446bd..e37cc89987e1 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -368,7 +368,7 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, ((void *)(uintptr_t)iova) : addr; if (crcp) - crc = rxe_crc32(to_rdev(mem->pd->ibpd.device), + *crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device), *crcp, src, length); memcpy(dest, src, length); diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 13ed2cc6eaa2..1b596fbbe251 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -114,7 +114,6 @@ enum rxe_device_param { RXE_MAX_UCONTEXT = 512, RXE_NUM_PORT = 1, - RXE_NUM_COMP_VECTORS = 1, RXE_MIN_QP_INDEX = 16, RXE_MAX_QP_INDEX = 0x00020000, diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 299b0f8423f2..83d709e74dfb 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1239,7 +1239,7 @@ int rxe_register_device(struct rxe_dev *rxe) dev->owner = THIS_MODULE; dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; - dev->num_comp_vectors = RXE_NUM_COMP_VECTORS; + dev->num_comp_vectors = num_possible_cpus(); dev->dev.parent = rxe_dma_device(rxe); dev->local_dma_lkey = 0; addrconf_addr_eui48((unsigned char *)&dev->node_guid, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 379c02fb4181..874b24366e4d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -155,7 +155,66 @@ static int ipoib_get_sset_count(struct net_device __always_unused *dev, return -EOPNOTSUPP; } +/* Return lane speed in unit of 1e6 bit/sec */ +static inline int ib_speed_enum_to_int(int speed) +{ + switch (speed) { + case IB_SPEED_SDR: + return SPEED_2500; + case IB_SPEED_DDR: + return SPEED_5000; + case IB_SPEED_QDR: + case IB_SPEED_FDR10: + return SPEED_10000; + case IB_SPEED_FDR: + return SPEED_14000; + case IB_SPEED_EDR: + return SPEED_25000; + } + + return SPEED_UNKNOWN; +} + +static int ipoib_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct ipoib_dev_priv *priv = netdev_priv(netdev); + struct ib_port_attr attr; + int ret, speed, width; + + if (!netif_carrier_ok(netdev)) { + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + return 0; + } + + ret = ib_query_port(priv->ca, priv->port, &attr); + if (ret < 0) + return -EINVAL; + + speed = ib_speed_enum_to_int(attr.active_speed); + width = ib_width_enum_to_int(attr.active_width); + + if (speed < 0 || width < 0) + return -EINVAL; + + /* Except the following are set, the other members of + * the struct ethtool_link_settings are initialized to + * zero in the function __ethtool_get_link_ksettings. + */ + cmd->base.speed = speed * width; + cmd->base.duplex = DUPLEX_FULL; + + cmd->base.phy_address = 0xFF; + + cmd->base.autoneg = AUTONEG_ENABLE; + cmd->base.port = PORT_OTHER; + + return 0; +} + static const struct ethtool_ops ipoib_ethtool_ops = { + .get_link_ksettings = ipoib_get_link_ksettings, .get_drvinfo = ipoib_get_drvinfo, .get_coalesce = ipoib_get_coalesce, .set_coalesce = ipoib_set_coalesce, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index a84b652f9b54..fc52d742b7f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -35,6 +35,6 @@ config MLX5_CORE_EN_DCB config MLX5_CORE_IPOIB bool "Mellanox Technologies ConnectX-4 IPoIB offloads support" depends on MLX5_CORE_EN - default y + default n ---help--- MLX5 IPoIB offloads & acceleration support. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c index 3c84e36af018..019c230da498 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include <rdma/ib_verbs.h> #include <linux/mlx5/fs.h> #include "en.h" #include "ipoib.h" @@ -359,10 +360,10 @@ unlock: return 0; } -#ifdef notusedyet /* IPoIB RDMA netdev callbacks */ static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca, - union ib_gid *gid, u16 lid, int set_qkey) + union ib_gid *gid, u16 lid, int set_qkey, + u32 qkey) { struct mlx5e_priv *epriv = mlx5i_epriv(netdev); struct mlx5_core_dev *mdev = epriv->mdev; @@ -375,6 +376,12 @@ static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca, mlx5_core_warn(mdev, "failed attaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw); + if (set_qkey) { + mlx5_core_dbg(mdev, "%s setting qkey 0x%x\n", + netdev->name, qkey); + ipriv->qkey = qkey; + } + return err; } @@ -397,15 +404,15 @@ static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca, } static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb, - struct ib_ah *address, u32 dqpn, u32 dqkey) + struct ib_ah *address, u32 dqpn) { struct mlx5e_priv *epriv = mlx5i_epriv(dev); struct mlx5e_txqsq *sq = epriv->txq2sq[skb_get_queue_mapping(skb)]; struct mlx5_ib_ah *mah = to_mah(address); + struct mlx5i_priv *ipriv = epriv->ppriv; - return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, dqkey); + return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey); } -#endif static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev) { @@ -414,22 +421,23 @@ static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev) if (!MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) { mlx5_core_warn(mdev, "IPoIB enhanced offloads are not supported\n"); - return -ENOTSUPP; + return -EOPNOTSUPP; } return 0; } -static struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, - struct ib_device *ibdev, - const char *name, - void (*setup)(struct net_device *)) +struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, + struct ib_device *ibdev, + const char *name, + void (*setup)(struct net_device *)) { const struct mlx5e_profile *profile = &mlx5i_nic_profile; int nch = profile->max_nch(mdev); struct net_device *netdev; struct mlx5i_priv *ipriv; struct mlx5e_priv *epriv; + struct rdma_netdev *rn; int err; if (mlx5i_check_required_hca_cap(mdev)) { @@ -464,13 +472,13 @@ static struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, mlx5e_attach_netdev(epriv); netif_carrier_off(netdev); - /* TODO: set rdma_netdev func pointers - * rn = &ipriv->rn; - * rn->hca = ibdev; - * rn->send = mlx5i_xmit; - * rn->attach_mcast = mlx5i_attach_mcast; - * rn->detach_mcast = mlx5i_detach_mcast; - */ + /* set rdma_netdev func pointers */ + rn = &ipriv->rn; + rn->hca = ibdev; + rn->send = mlx5i_xmit; + rn->attach_mcast = mlx5i_attach_mcast; + rn->detach_mcast = mlx5i_detach_mcast; + return netdev; err_free_netdev: @@ -482,7 +490,7 @@ free_mdev_resources: } EXPORT_SYMBOL(mlx5_rdma_netdev_alloc); -static void mlx5_rdma_netdev_free(struct net_device *netdev) +void mlx5_rdma_netdev_free(struct net_device *netdev) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); const struct mlx5e_profile *profile = priv->profile; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h index bae0a5cbc8ad..213191a78464 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h @@ -40,7 +40,9 @@ /* ipoib rdma netdev's private data structure */ struct mlx5i_priv { + struct rdma_netdev rn; /* keep this first */ struct mlx5_core_qp qp; + u32 qkey; char *mlx5e_priv[0]; }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 18fc65b84b79..bcdf739ee41a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1097,6 +1097,25 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); +#ifndef CONFIG_MLX5_CORE_IPOIB +static inline +struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, + struct ib_device *ibdev, + const char *name, + void (*setup)(struct net_device *)) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void mlx5_rdma_netdev_free(struct net_device *netdev) {} +#else +struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, + struct ib_device *ibdev, + const char *name, + void (*setup)(struct net_device *)); +void mlx5_rdma_netdev_free(struct net_device *netdev); +#endif /* CONFIG_MLX5_CORE_IPOIB */ + struct mlx5_profile { u64 mask; u8 log_max_qp; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 1d8141a88d3c..be6472e5b06b 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -324,6 +324,7 @@ struct rvt_qp { u8 r_state; /* opcode of last packet received */ u8 r_flags; u8 r_head_ack_queue; /* index into s_ack_queue[] */ + u8 r_adefered; /* defered ack count */ struct list_head rspwait; /* link for waiting to respond */ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 5f4ea28eabe4..d179d7767f51 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1494,6 +1494,7 @@ enum ethtool_link_mode_bit_indices { #define SPEED_2500 2500 #define SPEED_5000 5000 #define SPEED_10000 10000 +#define SPEED_14000 14000 #define SPEED_20000 20000 #define SPEED_25000 25000 #define SPEED_40000 40000 |